diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/alpha/Kconfig | 1 | ||||
-rw-r--r-- | arch/alpha/include/asm/atomic.h | 88 | ||||
-rw-r--r-- | arch/alpha/include/asm/param.h | 8 | ||||
-rw-r--r-- | arch/alpha/include/asm/spinlock.h | 4 | ||||
-rw-r--r-- | arch/alpha/include/asm/unistd.h | 3 | ||||
-rw-r--r-- | arch/alpha/include/uapi/asm/param.h | 7 | ||||
-rw-r--r-- | arch/alpha/include/uapi/asm/unistd.h | 2 | ||||
-rw-r--r-- | arch/alpha/kernel/entry.S | 399 | ||||
-rw-r--r-- | arch/alpha/kernel/irq_alpha.c | 2 | ||||
-rw-r--r-- | arch/alpha/kernel/smp.c | 5 | ||||
-rw-r--r-- | arch/alpha/kernel/sys_dp264.c | 8 | ||||
-rw-r--r-- | arch/alpha/kernel/sys_marvel.c | 3 | ||||
-rw-r--r-- | arch/alpha/kernel/systbls.S | 2 | ||||
-rw-r--r-- | arch/alpha/kernel/time.c | 4 | ||||
-rw-r--r-- | arch/alpha/kernel/traps.c | 8 | ||||
-rw-r--r-- | arch/x86/crypto/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/crypto/crct10dif-pcl-asm_64.S | 643 | ||||
-rw-r--r-- | arch/x86/crypto/crct10dif-pclmul_glue.c | 151 |
18 files changed, 374 insertions, 966 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 837a1f2d8b96..082d9b4b5472 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -15,6 +15,7 @@ config ALPHA select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE select GENERIC_STRNCPY_FROM_USER diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index c2cbe4fc391c..78b03ef39f6f 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v) */ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; - c = atomic_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c; + int c, new, old; + smp_mb(); + __asm__ __volatile__( + "1: ldl_l %[old],%[mem]\n" + " cmpeq %[old],%[u],%[c]\n" + " addl %[old],%[a],%[new]\n" + " bne %[c],2f\n" + " stl_c %[new],%[mem]\n" + " beq %[new],3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c) + : [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u) + : "memory"); + smp_mb(); + return old; } @@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. + * Returns true iff @v was not @u. */ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) { - long c, old; - c = atomic64_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic64_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c != (u); + long c, tmp; + smp_mb(); + __asm__ __volatile__( + "1: ldq_l %[tmp],%[mem]\n" + " cmpeq %[tmp],%[u],%[c]\n" + " addq %[tmp],%[a],%[tmp]\n" + " bne %[c],2f\n" + " stq_c %[tmp],%[mem]\n" + " beq %[tmp],3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : [tmp] "=&r"(tmp), [c] "=&r"(c) + : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u) + : "memory"); + smp_mb(); + return !c; +} + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long old, tmp; + smp_mb(); + __asm__ __volatile__( + "1: ldq_l %[old],%[mem]\n" + " subq %[old],1,%[tmp]\n" + " ble %[old],2f\n" + " stq_c %[tmp],%[mem]\n" + " beq %[tmp],3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : [old] "=&r"(old), [tmp] "=&r"(tmp) + : [mem] "m"(*v) + : "memory"); + smp_mb(); + return old - 1; } #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h index bf46af51941b..a5b68b268bcf 100644 --- a/arch/alpha/include/asm/param.h +++ b/arch/alpha/include/asm/param.h @@ -3,7 +3,9 @@ #include <uapi/asm/param.h> -#define HZ CONFIG_HZ -#define USER_HZ HZ -# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +# undef HZ +# define HZ CONFIG_HZ +# define USER_HZ 1024 +# define CLOCKS_PER_SEC USER_HZ /* frequency at which times() counts */ + #endif /* _ASM_ALPHA_PARAM_H */ diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 3bba21e41b81..37b570d01202 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 43baee17acdf..f2c94402e2c8 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -3,8 +3,7 @@ #include <uapi/asm/unistd.h> - -#define NR_SYSCALLS 506 +#define NR_SYSCALLS 508 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h index 29daed819ebd..dbcd9834af6d 100644 --- a/arch/alpha/include/uapi/asm/param.h +++ b/arch/alpha/include/uapi/asm/param.h @@ -1,13 +1,7 @@ #ifndef _UAPI_ASM_ALPHA_PARAM_H #define _UAPI_ASM_ALPHA_PARAM_H -/* ??? Gross. I don't want to parameterize this, and supposedly the - hardware ignores reprogramming. We also need userland buy-in to the - change in HZ, since this is visible in the wait4 resources etc. */ - -#ifndef __KERNEL__ #define HZ 1024 -#endif #define EXEC_PAGESIZE 8192 @@ -17,5 +11,4 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ - #endif /* _UAPI_ASM_ALPHA_PARAM_H */ diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index 801d28bcea51..53ae7bb1bfd1 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -467,5 +467,7 @@ #define __NR_sendmmsg 503 #define __NR_process_vm_readv 504 #define __NR_process_vm_writev 505 +#define __NR_kcmp 506 +#define __NR_finit_module 507 #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index f62a994ef126..a969b95ee5ac 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -12,11 +12,32 @@ .text .set noat + .cfi_sections .debug_frame /* Stack offsets. */ #define SP_OFF 184 #define SWITCH_STACK_SIZE 320 +.macro CFI_START_OSF_FRAME func + .align 4 + .globl \func + .type \func,@function +\func: + .cfi_startproc simple + .cfi_return_column 64 + .cfi_def_cfa $sp, 48 + .cfi_rel_offset 64, 8 + .cfi_rel_offset $gp, 16 + .cfi_rel_offset $16, 24 + .cfi_rel_offset $17, 32 + .cfi_rel_offset $18, 40 +.endm + +.macro CFI_END_OSF_FRAME func + .cfi_endproc + .size \func, . - \func +.endm + /* * This defines the normal kernel pt-regs layout. * @@ -27,100 +48,158 @@ * the palcode-provided values are available to the signal handler. */ -#define SAVE_ALL \ - subq $sp, SP_OFF, $sp; \ - stq $0, 0($sp); \ - stq $1, 8($sp); \ - stq $2, 16($sp); \ - stq $3, 24($sp); \ - stq $4, 32($sp); \ - stq $28, 144($sp); \ - lda $2, alpha_mv; \ - stq $5, 40($sp); \ - stq $6, 48($sp); \ - stq $7, 56($sp); \ - stq $8, 64($sp); \ - stq $19, 72($sp); \ - stq $20, 80($sp); \ - stq $21, 88($sp); \ - ldq $2, HAE_CACHE($2); \ - stq $22, 96($sp); \ - stq $23, 104($sp); \ - stq $24, 112($sp); \ - stq $25, 120($sp); \ - stq $26, 128($sp); \ - stq $27, 136($sp); \ - stq $2, 152($sp); \ - stq $16, 160($sp); \ - stq $17, 168($sp); \ +.macro SAVE_ALL + subq $sp, SP_OFF, $sp + .cfi_adjust_cfa_offset SP_OFF + stq $0, 0($sp) + stq $1, 8($sp) + stq $2, 16($sp) + stq $3, 24($sp) + stq $4, 32($sp) + stq $28, 144($sp) + .cfi_rel_offset $0, 0 + .cfi_rel_offset $1, 8 + .cfi_rel_offset $2, 16 + .cfi_rel_offset $3, 24 + .cfi_rel_offset $4, 32 + .cfi_rel_offset $28, 144 + lda $2, alpha_mv + stq $5, 40($sp) + stq $6, 48($sp) + stq $7, 56($sp) + stq $8, 64($sp) + stq $19, 72($sp) + stq $20, 80($sp) + stq $21, 88($sp) + ldq $2, HAE_CACHE($2) + stq $22, 96($sp) + stq $23, 104($sp) + stq $24, 112($sp) + stq $25, 120($sp) + stq $26, 128($sp) + stq $27, 136($sp) + stq $2, 152($sp) + stq $16, 160($sp) + stq $17, 168($sp) stq $18, 176($sp) + .cfi_rel_offset $5, 40 + .cfi_rel_offset $6, 48 + .cfi_rel_offset $7, 56 + .cfi_rel_offset $8, 64 + .cfi_rel_offset $19, 72 + .cfi_rel_offset $20, 80 + .cfi_rel_offset $21, 88 + .cfi_rel_offset $22, 96 + .cfi_rel_offset $23, 104 + .cfi_rel_offset $24, 112 + .cfi_rel_offset $25, 120 + .cfi_rel_offset $26, 128 + .cfi_rel_offset $27, 136 +.endm -#define RESTORE_ALL \ - lda $19, alpha_mv; \ - ldq $0, 0($sp); \ - ldq $1, 8($sp); \ - ldq $2, 16($sp); \ - ldq $3, 24($sp); \ - ldq $21, 152($sp); \ - ldq $20, HAE_CACHE($19); \ - ldq $4, 32($sp); \ - ldq $5, 40($sp); \ - ldq $6, 48($sp); \ - ldq $7, 56($sp); \ - subq $20, $21, $20; \ - ldq $8, 64($sp); \ - beq $20, 99f; \ - ldq $20, HAE_REG($19); \ - stq $21, HAE_CACHE($19); \ - stq $21, 0($20); \ -99:; \ - ldq $19, 72($sp); \ - ldq $20, 80($sp); \ - ldq $21, 88($sp); \ - ldq $22, 96($sp); \ - ldq $23, 104($sp); \ - ldq $24, 112($sp); \ - ldq $25, 120($sp); \ - ldq $26, 128($sp); \ - ldq $27, 136($sp); \ - ldq $28, 144($sp); \ +.macro RESTORE_ALL + lda $19, alpha_mv + ldq $0, 0($sp) + ldq $1, 8($sp) + ldq $2, 16($sp) + ldq $3, 24($sp) + ldq $21, 152($sp) + ldq $20, HAE_CACHE($19) + ldq $4, 32($sp) + ldq $5, 40($sp) + ldq $6, 48($sp) + ldq $7, 56($sp) + subq $20, $21, $20 + ldq $8, 64($sp) + beq $20, 99f + ldq $20, HAE_REG($19) + stq $21, HAE_CACHE($19) + stq $21, 0($20) +99: ldq $19, 72($sp) + ldq $20, 80($sp) + ldq $21, 88($sp) + ldq $22, 96($sp) + ldq $23, 104($sp) + ldq $24, 112($sp) + ldq $25, 120($sp) + ldq $26, 128($sp) + ldq $27, 136($sp) + ldq $28, 144($sp) addq $sp, SP_OFF, $sp + .cfi_restore $0 + .cfi_restore $1 + .cfi_restore $2 + .cfi_restore $3 + .cfi_restore $4 + .cfi_restore $5 + .cfi_restore $6 + .cfi_restore $7 + .cfi_restore $8 + .cfi_restore $19 + .cfi_restore $20 + .cfi_restore $21 + .cfi_restore $22 + .cfi_restore $23 + .cfi_restore $24 + .cfi_restore $25 + .cfi_restore $26 + .cfi_restore $27 + .cfi_restore $28 + .cfi_adjust_cfa_offset -SP_OFF +.endm + +.macro DO_SWITCH_STACK + bsr $1, do_switch_stack + .cfi_adjust_cfa_offset SWITCH_STACK_SIZE + .cfi_rel_offset $9, 0 + .cfi_rel_offset $10, 8 + .cfi_rel_offset $11, 16 + .cfi_rel_offset $12, 24 + .cfi_rel_offset $13, 32 + .cfi_rel_offset $14, 40 + .cfi_rel_offset $15, 48 + /* We don't really care about the FP registers for debugging. */ +.endm + +.macro UNDO_SWITCH_STACK + bsr $1, undo_switch_stack + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_adjust_cfa_offset -SWITCH_STACK_SIZE +.endm /* * Non-syscall kernel entry points. */ - .align 4 - .globl entInt - .ent entInt -entInt: +CFI_START_OSF_FRAME entInt SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $19 jsr $31, do_entInt -.end entInt +CFI_END_OSF_FRAME entInt - .align 4 - .globl entArith - .ent entArith -entArith: +CFI_START_OSF_FRAME entArith SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $18 jsr $31, do_entArith -.end entArith +CFI_END_OSF_FRAME entArith - .align 4 - .globl entMM - .ent entMM -entMM: +CFI_START_OSF_FRAME entMM SAVE_ALL /* save $9 - $15 so the inline exception code can manipulate them. */ subq $sp, 56, $sp + .cfi_adjust_cfa_offset 56 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -128,6 +207,13 @@ entMM: stq $13, 32($sp) stq $14, 40($sp) stq $15, 48($sp) + .cfi_rel_offset $9, 0 + .cfi_rel_offset $10, 8 + .cfi_rel_offset $11, 16 + .cfi_rel_offset $12, 24 + .cfi_rel_offset $13, 32 + .cfi_rel_offset $14, 40 + .cfi_rel_offset $15, 48 addq $sp, 56, $19 /* handle the fault */ lda $8, 0x3fff @@ -142,28 +228,33 @@ entMM: ldq $14, 40($sp) ldq $15, 48($sp) addq $sp, 56, $sp + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_adjust_cfa_offset -56 /* finish up the syscall as normal. */ br ret_from_sys_call -.end entMM +CFI_END_OSF_FRAME entMM - .align 4 - .globl entIF - .ent entIF -entIF: +CFI_START_OSF_FRAME entIF SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $17 jsr $31, do_entIF -.end entIF +CFI_END_OSF_FRAME entIF - .align 4 - .globl entUna - .ent entUna -entUna: +CFI_START_OSF_FRAME entUna lda $sp, -256($sp) + .cfi_adjust_cfa_offset 256 stq $0, 0($sp) + .cfi_rel_offset $0, 0 + .cfi_remember_state ldq $0, 256($sp) /* get PS */ stq $1, 8($sp) stq $2, 16($sp) @@ -195,6 +286,32 @@ entUna: stq $28, 224($sp) mov $sp, $19 stq $gp, 232($sp) + .cfi_rel_offset $1, 1*8 + .cfi_rel_offset $2, 2*8 + .cfi_rel_offset $3, 3*8 + .cfi_rel_offset $4, 4*8 + .cfi_rel_offset $5, 5*8 + .cfi_rel_offset $6, 6*8 + .cfi_rel_offset $7, 7*8 + .cfi_rel_offset $8, 8*8 + .cfi_rel_offset $9, 9*8 + .cfi_rel_offset $10, 10*8 + .cfi_rel_offset $11, 11*8 + .cfi_rel_offset $12, 12*8 + .cfi_rel_offset $13, 13*8 + .cfi_rel_offset $14, 14*8 + .cfi_rel_offset $15, 15*8 + .cfi_rel_offset $19, 19*8 + .cfi_rel_offset $20, 20*8 + .cfi_rel_offset $21, 21*8 + .cfi_rel_offset $22, 22*8 + .cfi_rel_offset $23, 23*8 + .cfi_rel_offset $24, 24*8 + .cfi_rel_offset $25, 25*8 + .cfi_rel_offset $26, 26*8 + .cfi_rel_offset $27, 27*8 + .cfi_rel_offset $28, 28*8 + .cfi_rel_offset $29, 29*8 lda $8, 0x3fff stq $31, 248($sp) bic $sp, $8, $8 @@ -228,16 +345,45 @@ entUna: ldq $28, 224($sp) ldq $gp, 232($sp) lda $sp, 256($sp) + .cfi_restore $1 + .cfi_restore $2 + .cfi_restore $3 + .cfi_restore $4 + .cfi_restore $5 + .cfi_restore $6 + .cfi_restore $7 + .cfi_restore $8 + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_restore $19 + .cfi_restore $20 + .cfi_restore $21 + .cfi_restore $22 + .cfi_restore $23 + .cfi_restore $24 + .cfi_restore $25 + .cfi_restore $26 + .cfi_restore $27 + .cfi_restore $28 + .cfi_restore $29 + .cfi_adjust_cfa_offset -256 call_pal PAL_rti -.end entUna .align 4 - .ent entUnaUser entUnaUser: + .cfi_restore_state ldq $0, 0($sp) /* restore original $0 */ lda $sp, 256($sp) /* pop entUna's stack frame */ + .cfi_restore $0 + .cfi_adjust_cfa_offset -256 SAVE_ALL /* setup normal kernel stack */ lda $sp, -56($sp) + .cfi_adjust_cfa_offset 56 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -245,6 +391,13 @@ entUnaUser: stq $13, 32($sp) stq $14, 40($sp) stq $15, 48($sp) + .cfi_rel_offset $9, 0 + .cfi_rel_offset $10, 8 + .cfi_rel_offset $11, 16 + .cfi_rel_offset $12, 24 + .cfi_rel_offset $13, 32 + .cfi_rel_offset $14, 40 + .cfi_rel_offset $15, 48 lda $8, 0x3fff addq $sp, 56, $19 bic $sp, $8, $8 @@ -257,20 +410,25 @@ entUnaUser: ldq $14, 40($sp) ldq $15, 48($sp) lda $sp, 56($sp) + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_adjust_cfa_offset -56 br ret_from_sys_call -.end entUnaUser +CFI_END_OSF_FRAME entUna - .align 4 - .globl entDbg - .ent entDbg -entDbg: +CFI_START_OSF_FRAME entDbg SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $16 jsr $31, do_entDbg -.end entDbg +CFI_END_OSF_FRAME entDbg /* * The system call entry point is special. Most importantly, it looks @@ -285,8 +443,12 @@ entDbg: .align 4 .globl entSys - .globl ret_from_sys_call - .ent entSys + .type entSys, @function + .cfi_startproc simple + .cfi_return_column 64 + .cfi_def_cfa $sp, 48 + .cfi_rel_offset 64, 8 + .cfi_rel_offset $gp, 16 entSys: SAVE_ALL lda $8, 0x3fff @@ -300,6 +462,9 @@ entSys: stq $17, SP_OFF+32($sp) s8addq $0, $5, $5 stq $18, SP_OFF+40($sp) + .cfi_rel_offset $16, SP_OFF+24 + .cfi_rel_offset $17, SP_OFF+32 + .cfi_rel_offset $18, SP_OFF+40 blbs $3, strace beq $4, 1f ldq $27, 0($5) @@ -310,6 +475,7 @@ entSys: stq $31, 72($sp) /* a3=0 => no error */ .align 4 + .globl ret_from_sys_call ret_from_sys_call: cmovne $26, 0, $18 /* $18 = 0 => non-restartable */ ldq $0, SP_OFF($sp) @@ -324,10 +490,12 @@ ret_to_user: and $17, _TIF_WORK_MASK, $2 bne $2, work_pending restore_all: + .cfi_remember_state RESTORE_ALL call_pal PAL_rti ret_to_kernel: + .cfi_restore_state lda $16, 7 call_pal PAL_swpipl br restore_all @@ -356,7 +524,6 @@ $ret_success: stq $0, 0($sp) stq $31, 72($sp) /* a3=0 => no error */ br ret_from_sys_call -.end entSys /* * Do all cleanup when returning from all interrupts and system calls. @@ -370,7 +537,7 @@ $ret_success: */ .align 4 - .ent work_pending + .type work_pending, @function work_pending: and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 bne $2, $work_notifysig @@ -387,23 +554,22 @@ $work_resched: $work_notifysig: mov $sp, $16 - bsr $1, do_switch_stack + DO_SWITCH_STACK jsr $26, do_work_pending - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK br restore_all -.end work_pending /* * PTRACE syscall handler */ .align 4 - .ent strace + .type strace, @function strace: /* set up signal stack, call syscall_trace */ - bsr $1, do_switch_stack + DO_SWITCH_STACK jsr $26, syscall_trace_enter /* returns the syscall number */ - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK /* get the arguments back.. */ ldq $16, SP_OFF+24($sp) @@ -431,9 +597,9 @@ ret_from_straced: $strace_success: stq $0, 0($sp) /* save return value */ - bsr $1, do_switch_stack + DO_SWITCH_STACK jsr $26, syscall_trace_leave - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK br $31, ret_from_sys_call .align 3 @@ -447,26 +613,31 @@ $strace_error: stq $0, 0($sp) stq $1, 72($sp) /* a3 for return */ - bsr $1, do_switch_stack + DO_SWITCH_STACK mov $18, $9 /* save old syscall number */ mov $19, $10 /* save old a3 */ jsr $26, syscall_trace_leave mov $9, $18 mov $10, $19 - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK mov $31, $26 /* tell "ret_from_sys_call" we can restart */ br ret_from_sys_call -.end strace +CFI_END_OSF_FRAME entSys /* * Save and restore the switch stack -- aka the balance of the user context. */ .align 4 - .ent do_switch_stack + .type do_switch_stack, @function + .cfi_startproc simple + .cfi_return_column 64 + .cfi_def_cfa $sp, 0 + .cfi_register 64, $1 do_switch_stack: lda $sp, -SWITCH_STACK_SIZE($sp) + .cfi_adjust_cfa_offset SWITCH_STACK_SIZE stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -510,10 +681,14 @@ do_switch_stack: stt $f0, 312($sp) # save fpcr in slot of $f31 ldt $f0, 64($sp) # dont let "do_switch_stack" change fp state. ret $31, ($1), 1 -.end do_switch_stack + .cfi_endproc + .size do_switch_stack, .-do_switch_stack .align 4 - .ent undo_switch_stack + .type undo_switch_stack, @function + .cfi_startproc simple + .cfi_def_cfa $sp, 0 + .cfi_register 64, $1 undo_switch_stack: ldq $9, 0($sp) ldq $10, 8($sp) @@ -558,7 +733,8 @@ undo_switch_stack: ldt $f30, 304($sp) lda $sp, SWITCH_STACK_SIZE($sp) ret $31, ($1), 1 -.end undo_switch_stack + .cfi_endproc + .size undo_switch_stack, .-undo_switch_stack /* * The meat of the context switch code. @@ -566,17 +742,18 @@ undo_switch_stack: .align 4 .globl alpha_switch_to - .ent alpha_switch_to + .type alpha_switch_to, @function + .cfi_startproc alpha_switch_to: - .prologue 0 - bsr $1, do_switch_stack + DO_SWITCH_STACK call_pal PAL_swpctx lda $8, 0x3fff - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK bic $sp, $8, $8 mov $17, $0 ret -.end alpha_switch_to + .cfi_endproc + .size alpha_switch_to, .-alpha_switch_to /* * New processes begin life here. diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index f433fc11877a..28e4429596f3 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -236,7 +236,7 @@ void __init init_rtc_irq(void) { irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip, - handle_simple_irq, "RTC"); + handle_percpu_irq, "RTC"); setup_irq(RTC_IRQ, &timer_irqaction); } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 53b18a620e1c..9dbbcb3b9146 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -264,9 +264,10 @@ recv_secondary_console_msg(void) if (cnt <= 0 || cnt >= 80) strcpy(buf, "<<< BOGUS MSG >>>"); else { - cp1 = (char *) &cpu->ipc_buffer[11]; + cp1 = (char *) &cpu->ipc_buffer[1]; cp2 = buf; - strcpy(cp2, cp1); + memcpy(cp2, cp1, cnt); + cp2[cnt] = '\0'; while ((cp2 = strchr(cp2, '\r')) != 0) { *cp2 = ' '; diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 5bf401f7ea97..6c35159bc00e 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = { static void dp264_device_interrupt(unsigned long vector) { -#if 1 - printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n"); -#else unsigned long pld; unsigned int i; @@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector) isa_device_interrupt(vector); else handle_irq(16 + i); -#if 0 - TSUNAMI_cchip->dir0.csr = 1UL << i; mb(); - tmp = TSUNAMI_cchip->dir0.csr; -#endif } -#endif } static void diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 407accc80877..c92e389ff219 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -317,8 +317,9 @@ marvel_init_irq(void) } static int -marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin) { + struct pci_dev *dev = (struct pci_dev *)cdev; struct pci_controller *hose = dev->sysdata; struct io7_port *io7_port = hose->sysdata; struct io7 *io7 = io7_port->io7; diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 4284ec798ec9..dca9b3fb0071 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -524,6 +524,8 @@ sys_call_table: .quad sys_sendmmsg .quad sys_process_vm_readv .quad sys_process_vm_writev /* 505 */ + .quad sys_kcmp + .quad sys_finit_module .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index e336694ca042..ea3395036556 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -105,9 +105,7 @@ void arch_irq_work_raise(void) static inline __u32 rpcc(void) { - __u32 result; - asm volatile ("rpcc %0" : "=r"(result)); - return result; + return __builtin_alpha_rpcc(); } int update_persistent_clock(struct timespec now) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index be1fba334bd0..bd0665cdc840 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15) { printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", regs->pc, regs->r26, regs->ps, print_tainted()); - print_symbol("pc is at %s\n", regs->pc); - print_symbol("ra is at %s\n", regs->r26 ); + printk("pc is at %pSR\n", (void *)regs->pc); + printk("ra is at %pSR\n", (void *)regs->r26); printk("v0 = %016lx t0 = %016lx t1 = %016lx\n", regs->r0, regs->r1, regs->r2); printk("t2 = %016lx t3 = %016lx t4 = %016lx\n", @@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp) continue; if (tmp >= (unsigned long) &_etext) continue; - printk("[<%lx>]", tmp); - print_symbol(" %s", tmp); - printk("\n"); + printk("[<%lx>] %pSR\n", tmp, (void *)tmp); if (i > 40) { printk(" ..."); break; diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 7d6ba9db1be9..6c63c358a7e6 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o -crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S deleted file mode 100644 index 35e97569d05f..000000000000 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ /dev/null @@ -1,643 +0,0 @@ -######################################################################## -# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions -# -# Copyright (c) 2013, Intel Corporation -# -# Authors: -# Erdinc Ozturk <erdinc.ozturk@intel.com> -# Vinodh Gopal <vinodh.gopal@intel.com> -# James Guilford <james.guilford@intel.com> -# Tim Chen <tim.c.chen@linux.intel.com> -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# OpenIB.org BSD license below: -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# * Neither the name of the Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -######################################################################## -# Function API: -# UINT16 crc_t10dif_pcl( -# UINT16 init_crc, //initial CRC value, 16 bits -# const unsigned char *buf, //buffer pointer to calculate CRC on -# UINT64 len //buffer length in bytes (64-bit data) -# ); -# -# Reference paper titled "Fast CRC Computation for Generic -# Polynomials Using PCLMULQDQ Instruction" -# URL: http://www.intel.com/content/dam/www/public/us/en/documents -# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -# -# - -#include <linux/linkage.h> - -.text - -#define arg1 %rdi -#define arg2 %rsi -#define arg3 %rdx - -#define arg1_low32 %edi - -ENTRY(crc_t10dif_pcl) -.align 16 - - # adjust the 16-bit initial_crc value, scale it to 32 bits - shl $16, arg1_low32 - - # Allocate Stack Space - mov %rsp, %rcx - sub $16*2, %rsp - # align stack to 16 byte boundary - and $~(0x10 - 1), %rsp - - # check if smaller than 256 - cmp $256, arg3 - - # for sizes less than 128, we can't fold 64B at a time... - jl _less_than_128 - - - # load the initial crc value - movd arg1_low32, %xmm10 # initial crc - - # crc value does not need to be byte-reflected, but it needs - # to be moved to the high part of the register. - # because data will be byte-reflected and will align with - # initial crc at correct place. - pslldq $12, %xmm10 - - movdqa SHUF_MASK(%rip), %xmm11 - # receive the initial 64B data, xor the initial crc value - movdqu 16*0(arg2), %xmm0 - movdqu 16*1(arg2), %xmm1 - movdqu 16*2(arg2), %xmm2 - movdqu 16*3(arg2), %xmm3 - movdqu 16*4(arg2), %xmm4 - movdqu 16*5(arg2), %xmm5 - movdqu 16*6(arg2), %xmm6 - movdqu 16*7(arg2), %xmm7 - - pshufb %xmm11, %xmm0 - # XOR the initial_crc value - pxor %xmm10, %xmm0 - pshufb %xmm11, %xmm1 - pshufb %xmm11, %xmm2 - pshufb %xmm11, %xmm3 - pshufb %xmm11, %xmm4 - pshufb %xmm11, %xmm5 - pshufb %xmm11, %xmm6 - pshufb %xmm11, %xmm7 - - movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 - #imm value of pclmulqdq instruction - #will determine which constant to use - - ################################################################# - # we subtract 256 instead of 128 to save one instruction from the loop - sub $256, arg3 - - # at this section of the code, there is 64*x+y (0<=y<64) bytes of - # buffer. The _fold_64_B_loop will fold 64B at a time - # until we have 64+y Bytes of buffer - - - # fold 64B at a time. This section of the code folds 4 xmm - # registers in parallel -_fold_64_B_loop: - - # update the buffer pointer - add $128, arg2 # buf += 64# - - movdqu 16*0(arg2), %xmm9 - movdqu 16*1(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm0, %xmm8 - movdqa %xmm1, %xmm13 - pclmulqdq $0x0 , %xmm10, %xmm0 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0 , %xmm10, %xmm1 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm0 - xorps %xmm8 , %xmm0 - pxor %xmm12, %xmm1 - xorps %xmm13, %xmm1 - - movdqu 16*2(arg2), %xmm9 - movdqu 16*3(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm2, %xmm8 - movdqa %xmm3, %xmm13 - pclmulqdq $0x0, %xmm10, %xmm2 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0, %xmm10, %xmm3 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm2 - xorps %xmm8 , %xmm2 - pxor %xmm12, %xmm3 - xorps %xmm13, %xmm3 - - movdqu 16*4(arg2), %xmm9 - movdqu 16*5(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm4, %xmm8 - movdqa %xmm5, %xmm13 - pclmulqdq $0x0, %xmm10, %xmm4 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0, %xmm10, %xmm5 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm4 - xorps %xmm8 , %xmm4 - pxor %xmm12, %xmm5 - xorps %xmm13, %xmm5 - - movdqu 16*6(arg2), %xmm9 - movdqu 16*7(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm6 , %xmm8 - movdqa %xmm7 , %xmm13 - pclmulqdq $0x0 , %xmm10, %xmm6 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0 , %xmm10, %xmm7 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm6 - xorps %xmm8 , %xmm6 - pxor %xmm12, %xmm7 - xorps %xmm13, %xmm7 - - sub $128, arg3 - - # check if there is another 64B in the buffer to be able to fold - jge _fold_64_B_loop - ################################################################## - - - add $128, arg2 - # at this point, the buffer pointer is pointing at the last y Bytes - # of the buffer the 64B of folded data is in 4 of the xmm - # registers: xmm0, xmm1, xmm2, xmm3 - - - # fold the 8 xmm registers to 1 xmm register with different constants - - movdqa rk9(%rip), %xmm10 - movdqa %xmm0, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm0 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm0, %xmm7 - - movdqa rk11(%rip), %xmm10 - movdqa %xmm1, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm1 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm1, %xmm7 - - movdqa rk13(%rip), %xmm10 - movdqa %xmm2, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm2 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm2, %xmm7 - - movdqa rk15(%rip), %xmm10 - movdqa %xmm3, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm3 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm3, %xmm7 - - movdqa rk17(%rip), %xmm10 - movdqa %xmm4, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm4 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm4, %xmm7 - - movdqa rk19(%rip), %xmm10 - movdqa %xmm5, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm5 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm5, %xmm7 - - movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 - #imm value of pclmulqdq instruction - #will determine which constant to use - movdqa %xmm6, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm6 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm6, %xmm7 - - - # instead of 64, we add 48 to the loop counter to save 1 instruction - # from the loop instead of a cmp instruction, we use the negative - # flag with the jl instruction - add $128-16, arg3 - jl _final_reduction_for_128 - - # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 - # and the rest is in memory. We can fold 16 bytes at a time if y>=16 - # continue folding 16B at a time - -_16B_reduction_loop: - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm7 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - movdqu (arg2), %xmm0 - pshufb %xmm11, %xmm0 - pxor %xmm0 , %xmm7 - add $16, arg2 - sub $16, arg3 - # instead of a cmp instruction, we utilize the flags with the - # jge instruction equivalent of: cmp arg3, 16-16 - # check if there is any more 16B in the buffer to be able to fold - jge _16B_reduction_loop - - #now we have 16+z bytes left to reduce, where 0<= z < 16. - #first, we reduce the data in the xmm7 register - - -_final_reduction_for_128: - # check if any more data to fold. If not, compute the CRC of - # the final 128 bits - add $16, arg3 - je _128_done - - # here we are getting data that is less than 16 bytes. - # since we know that there was data before the pointer, we can - # offset the input pointer before the actual point, to receive - # exactly 16 bytes. after that the registers need to be adjusted. -_get_last_two_xmms: - movdqa %xmm7, %xmm2 - - movdqu -16(arg2, arg3), %xmm1 - pshufb %xmm11, %xmm1 - - # get rid of the extra data that was loaded before - # load the shift constant - lea pshufb_shf_table+16(%rip), %rax - sub arg3, %rax - movdqu (%rax), %xmm0 - - # shift xmm2 to the left by arg3 bytes - pshufb %xmm0, %xmm2 - - # shift xmm7 to the right by 16-arg3 bytes - pxor mask1(%rip), %xmm0 - pshufb %xmm0, %xmm7 - pblendvb %xmm2, %xmm1 #xmm0 is implicit - - # fold 16 Bytes - movdqa %xmm1, %xmm2 - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm7 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm2, %xmm7 - -_128_done: - # compute crc of a 128-bit value - movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 - movdqa %xmm7, %xmm0 - - #64b fold - pclmulqdq $0x1, %xmm10, %xmm7 - pslldq $8 , %xmm0 - pxor %xmm0, %xmm7 - - #32b fold - movdqa %xmm7, %xmm0 - - pand mask2(%rip), %xmm0 - - psrldq $12, %xmm7 - pclmulqdq $0x10, %xmm10, %xmm7 - pxor %xmm0, %xmm7 - - #barrett reduction -_barrett: - movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 - movdqa %xmm7, %xmm0 - pclmulqdq $0x01, %xmm10, %xmm7 - pslldq $4, %xmm7 - pclmulqdq $0x11, %xmm10, %xmm7 - - pslldq $4, %xmm7 - pxor %xmm0, %xmm7 - pextrd $1, %xmm7, %eax - -_cleanup: - # scale the result back to 16 bits - shr $16, %eax - mov %rcx, %rsp - ret - -######################################################################## - -.align 16 -_less_than_128: - - # check if there is enough buffer to be able to fold 16B at a time - cmp $32, arg3 - jl _less_than_32 - movdqa SHUF_MASK(%rip), %xmm11 - - # now if there is, load the constants - movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 - - movd arg1_low32, %xmm0 # get the initial crc value - pslldq $12, %xmm0 # align it to its correct place - movdqu (arg2), %xmm7 # load the plaintext - pshufb %xmm11, %xmm7 # byte-reflect the plaintext - pxor %xmm0, %xmm7 - - - # update the buffer pointer - add $16, arg2 - - # update the counter. subtract 32 instead of 16 to save one - # instruction from the loop - sub $32, arg3 - - jmp _16B_reduction_loop - - -.align 16 -_less_than_32: - # mov initial crc to the return value. this is necessary for - # zero-length buffers. - mov arg1_low32, %eax - test arg3, arg3 - je _cleanup - - movdqa SHUF_MASK(%rip), %xmm11 - - movd arg1_low32, %xmm0 # get the initial crc value - pslldq $12, %xmm0 # align it to its correct place - - cmp $16, arg3 - je _exact_16_left - jl _less_than_16_left - - movdqu (arg2), %xmm7 # load the plaintext - pshufb %xmm11, %xmm7 # byte-reflect the plaintext - pxor %xmm0 , %xmm7 # xor the initial crc value - add $16, arg2 - sub $16, arg3 - movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 - jmp _get_last_two_xmms - - -.align 16 -_less_than_16_left: - # use stack space to load data less than 16 bytes, zero-out - # the 16B in memory first. - - pxor %xmm1, %xmm1 - mov %rsp, %r11 - movdqa %xmm1, (%r11) - - cmp $4, arg3 - jl _only_less_than_4 - - # backup the counter value - mov arg3, %r9 - cmp $8, arg3 - jl _less_than_8_left - - # load 8 Bytes - mov (arg2), %rax - mov %rax, (%r11) - add $8, %r11 - sub $8, arg3 - add $8, arg2 -_less_than_8_left: - - cmp $4, arg3 - jl _less_than_4_left - - # load 4 Bytes - mov (arg2), %eax - mov %eax, (%r11) - add $4, %r11 - sub $4, arg3 - add $4, arg2 -_less_than_4_left: - - cmp $2, arg3 - jl _less_than_2_left - - # load 2 Bytes - mov (arg2), %ax - mov %ax, (%r11) - add $2, %r11 - sub $2, arg3 - add $2, arg2 -_less_than_2_left: - cmp $1, arg3 - jl _zero_left - - # load 1 Byte - mov (arg2), %al - mov %al, (%r11) -_zero_left: - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - # shl r9, 4 - lea pshufb_shf_table+16(%rip), %rax - sub %r9, %rax - movdqu (%rax), %xmm0 - pxor mask1(%rip), %xmm0 - - pshufb %xmm0, %xmm7 - jmp _128_done - -.align 16 -_exact_16_left: - movdqu (arg2), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - jmp _128_done - -_only_less_than_4: - cmp $3, arg3 - jl _only_less_than_3 - - # load 3 Bytes - mov (arg2), %al - mov %al, (%r11) - - mov 1(arg2), %al - mov %al, 1(%r11) - - mov 2(arg2), %al - mov %al, 2(%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $5, %xmm7 - - jmp _barrett -_only_less_than_3: - cmp $2, arg3 - jl _only_less_than_2 - - # load 2 Bytes - mov (arg2), %al - mov %al, (%r11) - - mov 1(arg2), %al - mov %al, 1(%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $6, %xmm7 - - jmp _barrett -_only_less_than_2: - - # load 1 Byte - mov (arg2), %al - mov %al, (%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $7, %xmm7 - - jmp _barrett - -ENDPROC(crc_t10dif_pcl) - -.data - -# precomputed constants -# these constants are precomputed from the poly: -# 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 -# Q = 0x18BB70000 -# rk1 = 2^(32*3) mod Q << 32 -# rk2 = 2^(32*5) mod Q << 32 -# rk3 = 2^(32*15) mod Q << 32 -# rk4 = 2^(32*17) mod Q << 32 -# rk5 = 2^(32*3) mod Q << 32 -# rk6 = 2^(32*2) mod Q << 32 -# rk7 = floor(2^64/Q) -# rk8 = Q -rk1: -.quad 0x2d56000000000000 -rk2: -.quad 0x06df000000000000 -rk3: -.quad 0x9d9d000000000000 -rk4: -.quad 0x7cf5000000000000 -rk5: -.quad 0x2d56000000000000 -rk6: -.quad 0x1368000000000000 -rk7: -.quad 0x00000001f65a57f8 -rk8: -.quad 0x000000018bb70000 - -rk9: -.quad 0xceae000000000000 -rk10: -.quad 0xbfd6000000000000 -rk11: -.quad 0x1e16000000000000 -rk12: -.quad 0x713c000000000000 -rk13: -.quad 0xf7f9000000000000 -rk14: -.quad 0x80a6000000000000 -rk15: -.quad 0x044c000000000000 -rk16: -.quad 0xe658000000000000 -rk17: -.quad 0xad18000000000000 -rk18: -.quad 0xa497000000000000 -rk19: -.quad 0x6ee3000000000000 -rk20: -.quad 0xe7b5000000000000 - - - -mask1: -.octa 0x80808080808080808080808080808080 -mask2: -.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF - -SHUF_MASK: -.octa 0x000102030405060708090A0B0C0D0E0F - -pshufb_shf_table: -# use these values for shift constants for the pshufb instruction -# different alignments result in values as shown: -# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 -# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 -# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 -# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 -# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 -# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 -# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 -# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 -# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 -# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 -# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 -# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 -# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 -# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 -# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 -.octa 0x8f8e8d8c8b8a89888786858483828100 -.octa 0x000e0d0c0b0a09080706050403020100 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c deleted file mode 100644 index 7845d7fd54c0..000000000000 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions - * - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <asm/i387.h> -#include <asm/cpufeature.h> -#include <asm/cpu_device_id.h> - -asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, - size_t len); - -struct chksum_desc_ctx { - __u16 crc; -}; - -/* - * Steps through buffer one byte at at time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - if (irq_fpu_usable()) { - kernel_fpu_begin(); - ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); - kernel_fpu_end(); - } else - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - if (irq_fpu_usable()) { - kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); - kernel_fpu_end(); - } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-pclmul", - .cra_priority = 200, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static const struct x86_cpu_id crct10dif_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); - -static int __init crct10dif_intel_mod_init(void) -{ - if (!x86_match_cpu(crct10dif_cpu_id)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crct10dif_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_intel_mod_init); -module_exit(crct10dif_intel_mod_fini); - -MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); -MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS("crct10dif"); -MODULE_ALIAS("crct10dif-pclmul"); |