summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/atomic.h88
-rw-r--r--arch/alpha/include/asm/param.h8
-rw-r--r--arch/alpha/include/asm/spinlock.h4
-rw-r--r--arch/alpha/include/asm/unistd.h3
-rw-r--r--arch/alpha/include/uapi/asm/param.h7
-rw-r--r--arch/alpha/include/uapi/asm/unistd.h2
-rw-r--r--arch/alpha/kernel/entry.S399
-rw-r--r--arch/alpha/kernel/irq_alpha.c2
-rw-r--r--arch/alpha/kernel/smp.c5
-rw-r--r--arch/alpha/kernel/sys_dp264.c8
-rw-r--r--arch/alpha/kernel/sys_marvel.c3
-rw-r--r--arch/alpha/kernel/systbls.S2
-rw-r--r--arch/alpha/kernel/time.c4
-rw-r--r--arch/alpha/kernel/traps.c8
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S643
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c151
18 files changed, 374 insertions, 966 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 837a1f2d8b96..082d9b4b5472 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -15,6 +15,7 @@ config ALPHA
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index c2cbe4fc391c..78b03ef39f6f 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
*/
static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
{
- int c, old;
- c = atomic_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c;
+ int c, new, old;
+ smp_mb();
+ __asm__ __volatile__(
+ "1: ldl_l %[old],%[mem]\n"
+ " cmpeq %[old],%[u],%[c]\n"
+ " addl %[old],%[a],%[new]\n"
+ " bne %[c],2f\n"
+ " stl_c %[new],%[mem]\n"
+ " beq %[new],3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+ : [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+ : "memory");
+ smp_mb();
+ return old;
}
@@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
*/
static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
{
- long c, old;
- c = atomic64_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic64_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c != (u);
+ long c, tmp;
+ smp_mb();
+ __asm__ __volatile__(
+ "1: ldq_l %[tmp],%[mem]\n"
+ " cmpeq %[tmp],%[u],%[c]\n"
+ " addq %[tmp],%[a],%[tmp]\n"
+ " bne %[c],2f\n"
+ " stq_c %[tmp],%[mem]\n"
+ " beq %[tmp],3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : [tmp] "=&r"(tmp), [c] "=&r"(c)
+ : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+ : "memory");
+ smp_mb();
+ return !c;
+}
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+ long old, tmp;
+ smp_mb();
+ __asm__ __volatile__(
+ "1: ldq_l %[old],%[mem]\n"
+ " subq %[old],1,%[tmp]\n"
+ " ble %[old],2f\n"
+ " stq_c %[tmp],%[mem]\n"
+ " beq %[tmp],3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : [old] "=&r"(old), [tmp] "=&r"(tmp)
+ : [mem] "m"(*v)
+ : "memory");
+ smp_mb();
+ return old - 1;
}
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
index bf46af51941b..a5b68b268bcf 100644
--- a/arch/alpha/include/asm/param.h
+++ b/arch/alpha/include/asm/param.h
@@ -3,7 +3,9 @@
#include <uapi/asm/param.h>
-#define HZ CONFIG_HZ
-#define USER_HZ HZ
-# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */
+# undef HZ
+# define HZ CONFIG_HZ
+# define USER_HZ 1024
+# define CLOCKS_PER_SEC USER_HZ /* frequency at which times() counts */
+
#endif /* _ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 3bba21e41b81..37b570d01202 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-#define arch_spin_relax(lock) cpu_relax()
-#define arch_read_relax(lock) cpu_relax()
-#define arch_write_relax(lock) cpu_relax()
-
#endif /* _ALPHA_SPINLOCK_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 43baee17acdf..f2c94402e2c8 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,8 +3,7 @@
#include <uapi/asm/unistd.h>
-
-#define NR_SYSCALLS 506
+#define NR_SYSCALLS 508
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
index 29daed819ebd..dbcd9834af6d 100644
--- a/arch/alpha/include/uapi/asm/param.h
+++ b/arch/alpha/include/uapi/asm/param.h
@@ -1,13 +1,7 @@
#ifndef _UAPI_ASM_ALPHA_PARAM_H
#define _UAPI_ASM_ALPHA_PARAM_H
-/* ??? Gross. I don't want to parameterize this, and supposedly the
- hardware ignores reprogramming. We also need userland buy-in to the
- change in HZ, since this is visible in the wait4 resources etc. */
-
-#ifndef __KERNEL__
#define HZ 1024
-#endif
#define EXEC_PAGESIZE 8192
@@ -17,5 +11,4 @@
#define MAXHOSTNAMELEN 64 /* max length of hostname */
-
#endif /* _UAPI_ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index 801d28bcea51..53ae7bb1bfd1 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -467,5 +467,7 @@
#define __NR_sendmmsg 503
#define __NR_process_vm_readv 504
#define __NR_process_vm_writev 505
+#define __NR_kcmp 506
+#define __NR_finit_module 507
#endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index f62a994ef126..a969b95ee5ac 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -12,11 +12,32 @@
.text
.set noat
+ .cfi_sections .debug_frame
/* Stack offsets. */
#define SP_OFF 184
#define SWITCH_STACK_SIZE 320
+.macro CFI_START_OSF_FRAME func
+ .align 4
+ .globl \func
+ .type \func,@function
+\func:
+ .cfi_startproc simple
+ .cfi_return_column 64
+ .cfi_def_cfa $sp, 48
+ .cfi_rel_offset 64, 8
+ .cfi_rel_offset $gp, 16
+ .cfi_rel_offset $16, 24
+ .cfi_rel_offset $17, 32
+ .cfi_rel_offset $18, 40
+.endm
+
+.macro CFI_END_OSF_FRAME func
+ .cfi_endproc
+ .size \func, . - \func
+.endm
+
/*
* This defines the normal kernel pt-regs layout.
*
@@ -27,100 +48,158 @@
* the palcode-provided values are available to the signal handler.
*/
-#define SAVE_ALL \
- subq $sp, SP_OFF, $sp; \
- stq $0, 0($sp); \
- stq $1, 8($sp); \
- stq $2, 16($sp); \
- stq $3, 24($sp); \
- stq $4, 32($sp); \
- stq $28, 144($sp); \
- lda $2, alpha_mv; \
- stq $5, 40($sp); \
- stq $6, 48($sp); \
- stq $7, 56($sp); \
- stq $8, 64($sp); \
- stq $19, 72($sp); \
- stq $20, 80($sp); \
- stq $21, 88($sp); \
- ldq $2, HAE_CACHE($2); \
- stq $22, 96($sp); \
- stq $23, 104($sp); \
- stq $24, 112($sp); \
- stq $25, 120($sp); \
- stq $26, 128($sp); \
- stq $27, 136($sp); \
- stq $2, 152($sp); \
- stq $16, 160($sp); \
- stq $17, 168($sp); \
+.macro SAVE_ALL
+ subq $sp, SP_OFF, $sp
+ .cfi_adjust_cfa_offset SP_OFF
+ stq $0, 0($sp)
+ stq $1, 8($sp)
+ stq $2, 16($sp)
+ stq $3, 24($sp)
+ stq $4, 32($sp)
+ stq $28, 144($sp)
+ .cfi_rel_offset $0, 0
+ .cfi_rel_offset $1, 8
+ .cfi_rel_offset $2, 16
+ .cfi_rel_offset $3, 24
+ .cfi_rel_offset $4, 32
+ .cfi_rel_offset $28, 144
+ lda $2, alpha_mv
+ stq $5, 40($sp)
+ stq $6, 48($sp)
+ stq $7, 56($sp)
+ stq $8, 64($sp)
+ stq $19, 72($sp)
+ stq $20, 80($sp)
+ stq $21, 88($sp)
+ ldq $2, HAE_CACHE($2)
+ stq $22, 96($sp)
+ stq $23, 104($sp)
+ stq $24, 112($sp)
+ stq $25, 120($sp)
+ stq $26, 128($sp)
+ stq $27, 136($sp)
+ stq $2, 152($sp)
+ stq $16, 160($sp)
+ stq $17, 168($sp)
stq $18, 176($sp)
+ .cfi_rel_offset $5, 40
+ .cfi_rel_offset $6, 48
+ .cfi_rel_offset $7, 56
+ .cfi_rel_offset $8, 64
+ .cfi_rel_offset $19, 72
+ .cfi_rel_offset $20, 80
+ .cfi_rel_offset $21, 88
+ .cfi_rel_offset $22, 96
+ .cfi_rel_offset $23, 104
+ .cfi_rel_offset $24, 112
+ .cfi_rel_offset $25, 120
+ .cfi_rel_offset $26, 128
+ .cfi_rel_offset $27, 136
+.endm
-#define RESTORE_ALL \
- lda $19, alpha_mv; \
- ldq $0, 0($sp); \
- ldq $1, 8($sp); \
- ldq $2, 16($sp); \
- ldq $3, 24($sp); \
- ldq $21, 152($sp); \
- ldq $20, HAE_CACHE($19); \
- ldq $4, 32($sp); \
- ldq $5, 40($sp); \
- ldq $6, 48($sp); \
- ldq $7, 56($sp); \
- subq $20, $21, $20; \
- ldq $8, 64($sp); \
- beq $20, 99f; \
- ldq $20, HAE_REG($19); \
- stq $21, HAE_CACHE($19); \
- stq $21, 0($20); \
-99:; \
- ldq $19, 72($sp); \
- ldq $20, 80($sp); \
- ldq $21, 88($sp); \
- ldq $22, 96($sp); \
- ldq $23, 104($sp); \
- ldq $24, 112($sp); \
- ldq $25, 120($sp); \
- ldq $26, 128($sp); \
- ldq $27, 136($sp); \
- ldq $28, 144($sp); \
+.macro RESTORE_ALL
+ lda $19, alpha_mv
+ ldq $0, 0($sp)
+ ldq $1, 8($sp)
+ ldq $2, 16($sp)
+ ldq $3, 24($sp)
+ ldq $21, 152($sp)
+ ldq $20, HAE_CACHE($19)
+ ldq $4, 32($sp)
+ ldq $5, 40($sp)
+ ldq $6, 48($sp)
+ ldq $7, 56($sp)
+ subq $20, $21, $20
+ ldq $8, 64($sp)
+ beq $20, 99f
+ ldq $20, HAE_REG($19)
+ stq $21, HAE_CACHE($19)
+ stq $21, 0($20)
+99: ldq $19, 72($sp)
+ ldq $20, 80($sp)
+ ldq $21, 88($sp)
+ ldq $22, 96($sp)
+ ldq $23, 104($sp)
+ ldq $24, 112($sp)
+ ldq $25, 120($sp)
+ ldq $26, 128($sp)
+ ldq $27, 136($sp)
+ ldq $28, 144($sp)
addq $sp, SP_OFF, $sp
+ .cfi_restore $0
+ .cfi_restore $1
+ .cfi_restore $2
+ .cfi_restore $3
+ .cfi_restore $4
+ .cfi_restore $5
+ .cfi_restore $6
+ .cfi_restore $7
+ .cfi_restore $8
+ .cfi_restore $19
+ .cfi_restore $20
+ .cfi_restore $21
+ .cfi_restore $22
+ .cfi_restore $23
+ .cfi_restore $24
+ .cfi_restore $25
+ .cfi_restore $26
+ .cfi_restore $27
+ .cfi_restore $28
+ .cfi_adjust_cfa_offset -SP_OFF
+.endm
+
+.macro DO_SWITCH_STACK
+ bsr $1, do_switch_stack
+ .cfi_adjust_cfa_offset SWITCH_STACK_SIZE
+ .cfi_rel_offset $9, 0
+ .cfi_rel_offset $10, 8
+ .cfi_rel_offset $11, 16
+ .cfi_rel_offset $12, 24
+ .cfi_rel_offset $13, 32
+ .cfi_rel_offset $14, 40
+ .cfi_rel_offset $15, 48
+ /* We don't really care about the FP registers for debugging. */
+.endm
+
+.macro UNDO_SWITCH_STACK
+ bsr $1, undo_switch_stack
+ .cfi_restore $9
+ .cfi_restore $10
+ .cfi_restore $11
+ .cfi_restore $12
+ .cfi_restore $13
+ .cfi_restore $14
+ .cfi_restore $15
+ .cfi_adjust_cfa_offset -SWITCH_STACK_SIZE
+.endm
/*
* Non-syscall kernel entry points.
*/
- .align 4
- .globl entInt
- .ent entInt
-entInt:
+CFI_START_OSF_FRAME entInt
SAVE_ALL
lda $8, 0x3fff
lda $26, ret_from_sys_call
bic $sp, $8, $8
mov $sp, $19
jsr $31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
- .align 4
- .globl entArith
- .ent entArith
-entArith:
+CFI_START_OSF_FRAME entArith
SAVE_ALL
lda $8, 0x3fff
lda $26, ret_from_sys_call
bic $sp, $8, $8
mov $sp, $18
jsr $31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
- .align 4
- .globl entMM
- .ent entMM
-entMM:
+CFI_START_OSF_FRAME entMM
SAVE_ALL
/* save $9 - $15 so the inline exception code can manipulate them. */
subq $sp, 56, $sp
+ .cfi_adjust_cfa_offset 56
stq $9, 0($sp)
stq $10, 8($sp)
stq $11, 16($sp)
@@ -128,6 +207,13 @@ entMM:
stq $13, 32($sp)
stq $14, 40($sp)
stq $15, 48($sp)
+ .cfi_rel_offset $9, 0
+ .cfi_rel_offset $10, 8
+ .cfi_rel_offset $11, 16
+ .cfi_rel_offset $12, 24
+ .cfi_rel_offset $13, 32
+ .cfi_rel_offset $14, 40
+ .cfi_rel_offset $15, 48
addq $sp, 56, $19
/* handle the fault */
lda $8, 0x3fff
@@ -142,28 +228,33 @@ entMM:
ldq $14, 40($sp)
ldq $15, 48($sp)
addq $sp, 56, $sp
+ .cfi_restore $9
+ .cfi_restore $10
+ .cfi_restore $11
+ .cfi_restore $12
+ .cfi_restore $13
+ .cfi_restore $14
+ .cfi_restore $15
+ .cfi_adjust_cfa_offset -56
/* finish up the syscall as normal. */
br ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
- .align 4
- .globl entIF
- .ent entIF
-entIF:
+CFI_START_OSF_FRAME entIF
SAVE_ALL
lda $8, 0x3fff
lda $26, ret_from_sys_call
bic $sp, $8, $8
mov $sp, $17
jsr $31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
- .align 4
- .globl entUna
- .ent entUna
-entUna:
+CFI_START_OSF_FRAME entUna
lda $sp, -256($sp)
+ .cfi_adjust_cfa_offset 256
stq $0, 0($sp)
+ .cfi_rel_offset $0, 0
+ .cfi_remember_state
ldq $0, 256($sp) /* get PS */
stq $1, 8($sp)
stq $2, 16($sp)
@@ -195,6 +286,32 @@ entUna:
stq $28, 224($sp)
mov $sp, $19
stq $gp, 232($sp)
+ .cfi_rel_offset $1, 1*8
+ .cfi_rel_offset $2, 2*8
+ .cfi_rel_offset $3, 3*8
+ .cfi_rel_offset $4, 4*8
+ .cfi_rel_offset $5, 5*8
+ .cfi_rel_offset $6, 6*8
+ .cfi_rel_offset $7, 7*8
+ .cfi_rel_offset $8, 8*8
+ .cfi_rel_offset $9, 9*8
+ .cfi_rel_offset $10, 10*8
+ .cfi_rel_offset $11, 11*8
+ .cfi_rel_offset $12, 12*8
+ .cfi_rel_offset $13, 13*8
+ .cfi_rel_offset $14, 14*8
+ .cfi_rel_offset $15, 15*8
+ .cfi_rel_offset $19, 19*8
+ .cfi_rel_offset $20, 20*8
+ .cfi_rel_offset $21, 21*8
+ .cfi_rel_offset $22, 22*8
+ .cfi_rel_offset $23, 23*8
+ .cfi_rel_offset $24, 24*8
+ .cfi_rel_offset $25, 25*8
+ .cfi_rel_offset $26, 26*8
+ .cfi_rel_offset $27, 27*8
+ .cfi_rel_offset $28, 28*8
+ .cfi_rel_offset $29, 29*8
lda $8, 0x3fff
stq $31, 248($sp)
bic $sp, $8, $8
@@ -228,16 +345,45 @@ entUna:
ldq $28, 224($sp)
ldq $gp, 232($sp)
lda $sp, 256($sp)
+ .cfi_restore $1
+ .cfi_restore $2
+ .cfi_restore $3
+ .cfi_restore $4
+ .cfi_restore $5
+ .cfi_restore $6
+ .cfi_restore $7
+ .cfi_restore $8
+ .cfi_restore $9
+ .cfi_restore $10
+ .cfi_restore $11
+ .cfi_restore $12
+ .cfi_restore $13
+ .cfi_restore $14
+ .cfi_restore $15
+ .cfi_restore $19
+ .cfi_restore $20
+ .cfi_restore $21
+ .cfi_restore $22
+ .cfi_restore $23
+ .cfi_restore $24
+ .cfi_restore $25
+ .cfi_restore $26
+ .cfi_restore $27
+ .cfi_restore $28
+ .cfi_restore $29
+ .cfi_adjust_cfa_offset -256
call_pal PAL_rti
-.end entUna
.align 4
- .ent entUnaUser
entUnaUser:
+ .cfi_restore_state
ldq $0, 0($sp) /* restore original $0 */
lda $sp, 256($sp) /* pop entUna's stack frame */
+ .cfi_restore $0
+ .cfi_adjust_cfa_offset -256
SAVE_ALL /* setup normal kernel stack */
lda $sp, -56($sp)
+ .cfi_adjust_cfa_offset 56
stq $9, 0($sp)
stq $10, 8($sp)
stq $11, 16($sp)
@@ -245,6 +391,13 @@ entUnaUser:
stq $13, 32($sp)
stq $14, 40($sp)
stq $15, 48($sp)
+ .cfi_rel_offset $9, 0
+ .cfi_rel_offset $10, 8
+ .cfi_rel_offset $11, 16
+ .cfi_rel_offset $12, 24
+ .cfi_rel_offset $13, 32
+ .cfi_rel_offset $14, 40
+ .cfi_rel_offset $15, 48
lda $8, 0x3fff
addq $sp, 56, $19
bic $sp, $8, $8
@@ -257,20 +410,25 @@ entUnaUser:
ldq $14, 40($sp)
ldq $15, 48($sp)
lda $sp, 56($sp)
+ .cfi_restore $9
+ .cfi_restore $10
+ .cfi_restore $11
+ .cfi_restore $12
+ .cfi_restore $13
+ .cfi_restore $14
+ .cfi_restore $15
+ .cfi_adjust_cfa_offset -56
br ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
- .align 4
- .globl entDbg
- .ent entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
SAVE_ALL
lda $8, 0x3fff
lda $26, ret_from_sys_call
bic $sp, $8, $8
mov $sp, $16
jsr $31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
/*
* The system call entry point is special. Most importantly, it looks
@@ -285,8 +443,12 @@ entDbg:
.align 4
.globl entSys
- .globl ret_from_sys_call
- .ent entSys
+ .type entSys, @function
+ .cfi_startproc simple
+ .cfi_return_column 64
+ .cfi_def_cfa $sp, 48
+ .cfi_rel_offset 64, 8
+ .cfi_rel_offset $gp, 16
entSys:
SAVE_ALL
lda $8, 0x3fff
@@ -300,6 +462,9 @@ entSys:
stq $17, SP_OFF+32($sp)
s8addq $0, $5, $5
stq $18, SP_OFF+40($sp)
+ .cfi_rel_offset $16, SP_OFF+24
+ .cfi_rel_offset $17, SP_OFF+32
+ .cfi_rel_offset $18, SP_OFF+40
blbs $3, strace
beq $4, 1f
ldq $27, 0($5)
@@ -310,6 +475,7 @@ entSys:
stq $31, 72($sp) /* a3=0 => no error */
.align 4
+ .globl ret_from_sys_call
ret_from_sys_call:
cmovne $26, 0, $18 /* $18 = 0 => non-restartable */
ldq $0, SP_OFF($sp)
@@ -324,10 +490,12 @@ ret_to_user:
and $17, _TIF_WORK_MASK, $2
bne $2, work_pending
restore_all:
+ .cfi_remember_state
RESTORE_ALL
call_pal PAL_rti
ret_to_kernel:
+ .cfi_restore_state
lda $16, 7
call_pal PAL_swpipl
br restore_all
@@ -356,7 +524,6 @@ $ret_success:
stq $0, 0($sp)
stq $31, 72($sp) /* a3=0 => no error */
br ret_from_sys_call
-.end entSys
/*
* Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@ $ret_success:
*/
.align 4
- .ent work_pending
+ .type work_pending, @function
work_pending:
and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
bne $2, $work_notifysig
@@ -387,23 +554,22 @@ $work_resched:
$work_notifysig:
mov $sp, $16
- bsr $1, do_switch_stack
+ DO_SWITCH_STACK
jsr $26, do_work_pending
- bsr $1, undo_switch_stack
+ UNDO_SWITCH_STACK
br restore_all
-.end work_pending
/*
* PTRACE syscall handler
*/
.align 4
- .ent strace
+ .type strace, @function
strace:
/* set up signal stack, call syscall_trace */
- bsr $1, do_switch_stack
+ DO_SWITCH_STACK
jsr $26, syscall_trace_enter /* returns the syscall number */
- bsr $1, undo_switch_stack
+ UNDO_SWITCH_STACK
/* get the arguments back.. */
ldq $16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@ ret_from_straced:
$strace_success:
stq $0, 0($sp) /* save return value */
- bsr $1, do_switch_stack
+ DO_SWITCH_STACK
jsr $26, syscall_trace_leave
- bsr $1, undo_switch_stack
+ UNDO_SWITCH_STACK
br $31, ret_from_sys_call
.align 3
@@ -447,26 +613,31 @@ $strace_error:
stq $0, 0($sp)
stq $1, 72($sp) /* a3 for return */
- bsr $1, do_switch_stack
+ DO_SWITCH_STACK
mov $18, $9 /* save old syscall number */
mov $19, $10 /* save old a3 */
jsr $26, syscall_trace_leave
mov $9, $18
mov $10, $19
- bsr $1, undo_switch_stack
+ UNDO_SWITCH_STACK
mov $31, $26 /* tell "ret_from_sys_call" we can restart */
br ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
/*
* Save and restore the switch stack -- aka the balance of the user context.
*/
.align 4
- .ent do_switch_stack
+ .type do_switch_stack, @function
+ .cfi_startproc simple
+ .cfi_return_column 64
+ .cfi_def_cfa $sp, 0
+ .cfi_register 64, $1
do_switch_stack:
lda $sp, -SWITCH_STACK_SIZE($sp)
+ .cfi_adjust_cfa_offset SWITCH_STACK_SIZE
stq $9, 0($sp)
stq $10, 8($sp)
stq $11, 16($sp)
@@ -510,10 +681,14 @@ do_switch_stack:
stt $f0, 312($sp) # save fpcr in slot of $f31
ldt $f0, 64($sp) # dont let "do_switch_stack" change fp state.
ret $31, ($1), 1
-.end do_switch_stack
+ .cfi_endproc
+ .size do_switch_stack, .-do_switch_stack
.align 4
- .ent undo_switch_stack
+ .type undo_switch_stack, @function
+ .cfi_startproc simple
+ .cfi_def_cfa $sp, 0
+ .cfi_register 64, $1
undo_switch_stack:
ldq $9, 0($sp)
ldq $10, 8($sp)
@@ -558,7 +733,8 @@ undo_switch_stack:
ldt $f30, 304($sp)
lda $sp, SWITCH_STACK_SIZE($sp)
ret $31, ($1), 1
-.end undo_switch_stack
+ .cfi_endproc
+ .size undo_switch_stack, .-undo_switch_stack
/*
* The meat of the context switch code.
@@ -566,17 +742,18 @@ undo_switch_stack:
.align 4
.globl alpha_switch_to
- .ent alpha_switch_to
+ .type alpha_switch_to, @function
+ .cfi_startproc
alpha_switch_to:
- .prologue 0
- bsr $1, do_switch_stack
+ DO_SWITCH_STACK
call_pal PAL_swpctx
lda $8, 0x3fff
- bsr $1, undo_switch_stack
+ UNDO_SWITCH_STACK
bic $sp, $8, $8
mov $17, $0
ret
-.end alpha_switch_to
+ .cfi_endproc
+ .size alpha_switch_to, .-alpha_switch_to
/*
* New processes begin life here.
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index f433fc11877a..28e4429596f3 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -236,7 +236,7 @@ void __init
init_rtc_irq(void)
{
irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
- handle_simple_irq, "RTC");
+ handle_percpu_irq, "RTC");
setup_irq(RTC_IRQ, &timer_irqaction);
}
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 53b18a620e1c..9dbbcb3b9146 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -264,9 +264,10 @@ recv_secondary_console_msg(void)
if (cnt <= 0 || cnt >= 80)
strcpy(buf, "<<< BOGUS MSG >>>");
else {
- cp1 = (char *) &cpu->ipc_buffer[11];
+ cp1 = (char *) &cpu->ipc_buffer[1];
cp2 = buf;
- strcpy(cp2, cp1);
+ memcpy(cp2, cp1, cnt);
+ cp2[cnt] = '\0';
while ((cp2 = strchr(cp2, '\r')) != 0) {
*cp2 = ' ';
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 5bf401f7ea97..6c35159bc00e 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = {
static void
dp264_device_interrupt(unsigned long vector)
{
-#if 1
- printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
unsigned long pld;
unsigned int i;
@@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector)
isa_device_interrupt(vector);
else
handle_irq(16 + i);
-#if 0
- TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
- tmp = TSUNAMI_cchip->dir0.csr;
-#endif
}
-#endif
}
static void
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 407accc80877..c92e389ff219 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -317,8 +317,9 @@ marvel_init_irq(void)
}
static int
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
{
+ struct pci_dev *dev = (struct pci_dev *)cdev;
struct pci_controller *hose = dev->sysdata;
struct io7_port *io7_port = hose->sysdata;
struct io7 *io7 = io7_port->io7;
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 4284ec798ec9..dca9b3fb0071 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -524,6 +524,8 @@ sys_call_table:
.quad sys_sendmmsg
.quad sys_process_vm_readv
.quad sys_process_vm_writev /* 505 */
+ .quad sys_kcmp
+ .quad sys_finit_module
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index e336694ca042..ea3395036556 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -105,9 +105,7 @@ void arch_irq_work_raise(void)
static inline __u32 rpcc(void)
{
- __u32 result;
- asm volatile ("rpcc %0" : "=r"(result));
- return result;
+ return __builtin_alpha_rpcc();
}
int update_persistent_clock(struct timespec now)
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index be1fba334bd0..bd0665cdc840 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
{
printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n",
regs->pc, regs->r26, regs->ps, print_tainted());
- print_symbol("pc is at %s\n", regs->pc);
- print_symbol("ra is at %s\n", regs->r26 );
+ printk("pc is at %pSR\n", (void *)regs->pc);
+ printk("ra is at %pSR\n", (void *)regs->r26);
printk("v0 = %016lx t0 = %016lx t1 = %016lx\n",
regs->r0, regs->r1, regs->r2);
printk("t2 = %016lx t3 = %016lx t4 = %016lx\n",
@@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp)
continue;
if (tmp >= (unsigned long) &_etext)
continue;
- printk("[<%lx>]", tmp);
- print_symbol(" %s", tmp);
- printk("\n");
+ printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
if (i > 40) {
printk(" ...");
break;
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 7d6ba9db1be9..6c63c358a7e6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
@@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 35e97569d05f..000000000000
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,643 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-# Erdinc Ozturk <erdinc.ozturk@intel.com>
-# Vinodh Gopal <vinodh.gopal@intel.com>
-# James Guilford <james.guilford@intel.com>
-# Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses. You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the
-# distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-# Function API:
-# UINT16 crc_t10dif_pcl(
-# UINT16 init_crc, //initial CRC value, 16 bits
-# const unsigned char *buf, //buffer pointer to calculate CRC on
-# UINT64 len //buffer length in bytes (64-bit data)
-# );
-#
-# Reference paper titled "Fast CRC Computation for Generic
-# Polynomials Using PCLMULQDQ Instruction"
-# URL: http://www.intel.com/content/dam/www/public/us/en/documents
-# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define arg1 %rdi
-#define arg2 %rsi
-#define arg3 %rdx
-
-#define arg1_low32 %edi
-
-ENTRY(crc_t10dif_pcl)
-.align 16
-
- # adjust the 16-bit initial_crc value, scale it to 32 bits
- shl $16, arg1_low32
-
- # Allocate Stack Space
- mov %rsp, %rcx
- sub $16*2, %rsp
- # align stack to 16 byte boundary
- and $~(0x10 - 1), %rsp
-
- # check if smaller than 256
- cmp $256, arg3
-
- # for sizes less than 128, we can't fold 64B at a time...
- jl _less_than_128
-
-
- # load the initial crc value
- movd arg1_low32, %xmm10 # initial crc
-
- # crc value does not need to be byte-reflected, but it needs
- # to be moved to the high part of the register.
- # because data will be byte-reflected and will align with
- # initial crc at correct place.
- pslldq $12, %xmm10
-
- movdqa SHUF_MASK(%rip), %xmm11
- # receive the initial 64B data, xor the initial crc value
- movdqu 16*0(arg2), %xmm0
- movdqu 16*1(arg2), %xmm1
- movdqu 16*2(arg2), %xmm2
- movdqu 16*3(arg2), %xmm3
- movdqu 16*4(arg2), %xmm4
- movdqu 16*5(arg2), %xmm5
- movdqu 16*6(arg2), %xmm6
- movdqu 16*7(arg2), %xmm7
-
- pshufb %xmm11, %xmm0
- # XOR the initial_crc value
- pxor %xmm10, %xmm0
- pshufb %xmm11, %xmm1
- pshufb %xmm11, %xmm2
- pshufb %xmm11, %xmm3
- pshufb %xmm11, %xmm4
- pshufb %xmm11, %xmm5
- pshufb %xmm11, %xmm6
- pshufb %xmm11, %xmm7
-
- movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
- #imm value of pclmulqdq instruction
- #will determine which constant to use
-
- #################################################################
- # we subtract 256 instead of 128 to save one instruction from the loop
- sub $256, arg3
-
- # at this section of the code, there is 64*x+y (0<=y<64) bytes of
- # buffer. The _fold_64_B_loop will fold 64B at a time
- # until we have 64+y Bytes of buffer
-
-
- # fold 64B at a time. This section of the code folds 4 xmm
- # registers in parallel
-_fold_64_B_loop:
-
- # update the buffer pointer
- add $128, arg2 # buf += 64#
-
- movdqu 16*0(arg2), %xmm9
- movdqu 16*1(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm0, %xmm8
- movdqa %xmm1, %xmm13
- pclmulqdq $0x0 , %xmm10, %xmm0
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0 , %xmm10, %xmm1
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm0
- xorps %xmm8 , %xmm0
- pxor %xmm12, %xmm1
- xorps %xmm13, %xmm1
-
- movdqu 16*2(arg2), %xmm9
- movdqu 16*3(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm2, %xmm8
- movdqa %xmm3, %xmm13
- pclmulqdq $0x0, %xmm10, %xmm2
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0, %xmm10, %xmm3
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm2
- xorps %xmm8 , %xmm2
- pxor %xmm12, %xmm3
- xorps %xmm13, %xmm3
-
- movdqu 16*4(arg2), %xmm9
- movdqu 16*5(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm4, %xmm8
- movdqa %xmm5, %xmm13
- pclmulqdq $0x0, %xmm10, %xmm4
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0, %xmm10, %xmm5
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm4
- xorps %xmm8 , %xmm4
- pxor %xmm12, %xmm5
- xorps %xmm13, %xmm5
-
- movdqu 16*6(arg2), %xmm9
- movdqu 16*7(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm6 , %xmm8
- movdqa %xmm7 , %xmm13
- pclmulqdq $0x0 , %xmm10, %xmm6
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0 , %xmm10, %xmm7
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm6
- xorps %xmm8 , %xmm6
- pxor %xmm12, %xmm7
- xorps %xmm13, %xmm7
-
- sub $128, arg3
-
- # check if there is another 64B in the buffer to be able to fold
- jge _fold_64_B_loop
- ##################################################################
-
-
- add $128, arg2
- # at this point, the buffer pointer is pointing at the last y Bytes
- # of the buffer the 64B of folded data is in 4 of the xmm
- # registers: xmm0, xmm1, xmm2, xmm3
-
-
- # fold the 8 xmm registers to 1 xmm register with different constants
-
- movdqa rk9(%rip), %xmm10
- movdqa %xmm0, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm0
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm0, %xmm7
-
- movdqa rk11(%rip), %xmm10
- movdqa %xmm1, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm1
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm1, %xmm7
-
- movdqa rk13(%rip), %xmm10
- movdqa %xmm2, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm2
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm2, %xmm7
-
- movdqa rk15(%rip), %xmm10
- movdqa %xmm3, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm3
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm3, %xmm7
-
- movdqa rk17(%rip), %xmm10
- movdqa %xmm4, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm4
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm4, %xmm7
-
- movdqa rk19(%rip), %xmm10
- movdqa %xmm5, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm5
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm5, %xmm7
-
- movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
- #imm value of pclmulqdq instruction
- #will determine which constant to use
- movdqa %xmm6, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm6
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm6, %xmm7
-
-
- # instead of 64, we add 48 to the loop counter to save 1 instruction
- # from the loop instead of a cmp instruction, we use the negative
- # flag with the jl instruction
- add $128-16, arg3
- jl _final_reduction_for_128
-
- # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
- # and the rest is in memory. We can fold 16 bytes at a time if y>=16
- # continue folding 16B at a time
-
-_16B_reduction_loop:
- movdqa %xmm7, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm7
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- movdqu (arg2), %xmm0
- pshufb %xmm11, %xmm0
- pxor %xmm0 , %xmm7
- add $16, arg2
- sub $16, arg3
- # instead of a cmp instruction, we utilize the flags with the
- # jge instruction equivalent of: cmp arg3, 16-16
- # check if there is any more 16B in the buffer to be able to fold
- jge _16B_reduction_loop
-
- #now we have 16+z bytes left to reduce, where 0<= z < 16.
- #first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
- # check if any more data to fold. If not, compute the CRC of
- # the final 128 bits
- add $16, arg3
- je _128_done
-
- # here we are getting data that is less than 16 bytes.
- # since we know that there was data before the pointer, we can
- # offset the input pointer before the actual point, to receive
- # exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
- movdqa %xmm7, %xmm2
-
- movdqu -16(arg2, arg3), %xmm1
- pshufb %xmm11, %xmm1
-
- # get rid of the extra data that was loaded before
- # load the shift constant
- lea pshufb_shf_table+16(%rip), %rax
- sub arg3, %rax
- movdqu (%rax), %xmm0
-
- # shift xmm2 to the left by arg3 bytes
- pshufb %xmm0, %xmm2
-
- # shift xmm7 to the right by 16-arg3 bytes
- pxor mask1(%rip), %xmm0
- pshufb %xmm0, %xmm7
- pblendvb %xmm2, %xmm1 #xmm0 is implicit
-
- # fold 16 Bytes
- movdqa %xmm1, %xmm2
- movdqa %xmm7, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm7
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm2, %xmm7
-
-_128_done:
- # compute crc of a 128-bit value
- movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
- movdqa %xmm7, %xmm0
-
- #64b fold
- pclmulqdq $0x1, %xmm10, %xmm7
- pslldq $8 , %xmm0
- pxor %xmm0, %xmm7
-
- #32b fold
- movdqa %xmm7, %xmm0
-
- pand mask2(%rip), %xmm0
-
- psrldq $12, %xmm7
- pclmulqdq $0x10, %xmm10, %xmm7
- pxor %xmm0, %xmm7
-
- #barrett reduction
-_barrett:
- movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
- movdqa %xmm7, %xmm0
- pclmulqdq $0x01, %xmm10, %xmm7
- pslldq $4, %xmm7
- pclmulqdq $0x11, %xmm10, %xmm7
-
- pslldq $4, %xmm7
- pxor %xmm0, %xmm7
- pextrd $1, %xmm7, %eax
-
-_cleanup:
- # scale the result back to 16 bits
- shr $16, %eax
- mov %rcx, %rsp
- ret
-
-########################################################################
-
-.align 16
-_less_than_128:
-
- # check if there is enough buffer to be able to fold 16B at a time
- cmp $32, arg3
- jl _less_than_32
- movdqa SHUF_MASK(%rip), %xmm11
-
- # now if there is, load the constants
- movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
-
- movd arg1_low32, %xmm0 # get the initial crc value
- pslldq $12, %xmm0 # align it to its correct place
- movdqu (arg2), %xmm7 # load the plaintext
- pshufb %xmm11, %xmm7 # byte-reflect the plaintext
- pxor %xmm0, %xmm7
-
-
- # update the buffer pointer
- add $16, arg2
-
- # update the counter. subtract 32 instead of 16 to save one
- # instruction from the loop
- sub $32, arg3
-
- jmp _16B_reduction_loop
-
-
-.align 16
-_less_than_32:
- # mov initial crc to the return value. this is necessary for
- # zero-length buffers.
- mov arg1_low32, %eax
- test arg3, arg3
- je _cleanup
-
- movdqa SHUF_MASK(%rip), %xmm11
-
- movd arg1_low32, %xmm0 # get the initial crc value
- pslldq $12, %xmm0 # align it to its correct place
-
- cmp $16, arg3
- je _exact_16_left
- jl _less_than_16_left
-
- movdqu (arg2), %xmm7 # load the plaintext
- pshufb %xmm11, %xmm7 # byte-reflect the plaintext
- pxor %xmm0 , %xmm7 # xor the initial crc value
- add $16, arg2
- sub $16, arg3
- movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
- jmp _get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
- # use stack space to load data less than 16 bytes, zero-out
- # the 16B in memory first.
-
- pxor %xmm1, %xmm1
- mov %rsp, %r11
- movdqa %xmm1, (%r11)
-
- cmp $4, arg3
- jl _only_less_than_4
-
- # backup the counter value
- mov arg3, %r9
- cmp $8, arg3
- jl _less_than_8_left
-
- # load 8 Bytes
- mov (arg2), %rax
- mov %rax, (%r11)
- add $8, %r11
- sub $8, arg3
- add $8, arg2
-_less_than_8_left:
-
- cmp $4, arg3
- jl _less_than_4_left
-
- # load 4 Bytes
- mov (arg2), %eax
- mov %eax, (%r11)
- add $4, %r11
- sub $4, arg3
- add $4, arg2
-_less_than_4_left:
-
- cmp $2, arg3
- jl _less_than_2_left
-
- # load 2 Bytes
- mov (arg2), %ax
- mov %ax, (%r11)
- add $2, %r11
- sub $2, arg3
- add $2, arg2
-_less_than_2_left:
- cmp $1, arg3
- jl _zero_left
-
- # load 1 Byte
- mov (arg2), %al
- mov %al, (%r11)
-_zero_left:
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- # shl r9, 4
- lea pshufb_shf_table+16(%rip), %rax
- sub %r9, %rax
- movdqu (%rax), %xmm0
- pxor mask1(%rip), %xmm0
-
- pshufb %xmm0, %xmm7
- jmp _128_done
-
-.align 16
-_exact_16_left:
- movdqu (arg2), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- jmp _128_done
-
-_only_less_than_4:
- cmp $3, arg3
- jl _only_less_than_3
-
- # load 3 Bytes
- mov (arg2), %al
- mov %al, (%r11)
-
- mov 1(arg2), %al
- mov %al, 1(%r11)
-
- mov 2(arg2), %al
- mov %al, 2(%r11)
-
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- psrldq $5, %xmm7
-
- jmp _barrett
-_only_less_than_3:
- cmp $2, arg3
- jl _only_less_than_2
-
- # load 2 Bytes
- mov (arg2), %al
- mov %al, (%r11)
-
- mov 1(arg2), %al
- mov %al, 1(%r11)
-
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- psrldq $6, %xmm7
-
- jmp _barrett
-_only_less_than_2:
-
- # load 1 Byte
- mov (arg2), %al
- mov %al, (%r11)
-
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- psrldq $7, %xmm7
-
- jmp _barrett
-
-ENDPROC(crc_t10dif_pcl)
-
-.data
-
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
-
-
-mask1:
-.octa 0x80808080808080808080808080808080
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
-# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
-# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
-# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
-# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
-# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
-# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
-# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
-# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
deleted file mode 100644
index 7845d7fd54c0..000000000000
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
- *
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <asm/i387.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
- size_t len);
-
-struct chksum_desc_ctx {
- __u16 crc;
-};
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = 0;
-
- return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- if (irq_fpu_usable()) {
- kernel_fpu_begin();
- ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
- kernel_fpu_end();
- } else
- ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
- return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__u16 *)out = ctx->crc;
- return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
- u8 *out)
-{
- if (irq_fpu_usable()) {
- kernel_fpu_begin();
- *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
- kernel_fpu_end();
- } else
- *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
- return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
- unsigned int length, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
- .digestsize = CRC_T10DIF_DIGEST_SIZE,
- .init = chksum_init,
- .update = chksum_update,
- .final = chksum_final,
- .finup = chksum_finup,
- .digest = chksum_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crct10dif",
- .cra_driver_name = "crct10dif-pclmul",
- .cra_priority = 200,
- .cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static const struct x86_cpu_id crct10dif_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
-
-static int __init crct10dif_intel_mod_init(void)
-{
- if (!x86_match_cpu(crct10dif_cpu_id))
- return -ENODEV;
-
- return crypto_register_shash(&alg);
-}
-
-static void __exit crct10dif_intel_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_intel_mod_init);
-module_exit(crct10dif_intel_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");