18 files changed, 374 insertions, 966 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 837a1f2d8b96..082d9b4b5472 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -15,6 +15,7 @@ config ALPHA
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index c2cbe4fc391c..78b03ef39f6f 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
  */
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
+	int c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldl_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addl	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stl_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+	: "memory");
+	smp_mb();
+	return old;
 }
 
 
@@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @u: ...unless v is equal to u.
  *
  * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
  */
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
+	long c, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[tmp],%[mem]\n"
+	"	cmpeq	%[tmp],%[u],%[c]\n"
+	"	addq	%[tmp],%[a],%[tmp]\n"
+	"	bne	%[c],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [tmp] "=&r"(tmp), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+	: "memory");
+	smp_mb();
+	return !c;
+}
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long old, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	subq	%[old],1,%[tmp]\n"
+	"	ble	%[old],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [tmp] "=&r"(tmp)
+	: [mem] "m"(*v)
+	: "memory");
+	smp_mb();
+	return old - 1;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
index bf46af51941b..a5b68b268bcf 100644
--- a/arch/alpha/include/asm/param.h
+++ b/arch/alpha/include/asm/param.h
@@ -3,7 +3,9 @@
 
 #include <uapi/asm/param.h>
 
-#define HZ		CONFIG_HZ
-#define USER_HZ		HZ
-# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+# undef HZ
+# define HZ		CONFIG_HZ
+# define USER_HZ	1024
+# define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+
 #endif /* _ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 3bba21e41b81..37b570d01202 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ALPHA_SPINLOCK_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 43baee17acdf..f2c94402e2c8 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,8 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-
-#define NR_SYSCALLS			506
+#define NR_SYSCALLS			508
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
index 29daed819ebd..dbcd9834af6d 100644
--- a/arch/alpha/include/uapi/asm/param.h
+++ b/arch/alpha/include/uapi/asm/param.h
@@ -1,13 +1,7 @@
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
-
-#ifndef __KERNEL__
 #define HZ		1024
-#endif
 
 #define EXEC_PAGESIZE	8192
 
@@ -17,5 +11,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index 801d28bcea51..53ae7bb1bfd1 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -467,5 +467,7 @@
 #define __NR_sendmmsg			503
 #define __NR_process_vm_readv		504
 #define __NR_process_vm_writev		505
+#define __NR_kcmp			506
+#define __NR_finit_module		507
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index f62a994ef126..a969b95ee5ac 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -12,11 +12,32 @@
 
 	.text
 	.set noat
+	.cfi_sections	.debug_frame
 
 /* Stack offsets.  */
 #define SP_OFF			184
 #define SWITCH_STACK_SIZE	320
 
+.macro	CFI_START_OSF_FRAME	func
+	.align	4
+	.globl	\func
+	.type	\func,@function
+\func:
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+	.cfi_rel_offset	$16, 24
+	.cfi_rel_offset	$17, 32
+	.cfi_rel_offset	$18, 40
+.endm
+
+.macro	CFI_END_OSF_FRAME	func
+	.cfi_endproc
+	.size	\func, . - \func
+.endm
+
 /*
  * This defines the normal kernel pt-regs layout.
  *
@@ -27,100 +48,158 @@
  * the palcode-provided values are available to the signal handler.
  */
 
-#define SAVE_ALL			\
-	subq	$sp, SP_OFF, $sp;	\
-	stq	$0, 0($sp);		\
-	stq	$1, 8($sp);		\
-	stq	$2, 16($sp);		\
-	stq	$3, 24($sp);		\
-	stq	$4, 32($sp);		\
-	stq	$28, 144($sp);		\
-	lda	$2, alpha_mv;		\
-	stq	$5, 40($sp);		\
-	stq	$6, 48($sp);		\
-	stq	$7, 56($sp);		\
-	stq	$8, 64($sp);		\
-	stq	$19, 72($sp);		\
-	stq	$20, 80($sp);		\
-	stq	$21, 88($sp);		\
-	ldq	$2, HAE_CACHE($2);	\
-	stq	$22, 96($sp);		\
-	stq	$23, 104($sp);		\
-	stq	$24, 112($sp);		\
-	stq	$25, 120($sp);		\
-	stq	$26, 128($sp);		\
-	stq	$27, 136($sp);		\
-	stq	$2, 152($sp);		\
-	stq	$16, 160($sp);		\
-	stq	$17, 168($sp);		\
+.macro	SAVE_ALL
+	subq	$sp, SP_OFF, $sp
+	.cfi_adjust_cfa_offset	SP_OFF
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	stq	$28, 144($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_rel_offset $1, 8
+	.cfi_rel_offset	$2, 16
+	.cfi_rel_offset	$3, 24
+	.cfi_rel_offset	$4, 32
+	.cfi_rel_offset	$28, 144
+	lda	$2, alpha_mv
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$19, 72($sp)
+	stq	$20, 80($sp)
+	stq	$21, 88($sp)
+	ldq	$2, HAE_CACHE($2)
+	stq	$22, 96($sp)
+	stq	$23, 104($sp)
+	stq	$24, 112($sp)
+	stq	$25, 120($sp)
+	stq	$26, 128($sp)
+	stq	$27, 136($sp)
+	stq	$2, 152($sp)
+	stq	$16, 160($sp)
+	stq	$17, 168($sp)
 	stq	$18, 176($sp)
+	.cfi_rel_offset	$5, 40
+	.cfi_rel_offset	$6, 48
+	.cfi_rel_offset	$7, 56
+	.cfi_rel_offset	$8, 64
+	.cfi_rel_offset $19, 72
+	.cfi_rel_offset	$20, 80
+	.cfi_rel_offset	$21, 88
+	.cfi_rel_offset $22, 96
+	.cfi_rel_offset	$23, 104
+	.cfi_rel_offset	$24, 112
+	.cfi_rel_offset	$25, 120
+	.cfi_rel_offset	$26, 128
+	.cfi_rel_offset	$27, 136
+.endm
 
-#define RESTORE_ALL			\
-	lda	$19, alpha_mv;		\
-	ldq	$0, 0($sp);		\
-	ldq	$1, 8($sp);		\
-	ldq	$2, 16($sp);		\
-	ldq	$3, 24($sp);		\
-	ldq	$21, 152($sp);		\
-	ldq	$20, HAE_CACHE($19);	\
-	ldq	$4, 32($sp);		\
-	ldq	$5, 40($sp);		\
-	ldq	$6, 48($sp);		\
-	ldq	$7, 56($sp);		\
-	subq	$20, $21, $20;		\
-	ldq	$8, 64($sp);		\
-	beq	$20, 99f;		\
-	ldq	$20, HAE_REG($19);	\
-	stq	$21, HAE_CACHE($19);	\
-	stq	$21, 0($20);		\
-99:;					\
-	ldq	$19, 72($sp);		\
-	ldq	$20, 80($sp);		\
-	ldq	$21, 88($sp);		\
-	ldq	$22, 96($sp);		\
-	ldq	$23, 104($sp);		\
-	ldq	$24, 112($sp);		\
-	ldq	$25, 120($sp);		\
-	ldq	$26, 128($sp);		\
-	ldq	$27, 136($sp);		\
-	ldq	$28, 144($sp);		\
+.macro	RESTORE_ALL
+	lda	$19, alpha_mv
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$21, 152($sp)
+	ldq	$20, HAE_CACHE($19)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	subq	$20, $21, $20
+	ldq	$8, 64($sp)
+	beq	$20, 99f
+	ldq	$20, HAE_REG($19)
+	stq	$21, HAE_CACHE($19)
+	stq	$21, 0($20)
+99:	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+	ldq	$22, 96($sp)
+	ldq	$23, 104($sp)
+	ldq	$24, 112($sp)
+	ldq	$25, 120($sp)
+	ldq	$26, 128($sp)
+	ldq	$27, 136($sp)
+	ldq	$28, 144($sp)
 	addq	$sp, SP_OFF, $sp
+	.cfi_restore	$0
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_adjust_cfa_offset	-SP_OFF
+.endm
+
+.macro	DO_SWITCH_STACK
+	bsr	$1, do_switch_stack
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	/* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro	UNDO_SWITCH_STACK
+	bsr	$1, undo_switch_stack
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
+.endm
 
 /*
  * Non-syscall kernel entry points.
  */
 
-	.align	4
-	.globl	entInt
-	.ent	entInt
-entInt:
+CFI_START_OSF_FRAME entInt
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $19
 	jsr	$31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
 
-	.align	4
-	.globl	entArith
-	.ent	entArith
-entArith:
+CFI_START_OSF_FRAME entArith
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $18
 	jsr	$31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
 
-	.align	4
-	.globl	entMM
-	.ent	entMM
-entMM:
+CFI_START_OSF_FRAME entMM
 	SAVE_ALL
 /* save $9 - $15 so the inline exception code can manipulate them.  */
 	subq	$sp, 56, $sp
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -128,6 +207,13 @@ entMM:
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	addq	$sp, 56, $19
 /* handle the fault */
 	lda	$8, 0x3fff
@@ -142,28 +228,33 @@ entMM:
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	addq	$sp, 56, $sp
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 /* finish up the syscall as normal.  */
 	br	ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
 
-	.align	4
-	.globl	entIF
-	.ent	entIF
-entIF:
+CFI_START_OSF_FRAME entIF
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $17
 	jsr	$31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
 
-	.align	4
-	.globl	entUna
-	.ent	entUna
-entUna:
+CFI_START_OSF_FRAME entUna
 	lda	$sp, -256($sp)
+	.cfi_adjust_cfa_offset	256
 	stq	$0, 0($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_remember_state
 	ldq	$0, 256($sp)	/* get PS */
 	stq	$1, 8($sp)
 	stq	$2, 16($sp)
@@ -195,6 +286,32 @@ entUna:
 	stq	$28, 224($sp)
 	mov	$sp, $19
 	stq	$gp, 232($sp)
+	.cfi_rel_offset	$1, 1*8
+	.cfi_rel_offset	$2, 2*8
+	.cfi_rel_offset	$3, 3*8
+	.cfi_rel_offset	$4, 4*8
+	.cfi_rel_offset	$5, 5*8
+	.cfi_rel_offset	$6, 6*8
+	.cfi_rel_offset	$7, 7*8
+	.cfi_rel_offset	$8, 8*8
+	.cfi_rel_offset	$9, 9*8
+	.cfi_rel_offset	$10, 10*8
+	.cfi_rel_offset	$11, 11*8
+	.cfi_rel_offset	$12, 12*8
+	.cfi_rel_offset	$13, 13*8
+	.cfi_rel_offset	$14, 14*8
+	.cfi_rel_offset	$15, 15*8
+	.cfi_rel_offset	$19, 19*8
+	.cfi_rel_offset	$20, 20*8
+	.cfi_rel_offset	$21, 21*8
+	.cfi_rel_offset	$22, 22*8
+	.cfi_rel_offset	$23, 23*8
+	.cfi_rel_offset	$24, 24*8
+	.cfi_rel_offset	$25, 25*8
+	.cfi_rel_offset	$26, 26*8
+	.cfi_rel_offset	$27, 27*8
+	.cfi_rel_offset	$28, 28*8
+	.cfi_rel_offset	$29, 29*8
 	lda	$8, 0x3fff
 	stq	$31, 248($sp)
 	bic	$sp, $8, $8
@@ -228,16 +345,45 @@ entUna:
 	ldq	$28, 224($sp)
 	ldq	$gp, 232($sp)
 	lda	$sp, 256($sp)
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_restore	$29
+	.cfi_adjust_cfa_offset	-256
 	call_pal PAL_rti
-.end entUna
 
 	.align	4
-	.ent	entUnaUser
 entUnaUser:
+	.cfi_restore_state
 	ldq	$0, 0($sp)	/* restore original $0 */
 	lda	$sp, 256($sp)	/* pop entUna's stack frame */
+	.cfi_restore	$0
+	.cfi_adjust_cfa_offset	-256
 	SAVE_ALL		/* setup normal kernel stack */
 	lda	$sp, -56($sp)
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -245,6 +391,13 @@ entUnaUser:
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	lda	$8, 0x3fff
 	addq	$sp, 56, $19
 	bic	$sp, $8, $8
@@ -257,20 +410,25 @@ entUnaUser:
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	lda	$sp, 56($sp)
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 	br	ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
 
-	.align	4
-	.globl	entDbg
-	.ent	entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $16
 	jsr	$31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
 
 /*
  * The system call entry point is special.  Most importantly, it looks
@@ -285,8 +443,12 @@ entDbg:
 
 	.align	4
 	.globl	entSys
-	.globl	ret_from_sys_call
-	.ent	entSys
+	.type	entSys, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
 entSys:
 	SAVE_ALL
 	lda	$8, 0x3fff
@@ -300,6 +462,9 @@ entSys:
 	stq	$17, SP_OFF+32($sp)
 	s8addq	$0, $5, $5
 	stq	$18, SP_OFF+40($sp)
+	.cfi_rel_offset	$16, SP_OFF+24
+	.cfi_rel_offset	$17, SP_OFF+32
+	.cfi_rel_offset	$18, SP_OFF+40
 	blbs	$3, strace
 	beq	$4, 1f
 	ldq	$27, 0($5)
@@ -310,6 +475,7 @@ entSys:
 	stq	$31, 72($sp)		/* a3=0 => no error */
 
 	.align	4
+	.globl	ret_from_sys_call
 ret_from_sys_call:
 	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
 	ldq	$0, SP_OFF($sp)
@@ -324,10 +490,12 @@ ret_to_user:
 	and	$17, _TIF_WORK_MASK, $2
 	bne	$2, work_pending
 restore_all:
+	.cfi_remember_state
 	RESTORE_ALL
 	call_pal PAL_rti
 
 ret_to_kernel:
+	.cfi_restore_state
 	lda	$16, 7
 	call_pal PAL_swpipl
 	br restore_all
@@ -356,7 +524,6 @@ $ret_success:
 	stq	$0, 0($sp)
 	stq	$31, 72($sp)	/* a3=0 => no error */
 	br	ret_from_sys_call
-.end entSys
 
 /*
  * Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@ $ret_success:
  */
 
 	.align	4
-	.ent	work_pending
+	.type	work_pending, @function
 work_pending:
 	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
 	bne	$2, $work_notifysig
@@ -387,23 +554,22 @@ $work_resched:
 
 $work_notifysig:
 	mov	$sp, $16
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, do_work_pending
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	restore_all
-.end work_pending
 
 /*
  * PTRACE syscall handler
  */
 
 	.align	4
-	.ent	strace
+	.type	strace, @function
 strace:
 	/* set up signal stack, call syscall_trace */
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_enter /* returns the syscall number */
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	/* get the arguments back.. */
 	ldq	$16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@ ret_from_straced:
 $strace_success:
 	stq	$0, 0($sp)		/* save return value */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_leave
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	$31, ret_from_sys_call
 
 	.align	3
@@ -447,26 +613,31 @@ $strace_error:
 	stq	$0, 0($sp)
 	stq	$1, 72($sp)	/* a3 for return */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	mov	$18, $9		/* save old syscall number */
 	mov	$19, $10	/* save old a3 */
 	jsr	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
 
 /*
  * Save and restore the switch stack -- aka the balance of the user context.
  */
 
 	.align	4
-	.ent	do_switch_stack
+	.type	do_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 do_switch_stack:
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -510,10 +681,14 @@ do_switch_stack:
 	stt	$f0, 312($sp)	# save fpcr in slot of $f31
 	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
 	ret	$31, ($1), 1
-.end do_switch_stack
+	.cfi_endproc
+	.size	do_switch_stack, .-do_switch_stack
 
 	.align	4
-	.ent	undo_switch_stack
+	.type	undo_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 undo_switch_stack:
 	ldq	$9, 0($sp)
 	ldq	$10, 8($sp)
@@ -558,7 +733,8 @@ undo_switch_stack:
 	ldt	$f30, 304($sp)
 	lda	$sp, SWITCH_STACK_SIZE($sp)
 	ret	$31, ($1), 1
-.end undo_switch_stack
+	.cfi_endproc
+	.size	undo_switch_stack, .-undo_switch_stack
 
 /*
  * The meat of the context switch code.
@@ -566,17 +742,18 @@ undo_switch_stack:
 
 	.align	4
 	.globl	alpha_switch_to
-	.ent	alpha_switch_to
+	.type	alpha_switch_to, @function
+	.cfi_startproc
 alpha_switch_to:
-	.prologue 0
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	call_pal PAL_swpctx
 	lda	$8, 0x3fff
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	bic	$sp, $8, $8
 	mov	$17, $0
 	ret
-.end alpha_switch_to
+	.cfi_endproc
+	.size	alpha_switch_to, .-alpha_switch_to
 
 /*
  * New processes begin life here.
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index f433fc11877a..28e4429596f3 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -236,7 +236,7 @@ void __init
 init_rtc_irq(void)
 {
 	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
-				      handle_simple_irq, "RTC");
+				      handle_percpu_irq, "RTC");
 	setup_irq(RTC_IRQ, &timer_irqaction);
 }
 
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 53b18a620e1c..9dbbcb3b9146 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -264,9 +264,10 @@ recv_secondary_console_msg(void)
 		if (cnt <= 0 || cnt >= 80)
 			strcpy(buf, "<<< BOGUS MSG >>>");
 		else {
-			cp1 = (char *) &cpu->ipc_buffer[11];
+			cp1 = (char *) &cpu->ipc_buffer[1];
 			cp2 = buf;
-			strcpy(cp2, cp1);
+			memcpy(cp2, cp1, cnt);
+			cp2[cnt] = '\0';
 			
 			while ((cp2 = strchr(cp2, '\r')) != 0) {
 				*cp2 = ' ';
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 5bf401f7ea97..6c35159bc00e 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = {
 static void
 dp264_device_interrupt(unsigned long vector)
 {
-#if 1
-	printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
 	unsigned long pld;
 	unsigned int i;
 
@@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector)
 			isa_device_interrupt(vector);
 		else
 			handle_irq(16 + i);
-#if 0
-		TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
-		tmp = TSUNAMI_cchip->dir0.csr;
-#endif
 	}
-#endif
 }
 
 static void 
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 407accc80877..c92e389ff219 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -317,8 +317,9 @@ marvel_init_irq(void)
 }
 
 static int 
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 {
+	struct pci_dev *dev = (struct pci_dev *)cdev;
 	struct pci_controller *hose = dev->sysdata;
 	struct io7_port *io7_port = hose->sysdata;
 	struct io7 *io7 = io7_port->io7;
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 4284ec798ec9..dca9b3fb0071 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -524,6 +524,8 @@ sys_call_table:
 	.quad sys_sendmmsg
 	.quad sys_process_vm_readv
 	.quad sys_process_vm_writev		/* 505 */
+	.quad sys_kcmp
+	.quad sys_finit_module
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index e336694ca042..ea3395036556 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -105,9 +105,7 @@ void arch_irq_work_raise(void)
 
 static inline __u32 rpcc(void)
 {
-    __u32 result;
-    asm volatile ("rpcc %0" : "=r"(result));
-    return result;
+	return __builtin_alpha_rpcc();
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index be1fba334bd0..bd0665cdc840 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
 {
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	       regs->pc, regs->r26, regs->ps, print_tainted());
-	print_symbol("pc is at %s\n", regs->pc);
-	print_symbol("ra is at %s\n", regs->r26 );
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
 	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
 	       regs->r0, regs->r1, regs->r2);
 	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
@@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp)
 			continue;
 		if (tmp >= (unsigned long) &_etext)
 			continue;
-		printk("[<%lx>]", tmp);
-		print_symbol(" %s", tmp);
-		printk("\n");
+		printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
 		if (i > 40) {
 			printk(" ...");
 			break;
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 7d6ba9db1be9..6c63c358a7e6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 35e97569d05f..000000000000
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,643 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-#       Function API:
-#       UINT16 crc_t10dif_pcl(
-#               UINT16 init_crc, //initial CRC value, 16 bits
-#               const unsigned char *buf, //buffer pointer to calculate CRC on
-#               UINT64 len //buffer length in bytes (64-bit data)
-#       );
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#	Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define        arg1 %rdi
-#define        arg2 %rsi
-#define        arg3 %rdx
-
-#define        arg1_low32 %edi
-
-ENTRY(crc_t10dif_pcl)
-.align 16
-
-	# adjust the 16-bit initial_crc value, scale it to 32 bits
-	shl	$16, arg1_low32
-
-	# Allocate Stack Space
-	mov     %rsp, %rcx
-	sub	$16*2, %rsp
-	# align stack to 16 byte boundary
-	and     $~(0x10 - 1), %rsp
-
-	# check if smaller than 256
-	cmp	$256, arg3
-
-	# for sizes less than 128, we can't fold 64B at a time...
-	jl	_less_than_128
-
-
-	# load the initial crc value
-	movd	arg1_low32, %xmm10	# initial crc
-
-	# crc value does not need to be byte-reflected, but it needs
-	# to be moved to the high part of the register.
-	# because data will be byte-reflected and will align with
-	# initial crc at correct place.
-	pslldq	$12, %xmm10
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-	# receive the initial 64B data, xor the initial crc value
-	movdqu	16*0(arg2), %xmm0
-	movdqu	16*1(arg2), %xmm1
-	movdqu	16*2(arg2), %xmm2
-	movdqu	16*3(arg2), %xmm3
-	movdqu	16*4(arg2), %xmm4
-	movdqu	16*5(arg2), %xmm5
-	movdqu	16*6(arg2), %xmm6
-	movdqu	16*7(arg2), %xmm7
-
-	pshufb	%xmm11, %xmm0
-	# XOR the initial_crc value
-	pxor	%xmm10, %xmm0
-	pshufb	%xmm11, %xmm1
-	pshufb	%xmm11, %xmm2
-	pshufb	%xmm11, %xmm3
-	pshufb	%xmm11, %xmm4
-	pshufb	%xmm11, %xmm5
-	pshufb	%xmm11, %xmm6
-	pshufb	%xmm11, %xmm7
-
-	movdqa	rk3(%rip), %xmm10	#xmm10 has rk3 and rk4
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-
-	#################################################################
-	# we subtract 256 instead of 128 to save one instruction from the loop
-	sub	$256, arg3
-
-	# at this section of the code, there is 64*x+y (0<=y<64) bytes of
-	# buffer. The _fold_64_B_loop will fold 64B at a time
-	# until we have 64+y Bytes of buffer
-
-
-	# fold 64B at a time. This section of the code folds 4 xmm
-	# registers in parallel
-_fold_64_B_loop:
-
-	# update the buffer pointer
-	add	$128, arg2		#    buf += 64#
-
-	movdqu	16*0(arg2), %xmm9
-	movdqu	16*1(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm0, %xmm8
-	movdqa	%xmm1, %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm0
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm1
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm0
-	xorps	%xmm8 , %xmm0
-	pxor	%xmm12, %xmm1
-	xorps	%xmm13, %xmm1
-
-	movdqu	16*2(arg2), %xmm9
-	movdqu	16*3(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm2, %xmm8
-	movdqa	%xmm3, %xmm13
-	pclmulqdq	$0x0, %xmm10, %xmm2
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0, %xmm10, %xmm3
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm2
-	xorps	%xmm8 , %xmm2
-	pxor	%xmm12, %xmm3
-	xorps	%xmm13, %xmm3
-
-	movdqu	16*4(arg2), %xmm9
-	movdqu	16*5(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm4, %xmm8
-	movdqa	%xmm5, %xmm13
-	pclmulqdq	$0x0,  %xmm10, %xmm4
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0,  %xmm10, %xmm5
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 ,  %xmm4
-	xorps	%xmm8 ,  %xmm4
-	pxor	%xmm12,  %xmm5
-	xorps	%xmm13,  %xmm5
-
-	movdqu	16*6(arg2), %xmm9
-	movdqu	16*7(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm6 , %xmm8
-	movdqa	%xmm7 , %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm6
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm6
-	xorps	%xmm8 , %xmm6
-	pxor	%xmm12, %xmm7
-	xorps	%xmm13, %xmm7
-
-	sub	$128, arg3
-
-	# check if there is another 64B in the buffer to be able to fold
-	jge	_fold_64_B_loop
-	##################################################################
-
-
-	add	$128, arg2
-	# at this point, the buffer pointer is pointing at the last y Bytes
-	# of the buffer the 64B of folded data is in 4 of the xmm
-	# registers: xmm0, xmm1, xmm2, xmm3
-
-
-	# fold the 8 xmm registers to 1 xmm register with different constants
-
-	movdqa	rk9(%rip), %xmm10
-	movdqa	%xmm0, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm0
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm0, %xmm7
-
-	movdqa	rk11(%rip), %xmm10
-	movdqa	%xmm1, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm1
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm1, %xmm7
-
-	movdqa	rk13(%rip), %xmm10
-	movdqa	%xmm2, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm2
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-	movdqa	rk15(%rip), %xmm10
-	movdqa	%xmm3, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm3
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm3, %xmm7
-
-	movdqa	rk17(%rip), %xmm10
-	movdqa	%xmm4, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm4
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm4, %xmm7
-
-	movdqa	rk19(%rip), %xmm10
-	movdqa	%xmm5, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm5
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm5, %xmm7
-
-	movdqa	rk1(%rip), %xmm10	#xmm10 has rk1 and rk2
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-	movdqa	%xmm6, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm6
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm6, %xmm7
-
-
-	# instead of 64, we add 48 to the loop counter to save 1 instruction
-	# from the loop instead of a cmp instruction, we use the negative
-	# flag with the jl instruction
-	add	$128-16, arg3
-	jl	_final_reduction_for_128
-
-	# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
-	# and the rest is in memory. We can fold 16 bytes at a time if y>=16
-	# continue folding 16B at a time
-
-_16B_reduction_loop:
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	movdqu	(arg2), %xmm0
-	pshufb	%xmm11, %xmm0
-	pxor	%xmm0 , %xmm7
-	add	$16, arg2
-	sub	$16, arg3
-	# instead of a cmp instruction, we utilize the flags with the
-	# jge instruction equivalent of: cmp arg3, 16-16
-	# check if there is any more 16B in the buffer to be able to fold
-	jge	_16B_reduction_loop
-
-	#now we have 16+z bytes left to reduce, where 0<= z < 16.
-	#first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
-	# check if any more data to fold. If not, compute the CRC of
-	# the final 128 bits
-	add	$16, arg3
-	je	_128_done
-
-	# here we are getting data that is less than 16 bytes.
-	# since we know that there was data before the pointer, we can
-	# offset the input pointer before the actual point, to receive
-	# exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
-	movdqa	%xmm7, %xmm2
-
-	movdqu	-16(arg2, arg3), %xmm1
-	pshufb	%xmm11, %xmm1
-
-	# get rid of the extra data that was loaded before
-	# load the shift constant
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	arg3, %rax
-	movdqu	(%rax), %xmm0
-
-	# shift xmm2 to the left by arg3 bytes
-	pshufb	%xmm0, %xmm2
-
-	# shift xmm7 to the right by 16-arg3 bytes
-	pxor	mask1(%rip), %xmm0
-	pshufb	%xmm0, %xmm7
-	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
-
-	# fold 16 Bytes
-	movdqa	%xmm1, %xmm2
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-_128_done:
-	# compute crc of a 128-bit value
-	movdqa	rk5(%rip), %xmm10	# rk5 and rk6 in xmm10
-	movdqa	%xmm7, %xmm0
-
-	#64b fold
-	pclmulqdq	$0x1, %xmm10, %xmm7
-	pslldq	$8   ,  %xmm0
-	pxor	%xmm0,  %xmm7
-
-	#32b fold
-	movdqa	%xmm7, %xmm0
-
-	pand	mask2(%rip), %xmm0
-
-	psrldq	$12, %xmm7
-	pclmulqdq	$0x10, %xmm10, %xmm7
-	pxor	%xmm0, %xmm7
-
-	#barrett reduction
-_barrett:
-	movdqa	rk7(%rip), %xmm10	# rk7 and rk8 in xmm10
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x01, %xmm10, %xmm7
-	pslldq	$4, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm7
-
-	pslldq	$4, %xmm7
-	pxor	%xmm0, %xmm7
-	pextrd	$1, %xmm7, %eax
-
-_cleanup:
-	# scale the result back to 16 bits
-	shr	$16, %eax
-	mov     %rcx, %rsp
-	ret
-
-########################################################################
-
-.align 16
-_less_than_128:
-
-	# check if there is enough buffer to be able to fold 16B at a time
-	cmp	$32, arg3
-	jl	_less_than_32
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	# now if there is, load the constants
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0, %xmm7
-
-
-	# update the buffer pointer
-	add	$16, arg2
-
-	# update the counter. subtract 32 instead of 16 to save one
-	# instruction from the loop
-	sub	$32, arg3
-
-	jmp	_16B_reduction_loop
-
-
-.align 16
-_less_than_32:
-	# mov initial crc to the return value. this is necessary for
-	# zero-length buffers.
-	mov	arg1_low32, %eax
-	test	arg3, arg3
-	je	_cleanup
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-
-	cmp	$16, arg3
-	je	_exact_16_left
-	jl	_less_than_16_left
-
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-	add	$16, arg2
-	sub	$16, arg3
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-	jmp	_get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
-	# use stack space to load data less than 16 bytes, zero-out
-	# the 16B in memory first.
-
-	pxor	%xmm1, %xmm1
-	mov	%rsp, %r11
-	movdqa	%xmm1, (%r11)
-
-	cmp	$4, arg3
-	jl	_only_less_than_4
-
-	# backup the counter value
-	mov	arg3, %r9
-	cmp	$8, arg3
-	jl	_less_than_8_left
-
-	# load 8 Bytes
-	mov	(arg2), %rax
-	mov	%rax, (%r11)
-	add	$8, %r11
-	sub	$8, arg3
-	add	$8, arg2
-_less_than_8_left:
-
-	cmp	$4, arg3
-	jl	_less_than_4_left
-
-	# load 4 Bytes
-	mov	(arg2), %eax
-	mov	%eax, (%r11)
-	add	$4, %r11
-	sub	$4, arg3
-	add	$4, arg2
-_less_than_4_left:
-
-	cmp	$2, arg3
-	jl	_less_than_2_left
-
-	# load 2 Bytes
-	mov	(arg2), %ax
-	mov	%ax, (%r11)
-	add	$2, %r11
-	sub	$2, arg3
-	add	$2, arg2
-_less_than_2_left:
-	cmp     $1, arg3
-        jl      _zero_left
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-_zero_left:
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-
-	# shl r9, 4
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	%r9, %rax
-	movdqu	(%rax), %xmm0
-	pxor	mask1(%rip), %xmm0
-
-	pshufb	%xmm0, %xmm7
-	jmp	_128_done
-
-.align 16
-_exact_16_left:
-	movdqu	(arg2), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	jmp	_128_done
-
-_only_less_than_4:
-	cmp	$3, arg3
-	jl	_only_less_than_3
-
-	# load 3 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	mov	2(arg2), %al
-	mov	%al, 2(%r11)
-
-	movdqa	 (%rsp), %xmm7
-	pshufb	 %xmm11, %xmm7
-	pxor	 %xmm0 , %xmm7  # xor the initial crc value
-
-	psrldq	$5, %xmm7
-
-	jmp	_barrett
-_only_less_than_3:
-	cmp	$2, arg3
-	jl	_only_less_than_2
-
-	# load 2 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$6, %xmm7
-
-	jmp	_barrett
-_only_less_than_2:
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$7, %xmm7
-
-	jmp	_barrett
-
-ENDPROC(crc_t10dif_pcl)
-
-.data
-
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
-
-
-mask1:
-.octa 0x80808080808080808080808080808080
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-#	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-#	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-#	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-#	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-#	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-#	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-#	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
-#	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
-#	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
-#	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
-#	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
-#	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
-#	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
-#	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
-#	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
deleted file mode 100644
index 7845d7fd54c0..000000000000
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
- *
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <asm/i387.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
-				size_t len);
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
-		kernel_fpu_end();
-	} else
-		ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
-		kernel_fpu_end();
-	} else
-		*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-pclmul",
-		.cra_priority		=	200,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static const struct x86_cpu_id crct10dif_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
-
-static int __init crct10dif_intel_mod_init(void)
-{
-	if (!x86_match_cpu(crct10dif_cpu_id))
-		return -ENODEV;
-
-	return crypto_register_shash(&alg);
-}
-
-static void __exit crct10dif_intel_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_intel_mod_init);
-module_exit(crct10dif_intel_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");