419 files changed, 7323 insertions, 4933 deletions
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile
index cd143887380a..8399bd0e68e8 100644
--- a/arch/alpha/boot/Makefile
+++ b/arch/alpha/boot/Makefile
@@ -14,6 +14,9 @@ targets		:= vmlinux.gz vmlinux \
 		   tools/bootpzh bootloader bootpheader bootpzheader 
 OBJSTRIP	:= $(obj)/tools/objstrip
 
+HOSTCFLAGS	:= -Wall -I$(objtree)/usr/include
+BOOTCFLAGS	+= -I$(obj) -I$(srctree)/$(obj)
+
 # SRM bootable image.  Copy to offset 512 of a partition.
 $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
 	( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ 
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
 $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
 	$(call if_changed,objstrip)
 
-LDFLAGS_bootloader   := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpheader  := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpzheader := -static -uvsprintf -T  #-N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootpheader  := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
 
-OBJ_bootlx   := $(obj)/head.o $(obj)/main.o
-OBJ_bootph   := $(obj)/head.o $(obj)/bootp.o
-OBJ_bootpzh  := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o
+OBJ_bootlx   := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph   := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh  := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
 
 $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
 	$(call if_changed,ld)
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 3baf2d1e908d..dd6eb4a33582 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -19,7 +19,6 @@
 
 #include "ksize.h"
 
-extern int vsprintf(char *, const char *, va_list);
 extern unsigned long switch_to_osf_pal(unsigned long nr,
 	struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
 	unsigned long *vptb);
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644
index 000000000000..f844dae8a54a
--- /dev/null
+++ b/arch/alpha/boot/stdio.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+	const char *sc;
+
+	for (sc = s; count-- && *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+# define do_div(n, base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	__rem = ((unsigned long long)(n)) % __base;			\
+	(n) = ((unsigned long long)(n)) / __base;			\
+	__rem;								\
+})
+
+
+static int skip_atoi(const char **s)
+{
+	int i, c;
+
+	for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+		i = i*10 + c - '0';
+	return i;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SPECIAL	32		/* 0x */
+#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+	int i;
+
+	if (type & LARGE)
+		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	if (type & LEFT)
+		type &= ~ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & SIGN) {
+		if ((signed long long)num < 0) {
+			sign = '-';
+			num = - (signed long long)num;
+			size--;
+		} else if (type & PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0) {
+		tmp[i++] = digits[do_div(num, base)];
+	}
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(ZEROPAD+LEFT)))
+		while(size-->0)
+			*str++ = ' ';
+	if (sign)
+		*str++ = sign;
+	if (type & SPECIAL) {
+		if (base==8)
+			*str++ = '0';
+		else if (base==16) {
+			*str++ = '0';
+			*str++ = digits[33];
+		}
+	}
+	if (!(type & LEFT))
+		while (size-- > 0)
+			*str++ = c;
+	while (i < precision--)
+		*str++ = '0';
+	while (i-- > 0)
+		*str++ = tmp[i];
+	while (size-- > 0)
+		*str++ = ' ';
+	return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char * str;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+	                        /* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+	for (str=buf ; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			*str++ = *fmt;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= LEFT; goto repeat;
+				case '+': flags |= PLUS; goto repeat;
+				case ' ': flags |= SPACE; goto repeat;
+				case '#': flags |= SPECIAL; goto repeat;
+				case '0': flags |= ZEROPAD; goto repeat;
+				}
+
+		/* get field width */
+		field_width = -1;
+		if ('0' <= *fmt && *fmt <= '9')
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if ('0' <= *fmt && *fmt <= '9')
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'l' && *(fmt + 1) == 'l') {
+			qualifier = 'q';
+			fmt += 2;
+		} else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+			|| *fmt == 'Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT))
+				while (--field_width > 0)
+					*str++ = ' ';
+			*str++ = (unsigned char) va_arg(args, int);
+			while (--field_width > 0)
+				*str++ = ' ';
+			continue;
+
+		case 's':
+			s = va_arg(args, char *);
+			if (!s)
+				s = "<NULL>";
+
+			len = strnlen(s, precision);
+
+			if (!(flags & LEFT))
+				while (len < field_width--)
+					*str++ = ' ';
+			for (i = 0; i < len; ++i)
+				*str++ = *s++;
+			while (len < field_width--)
+				*str++ = ' ';
+			continue;
+
+		case 'p':
+			if (field_width == -1) {
+				field_width = 2*sizeof(void *);
+				flags |= ZEROPAD;
+			}
+			str = number(str,
+				(unsigned long) va_arg(args, void *), 16,
+				field_width, precision, flags);
+			continue;
+
+
+		case 'n':
+			if (qualifier == 'l') {
+				long * ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z') {
+				size_t * ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int * ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			continue;
+
+		case '%':
+			*str++ = '%';
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'X':
+			flags |= LARGE;
+		case 'x':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			*str++ = '%';
+			if (*fmt)
+				*str++ = *fmt;
+			else
+				--fmt;
+			continue;
+		}
+		if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'q') {
+			num = va_arg(args, unsigned long long);
+			if (flags & SIGN)
+				num = (signed long long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, num, base, field_width, precision, flags);
+	}
+	*str = '\0';
+	return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i=vsprintf(buf,fmt,args);
+	va_end(args);
+	return i;
+}
diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
index 367d53d031fc..dee82695f48b 100644
--- a/arch/alpha/boot/tools/objstrip.c
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -27,6 +27,9 @@
 #include <linux/param.h>
 #ifdef __ELF__
 # include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
 #endif
 
 /* bootfile size must be multiple of BLOCK_SIZE: */
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 429e8cd0d78e..e5117766529e 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
 #undef __ASM__MB
 #undef ____cmpxchg
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 #endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index f61e1a56c378..4cb4b6d3452c 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -2,6 +2,5 @@
 #define _ALPHA_TYPES_H
 
 #include <asm-generic/int-ll64.h>
-#include <uapi/asm/types.h>
 
 #endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index c509d306db45..a56e608db2f9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,7 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define NR_SYSCALLS			511
+#define NR_SYSCALLS			514
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index d214a0358100..aa33bf5aacb6 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -472,5 +472,8 @@
 #define __NR_sched_setattr		508
 #define __NR_sched_getattr		509
 #define __NR_renameat2			510
+#define __NR_getrandom			511
+#define __NR_memfd_create		512
+#define __NR_execveat			513
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c
index 253cf1a87481..51267ac5729b 100644
--- a/arch/alpha/kernel/err_ev6.c
+++ b/arch/alpha/kernel/err_ev6.c
@@ -6,7 +6,6 @@
  *	Error handling code supporting Alpha systems
  */
 
-#include <linux/init.h>
 #include <linux/sched.h>
 
 #include <asm/io.h>
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 7b2be251c30f..51f2c8654253 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -19,7 +19,6 @@
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index e51f578636a5..36dc91ace83a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
  	if (tv) {
 		if (get_tv32((struct timeval *)&kts, tv))
 			return -EFAULT;
+		kts.tv_nsec *= 1000;
 	}
 	if (tz) {
 		if (copy_from_user(&ktz, tz, sizeof(*tz)))
 			return -EFAULT;
 	}
 
-	kts.tv_nsec *= 1000;
-
 	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 1941a07b5811..84d13263ce46 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
 }
 
 /*
- * Copy an alpha thread..
+ * Copy architecture-specific thread state
  */
-
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long arg,
+	    unsigned long kthread_arg,
 	    struct task_struct *p)
 {
 	extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 			sizeof(struct switch_stack) + sizeof(struct pt_regs));
 		childstack->r26 = (unsigned long) ret_from_kernel_thread;
 		childstack->r9 = usp;	/* function */
-		childstack->r10 = arg;
+		childstack->r10 = kthread_arg;
 		childregs->hae = alpha_mv.hae_cache,
 		childti->pcb.usp = 0;
 		return 0;
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 99ac36d5de4e..2f24447fef92 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -63,7 +63,6 @@ static struct {
 enum ipi_message_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
 };
 
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 }
 
-
 static void
 send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
 {
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
 			generic_smp_call_function_interrupt();
 			break;
 
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
-
 		case IPI_CPU_STOP:
 			halt();
 
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 static void
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 6f01d9ad7b81..72b59511e59a 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -237,8 +237,7 @@ srmcons_init(void)
 
 	return -ENODEV;
 }
-
-module_init(srmcons_init);
+device_initcall(srmcons_init);
 
 
 /*
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index f21d61fab678..24e41bd7d3c9 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
 	irq = intline;
 
-	msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	msi_loc = dev->msi_cap;
 	msg_ctl = 0;
 	if (msi_loc) 
 		pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 24789713f1ea..9b62e3fd4f03 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -529,6 +529,9 @@ sys_call_table:
 	.quad sys_sched_setattr
 	.quad sys_sched_getattr
 	.quad sys_renameat2			/* 510 */
+	.quad sys_getrandom
+	.quad sys_memfd_create
+	.quad sys_execveat
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 9c4c189eb22f..74aceead06e9 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -14,7 +14,6 @@
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/ratelimit.h>
 
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 9d0ac091a52a..4a905bd667e2 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -23,8 +23,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
 
@@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 
 	/* If we're in an interrupt context, or have no user context,
 	   we must not take the fault.  */
-	if (!mm || in_atomic())
+	if (!mm || faulthandler_disabled())
 		goto no_context;
 
 #ifdef CONFIG_ALPHA_LARGE_VMALLOC
diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c
index 18aa9b4f94f1..086a0d5445c5 100644
--- a/arch/alpha/oprofile/op_model_ev4.c
+++ b/arch/alpha/oprofile/op_model_ev4.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c
index c32f8a0ad925..c300f5ef3482 100644
--- a/arch/alpha/oprofile/op_model_ev5.c
+++ b/arch/alpha/oprofile/op_model_ev5.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c
index 1c84cc257fc7..02edf5971614 100644
--- a/arch/alpha/oprofile/op_model_ev6.c
+++ b/arch/alpha/oprofile/op_model_ev6.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c
index 34a57a126553..adb1744d20f3 100644
--- a/arch/alpha/oprofile/op_model_ev67.c
+++ b/arch/alpha/oprofile/op_model_ev67.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug
index a7fc0da25650..ff6a4b5ce927 100644
--- a/arch/arc/Kconfig.debug
+++ b/arch/arc/Kconfig.debug
@@ -2,19 +2,6 @@ menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
-config EARLY_PRINTK
-	bool "Early printk" if EMBEDDED
-	default y
-	help
-	  Write kernel log output directly into the VGA buffer or to a serial
-	  port.
-
-	  This is useful for kernel debugging when your machine crashes very
-	  early before the console code is initialized. For normal operation
-	  it is not recommended because it looks ugly and doesn't cooperate
-	  with klogd/syslogd or the X server. You should normally N here,
-	  unless you want to debug such a crash.
-
 config 16KSTACKS
 	bool "Use 16Kb for kernel stacks instead of 8Kb"
 	help
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 067551b6920a..9917a45fc430 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -99,7 +99,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	atomic_ops_unlock(flags);					\
 }
 
-#define ATOMIC_OP_RETURN(op, c_op)					\
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 4dc64ddebece..05b5aaf5b0f9 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	pagefault_disable();	/* implies preempt_disable() */
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	pagefault_enable();	/* subsumes preempt_enable() */
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
@@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	return ret;
 }
 
-/* Compare-xchg with preemption disabled.
+/* Compare-xchg with pagefaults disabled.
  *  Notes:
  *      -Best-Effort: Exchg happens only if compare succeeds.
  *          If compare fails, returns; leaving retry/looping to upper layers
@@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	pagefault_disable();	/* implies preempt_disable() */
+	pagefault_disable();
 
 	/* TBD : can use llock/scond */
 	__asm__ __volatile__(
@@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
 	: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
 	: "cc", "memory");
 
-	pagefault_enable();	/* subsumes preempt_enable() */
+	pagefault_enable();
 
 	*uval = val;
 	return val;
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 8c3a3e02ba92..12b2100db073 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -266,7 +266,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
  * Machine specific helpers for Entire D-Cache or Per Line ops
  */
 
-static unsigned int __before_dc_op(const int op)
+static inline unsigned int __before_dc_op(const int op)
 {
 	unsigned int reg = reg;
 
@@ -284,7 +284,7 @@ static unsigned int __before_dc_op(const int op)
 	return reg;
 }
 
-static void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op, unsigned int reg)
 {
 	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
 		while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 6a2e006cbcce..d948e4e9d89c 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 86217db2937a..992736b5229b 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
 	imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
 	imx25-karo-tx25.dtb \
 	imx25-pdk.dtb
-dtb-$(CONFIG_SOC_IMX31) += \
+dtb-$(CONFIG_SOC_IMX27) += \
 	imx27-apf27.dtb \
 	imx27-apf27dev.dtb \
 	imx27-eukrea-mbimxsd27-baseboard.dtb \
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index c3255e0c90aa..dbb3f4d2bf84 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -223,6 +223,25 @@
 /include/ "tps65217.dtsi"
 
 &tps {
+	/*
+	 * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
+	 * mode") at poweroff.  Most BeagleBone versions do not support RTC-only
+	 * mode and risk hardware damage if this mode is entered.
+	 *
+	 * For details, see linux-omap mailing list May 2015 thread
+	 *	[PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
+	 * In particular, messages:
+	 *	http://www.spinics.net/lists/linux-omap/msg118585.html
+	 *	http://www.spinics.net/lists/linux-omap/msg118615.html
+	 *
+	 * You can override this later with
+	 *	&tps {  /delete-property/ ti,pmic-shutdown-controller;  }
+	 * if you want to use RTC-only mode and made sure you are not affected
+	 * by the hardware problems. (Tip: double-check by performing a current
+	 * measurement after shutdown: it should be less than 1 mA.)
+	 */
+	ti,pmic-shutdown-controller;
+
 	regulators {
 		dcdc1_reg: regulator@0 {
 			regulator-name = "vdds_dpr";
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 5c42d259fa68..901739fcb85a 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -80,7 +80,3 @@
 		status = "okay";
 	};
 };
-
-&rtc {
-	system-power-controller;
-};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 87fc7a35e802..156d05efcb70 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -654,7 +654,7 @@
 	wlcore: wlcore@2 {
 		compatible = "ti,wl1271";
 		reg = <2>;
-		interrupt-parent = <&gpio1>;
+		interrupt-parent = <&gpio0>;
 		interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
 		ref-clock-frequency = <38400000>;
 	};
diff --git a/arch/arm/boot/dts/am35xx-clocks.dtsi b/arch/arm/boot/dts/am35xx-clocks.dtsi
index 518b8fde88b0..18cc826e9db5 100644
--- a/arch/arm/boot/dts/am35xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am35xx-clocks.dtsi
@@ -12,7 +12,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,am35xx-gate-clock";
 		clocks = <&ipss_ick>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <1>;
 	};
 
@@ -20,7 +20,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
 		clocks = <&rmii_ck>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <9>;
 	};
 
@@ -28,7 +28,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,am35xx-gate-clock";
 		clocks = <&ipss_ick>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <2>;
 	};
 
@@ -36,7 +36,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
 		clocks = <&pclk_ck>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <10>;
 	};
 
@@ -44,7 +44,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,am35xx-gate-clock";
 		clocks = <&ipss_ick>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <0>;
 	};
 
@@ -52,7 +52,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
 		clocks = <&sys_ck>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <8>;
 	};
 
@@ -60,7 +60,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,am35xx-gate-clock";
 		clocks = <&sys_ck>;
-		reg = <0x059c>;
+		reg = <0x032c>;
 		ti,bit-shift = <3>;
 	};
 };
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 8ae29c955c11..c17097d2c167 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -49,7 +49,7 @@
 		pinctrl-0 = <&matrix_keypad_pins>;
 
 		debounce-delay-ms = <5>;
-		col-scan-delay-us = <1500>;
+		col-scan-delay-us = <5>;
 
 		row-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH		/* Bank5, pin5 */
 				&gpio5 6 GPIO_ACTIVE_HIGH>;	/* Bank5, pin6 */
@@ -473,7 +473,7 @@
 		interrupt-parent = <&gpio0>;
 		interrupts = <31 0>;
 
-		wake-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&gpio1 28 GPIO_ACTIVE_LOW>;
 
 		touchscreen-size-x = <480>;
 		touchscreen-size-y = <272>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 15f198e4864d..7128fad991ac 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -18,6 +18,7 @@
 	aliases {
 		rtc0 = &mcp_rtc;
 		rtc1 = &tps659038_rtc;
+		rtc2 = &rtc;
 	};
 
 	memory {
@@ -83,7 +84,7 @@
 	gpio_fan: gpio_fan {
 		/* Based on 5v 500mA AFB02505HHB */
 		compatible = "gpio-fan";
-		gpios =  <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>;
+		gpios =  <&tps659038_gpio 2 GPIO_ACTIVE_HIGH>;
 		gpio-fan,speed-map = <0     0>,
 				     <13000 1>;
 		#cooling-cells = <2>;
@@ -130,8 +131,8 @@
 
 	uart3_pins_default: uart3_pins_default {
 		pinctrl-single,pins = <
-			0x248 (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd.rxd */
-			0x24c (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd.txd */
+			0x3f8 (PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */
+			0x3fc (PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */
 		>;
 	};
 
@@ -455,7 +456,7 @@
 	mcp_rtc: rtc@6f {
 		compatible = "microchip,mcp7941x";
 		reg = <0x6f>;
-		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>;  /* IRQ_SYS_1N */
+		interrupts = <GIC_SPI 2 IRQ_TYPE_EDGE_RISING>;  /* IRQ_SYS_1N */
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&mcp79410_pins_default>;
@@ -478,7 +479,7 @@
 &uart3 {
 	status = "okay";
 	interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
-			      <&dra7_pmx_core 0x248>;
+			      <&dra7_pmx_core 0x3f8>;
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart3_pins_default>;
diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index c675257f2377..f076ff856d8b 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -69,7 +69,7 @@
 		mainpll: mainpll {
 			compatible = "fixed-clock";
 			#clock-cells = <0>;
-			clock-frequency = <2000000000>;
+			clock-frequency = <1000000000>;
 		};
 		/* 25 MHz reference crystal */
 		refclk: oscillator {
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index ed2dd8ba4080..218a2acd36e5 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -585,7 +585,7 @@
 		mainpll: mainpll {
 			compatible = "fixed-clock";
 			#clock-cells = <0>;
-			clock-frequency = <2000000000>;
+			clock-frequency = <1000000000>;
 		};
 
 		/* 25 MHz reference crystal */
diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi
index 0e85fc15ceda..ecd1318109ba 100644
--- a/arch/arm/boot/dts/armada-39x.dtsi
+++ b/arch/arm/boot/dts/armada-39x.dtsi
@@ -502,7 +502,7 @@
 		mainpll: mainpll {
 			compatible = "fixed-clock";
 			#clock-cells = <0>;
-			clock-frequency = <2000000000>;
+			clock-frequency = <1000000000>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index a2cf2154dcdb..fdd187c55aa5 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
@@ -95,6 +95,11 @@
 
 		internal-regs {
 
+			rtc@10300 {
+				/* No crystal connected to the internal RTC */
+				status = "disabled";
+			};
+
 			/* J10: VCC, NC, RX, NC, TX, GND  */
 			serial@12000 {
 				status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index e3b08fb959e5..990e8a2100f0 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -105,6 +105,10 @@
 		};
 
 		internal-regs {
+			rtc@10300 {
+				/* No crystal connected to the internal RTC */
+				status = "disabled";
+			};
 			serial@12000 {
 				status = "okay";
 			};
diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index de8427be830a..289806adb343 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -382,7 +382,7 @@
 			ti,hwmods = "usb_otg_hs";
 
 			usb0: usb@47401000 {
-				compatible = "ti,musb-am33xx";
+				compatible = "ti,musb-dm816";
 				reg = <0x47401400 0x400
 				       0x47401000 0x200>;
 				reg-names = "mc", "control";
@@ -422,7 +422,7 @@
 			};
 
 			usb1: usb@47401800 {
-				compatible = "ti,musb-am33xx";
+				compatible = "ti,musb-dm816";
 				reg = <0x47401c00 0x400
 				       0x47401800 0x200>;
 				reg-names = "mc", "control";
diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts
index aae7efc09b0b..e6fa251e17b9 100644
--- a/arch/arm/boot/dts/dove-cubox.dts
+++ b/arch/arm/boot/dts/dove-cubox.dts
@@ -87,6 +87,7 @@
 
 		/* connect xtal input to 25MHz reference */
 		clocks = <&ref25>;
+		clock-names = "xtal";
 
 		/* connect xtal input as source of pll0 and pll1 */
 		silabs,pll-source = <0 0>, <1 0>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 5332b57b4950..f03a091cd076 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -911,7 +911,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07ddc 0x4>, <0x4ae07de0 0x4>,
-			      <0x4ae06014 0x4>, <0x4a003b20 0x8>,
+			      <0x4ae06014 0x4>, <0x4a003b20 0xc>,
 			      <0x4ae0c158 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -944,7 +944,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07e34 0x4>, <0x4ae07e24 0x4>,
-			      <0x4ae06010 0x4>, <0x4a0025cc 0x8>,
+			      <0x4ae06010 0x4>, <0x4a0025cc 0xc>,
 			      <0x4a002470 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -977,7 +977,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07e30 0x4>, <0x4ae07e20 0x4>,
-			      <0x4ae06010 0x4>, <0x4a0025e0 0x8>,
+			      <0x4ae06010 0x4>, <0x4a0025e0 0xc>,
 			      <0x4a00246c 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -1010,7 +1010,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07de4 0x4>, <0x4ae07de8 0x4>,
-			      <0x4ae06010 0x4>, <0x4a003b08 0x8>,
+			      <0x4ae06010 0x4>, <0x4a003b08 0xc>,
 			      <0x4ae0c154 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -1203,7 +1203,7 @@
 			status = "disabled";
 		};
 
-		rtc@48838000 {
+		rtc: rtc@48838000 {
 			compatible = "ti,am3352-rtc";
 			reg = <0x48838000 0x100>;
 			interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index 8de12af7c276..d6b49e5b32e9 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -9,6 +9,7 @@
 
 #include <dt-bindings/sound/samsung-i2s.h>
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/clock/maxim,max77686.h>
 #include "exynos4412.dtsi"
 
 / {
@@ -105,6 +106,8 @@
 
 	rtc@10070000 {
 		status = "okay";
+		clocks = <&clock CLK_RTC>, <&max77686 MAX77686_CLK_AP>;
+		clock-names = "rtc", "rtc_src";
 	};
 
 	g2d@10800000 {
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 173ffa479ad3..792394dd0f2a 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -736,7 +736,7 @@
 
 			display-timings {
 				timing-0 {
-					clock-frequency = <0>;
+					clock-frequency = <57153600>;
 					hactive = <720>;
 					vactive = <1280>;
 					hfront-porch = <5>;
diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts
index 2657e842e5a5..1eca97ee4bd6 100644
--- a/arch/arm/boot/dts/exynos5250-snow.dts
+++ b/arch/arm/boot/dts/exynos5250-snow.dts
@@ -567,6 +567,7 @@
 	num-slots = <1>;
 	broken-cd;
 	cap-sdio-irq;
+	keep-power-in-suspend;
 	card-detect-delay = <200>;
 	samsung,dw-mshc-ciu-div = <3>;
 	samsung,dw-mshc-sdr-timing = <2 3>;
diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts
index 0788d08fb43e..146e71118a72 100644
--- a/arch/arm/boot/dts/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts
@@ -711,6 +711,7 @@
 	num-slots = <1>;
 	broken-cd;
 	cap-sdio-irq;
+	keep-power-in-suspend;
 	card-detect-delay = <200>;
 	clock-frequency = <400000000>;
 	samsung,dw-mshc-ciu-div = <1>;
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
index 5d31fc140823..2180a0152c9b 100644
--- a/arch/arm/boot/dts/exynos5420-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
@@ -28,7 +28,7 @@ trips {
 		type = "active";
 	};
 	cpu-crit-0 {
-		temperature = <1200000>; /* millicelsius */
+		temperature = <120000>; /* millicelsius */
 		hysteresis = <0>; /* millicelsius */
 		type = "critical";
 	};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index f67b23f303c3..45317538bbae 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -536,6 +536,7 @@
 		clock-names = "dp";
 		phys = <&dp_phy>;
 		phy-names = "dp";
+		power-domains = <&disp_pd>;
 	};
 
 	mipi_phy: video-phy@10040714 {
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
index 48adfa8f4300..356e963edf11 100644
--- a/arch/arm/boot/dts/exynos5440-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
@@ -18,7 +18,7 @@ trips {
 		type = "active";
 	};
 	cpu-crit-0 {
-		temperature = <1050000>; /* millicelsius */
+		temperature = <105000>; /* millicelsius */
 		hysteresis = <0>; /* millicelsius */
 		type = "critical";
 	};
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 412f41d62686..02eb8b15374f 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -674,6 +674,7 @@
 	num-slots = <1>;
 	broken-cd;
 	cap-sdio-irq;
+	keep-power-in-suspend;
 	card-detect-delay = <200>;
 	clock-frequency = <400000000>;
 	samsung,dw-mshc-ciu-div = <1>;
diff --git a/arch/arm/boot/dts/imx23-olinuxino.dts b/arch/arm/boot/dts/imx23-olinuxino.dts
index 7e6eef2488e8..82045398bf1f 100644
--- a/arch/arm/boot/dts/imx23-olinuxino.dts
+++ b/arch/arm/boot/dts/imx23-olinuxino.dts
@@ -12,6 +12,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 #include "imx23.dtsi"
 
 / {
@@ -93,6 +94,7 @@
 
 	ahb@80080000 {
 		usb0: usb@80080000 {
+			dr_mode = "host";
 			vbus-supply = <&reg_usb0_vbus>;
 			status = "okay";
 		};
@@ -122,7 +124,7 @@
 
 		user {
 			label = "green";
-			gpios = <&gpio2 1 1>;
+			gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index e4d3aecc4ed2..677f81d9dcd5 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -428,6 +428,7 @@
 
 			pwm4: pwm@53fc8000 {
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
+				#pwm-cells = <2>;
 				reg = <0x53fc8000 0x4000>;
 				clocks = <&clks 108>, <&clks 52>;
 				clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index 6951b66d1ab7..bc215e4b75fd 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -533,7 +533,7 @@
 
 			fec: ethernet@1002b000 {
 				compatible = "fsl,imx27-fec";
-				reg = <0x1002b000 0x4000>;
+				reg = <0x1002b000 0x1000>;
 				interrupts = <50>;
 				clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
 					 <&clks IMX27_CLK_FEC_AHB_GATE>;
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 25e25f82fbae..4e073e854742 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -913,7 +913,7 @@
 					      80 81 68 69
 					      70 71 72 73
 					      74 75 76 77>;
-				interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty",
+				interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
 						  "saif0", "saif1", "i2c0", "i2c1",
 						  "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
 						  "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
index 19cc269a08d4..1ce6133b67f5 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
@@ -31,6 +31,7 @@
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio4 15 0>;
+			enable-active-high;
 		};
 
 		reg_usb_h1_vbus: regulator@1 {
@@ -40,6 +41,7 @@
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio1 0 0>;
+			enable-active-high;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 46b2fed7c319..3b24b12651b2 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -185,7 +185,6 @@
 &i2c3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_i2c3>;
-	pinctrl-assert-gpios = <&gpio5 4 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 
 	max7310_a: gpio@30 {
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index 134d3f27a8ec..921de6605f07 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -110,6 +110,8 @@
 	nand@0,0 {
 		reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
 		nand-bus-width = <16>;
+		gpmc,device-width = <2>;
+		ti,nand-ecc-opt = "sw";
 
 		gpmc,sync-clk-ps = <0>;
 		gpmc,cs-on-ns = <0>;
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index a29315833ecd..5f5e0f3d5b64 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -498,6 +498,8 @@
 		DRVDD-supply = <&vmmc2>;
 		IOVDD-supply = <&vio>;
 		DVDD-supply = <&vio>;
+
+		ai3x-micbias-vg = <1>;
 	};
 
 	tlv320aic3x_aux: tlv320aic3x@19 {
@@ -509,6 +511,8 @@
 		DRVDD-supply = <&vmmc2>;
 		IOVDD-supply = <&vio>;
 		DVDD-supply = <&vio>;
+
+		ai3x-micbias-vg = <2>;
 	};
 
 	tsl2563: tsl2563@29 {
@@ -828,8 +832,8 @@
 		touchscreen-fuzz-x = <4>;
 		touchscreen-fuzz-y = <7>;
 		touchscreen-fuzz-pressure = <2>;
-		touchscreen-max-x = <4096>;
-		touchscreen-max-y = <4096>;
+		touchscreen-size-x = <4096>;
+		touchscreen-size-y = <4096>;
 		touchscreen-max-pressure = <2048>;
 
 		ti,x-plate-ohms = <280>;
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index d18a90f5eca3..69a40cfc1f29 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -456,6 +456,7 @@
 		};
 
 		mmu_isp: mmu@480bd400 {
+			#iommu-cells = <0>;
 			compatible = "ti,omap2-iommu";
 			reg = <0x480bd400 0x80>;
 			interrupts = <24>;
@@ -464,6 +465,7 @@
 		};
 
 		mmu_iva: mmu@5d000000 {
+			#iommu-cells = <0>;
 			compatible = "ti,omap2-iommu";
 			reg = <0x5d000000 0x80>;
 			interrupts = <28>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index efe5f737f39b..7d24ae0306b5 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -128,7 +128,7 @@
 	 * hierarchy.
 	 */
 	ocp {
-		compatible = "ti,omap4-l3-noc", "simple-bus";
+		compatible = "ti,omap5-l3-noc", "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		ranges;
diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts
index 74c3212f1f11..824ddab9c3ad 100644
--- a/arch/arm/boot/dts/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/r8a7791-koelsch.dts
@@ -545,7 +545,7 @@
 		compatible = "adi,adv7511w";
 		reg = <0x39>;
 		interrupt-parent = <&gpio3>;
-		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+		interrupts = <29 IRQ_TYPE_LEVEL_LOW>;
 
 		adi,input-depth = <8>;
 		adi,input-colorspace = "rgb";
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index bfd3f1c734b8..2201cd5da3bb 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -1017,23 +1017,6 @@
 			status = "disabled";
 		};
 
-		vmmci: regulator-gpio {
-			compatible = "regulator-gpio";
-
-			regulator-min-microvolt = <1800000>;
-			regulator-max-microvolt = <2900000>;
-			regulator-name = "mmci-reg";
-			regulator-type = "voltage";
-
-			startup-delay-us = <100>;
-			enable-active-high;
-
-			states = <1800000 0x1
-				  2900000 0x0>;
-
-			status = "disabled";
-		};
-
 		mcde@a0350000 {
 			compatible = "stericsson,mcde";
 			reg = <0xa0350000 0x1000>, /* MCDE */
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index bf8f0eddc2c0..744c1e3a744d 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -111,6 +111,21 @@
 			pinctrl-1 = <&i2c3_sleep_mode>;
 		};
 
+		vmmci: regulator-gpio {
+			compatible = "regulator-gpio";
+
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <2900000>;
+			regulator-name = "mmci-reg";
+			regulator-type = "voltage";
+
+			startup-delay-us = <100>;
+			enable-active-high;
+
+			states = <1800000 0x1
+				  2900000 0x0>;
+		};
+
 		// External Micro SD slot
 		sdi0_per1@80126000 {
 			arm,primecell-periphid = <0x10480180>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 206826a855c0..1bc84ebdccaa 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -146,8 +146,21 @@
 		};
 
 		vmmci: regulator-gpio {
+			compatible = "regulator-gpio";
+
 			gpios = <&gpio7 4 0x4>;
 			enable-gpio = <&gpio6 25 0x4>;
+
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <2900000>;
+			regulator-name = "mmci-reg";
+			regulator-type = "voltage";
+
+			startup-delay-us = <100>;
+			enable-active-high;
+
+			states = <1800000 0x1
+				  2900000 0x0>;
 		};
 
 		// External Micro SD slot
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index cf01c818b8ea..13cc7ca5e031 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -826,7 +826,7 @@
 			 <&tegra_car TEGRA124_CLK_PLL_U>,
 			 <&tegra_car TEGRA124_CLK_USBD>;
 		clock-names = "reg", "pll_u", "utmi-pads";
-		resets = <&tegra_car 59>, <&tegra_car 22>;
+		resets = <&tegra_car 22>, <&tegra_car 22>;
 		reset-names = "usb", "utmi-pads";
 		nvidia,hssync-start-delay = <0>;
 		nvidia,idle-wait-delay = <17>;
@@ -838,6 +838,7 @@
 		nvidia,hssquelch-level = <2>;
 		nvidia,hsdiscon-level = <5>;
 		nvidia,xcvr-hsslew = <12>;
+		nvidia,has-utmi-pad-registers;
 		status = "disabled";
 	};
 
@@ -862,7 +863,7 @@
 			 <&tegra_car TEGRA124_CLK_PLL_U>,
 			 <&tegra_car TEGRA124_CLK_USBD>;
 		clock-names = "reg", "pll_u", "utmi-pads";
-		resets = <&tegra_car 22>, <&tegra_car 22>;
+		resets = <&tegra_car 58>, <&tegra_car 22>;
 		reset-names = "usb", "utmi-pads";
 		nvidia,hssync-start-delay = <0>;
 		nvidia,idle-wait-delay = <17>;
@@ -874,7 +875,6 @@
 		nvidia,hssquelch-level = <2>;
 		nvidia,hsdiscon-level = <5>;
 		nvidia,xcvr-hsslew = <12>;
-		nvidia,has-utmi-pad-registers;
 		status = "disabled";
 	};
 
@@ -899,7 +899,7 @@
 			 <&tegra_car TEGRA124_CLK_PLL_U>,
 			 <&tegra_car TEGRA124_CLK_USBD>;
 		clock-names = "reg", "pll_u", "utmi-pads";
-		resets = <&tegra_car 58>, <&tegra_car 22>;
+		resets = <&tegra_car 59>, <&tegra_car 22>;
 		reset-names = "usb", "utmi-pads";
 		nvidia,hssync-start-delay = <0>;
 		nvidia,idle-wait-delay = <17>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 7a2aeacd62c0..107395c32d82 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -191,6 +191,7 @@
 		compatible = "arm,cortex-a15-pmu";
 		interrupts = <0 68 4>,
 			     <0 69 4>;
+		interrupt-affinity = <&cpu0>, <&cpu1>;
 	};
 
 	oscclk6a: oscclk6a {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
index 23662b5a5e9d..d949facba376 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
@@ -33,28 +33,28 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@0 {
+		A9_0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
 			next-level-cache = <&L2>;
 		};
 
-		cpu@1 {
+		A9_1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
 			next-level-cache = <&L2>;
 		};
 
-		cpu@2 {
+		A9_2: cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <2>;
 			next-level-cache = <&L2>;
 		};
 
-		cpu@3 {
+		A9_3: cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <3>;
@@ -170,6 +170,7 @@
 		compatible = "arm,pl310-cache";
 		reg = <0x1e00a000 0x1000>;
 		interrupts = <0 43 4>;
+		cache-unified;
 		cache-level = <2>;
 		arm,data-latency = <1 1 1>;
 		arm,tag-latency = <1 1 1>;
@@ -181,6 +182,8 @@
 			     <0 61 4>,
 			     <0 62 4>,
 			     <0 63 4>;
+		interrupt-affinity = <&A9_0>, <&A9_1>, <&A9_2>, <&A9_3>;
+
 	};
 
 	dcc {
diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
index a5cd2eda3edf..9ea54b3dba09 100644
--- a/arch/arm/boot/dts/zynq-7000.dtsi
+++ b/arch/arm/boot/dts/zynq-7000.dtsi
@@ -193,7 +193,7 @@
 		};
 
 		gem0: ethernet@e000b000 {
-			compatible = "cdns,gem";
+			compatible = "cdns,zynq-gem";
 			reg = <0xe000b000 0x1000>;
 			status = "disabled";
 			interrupts = <0 22 4>;
@@ -204,7 +204,7 @@
 		};
 
 		gem1: ethernet@e000c000 {
-			compatible = "cdns,gem";
+			compatible = "cdns,zynq-gem";
 			reg = <0xe000c000 0x1000>;
 			status = "disabled";
 			interrupts = <0 45 4>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ab86655c1f4b..fbbb1915c6a9 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -39,11 +39,14 @@ CONFIG_ARCH_HIP04=y
 CONFIG_ARCH_KEYSTONE=y
 CONFIG_ARCH_MESON=y
 CONFIG_ARCH_MXC=y
+CONFIG_SOC_IMX50=y
 CONFIG_SOC_IMX51=y
 CONFIG_SOC_IMX53=y
 CONFIG_SOC_IMX6Q=y
 CONFIG_SOC_IMX6SL=y
+CONFIG_SOC_IMX6SX=y
 CONFIG_SOC_VF610=y
+CONFIG_SOC_LS1021A=y
 CONFIG_ARCH_OMAP3=y
 CONFIG_ARCH_OMAP4=y
 CONFIG_SOC_OMAP5=y
@@ -426,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD_STI=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_STI=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 9ff7b54b2a83..3743ca221d40 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -393,7 +393,7 @@ CONFIG_TI_EDMA=y
 CONFIG_DMA_OMAP=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXTCON=m
-CONFIG_EXTCON_GPIO=m
+CONFIG_EXTCON_USB_GPIO=m
 CONFIG_EXTCON_PALMAS=m
 CONFIG_TI_EMIF=m
 CONFIG_PWM=y
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..6c2327e1c732 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do {									\
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)
 
-#define set_mb(var, value)	do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 
 #define smp_mb__before_atomic()	smp_mb()
 #define smp_mb__after_atomic()	smp_mb()
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
index 8e3fcb924db6..2ef282f96651 100644
--- a/arch/arm/include/asm/dma-iommu.h
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -25,7 +25,7 @@ struct dma_iommu_mapping {
 };
 
 struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size);
 
 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
 
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 4e78065a16aa..5eed82809d82 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
+	preempt_disable();
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	" TUSER(ldr) "	%1, [%4]\n"
 	"	teq	%1, %2\n"
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	: "cc", "memory");
 
 	*uval = val;
+	preempt_enable();
+
 	return ret;
 }
 
@@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
-	pagefault_disable();	/* implies preempt_disable() */
+#ifndef CONFIG_SMP
+	preempt_disable();
+#endif
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	pagefault_enable();	/* subsumes preempt_enable() */
+	pagefault_enable();
+#ifndef CONFIG_SMP
+	preempt_enable();
+#endif
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 2fe85fff5cca..370f7a732900 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 #define topology_physical_package_id(cpu)	(cpu_topology[cpu].socket_id)
 #define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
 
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 2f7e6ff67d51..0b579b2f4e0e 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool xen_arch_need_swiotlb(struct device *dev,
 			   unsigned long pfn,
 			   unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21fa032..4e7f40c577e6 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -33,7 +33,9 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq				@ disable interrupts
-	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	__sys_trace_return
 	tst	r1, #_TIF_WORK_MASK
 	bne	fast_work_pending
 	asm_trace_hardirqs_on
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 91c7ba182dcd..3b8c2833c537 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -303,9 +303,15 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 
 static int of_pmu_irq_cfg(struct platform_device *pdev)
 {
-	int i;
-	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	int i, irq;
+	int *irqs;
 
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 	if (!irqs)
 		return -ENOMEM;
 
@@ -317,7 +323,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev)
 				      i);
 		if (!dn) {
 			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(dn), i);
+				of_node_full_name(pdev->dev.of_node), i);
 			break;
 		}
 
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index acd5b560b728..5f5cd562c593 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -159,6 +159,8 @@ extern void exynos_enter_aftr(void);
 
 extern struct cpuidle_exynos_data cpuidle_coupled_exynos_data;
 
+extern void exynos_set_delayed_reset_assertion(bool enable);
+
 extern void s5p_init_cpu(void __iomem *cpuid_addr);
 extern unsigned int samsung_rev(void);
 extern void __iomem *cpu_boot_reg_base(void);
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index bcde0dd668df..5917a30eee33 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -167,6 +167,33 @@ static void __init exynos_init_io(void)
 }
 
 /*
+ * Set or clear the USE_DELAYED_RESET_ASSERTION option. Used by smp code
+ * and suspend.
+ *
+ * This is necessary only on Exynos4 SoCs. When system is running
+ * USE_DELAYED_RESET_ASSERTION should be set so the ARM CLK clock down
+ * feature could properly detect global idle state when secondary CPU is
+ * powered down.
+ *
+ * However this should not be set when such system is going into suspend.
+ */
+void exynos_set_delayed_reset_assertion(bool enable)
+{
+	if (of_machine_is_compatible("samsung,exynos4")) {
+		unsigned int tmp, core_id;
+
+		for (core_id = 0; core_id < num_possible_cpus(); core_id++) {
+			tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
+			if (enable)
+				tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
+			else
+				tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
+			pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
+		}
+	}
+}
+
+/*
  * Apparently, these SoCs are not able to wake-up from suspend using
  * the PMU. Too bad. Should they suddenly become capable of such a
  * feat, the matches below should be moved to suspend.c.
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index ebd135bb0995..a825bca2a2b6 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -34,30 +34,6 @@
 
 extern void exynos4_secondary_startup(void);
 
-/*
- * Set or clear the USE_DELAYED_RESET_ASSERTION option, set on Exynos4 SoCs
- * during hot-(un)plugging CPUx.
- *
- * The feature can be cleared safely during first boot of secondary CPU.
- *
- * Exynos4 SoCs require setting USE_DELAYED_RESET_ASSERTION during powering
- * down a CPU so the CPU idle clock down feature could properly detect global
- * idle state when CPUx is off.
- */
-static void exynos_set_delayed_reset_assertion(u32 core_id, bool enable)
-{
-	if (soc_is_exynos4()) {
-		unsigned int tmp;
-
-		tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
-		if (enable)
-			tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
-		else
-			tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
-		pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
-	}
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline void cpu_leave_lowpower(u32 core_id)
 {
@@ -73,8 +49,6 @@ static inline void cpu_leave_lowpower(u32 core_id)
 	  : "=&r" (v)
 	  : "Ir" (CR_C), "Ir" (0x40)
 	  : "cc");
-
-	 exynos_set_delayed_reset_assertion(core_id, false);
 }
 
 static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
@@ -87,14 +61,6 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 		/* Turn the CPU off on next WFI instruction. */
 		exynos_cpu_power_down(core_id);
 
-		/*
-		 * Exynos4 SoCs require setting
-		 * USE_DELAYED_RESET_ASSERTION so the CPU idle
-		 * clock down feature could properly detect
-		 * global idle state when CPUx is off.
-		 */
-		exynos_set_delayed_reset_assertion(core_id, true);
-
 		wfi();
 
 		if (pen_release == core_id) {
@@ -371,9 +337,6 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 		udelay(10);
 	}
 
-	/* No harm if this is called during first boot of secondary CPU */
-	exynos_set_delayed_reset_assertion(core_id, false);
-
 	/*
 	 * now the secondary core is starting up let it run its
 	 * calibrations, then wait for it to finish
@@ -420,6 +383,8 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
 
 	exynos_sysram_init();
 
+	exynos_set_delayed_reset_assertion(true);
+
 	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 		scu_enable(scu_base_addr());
 
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index cbe56b35aea0..a9686535f9ed 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -188,7 +188,7 @@ no_clk:
 		args.np = np;
 		args.args_count = 0;
 		child_domain = of_genpd_get_from_provider(&args);
-		if (!child_domain)
+		if (IS_ERR(child_domain))
 			continue;
 
 		if (of_parse_phandle_with_args(np, "power-domains",
@@ -196,7 +196,7 @@ no_clk:
 			continue;
 
 		parent_domain = of_genpd_get_from_provider(&args);
-		if (!parent_domain)
+		if (IS_ERR(parent_domain))
 			continue;
 
 		if (pm_genpd_add_subdomain(parent_domain, child_domain))
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 3e6aea7f83af..7d23ce04cad5 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3;
 static u32 exynos_irqwake_intmask = 0xffffffff;
 
 static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
-	{ 105, BIT(1) }, /* RTC alarm */
-	{ 106, BIT(2) }, /* RTC tick */
+	{ 73, BIT(1) }, /* RTC alarm */
+	{ 74, BIT(2) }, /* RTC tick */
 	{ /* sentinel */ },
 };
 
@@ -342,6 +342,8 @@ static void exynos_pm_enter_sleep_mode(void)
 
 static void exynos_pm_prepare(void)
 {
+	exynos_set_delayed_reset_assertion(false);
+
 	/* Set wake-up mask registers */
 	exynos_pm_set_wakeup_mask();
 
@@ -482,6 +484,7 @@ early_wakeup:
 
 	/* Clear SLEEP mode set in INFORM1 */
 	pmu_raw_writel(0x0, S5P_INFORM1);
+	exynos_set_delayed_reset_assertion(true);
 }
 
 static void exynos3250_pm_resume(void)
@@ -723,8 +726,10 @@ void __init exynos_pm_init(void)
 		return;
 	}
 
-	if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL)))
+	if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
 		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+		return;
+	}
 
 	pm_data = (const struct exynos_pm_data *) match->data;
 
diff --git a/arch/arm/mach-gemini/common.h b/arch/arm/mach-gemini/common.h
index 38a45260a7c8..dd883698ff7e 100644
--- a/arch/arm/mach-gemini/common.h
+++ b/arch/arm/mach-gemini/common.h
@@ -12,6 +12,8 @@
 #ifndef __GEMINI_COMMON_H__
 #define __GEMINI_COMMON_H__
 
+#include <linux/reboot.h>
+
 struct mtd_partition;
 
 extern void gemini_map_io(void);
@@ -26,6 +28,6 @@ extern int platform_register_pflash(unsigned int size,
 				    struct mtd_partition *parts,
 				    unsigned int nr_parts);
 
-extern void gemini_restart(char mode, const char *cmd);
+extern void gemini_restart(enum reboot_mode mode, const char *cmd);
 
 #endif /* __GEMINI_COMMON_H__ */
diff --git a/arch/arm/mach-gemini/reset.c b/arch/arm/mach-gemini/reset.c
index b26659759e27..21a6d6d4f9c4 100644
--- a/arch/arm/mach-gemini/reset.c
+++ b/arch/arm/mach-gemini/reset.c
@@ -14,7 +14,9 @@
 #include <mach/hardware.h>
 #include <mach/global_reg.h>
 
-void gemini_restart(char mode, const char *cmd)
+#include "common.h"
+
+void gemini_restart(enum reboot_mode mode, const char *cmd)
 {
 	__raw_writel(RESET_GLOBAL | RESET_CPU1,
 		     IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
diff --git a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
index fb8d4a2ad48c..a5edd7d60266 100644
--- a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
+++ b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Pengutronix, Wolfram Sang <w.sang@pengutronix.de>
+ * Copyright (C) 2010 Pengutronix, Wolfram Sang <kernel@pengutronix.de>
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 4d60005e9277..6d0893a3828e 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
 	struct device_node *np;
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
-	if (WARN_ON(!np ||
-		    !of_find_property(np, "interrupt-controller", NULL)))
-		pr_warn("Outdated DT detected, system is about to crash!!!\n");
+	if (WARN_ON(!np))
+		return;
+
+	if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
+		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+		/* map GPC, so that at least CPUidle and WARs keep working */
+		gpc_base = of_iomap(np, 0);
+	}
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
 	struct regulator *pu_reg;
 	int ret;
 
+	/* bail out if DT too old and doesn't provide the necessary info */
+	if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
+		return 0;
+
 	pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
 	if (PTR_ERR(pu_reg) == -ENODEV)
 		pu_reg = NULL;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 355b08936871..752969ff9de0 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -171,6 +171,12 @@
  */
 #define LINKS_PER_OCP_IF		2
 
+/*
+ * Address offset (in bytes) between the reset control and the reset
+ * status registers: 4 bytes on OMAP4
+ */
+#define OMAP4_RST_CTRL_ST_OFFSET	4
+
 /**
  * struct omap_hwmod_soc_ops - fn ptrs for some SoC-specific operations
  * @enable_module: function to enable a module (via MODULEMODE)
@@ -3016,10 +3022,12 @@ static int _omap4_deassert_hardreset(struct omap_hwmod *oh,
 	if (ohri->st_shift)
 		pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n",
 		       oh->name, ohri->name);
-	return omap_prm_deassert_hardreset(ohri->rst_shift, 0,
+	return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->rst_shift,
 					   oh->clkdm->pwrdm.ptr->prcm_partition,
 					   oh->clkdm->pwrdm.ptr->prcm_offs,
-					   oh->prcm.omap4.rstctrl_offs, 0);
+					   oh->prcm.omap4.rstctrl_offs,
+					   oh->prcm.omap4.rstctrl_offs +
+					   OMAP4_RST_CTRL_ST_OFFSET);
 }
 
 /**
@@ -3048,27 +3056,6 @@ static int _omap4_is_hardreset_asserted(struct omap_hwmod *oh,
 }
 
 /**
- * _am33xx_assert_hardreset - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to assert hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_assert_hardreset() with parameters extracted
- * from the hwmod @oh and the hardreset line data @ohri.  Only
- * intended for use as an soc_ops function pointer.  Passes along the
- * return value from am33xx_prminst_assert_hardreset().  XXX This
- * function is scheduled for removal when the PRM code is moved into
- * drivers/.
- */
-static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
-				   struct omap_hwmod_rst_info *ohri)
-
-{
-	return omap_prm_assert_hardreset(ohri->rst_shift, 0,
-					 oh->clkdm->pwrdm.ptr->prcm_offs,
-					 oh->prcm.omap4.rstctrl_offs);
-}
-
-/**
  * _am33xx_deassert_hardreset - call AM33XX PRM hardreset fn with hwmod args
  * @oh: struct omap_hwmod * to deassert hardreset
  * @ohri: hardreset line data
@@ -3083,32 +3070,13 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
 static int _am33xx_deassert_hardreset(struct omap_hwmod *oh,
 				     struct omap_hwmod_rst_info *ohri)
 {
-	return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift, 0,
+	return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift,
+					   oh->clkdm->pwrdm.ptr->prcm_partition,
 					   oh->clkdm->pwrdm.ptr->prcm_offs,
 					   oh->prcm.omap4.rstctrl_offs,
 					   oh->prcm.omap4.rstst_offs);
 }
 
-/**
- * _am33xx_is_hardreset_asserted - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to test hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_is_hardreset_asserted() with parameters
- * extracted from the hwmod @oh and the hardreset line data @ohri.
- * Only intended for use as an soc_ops function pointer.  Passes along
- * the return value from am33xx_prminst_is_hardreset_asserted().  XXX
- * This function is scheduled for removal when the PRM code is moved
- * into drivers/.
- */
-static int _am33xx_is_hardreset_asserted(struct omap_hwmod *oh,
-					struct omap_hwmod_rst_info *ohri)
-{
-	return omap_prm_is_hardreset_asserted(ohri->rst_shift, 0,
-					      oh->clkdm->pwrdm.ptr->prcm_offs,
-					      oh->prcm.omap4.rstctrl_offs);
-}
-
 /* Public functions */
 
 u32 omap_hwmod_read(struct omap_hwmod *oh, u16 reg_offs)
@@ -3908,21 +3876,13 @@ void __init omap_hwmod_init(void)
 		soc_ops.init_clkdm = _init_clkdm;
 		soc_ops.update_context_lost = _omap4_update_context_lost;
 		soc_ops.get_context_lost = _omap4_get_context_lost;
-	} else if (soc_is_am43xx()) {
+	} else if (cpu_is_ti816x() || soc_is_am33xx() || soc_is_am43xx()) {
 		soc_ops.enable_module = _omap4_enable_module;
 		soc_ops.disable_module = _omap4_disable_module;
 		soc_ops.wait_target_ready = _omap4_wait_target_ready;
 		soc_ops.assert_hardreset = _omap4_assert_hardreset;
-		soc_ops.deassert_hardreset = _omap4_deassert_hardreset;
-		soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
-		soc_ops.init_clkdm = _init_clkdm;
-	} else if (cpu_is_ti816x() || soc_is_am33xx()) {
-		soc_ops.enable_module = _omap4_enable_module;
-		soc_ops.disable_module = _omap4_disable_module;
-		soc_ops.wait_target_ready = _omap4_wait_target_ready;
-		soc_ops.assert_hardreset = _am33xx_assert_hardreset;
 		soc_ops.deassert_hardreset = _am33xx_deassert_hardreset;
-		soc_ops.is_hardreset_asserted = _am33xx_is_hardreset_asserted;
+		soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
 		soc_ops.init_clkdm = _init_clkdm;
 	} else {
 		WARN(1, "omap_hwmod: unknown SoC type\n");
diff --git a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
index e2223148ba4d..17e8004fc20f 100644
--- a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
@@ -544,6 +544,44 @@ static struct omap_hwmod am43xx_hdq1w_hwmod = {
 	},
 };
 
+static struct omap_hwmod_class_sysconfig am43xx_vpfe_sysc = {
+	.rev_offs       = 0x0,
+	.sysc_offs      = 0x104,
+	.sysc_flags     = SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE,
+	.idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+				MSTANDBY_FORCE | MSTANDBY_SMART | MSTANDBY_NO),
+	.sysc_fields    = &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class am43xx_vpfe_hwmod_class = {
+	.name           = "vpfe",
+	.sysc           = &am43xx_vpfe_sysc,
+};
+
+static struct omap_hwmod am43xx_vpfe0_hwmod = {
+	.name           = "vpfe0",
+	.class          = &am43xx_vpfe_hwmod_class,
+	.clkdm_name     = "l3s_clkdm",
+	.prcm           = {
+		.omap4  = {
+			.modulemode     = MODULEMODE_SWCTRL,
+			.clkctrl_offs   = AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET,
+		},
+	},
+};
+
+static struct omap_hwmod am43xx_vpfe1_hwmod = {
+	.name           = "vpfe1",
+	.class          = &am43xx_vpfe_hwmod_class,
+	.clkdm_name     = "l3s_clkdm",
+	.prcm           = {
+		.omap4  = {
+			.modulemode     = MODULEMODE_SWCTRL,
+			.clkctrl_offs   = AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET,
+		},
+	},
+};
+
 /* Interfaces */
 static struct omap_hwmod_ocp_if am43xx_l3_main__l4_hs = {
 	.master		= &am33xx_l3_main_hwmod,
@@ -825,6 +863,34 @@ static struct omap_hwmod_ocp_if am43xx_l4_ls__hdq1w = {
 	.user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe0 = {
+	.master         = &am43xx_vpfe0_hwmod,
+	.slave          = &am33xx_l3_main_hwmod,
+	.clk            = "l3_gclk",
+	.user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe1 = {
+	.master         = &am43xx_vpfe1_hwmod,
+	.slave          = &am33xx_l3_main_hwmod,
+	.clk            = "l3_gclk",
+	.user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe0 = {
+	.master         = &am33xx_l4_ls_hwmod,
+	.slave          = &am43xx_vpfe0_hwmod,
+	.clk            = "l4ls_gclk",
+	.user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe1 = {
+	.master         = &am33xx_l4_ls_hwmod,
+	.slave          = &am43xx_vpfe1_hwmod,
+	.clk            = "l4ls_gclk",
+	.user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
 static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
 	&am33xx_l4_wkup__synctimer,
 	&am43xx_l4_ls__timer8,
@@ -925,6 +991,10 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
 	&am43xx_l4_ls__dss_dispc,
 	&am43xx_l4_ls__dss_rfbi,
 	&am43xx_l4_ls__hdq1w,
+	&am43xx_l3__vpfe0,
+	&am43xx_l3__vpfe1,
+	&am43xx_l4_ls__vpfe0,
+	&am43xx_l4_ls__vpfe1,
 	NULL,
 };
 
diff --git a/arch/arm/mach-omap2/prcm43xx.h b/arch/arm/mach-omap2/prcm43xx.h
index 48df3b55057e..d0261996db6d 100644
--- a/arch/arm/mach-omap2/prcm43xx.h
+++ b/arch/arm/mach-omap2/prcm43xx.h
@@ -144,5 +144,6 @@
 #define AM43XX_CM_PER_USBPHYOCP2SCP1_CLKCTRL_OFFSET	0x05C0
 #define AM43XX_CM_PER_DSS_CLKCTRL_OFFSET		0x0a20
 #define AM43XX_CM_PER_HDQ1W_CLKCTRL_OFFSET		0x04a0
-
+#define AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET		0x0068
+#define AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET		0x0070
 #endif
diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h
index cbefbd7cfdb5..661d753df584 100644
--- a/arch/arm/mach-omap2/prm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-34xx.h
@@ -112,6 +112,7 @@
 #define OMAP3430_VC_CMD_ONLP_SHIFT			16
 #define OMAP3430_VC_CMD_RET_SHIFT			8
 #define OMAP3430_VC_CMD_OFF_SHIFT			0
+#define OMAP3430_SREN_MASK				(1 << 4)
 #define OMAP3430_HSEN_MASK				(1 << 3)
 #define OMAP3430_MCODE_MASK				(0x7 << 0)
 #define OMAP3430_VALID_MASK				(1 << 24)
diff --git a/arch/arm/mach-omap2/prm-regbits-44xx.h b/arch/arm/mach-omap2/prm-regbits-44xx.h
index b1c7a33e00e7..e794828dee55 100644
--- a/arch/arm/mach-omap2/prm-regbits-44xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-44xx.h
@@ -35,6 +35,7 @@
 #define OMAP4430_GLOBAL_WARM_SW_RST_SHIFT				1
 #define OMAP4430_GLOBAL_WUEN_MASK					(1 << 16)
 #define OMAP4430_HSMCODE_MASK						(0x7 << 0)
+#define OMAP4430_SRMODEEN_MASK						(1 << 4)
 #define OMAP4430_HSMODEEN_MASK						(1 << 3)
 #define OMAP4430_HSSCLL_SHIFT						24
 #define OMAP4430_ICEPICK_RST_SHIFT					9
diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c
index c4859c4d3646..d0b15dbafa2e 100644
--- a/arch/arm/mach-omap2/prminst44xx.c
+++ b/arch/arm/mach-omap2/prminst44xx.c
@@ -87,12 +87,6 @@ u32 omap4_prminst_rmw_inst_reg_bits(u32 mask, u32 bits, u8 part, s16 inst,
 	return v;
 }
 
-/*
- * Address offset (in bytes) between the reset control and the reset
- * status registers: 4 bytes on OMAP4
- */
-#define OMAP4_RST_CTRL_ST_OFFSET		4
-
 /**
  * omap4_prminst_is_hardreset_asserted - read the HW reset line state of
  * submodules contained in the hwmod module
@@ -141,11 +135,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
  * omap4_prminst_deassert_hardreset - deassert a submodule hardreset line and
  * wait
  * @shift: register bit shift corresponding to the reset line to deassert
- * @st_shift: status bit offset, not used for OMAP4+
+ * @st_shift: status bit offset corresponding to the reset line
  * @part: PRM partition
  * @inst: PRM instance offset
  * @rstctrl_offs: reset register offset
- * @st_offs: reset status register offset, not used for OMAP4+
+ * @rstst_offs: reset status register offset
  *
  * Some IPs like dsp, ipu or iva contain processors that require an HW
  * reset line to be asserted / deasserted in order to fully enable the
@@ -157,11 +151,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
  * of reset, or -EBUSY if the submodule did not exit reset promptly.
  */
 int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
-				     u16 rstctrl_offs, u16 st_offs)
+				     u16 rstctrl_offs, u16 rstst_offs)
 {
 	int c;
 	u32 mask = 1 << shift;
-	u16 rstst_offs = rstctrl_offs + OMAP4_RST_CTRL_ST_OFFSET;
+	u32 st_mask = 1 << st_shift;
 
 	/* Check the current status to avoid de-asserting the line twice */
 	if (omap4_prminst_is_hardreset_asserted(shift, part, inst,
@@ -169,13 +163,13 @@ int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
 		return -EEXIST;
 
 	/* Clear the reset status by writing 1 to the status bit */
-	omap4_prminst_rmw_inst_reg_bits(0xffffffff, mask, part, inst,
+	omap4_prminst_rmw_inst_reg_bits(0xffffffff, st_mask, part, inst,
 					rstst_offs);
 	/* de-assert the reset control line */
 	omap4_prminst_rmw_inst_reg_bits(mask, 0, part, inst, rstctrl_offs);
 	/* wait the status to be set */
-	omap_test_timeout(omap4_prminst_is_hardreset_asserted(shift, part, inst,
-							      rstst_offs),
+	omap_test_timeout(omap4_prminst_is_hardreset_asserted(st_shift, part,
+							      inst, rstst_offs),
 			  MAX_MODULE_HARDRESET_WAIT, c);
 
 	return (c == MAX_MODULE_HARDRESET_WAIT) ? -EBUSY : 0;
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index d1dedc8195ed..eafd120b53f1 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -203,23 +203,8 @@ save_context_wfi:
 	 */
 	ldr	r1, kernel_flush
 	blx	r1
-	/*
-	 * The kernel doesn't interwork: v7_flush_dcache_all in particluar will
-	 * always return in Thumb state when CONFIG_THUMB2_KERNEL is enabled.
-	 * This sequence switches back to ARM.  Note that .align may insert a
-	 * nop: bx pc needs to be word-aligned in order to work.
-	 */
- THUMB(	.thumb		)
- THUMB(	.align		)
- THUMB(	bx	pc	)
- THUMB(	nop		)
-	.arm
-
 	b	omap3_do_wfi
-
-/*
- * Local variables
- */
+ENDPROC(omap34xx_cpu_suspend)
 omap3_do_wfi_sram_addr:
 	.word omap3_do_wfi_sram
 kernel_flush:
@@ -364,10 +349,7 @@ exit_nonoff_modes:
  * ===================================
  */
 	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-
-/*
- * Local variables
- */
+ENDPROC(omap3_do_wfi)
 sdrc_power:
 	.word	SDRC_POWER_V
 cm_idlest1_core:
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index cef67af9e9b8..cac46d852da1 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -298,14 +298,11 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
 	if (IS_ERR(src))
 		return PTR_ERR(src);
 
-	if (clk_get_parent(timer->fclk) != src) {
-		r = clk_set_parent(timer->fclk, src);
-		if (r < 0) {
-			pr_warn("%s: %s cannot set source\n", __func__,
-				oh->name);
-			clk_put(src);
-			return r;
-		}
+	r = clk_set_parent(timer->fclk, src);
+	if (r < 0) {
+		pr_warn("%s: %s cannot set source\n", __func__, oh->name);
+		clk_put(src);
+		return r;
 	}
 
 	clk_put(src);
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index be9ef834fa81..076fd20d7e5a 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -316,7 +316,8 @@ static void __init omap3_vc_init_pmic_signaling(struct voltagedomain *voltdm)
 	 * idle. And we can also scale voltages to zero for off-idle.
 	 * Note that no actual voltage scaling during off-idle will
 	 * happen unless the board specific twl4030 PMIC scripts are
-	 * loaded.
+	 * loaded. See also omap_vc_i2c_init for comments regarding
+	 * erratum i531.
 	 */
 	val = voltdm->read(OMAP3_PRM_VOLTCTRL_OFFSET);
 	if (!(val & OMAP3430_PRM_VOLTCTRL_SEL_OFF)) {
@@ -704,9 +705,16 @@ static void __init omap_vc_i2c_init(struct voltagedomain *voltdm)
 		return;
 	}
 
+	/*
+	 * Note that for omap3 OMAP3430_SREN_MASK clears SREN to work around
+	 * erratum i531 "Extra Power Consumed When Repeated Start Operation
+	 * Mode Is Enabled on I2C Interface Dedicated for Smart Reflex (I2C4)".
+	 * Otherwise I2C4 eventually leads into about 23mW extra power being
+	 * consumed even during off idle using VMODE.
+	 */
 	i2c_high_speed = voltdm->pmic->i2c_high_speed;
 	if (i2c_high_speed)
-		voltdm->rmw(vc->common->i2c_cfg_hsen_mask,
+		voltdm->rmw(vc->common->i2c_cfg_clear_mask,
 			    vc->common->i2c_cfg_hsen_mask,
 			    vc->common->i2c_cfg_reg);
 
diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h
index cdbdd78e755e..89b83b7ff3ec 100644
--- a/arch/arm/mach-omap2/vc.h
+++ b/arch/arm/mach-omap2/vc.h
@@ -34,6 +34,7 @@ struct voltagedomain;
  * @cmd_ret_shift: RET field shift in PRM_VC_CMD_VAL_* register
  * @cmd_off_shift: OFF field shift in PRM_VC_CMD_VAL_* register
  * @i2c_cfg_reg: I2C configuration register offset
+ * @i2c_cfg_clear_mask: high-speed mode bit clear mask in I2C config register
  * @i2c_cfg_hsen_mask: high-speed mode bit field mask in I2C config register
  * @i2c_mcode_mask: MCODE field mask for I2C config register
  *
@@ -52,6 +53,7 @@ struct omap_vc_common {
 	u8 cmd_ret_shift;
 	u8 cmd_off_shift;
 	u8 i2c_cfg_reg;
+	u8 i2c_cfg_clear_mask;
 	u8 i2c_cfg_hsen_mask;
 	u8 i2c_mcode_mask;
 };
diff --git a/arch/arm/mach-omap2/vc3xxx_data.c b/arch/arm/mach-omap2/vc3xxx_data.c
index 75bc4aa22b3a..71d74c9172c1 100644
--- a/arch/arm/mach-omap2/vc3xxx_data.c
+++ b/arch/arm/mach-omap2/vc3xxx_data.c
@@ -40,6 +40,7 @@ static struct omap_vc_common omap3_vc_common = {
 	.cmd_onlp_shift	 = OMAP3430_VC_CMD_ONLP_SHIFT,
 	.cmd_ret_shift	 = OMAP3430_VC_CMD_RET_SHIFT,
 	.cmd_off_shift	 = OMAP3430_VC_CMD_OFF_SHIFT,
+	.i2c_cfg_clear_mask = OMAP3430_SREN_MASK | OMAP3430_HSEN_MASK,
 	.i2c_cfg_hsen_mask = OMAP3430_HSEN_MASK,
 	.i2c_cfg_reg	 = OMAP3_PRM_VC_I2C_CFG_OFFSET,
 	.i2c_mcode_mask	 = OMAP3430_MCODE_MASK,
diff --git a/arch/arm/mach-omap2/vc44xx_data.c b/arch/arm/mach-omap2/vc44xx_data.c
index 085e5d6a04fd..2abd5fa8a697 100644
--- a/arch/arm/mach-omap2/vc44xx_data.c
+++ b/arch/arm/mach-omap2/vc44xx_data.c
@@ -42,6 +42,7 @@ static const struct omap_vc_common omap4_vc_common = {
 	.cmd_ret_shift = OMAP4430_RET_SHIFT,
 	.cmd_off_shift = OMAP4430_OFF_SHIFT,
 	.i2c_cfg_reg = OMAP4_PRM_VC_CFG_I2C_MODE_OFFSET,
+	.i2c_cfg_clear_mask = OMAP4430_SRMODEEN_MASK | OMAP4430_HSMODEEN_MASK,
 	.i2c_cfg_hsen_mask = OMAP4430_HSMODEEN_MASK,
 	.i2c_mcode_mask	 = OMAP4430_HSMCODE_MASK,
 };
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 8896e71586f5..f09683687963 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -691,4 +691,13 @@ config SHARPSL_PM_MAX1111
 config PXA310_ULPI
 	bool
 
+config PXA_SYSTEMS_CPLDS
+	tristate "Motherboard cplds"
+	default ARCH_LUBBOCK || MACH_MAINSTONE
+	help
+	  This driver supports the Lubbock and Mainstone multifunction chip
+	  found on the pxa25x development platform system (Lubbock) and pxa27x
+	  development platform system (Mainstone). This IO board supports the
+	  interrupts handling, ethernet controller, flash chips, etc ...
+
 endif
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index eb0bf7678a99..4087d334ecdf 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -90,4 +90,5 @@ obj-$(CONFIG_MACH_RAUMFELD_CONNECTOR)	+= raumfeld.o
 obj-$(CONFIG_MACH_RAUMFELD_SPEAKER)	+= raumfeld.o
 obj-$(CONFIG_MACH_ZIPIT2)	+= z2.o
 
+obj-$(CONFIG_PXA_SYSTEMS_CPLDS)	+= pxa_cplds_irqs.o
 obj-$(CONFIG_TOSA_BT)		+= tosa-bt.o
diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h
index 958cd6af9384..1eecf794acd2 100644
--- a/arch/arm/mach-pxa/include/mach/lubbock.h
+++ b/arch/arm/mach-pxa/include/mach/lubbock.h
@@ -37,7 +37,9 @@
 #define LUB_GP			__LUB_REG(LUBBOCK_FPGA_PHYS + 0x100)
 
 /* Board specific IRQs */
-#define LUBBOCK_IRQ(x)		(IRQ_BOARD_START + (x))
+#define LUBBOCK_NR_IRQS		IRQ_BOARD_START
+
+#define LUBBOCK_IRQ(x)		(LUBBOCK_NR_IRQS + (x))
 #define LUBBOCK_SD_IRQ		LUBBOCK_IRQ(0)
 #define LUBBOCK_SA1111_IRQ	LUBBOCK_IRQ(1)
 #define LUBBOCK_USB_IRQ		LUBBOCK_IRQ(2)  /* usb connect */
@@ -47,8 +49,7 @@
 #define LUBBOCK_USB_DISC_IRQ	LUBBOCK_IRQ(6)  /* usb disconnect */
 #define LUBBOCK_LAST_IRQ	LUBBOCK_IRQ(6)
 
-#define LUBBOCK_SA1111_IRQ_BASE	(IRQ_BOARD_START + 16)
-#define LUBBOCK_NR_IRQS		(IRQ_BOARD_START + 16 + 55)
+#define LUBBOCK_SA1111_IRQ_BASE	(LUBBOCK_NR_IRQS + 32)
 
 #ifndef __ASSEMBLY__
 extern void lubbock_set_misc_wr(unsigned int mask, unsigned int set);
diff --git a/arch/arm/mach-pxa/include/mach/mainstone.h b/arch/arm/mach-pxa/include/mach/mainstone.h
index 1bfc4e822a41..e82a7d31104e 100644
--- a/arch/arm/mach-pxa/include/mach/mainstone.h
+++ b/arch/arm/mach-pxa/include/mach/mainstone.h
@@ -120,7 +120,9 @@
 #define MST_PCMCIA_PWR_VCC_50   0x4	   /* voltage VCC = 5.0V */
 
 /* board specific IRQs */
-#define MAINSTONE_IRQ(x)	(IRQ_BOARD_START + (x))
+#define MAINSTONE_NR_IRQS	IRQ_BOARD_START
+
+#define MAINSTONE_IRQ(x)	(MAINSTONE_NR_IRQS + (x))
 #define MAINSTONE_MMC_IRQ	MAINSTONE_IRQ(0)
 #define MAINSTONE_USIM_IRQ	MAINSTONE_IRQ(1)
 #define MAINSTONE_USBC_IRQ	MAINSTONE_IRQ(2)
@@ -136,6 +138,4 @@
 #define MAINSTONE_S1_STSCHG_IRQ	MAINSTONE_IRQ(14)
 #define MAINSTONE_S1_IRQ	MAINSTONE_IRQ(15)
 
-#define MAINSTONE_NR_IRQS	(IRQ_BOARD_START + 16)
-
 #endif
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index d8a1be619f21..4ac9ab80d24b 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -12,6 +12,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -123,84 +124,6 @@ void lubbock_set_misc_wr(unsigned int mask, unsigned int set)
 }
 EXPORT_SYMBOL(lubbock_set_misc_wr);
 
-static unsigned long lubbock_irq_enabled;
-
-static void lubbock_mask_irq(struct irq_data *d)
-{
-	int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
-	LUB_IRQ_MASK_EN = (lubbock_irq_enabled &= ~(1 << lubbock_irq));
-}
-
-static void lubbock_unmask_irq(struct irq_data *d)
-{
-	int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
-	/* the irq can be acknowledged only if deasserted, so it's done here */
-	LUB_IRQ_SET_CLR &= ~(1 << lubbock_irq);
-	LUB_IRQ_MASK_EN = (lubbock_irq_enabled |= (1 << lubbock_irq));
-}
-
-static struct irq_chip lubbock_irq_chip = {
-	.name		= "FPGA",
-	.irq_ack	= lubbock_mask_irq,
-	.irq_mask	= lubbock_mask_irq,
-	.irq_unmask	= lubbock_unmask_irq,
-};
-
-static void lubbock_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
-	do {
-		/* clear our parent irq */
-		desc->irq_data.chip->irq_ack(&desc->irq_data);
-		if (likely(pending)) {
-			irq = LUBBOCK_IRQ(0) + __ffs(pending);
-			generic_handle_irq(irq);
-		}
-		pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
-	} while (pending);
-}
-
-static void __init lubbock_init_irq(void)
-{
-	int irq;
-
-	pxa25x_init_irq();
-
-	/* setup extra lubbock irqs */
-	for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) {
-		irq_set_chip_and_handler(irq, &lubbock_irq_chip,
-					 handle_level_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
-
-	irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), lubbock_irq_handler);
-	irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void lubbock_irq_resume(void)
-{
-	LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-}
-
-static struct syscore_ops lubbock_irq_syscore_ops = {
-	.resume = lubbock_irq_resume,
-};
-
-static int __init lubbock_irq_device_init(void)
-{
-	if (machine_is_lubbock()) {
-		register_syscore_ops(&lubbock_irq_syscore_ops);
-		return 0;
-	}
-	return -ENODEV;
-}
-
-device_initcall(lubbock_irq_device_init);
-
-#endif
-
 static int lubbock_udc_is_connected(void)
 {
 	return (LUB_MISC_RD & (1 << 9)) == 0;
@@ -383,11 +306,38 @@ static struct platform_device lubbock_flash_device[2] = {
 	},
 };
 
+static struct resource lubbock_cplds_resources[] = {
+	[0] = {
+		.start	= LUBBOCK_FPGA_PHYS + 0xc0,
+		.end	= LUBBOCK_FPGA_PHYS + 0xe0 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= PXA_GPIO_TO_IRQ(0),
+		.end	= PXA_GPIO_TO_IRQ(0),
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+	},
+	[2] = {
+		.start	= LUBBOCK_IRQ(0),
+		.end	= LUBBOCK_IRQ(6),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device lubbock_cplds_device = {
+	.name		= "pxa_cplds_irqs",
+	.id		= -1,
+	.resource	= &lubbock_cplds_resources[0],
+	.num_resources	= 3,
+};
+
+
 static struct platform_device *devices[] __initdata = {
 	&sa1111_device,
 	&smc91x_device,
 	&lubbock_flash_device[0],
 	&lubbock_flash_device[1],
+	&lubbock_cplds_device,
 };
 
 static struct pxafb_mode_info sharp_lm8v31_mode = {
@@ -648,7 +598,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)")
 	/* Maintainer: MontaVista Software Inc. */
 	.map_io		= lubbock_map_io,
 	.nr_irqs	= LUBBOCK_NR_IRQS,
-	.init_irq	= lubbock_init_irq,
+	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
 	.init_time	= pxa_timer_init,
 	.init_machine	= lubbock_init,
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 78b84c0dfc79..2c0658cf6be2 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -13,6 +13,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
@@ -122,92 +123,6 @@ static unsigned long mainstone_pin_config[] = {
 	GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
 };
 
-static unsigned long mainstone_irq_enabled;
-
-static void mainstone_mask_irq(struct irq_data *d)
-{
-	int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
-	MST_INTMSKENA = (mainstone_irq_enabled &= ~(1 << mainstone_irq));
-}
-
-static void mainstone_unmask_irq(struct irq_data *d)
-{
-	int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
-	/* the irq can be acknowledged only if deasserted, so it's done here */
-	MST_INTSETCLR &= ~(1 << mainstone_irq);
-	MST_INTMSKENA = (mainstone_irq_enabled |= (1 << mainstone_irq));
-}
-
-static struct irq_chip mainstone_irq_chip = {
-	.name		= "FPGA",
-	.irq_ack	= mainstone_mask_irq,
-	.irq_mask	= mainstone_mask_irq,
-	.irq_unmask	= mainstone_unmask_irq,
-};
-
-static void mainstone_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled;
-	do {
-		/* clear useless edge notification */
-		desc->irq_data.chip->irq_ack(&desc->irq_data);
-		if (likely(pending)) {
-			irq = MAINSTONE_IRQ(0) + __ffs(pending);
-			generic_handle_irq(irq);
-		}
-		pending = MST_INTSETCLR & mainstone_irq_enabled;
-	} while (pending);
-}
-
-static void __init mainstone_init_irq(void)
-{
-	int irq;
-
-	pxa27x_init_irq();
-
-	/* setup extra Mainstone irqs */
-	for(irq = MAINSTONE_IRQ(0); irq <= MAINSTONE_IRQ(15); irq++) {
-		irq_set_chip_and_handler(irq, &mainstone_irq_chip,
-					 handle_level_irq);
-		if (irq == MAINSTONE_IRQ(10) || irq == MAINSTONE_IRQ(14))
-			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE | IRQF_NOAUTOEN);
-		else
-			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
-	set_irq_flags(MAINSTONE_IRQ(8), 0);
-	set_irq_flags(MAINSTONE_IRQ(12), 0);
-
-	MST_INTMSKENA = 0;
-	MST_INTSETCLR = 0;
-
-	irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), mainstone_irq_handler);
-	irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void mainstone_irq_resume(void)
-{
-	MST_INTMSKENA = mainstone_irq_enabled;
-}
-
-static struct syscore_ops mainstone_irq_syscore_ops = {
-	.resume = mainstone_irq_resume,
-};
-
-static int __init mainstone_irq_device_init(void)
-{
-	if (machine_is_mainstone())
-		register_syscore_ops(&mainstone_irq_syscore_ops);
-
-	return 0;
-}
-
-device_initcall(mainstone_irq_device_init);
-
-#endif
-
-
 static struct resource smc91x_resources[] = {
 	[0] = {
 		.start	= (MST_ETH_PHYS + 0x300),
@@ -487,11 +402,37 @@ static struct platform_device mst_gpio_keys_device = {
 	},
 };
 
+static struct resource mst_cplds_resources[] = {
+	[0] = {
+		.start	= MST_FPGA_PHYS + 0xc0,
+		.end	= MST_FPGA_PHYS + 0xe0 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= PXA_GPIO_TO_IRQ(0),
+		.end	= PXA_GPIO_TO_IRQ(0),
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+	},
+	[2] = {
+		.start	= MAINSTONE_IRQ(0),
+		.end	= MAINSTONE_IRQ(15),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mst_cplds_device = {
+	.name		= "pxa_cplds_irqs",
+	.id		= -1,
+	.resource	= &mst_cplds_resources[0],
+	.num_resources	= 3,
+};
+
 static struct platform_device *platform_devices[] __initdata = {
 	&smc91x_device,
 	&mst_flash_device[0],
 	&mst_flash_device[1],
 	&mst_gpio_keys_device,
+	&mst_cplds_device,
 };
 
 static struct pxaohci_platform_data mainstone_ohci_platform_data = {
@@ -718,7 +659,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)")
 	.atag_offset	= 0x100,	/* BLOB boot parameter setting */
 	.map_io		= mainstone_map_io,
 	.nr_irqs	= MAINSTONE_NR_IRQS,
-	.init_irq	= mainstone_init_irq,
+	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
 	.init_time	= pxa_timer_init,
 	.init_machine	= mainstone_init,
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
new file mode 100644
index 000000000000..2385052b0ce1
--- /dev/null
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -0,0 +1,200 @@
+/*
+ * Intel Reference Systems cplds
+ *
+ * Copyright (C) 2014 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Cplds motherboard driver, supporting lubbock and mainstone SoC board.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#define FPGA_IRQ_MASK_EN 0x0
+#define FPGA_IRQ_SET_CLR 0x10
+
+#define CPLDS_NB_IRQ	32
+
+struct cplds {
+	void __iomem *base;
+	int irq;
+	unsigned int irq_mask;
+	struct gpio_desc *gpio0;
+	struct irq_domain *irqdomain;
+};
+
+static irqreturn_t cplds_irq_handler(int in_irq, void *d)
+{
+	struct cplds *fpga = d;
+	unsigned long pending;
+	unsigned int bit;
+
+	pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask;
+	for_each_set_bit(bit, &pending, CPLDS_NB_IRQ)
+		generic_handle_irq(irq_find_mapping(fpga->irqdomain, bit));
+
+	return IRQ_HANDLED;
+}
+
+static void cplds_irq_mask_ack(struct irq_data *d)
+{
+	struct cplds *fpga = irq_data_get_irq_chip_data(d);
+	unsigned int cplds_irq = irqd_to_hwirq(d);
+	unsigned int set, bit = BIT(cplds_irq);
+
+	fpga->irq_mask &= ~bit;
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+	set = readl(fpga->base + FPGA_IRQ_SET_CLR);
+	writel(set & ~bit, fpga->base + FPGA_IRQ_SET_CLR);
+}
+
+static void cplds_irq_unmask(struct irq_data *d)
+{
+	struct cplds *fpga = irq_data_get_irq_chip_data(d);
+	unsigned int cplds_irq = irqd_to_hwirq(d);
+	unsigned int bit = BIT(cplds_irq);
+
+	fpga->irq_mask |= bit;
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+}
+
+static struct irq_chip cplds_irq_chip = {
+	.name		= "pxa_cplds",
+	.irq_mask_ack	= cplds_irq_mask_ack,
+	.irq_unmask	= cplds_irq_unmask,
+	.flags		= IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int cplds_irq_domain_map(struct irq_domain *d, unsigned int irq,
+				   irq_hw_number_t hwirq)
+{
+	struct cplds *fpga = d->host_data;
+
+	irq_set_chip_and_handler(irq, &cplds_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, fpga);
+
+	return 0;
+}
+
+static const struct irq_domain_ops cplds_irq_domain_ops = {
+	.xlate = irq_domain_xlate_twocell,
+	.map = cplds_irq_domain_map,
+};
+
+static int cplds_resume(struct platform_device *pdev)
+{
+	struct cplds *fpga = platform_get_drvdata(pdev);
+
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+
+	return 0;
+}
+
+static int cplds_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct cplds *fpga;
+	int ret;
+	int base_irq;
+	unsigned long irqflags = 0;
+
+	fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
+	if (!fpga)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (res) {
+		fpga->irq = (unsigned int)res->start;
+		irqflags = res->flags;
+	}
+	if (!fpga->irq)
+		return -ENODEV;
+
+	base_irq = platform_get_irq(pdev, 1);
+	if (base_irq < 0)
+		base_irq = 0;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fpga->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fpga->base))
+		return PTR_ERR(fpga->base);
+
+	platform_set_drvdata(pdev, fpga);
+
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+	writel(0, fpga->base + FPGA_IRQ_SET_CLR);
+
+	ret = devm_request_irq(&pdev->dev, fpga->irq, cplds_irq_handler,
+			       irqflags, dev_name(&pdev->dev), fpga);
+	if (ret == -ENOSYS)
+		return -EPROBE_DEFER;
+
+	if (ret) {
+		dev_err(&pdev->dev, "couldn't request main irq%d: %d\n",
+			fpga->irq, ret);
+		return ret;
+	}
+
+	irq_set_irq_wake(fpga->irq, 1);
+	fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
+					       CPLDS_NB_IRQ,
+					       &cplds_irq_domain_ops, fpga);
+	if (!fpga->irqdomain)
+		return -ENODEV;
+
+	if (base_irq) {
+		ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0,
+						 CPLDS_NB_IRQ);
+		if (ret) {
+			dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n",
+				base_irq, base_irq + CPLDS_NB_IRQ);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int cplds_remove(struct platform_device *pdev)
+{
+	struct cplds *fpga = platform_get_drvdata(pdev);
+
+	irq_set_chip_and_handler(fpga->irq, NULL, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id cplds_id_table[] = {
+	{ .compatible = "intel,lubbock-cplds-irqs", },
+	{ .compatible = "intel,mainstone-cplds-irqs", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, cplds_id_table);
+
+static struct platform_driver cplds_driver = {
+	.driver		= {
+		.name	= "pxa_cplds_irqs",
+		.of_match_table = of_match_ptr(cplds_id_table),
+	},
+	.probe		= cplds_probe,
+	.remove		= cplds_remove,
+	.resume		= cplds_resume,
+};
+
+module_platform_driver(cplds_driver);
+
+MODULE_DESCRIPTION("PXA Cplds interrupts driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c
index b07d88602073..b0dcbe28f78c 100644
--- a/arch/arm/mach-rockchip/pm.c
+++ b/arch/arm/mach-rockchip/pm.c
@@ -83,6 +83,13 @@ static void rk3288_slp_mode_set(int level)
 		     SGRF_PCLK_WDT_GATE | SGRF_FAST_BOOT_EN
 		     | SGRF_PCLK_WDT_GATE_WRITE | SGRF_FAST_BOOT_EN_WRITE);
 
+	/*
+	 * The dapswjdp can not auto reset before resume, that cause it may
+	 * access some illegal address during resume. Let's disable it before
+	 * suspend, and the MASKROM will enable it back.
+	 */
+	regmap_write(sgrf_regmap, RK3288_SGRF_CPU_CON0, SGRF_DAPDEVICEEN_WRITE);
+
 	/* booting address of resuming system is from this register value */
 	regmap_write(sgrf_regmap, RK3288_SGRF_FAST_BOOT_ADDR,
 		     rk3288_bootram_phy);
diff --git a/arch/arm/mach-rockchip/pm.h b/arch/arm/mach-rockchip/pm.h
index 03ff31d8282d..3e8d39c0c3d5 100644
--- a/arch/arm/mach-rockchip/pm.h
+++ b/arch/arm/mach-rockchip/pm.h
@@ -55,6 +55,10 @@ static inline void rockchip_suspend_init(void)
 #define SGRF_FAST_BOOT_EN		BIT(8)
 #define SGRF_FAST_BOOT_EN_WRITE		BIT(24)
 
+#define RK3288_SGRF_CPU_CON0		(0x40)
+#define SGRF_DAPDEVICEEN		BIT(0)
+#define SGRF_DAPDEVICEEN_WRITE		BIT(16)
+
 #define RK3288_CRU_MODE_CON		0x50
 #define RK3288_CRU_SEL0_CON		0x60
 #define RK3288_CRU_SEL1_CON		0x64
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index d360ec044b66..b6cf3b449428 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -30,11 +30,30 @@
 #include "pm.h"
 
 #define RK3288_GRF_SOC_CON0 0x244
+#define RK3288_TIMER6_7_PHYS 0xff810000
 
 static void __init rockchip_timer_init(void)
 {
 	if (of_machine_is_compatible("rockchip,rk3288")) {
 		struct regmap *grf;
+		void __iomem *reg_base;
+
+		/*
+		 * Most/all uboot versions for rk3288 don't enable timer7
+		 * which is needed for the architected timer to work.
+		 * So make sure it is running during early boot.
+		 */
+		reg_base = ioremap(RK3288_TIMER6_7_PHYS, SZ_16K);
+		if (reg_base) {
+			writel(0, reg_base + 0x30);
+			writel(0xffffffff, reg_base + 0x20);
+			writel(0xffffffff, reg_base + 0x24);
+			writel(1, reg_base + 0x30);
+			dsb();
+			iounmap(reg_base);
+		} else {
+			pr_err("rockchip: could not map timer7 registers\n");
+		}
 
 		/*
 		 * Disable auto jtag/sdmmc switching that causes issues
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 09c5fe3d30c2..7e7583ddd607 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1878,7 +1878,7 @@ struct dma_map_ops iommu_coherent_ops = {
  * arm_iommu_attach_device function.
  */
 struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
 {
 	unsigned int bits = size >> PAGE_SHIFT;
 	unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
@@ -1886,6 +1886,10 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
 	int extensions = 1;
 	int err = -ENOMEM;
 
+	/* currently only 32-bit DMA address space is supported */
+	if (size > DMA_BIT_MASK(32) + 1)
+		return ERR_PTR(-ERANGE);
+
 	if (!bitmap_size)
 		return ERR_PTR(-EINVAL);
 
@@ -2057,13 +2061,6 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!iommu)
 		return false;
 
-	/*
-	 * currently arm_iommu_create_mapping() takes a max of size_t
-	 * for size param. So check this limit for now.
-	 */
-	if (size > SIZE_MAX)
-		return false;
-
 	mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
 	if (IS_ERR(mapping)) {
 		pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 6333d9c17875..0d629b8f973f 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index b98895d9fe57..ee8dfa793989 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page)
 	void *kmap;
 	int type;
 
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr)
 		kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
 	}
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	int idx, type;
 	struct page *page = pfn_to_page(pfn);
 
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e6ef896c619..7186382672b5 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
 			}
 
 			/*
-			 * Find the first non-section-aligned page, and point
+			 * Find the first non-pmd-aligned page, and point
 			 * memblock_limit at it. This relies on rounding the
-			 * limit down to be section-aligned, which happens at
-			 * the end of this function.
+			 * limit down to be pmd-aligned, which happens at the
+			 * end of this function.
 			 *
 			 * With this algorithm, the start or end of almost any
-			 * bank can be non-section-aligned. The only exception
-			 * is that the start of the bank 0 must be section-
+			 * bank can be non-pmd-aligned. The only exception is
+			 * that the start of the bank 0 must be section-
 			 * aligned, since otherwise memory would need to be
 			 * allocated when mapping the start of bank 0, which
 			 * occurs before any free memory is mapped.
 			 */
 			if (!memblock_limit) {
-				if (!IS_ALIGNED(block_start, SECTION_SIZE))
+				if (!IS_ALIGNED(block_start, PMD_SIZE))
 					memblock_limit = block_start;
-				else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+				else if (!IS_ALIGNED(block_end, PMD_SIZE))
 					memblock_limit = arm_lowmem_limit;
 			}
 
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
-	 * Round the memblock limit down to a section size.  This
+	 * Round the memblock limit down to a pmd size.  This
 	 * helps to ensure that we will allocate memory from the
-	 * last full section, which should be mapped.
+	 * last full pmd, which should be mapped.
 	 */
 	if (memblock_limit)
-		memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+		memblock_limit = round_down(memblock_limit, PMD_SIZE);
 	if (!memblock_limit)
 		memblock_limit = arm_lowmem_limit;
 
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index aa0519eed698..774ef1323554 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -22,8 +22,6 @@
  *
  * These are the low level assembler for performing cache and TLB
  * functions on the arm1020.
- *
- *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index bff4c7f70fd6..ae3c27b71594 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -22,8 +22,6 @@
  *
  * These are the low level assembler for performing cache and TLB
  * functions on the arm1020e.
- *
- *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ede8c54ab4aa..32a47cc19076 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -441,9 +441,6 @@ ENTRY(cpu_arm925_set_pte_ext)
 	.type	__arm925_setup, #function
 __arm925_setup:
 	mov	r0, #0
-#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE)
-        orr     r0,r0,#1 << 7
-#endif
 
 	/* Transparent on, D-cache clean & flush mode. See  NOTE2 above */
         orr     r0,r0,#1 << 1			@ transparent mode on
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index e494d6d6acbe..92e08bf37aad 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -602,7 +602,6 @@ __\name\()_proc_info:
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
 	initfn	__feroceon_setup, __\name\()_proc_info
-	.long __feroceon_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index e1268f905026..e0e23582c8b4 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -54,6 +54,7 @@
 #define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
 
 #define FLAG_NEED_X_RESET	(1 << 0)
+#define FLAG_IMM_OVERFLOW	(1 << 1)
 
 struct jit_ctx {
 	const struct bpf_prog *skf;
@@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 	/* PC in ARM mode == address of the instruction + 8 */
 	imm = offset - (8 + ctx->idx * 4);
 
+	if (imm & ~0xfff) {
+		/*
+		 * literal pool is too far, signal it into flags. we
+		 * can only detect it on the second pass unfortunately.
+		 */
+		ctx->flags |= FLAG_IMM_OVERFLOW;
+		return 0;
+	}
+
 	return imm;
 }
 
@@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
 		return;
 	}
 #endif
-	if (rm != ARM_R0)
-		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+
+	/*
+	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
+	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
+	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
+	 * before using it as a source for ARM_R1.
+	 *
+	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
+	 * ARM_R5 (r_X) so there is no particular register overlap
+	 * issues.
+	 */
 	if (rn != ARM_R1)
 		emit(ARM_MOV_R(ARM_R1, rn), ctx);
+	if (rm != ARM_R0)
+		emit(ARM_MOV_R(ARM_R0, rm), ctx);
 
 	ctx->seen |= SEEN_CALL;
 	emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
@@ -855,6 +876,14 @@ b_epilogue:
 		default:
 			return -1;
 		}
+
+		if (ctx->flags & FLAG_IMM_OVERFLOW)
+			/*
+			 * this instruction generated an overflow when
+			 * trying to access the literal pool, so
+			 * delegate this filter to the kernel interpreter.
+			 */
+			return -1;
 	}
 
 	/* compute offsets only during the first pass */
@@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	ctx.idx = 0;
 
 	build_prologue(&ctx);
-	build_body(&ctx);
+	if (build_body(&ctx) < 0) {
+#if __LINUX_ARM_ARCH__ < 7
+		if (ctx.imm_count)
+			kfree(ctx.imms);
+#endif
+		bpf_jit_binary_free(header);
+		goto out;
+	}
 	build_epilogue(&ctx);
 
 	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 224081ccc92f..7d0f07020c80 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { }
 void xen_arch_post_suspend(int suspend_cancelled) { }
 void xen_timer_resume(void) { }
 void xen_arch_resume(void) { }
+void xen_arch_suspend(void) { }
 
 
 /* In the hypervisor.S file. */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 793551d15f1d..498325074a06 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -4,6 +4,7 @@
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -21,6 +22,20 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	struct memblock_region *reg;
+	gfp_t flags = __GFP_NOWARN;
+
+	for_each_memblock(memory, reg) {
+		if (reg->base < (phys_addr_t)0xffffffff) {
+			flags |= __GFP_DMA;
+			break;
+		}
+	}
+	return __get_free_pages(flags, order);
+}
+
 enum dma_cache_op {
        DMA_UNMAP,
        DMA_MAP,
diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
index c138b95a8356..351c95bda89e 100644
--- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
@@ -21,6 +21,20 @@
 			clock-output-names = "juno_mb:clk25mhz";
 		};
 
+		v2m_refclk1mhz: refclk1mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "juno_mb:refclk1mhz";
+		};
+
+		v2m_refclk32khz: refclk32khz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+			clock-output-names = "juno_mb:refclk32khz";
+		};
+
 		motherboard {
 			compatible = "arm,vexpress,v2p-p1", "simple-bus";
 			#address-cells = <2>;  /* SMB chipselect number and offset */
@@ -66,6 +80,15 @@
 				#size-cells = <1>;
 				ranges = <0 3 0 0x200000>;
 
+				v2m_sysctl: sysctl@020000 {
+					compatible = "arm,sp810", "arm,primecell";
+					reg = <0x020000 0x1000>;
+					clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&mb_clk24mhz>;
+					clock-names = "refclk", "timclk", "apb_pclk";
+					#clock-cells = <1>;
+					clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+				};
+
 				mmci@050000 {
 					compatible = "arm,pl180", "arm,primecell";
 					reg = <0x050000 0x1000>;
@@ -106,16 +129,16 @@
 					compatible = "arm,sp804", "arm,primecell";
 					reg = <0x110000 0x10000>;
 					interrupts = <9>;
-					clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
-					clock-names = "timclken1", "apb_pclk";
+					clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&mb_clk24mhz>;
+					clock-names = "timclken1", "timclken2", "apb_pclk";
 				};
 
 				v2m_timer23: timer@120000 {
 					compatible = "arm,sp804", "arm,primecell";
 					reg = <0x120000 0x10000>;
 					interrupts = <9>;
-					clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
-					clock-names = "timclken1", "apb_pclk";
+					clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&mb_clk24mhz>;
+					clock-names = "timclken1", "timclken2", "apb_pclk";
 				};
 
 				rtc@170000 {
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 43d54017b779..d0ab012fa379 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -16,7 +16,8 @@
 #include "mt8173.dtsi"
 
 / {
-	model = "mediatek,mt8173-evb";
+	model = "MediaTek MT8173 evaluation board";
+	compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
 
 	aliases {
 		serial0 = &uart0;
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
index 9499199924ae..6a37c3c6b11d 100644
--- a/arch/arm64/crypto/crc32-arm64.c
+++ b/arch/arm64/crypto/crc32-arm64.c
@@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
 {
 	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
 
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
 	put_unaligned_le32(~ctx->crc, out);
 	return 0;
 }
 
 static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
 {
-	put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out);
+	put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
 	return 0;
 }
 
@@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
 	mctx->key = ~0;
 	return 0;
 }
@@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = {
 	.setkey			=	chksum_setkey,
 	.init			=	chksum_init,
 	.update			=	chksumc_update,
-	.final			=	chksum_final,
+	.final			=	chksumc_final,
 	.finup			=	chksumc_finup,
 	.digest			=	chksumc_digest,
 	.descsize		=	sizeof(struct chksum_desc_ctx),
@@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = {
 		.cra_alignmask		=	0,
 		.cra_ctxsize		=	sizeof(struct chksum_ctx),
 		.cra_module		=	THIS_MODULE,
-		.cra_init		=	crc32_cra_init,
+		.cra_init		=	crc32c_cra_init,
 	}
 };
 
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 114e7cc5de8c..aefda9868627 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 
 static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
 	kernel_neon_begin_partial(16);
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 1340e44c048b..7cd587564a41 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 
 static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
 	kernel_neon_begin_partial(28);
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4dc0de..0fa47c4275cb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do {									\
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)
 
-#define set_mb(var, value)	do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 #define nop()		asm volatile("nop");
 
 #define smp_mb__before_atomic()	smp_mb()
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5f750dc96e0f..74069b3bd919 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
-	pagefault_disable();	/* implies preempt_disable() */
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	pagefault_enable();	/* subsumes preempt_enable() */
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 7ebcd31ce51c..225ec3524fbf 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -18,7 +18,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
 #define topology_physical_package_id(cpu)	(cpu_topology[cpu].cluster_id)
 #define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
 
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 21033bba9390..28f8365edc4c 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,7 +24,6 @@
 #include <asm/cacheflush.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/insn.h>
 #include <linux/stop_machine.h>
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -34,48 +33,6 @@ struct alt_region {
 	struct alt_instr *end;
 };
 
-/*
- * Decode the imm field of a b/bl instruction, and return the byte
- * offset as a signed value (so it can be used when computing a new
- * branch target).
- */
-static s32 get_branch_offset(u32 insn)
-{
-	s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
-
-	/* sign-extend the immediate before turning it into a byte offset */
-	return (imm << 6) >> 4;
-}
-
-static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
-{
-	u32 insn;
-
-	aarch64_insn_read(altinsnptr, &insn);
-
-	/* Stop the world on instructions we don't support... */
-	BUG_ON(aarch64_insn_is_cbz(insn));
-	BUG_ON(aarch64_insn_is_cbnz(insn));
-	BUG_ON(aarch64_insn_is_bcond(insn));
-	/* ... and there is probably more. */
-
-	if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
-		enum aarch64_insn_branch_type type;
-		unsigned long target;
-
-		if (aarch64_insn_is_b(insn))
-			type = AARCH64_INSN_BRANCH_NOLINK;
-		else
-			type = AARCH64_INSN_BRANCH_LINK;
-
-		target = (unsigned long)altinsnptr + get_branch_offset(insn);
-		insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
-						   target, type);
-	}
-
-	return insn;
-}
-
 static int __apply_alternatives(void *alt_region)
 {
 	struct alt_instr *alt;
@@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region)
 	u8 *origptr, *replptr;
 
 	for (alt = region->begin; alt < region->end; alt++) {
-		u32 insn;
-		int i;
-
 		if (!cpus_have_cap(alt->cpufeature))
 			continue;
 
@@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region)
 
 		origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
 		replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-
-		for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
-			insn = get_alt_insn(origptr + i, replptr + i);
-			aarch64_insn_write(origptr + i, insn);
-		}
-
+		memcpy(origptr, replptr, alt->alt_len);
 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + alt->alt_len));
 	}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 23f25acf43a9..cce18c85d2e8 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1315,15 +1315,15 @@ static int armpmu_device_probe(struct platform_device *pdev)
 	if (!cpu_pmu)
 		return -ENODEV;
 
-	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-	if (!irqs)
-		return -ENOMEM;
-
 	/* Don't bother with PPIs; they're already affine */
 	irq = platform_get_irq(pdev, 0);
 	if (irq >= 0 && irq_is_percpu(irq))
 		return 0;
 
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
 	for (i = 0; i < pdev->num_resources; ++i) {
 		struct device_node *dn;
 		int cpu;
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 74c256744b25..f3d6221cd5bd 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -328,10 +328,12 @@ static int ptdump_init(void)
 			for (j = 0; j < pg_level[i].num; j++)
 				pg_level[i].mask |= pg_level[i].bits[j].mask;
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
 	address_markers[VMEMMAP_START_NR].start_address =
 				(unsigned long)virt_to_page(PAGE_OFFSET);
 	address_markers[VMEMMAP_END_NR].start_address =
 				(unsigned long)virt_to_page(high_memory);
+#endif
 
 	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
 				 &ptdump_fops);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 96da13167d4a..0948d327d013 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	 * If we're in an interrupt or have no user context, we must not take
 	 * the fault.
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index edba042b2325..dc6a4842683a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -487,7 +487,7 @@ emit_cond_jmp:
 			return -EINVAL;
 		}
 
-		imm64 = (u64)insn1.imm << 32 | imm;
+		imm64 = (u64)insn1.imm << 32 | (u32)imm;
 		emit_a64_mov_i64(dst, imm64, ctx);
 
 		return 1;
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 962a6aeab787..366bbeaeb405 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
    if something tries to do an invalid cmpxchg().  */
 extern void __cmpxchg_called_with_bad_pointer(void);
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 				      unsigned long new, int size)
 {
diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h
index a46f7cf3e1ea..68cf638faf48 100644
--- a/arch/avr32/include/asm/uaccess.h
+++ b/arch/avr32/include/asm/uaccess.h
@@ -97,7 +97,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -116,7 +117,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -136,7 +138,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -158,7 +161,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index d223a8b57c1e..c03533937a9f 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -14,11 +14,11 @@
 #include <linux/pagemap.h>
 #include <linux/kdebug.h>
 #include <linux/kprobes.h>
+#include <linux/uaccess.h>
 
 #include <asm/mmu_context.h>
 #include <asm/sysreg.h>
 #include <asm/tlb.h>
-#include <asm/uaccess.h>
 
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, int trap)
@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
 	 * If we're in an interrupt or have no user context, we must
 	 * not take the fault...
 	 */
-	if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+	if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
 		goto no_context;
 
 	local_irq_enable();
diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h
index 4e8ad0523118..6abebe82d4e9 100644
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -10,6 +10,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/byteorder.h>
+#include <asm/def_LPBlackfin.h>
 
 #define __raw_readb bfin_read8
 #define __raw_readw bfin_read16
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 83f12f2ed9e3..3066d40a6db1 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <arch/system.h>
 
 extern int find_fixup_code(struct pt_regs *);
@@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	info.si_code = SEGV_MAPERR;
 
 	/*
-	 * If we're in an interrupt or "atomic" operation or have no
+	 * If we're in an interrupt, have pagefaults disabled or have no
 	 * user context, we must not take the fault.
 	 */
 
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index ec4917ddf678..61d99767fe16 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -19,9 +19,9 @@
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
 #include <asm/gdb-stub.h>
 
 /*****************************************************************************/
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(__frame))
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index bed9a9bd3c10..785344bbdc07 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
 	unsigned long paddr;
 	int type;
 
+	preempt_disable();
 	pagefault_disable();
 	type = kmap_atomic_idx_push();
 	paddr = page_to_phys(page);
@@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr)
 	}
 	kmap_atomic_idx_pop();
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 9e7802911a57..a6e34e2acbba 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
  *  looks just like atomic_cmpxchg on our arch currently with a bunch of
  *  variable casting.
  */
-#define __HAVE_ARCH_CMPXCHG 1
 
 #define cmpxchg(ptr, old, new)					\
 ({								\
diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
index e4127e4d6a5b..f000a382bc7f 100644
--- a/arch/hexagon/include/asm/uaccess.h
+++ b/arch/hexagon/include/asm/uaccess.h
@@ -36,7 +36,8 @@
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb2bbf9..843ba435e43b 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do {									\
 	___p1;								\
 })
 
-/*
- * XXX check on this ---I suspect what Linus really wants here is
- * acquire vs release semantics but we can't discuss this stuff with
- * Linus just yet.  Grrr...
- */
-#define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); mb(); } while (0)
 
 /*
  * The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 6437ca21f61b..3ad8f6988363 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -53,7 +53,7 @@ void build_cpu_to_node_map(void);
 #define topology_physical_package_id(cpu)	(cpu_data(cpu)->socket_id)
 #define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
 #define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #endif
 
 extern void arch_fix_phys_package_id(int num, u32 slot);
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index f35109b1d907..a0e3620f8f13 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
  * indicated by comparing RETURN with OLD.
  */
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 /*
  * This function doesn't exist, so you'll get a linker error
  * if something tries to do an invalid cmpxchg().
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 15051e9c2c6f..b054c5c6e713 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -127,7 +127,7 @@ int smp_num_siblings = 1;
 volatile int ia64_cpu_to_sapicid[NR_CPUS];
 EXPORT_SYMBOL(ia64_cpu_to_sapicid);
 
-static volatile cpumask_t cpu_callin_map;
+static cpumask_t cpu_callin_map;
 
 struct smp_boot_data smp_boot_data __initdata;
 
@@ -477,6 +477,7 @@ do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
 	for (timeout = 0; timeout < 100000; timeout++) {
 		if (cpumask_test_cpu(cpu, &cpu_callin_map))
 			break;  /* It has booted */
+		barrier(); /* Make sure we re-read cpu_callin_map */
 		udelay(100);
 	}
 	Dprintk("\n");
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index ba5ba7accd0d..70b40d1205a6 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -11,10 +11,10 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/prefetch.h>
+#include <linux/uaccess.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
 
 extern int die(char *, struct pt_regs *, long);
 
@@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	/*
 	 * If we're in an interrupt or have no user context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index d4e162d35b34..7cc3be9fa7c6 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	struct pci_controller *controller = bridge->bus->sysdata;
-
-	ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+	/*
+	 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+	 * here, pci_create_root_bus() has been called by someone else and
+	 * sysdata is likely to be different from what we expect.  Let it go in
+	 * that case.
+	 */
+	if (!bridge->dev.parent) {
+		struct pci_controller *controller = bridge->bus->sysdata;
+		ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+	}
 	return 0;
 }
 
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index de651db20b43..14bf9b739dd2 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
 	((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr),	\
 			sizeof(*(ptr))))
 
-#define __HAVE_ARCH_CMPXCHG	1
-
 static inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
 {
diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h
index 71adff209405..cac7014daef3 100644
--- a/arch/m32r/include/asm/uaccess.h
+++ b/arch/m32r/include/asm/uaccess.h
@@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s)
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_regs *regs);
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_regs *regs);
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_regs *regs);
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -274,7 +278,8 @@ do {									\
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
@@ -677,7 +686,8 @@ unsigned long clear_user(void __user *mem, unsigned long len);
  * strlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index ce7aea34fdf4..c18ddc74ef9a 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -45,7 +45,7 @@ static volatile unsigned long flushcache_cpumask = 0;
 /*
  * For flush_tlb_others()
  */
-static volatile cpumask_t flush_cpumask;
+static cpumask_t flush_cpumask;
 static struct mm_struct *flush_mm;
 static struct vm_area_struct *flush_vma;
 static volatile unsigned long flush_va;
@@ -415,7 +415,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	 */
 	send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
 
-	while (!cpumask_empty((cpumask_t*)&flush_cpumask)) {
+	while (!cpumask_empty(&flush_cpumask)) {
 		/* nothing. lockup detection does not belong here */
 		mb();
 	}
@@ -468,7 +468,7 @@ void smp_invalidate_interrupt(void)
 			__flush_tlb_page(va);
 		}
 	}
-	cpumask_clear_cpu(cpu_id, (cpumask_t*)&flush_cpumask);
+	cpumask_clear_cpu(cpu_id, &flush_cpumask);
 }
 
 /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index e3d4d4890104..8f9875b7933d 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -24,9 +24,9 @@
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
 #include <asm/m32r.h>
-#include <asm/uaccess.h>
 #include <asm/hardirq.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	mm = tsk->mm;
 
 	/*
-	 * If we're in an interrupt or have no user context or are running in an
-	 * atomic region then we must not take the fault..
+	 * If we're in an interrupt or have no user context or have pagefaults
+	 * disabled then we must not take the fault.
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto bad_area_nosemaphore;
 
 	if (error_code & ACE_USERMODE)
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index bc755bc620ad..83b1df80f0ac 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
  * indicated by comparing RETURN with OLD.
  */
 #ifdef CONFIG_RMW_INSNS
-#define __HAVE_ARCH_CMPXCHG	1
 
 static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
 				      unsigned long new, int size)
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
index a823cd73dc09..b5941818346f 100644
--- a/arch/m68k/include/asm/irqflags.h
+++ b/arch/m68k/include/asm/irqflags.h
@@ -2,9 +2,6 @@
 #define _M68K_IRQFLAGS_H
 
 #include <linux/types.h>
-#ifdef CONFIG_MMU
-#include <linux/preempt_mask.h>
-#endif
 #include <linux/preempt.h>
 #include <asm/thread_info.h>
 #include <asm/entry.h>
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index b2f04aee46ec..6a94cdd0c830 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -10,10 +10,10 @@
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
 #include <asm/setup.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
 extern void die_if_kernel(char *, struct pt_regs *, long);
@@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e26a65..5a696e507930 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
 #define read_barrier_depends()		do { } while (0)
 #define smp_read_barrier_depends()	do { } while (0)
 
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 
 #define smp_store_release(p, v)						\
 do {									\
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
index b1bc1be8540f..be29e3e44321 100644
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return old;
 }
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 #define cmpxchg(ptr, o, n)						\
 	({								\
 		__typeof__(*(ptr)) _o_ = (o);				\
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index 2de5dc695a87..f57edca63609 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 
 	mm = tsk->mm;
 
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
index d71f621a2c0b..807f1b1c4e65 100644
--- a/arch/metag/mm/highmem.c
+++ b/arch/metag/mm/highmem.c
@@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page)
 	unsigned long vaddr;
 	int type;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr)
 	}
 
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
@@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	unsigned long vaddr;
 	int type;
 
+	preempt_disable();
 	pagefault_disable();
 
 	type = kmap_atomic_idx_push();
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 62942fd12672..331b0d35f89c 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -178,7 +178,8 @@ extern long __user_bad(void);
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -290,7 +291,8 @@ extern long __user_bad(void);
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index d46a5ebb7570..177dfc003643 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
 	if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
 		is_write = 0;
 
-	if (unlikely(in_atomic() || !mm)) {
+	if (unlikely(faulthandler_disabled() || !mm)) {
 		if (kernel_mode(regs))
 			goto bad_area_nosemaphore;
 
-		/* in_atomic() in user mode is really bad,
+		/* faulthandler_disabled() in user mode is really bad,
 		   as is current->mm == NULL. */
-		pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
-									mm);
+		pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
+			 mm);
 		pr_emerg("r15 = %lx  MSR = %lx\n",
 		       regs->r15, regs->msr);
 		die("Weird page fault", regs, SIGSEGV);
diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c
index 5a92576fad92..2fcc5a52d84d 100644
--- a/arch/microblaze/mm/highmem.c
+++ b/arch/microblaze/mm/highmem.c
@@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	unsigned long vaddr;
 	int idx, type;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr)
 
 	if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
 		pagefault_enable();
+		preempt_enable();
 		return;
 	}
 
@@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr)
 #endif
 	kmap_atomic_idx_pop();
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 5200f649dd4e..ae2dd59050f7 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -277,7 +277,7 @@ LDFLAGS			+= -m $(ld-emul)
 ifdef CONFIG_MIPS
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
-	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
+	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
 ifdef CONFIG_64BIT
 CHECKFLAGS		+= -m64
 endif
diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c
index e1fe63051136..597899ad5438 100644
--- a/arch/mips/ath79/prom.c
+++ b/arch/mips/ath79/prom.c
@@ -1,6 +1,7 @@
 /*
  *  Atheros AR71XX/AR724X/AR913X specific prom routines
  *
+ *  Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
  *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
  *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  *
@@ -25,12 +26,14 @@ void __init prom_init(void)
 {
 	fw_init_cmdline();
 
+#ifdef CONFIG_BLK_DEV_INITRD
 	/* Read the initrd address from the firmware environment */
 	initrd_start = fw_getenvl("initrd_start");
 	if (initrd_start) {
 		initrd_start = KSEG0ADDR(initrd_start);
 		initrd_end = initrd_start + fw_getenvl("initrd_size");
 	}
+#endif
 }
 
 void __init prom_free_prom_memory(void)
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index a73c93c3d44a..7fc8397d16f2 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -225,7 +225,7 @@ void __init plat_time_init(void)
 	ddr_clk_rate = ath79_get_sys_clk_rate("ddr");
 	ref_clk_rate = ath79_get_sys_clk_rate("ref");
 
-	pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz",
+	pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz\n",
 		cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000,
 		ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000,
 		ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000,
diff --git a/arch/mips/cobalt/Makefile b/arch/mips/cobalt/Makefile
index 558e94977942..68f0c5871adc 100644
--- a/arch/mips/cobalt/Makefile
+++ b/arch/mips/cobalt/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the Cobalt micro systems family specific parts of the kernel
 #
 
-obj-y := buttons.o irq.o lcd.o led.o reset.o rtc.o serial.o setup.o time.o
+obj-y := buttons.o irq.o lcd.o led.o mtd.o reset.o rtc.o serial.o setup.o time.o
 
 obj-$(CONFIG_PCI)		+= pci.o
-obj-$(CONFIG_MTD_PHYSMAP)	+= mtd.o
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 002680648dcb..b2a577ebce0b 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
 CONFIG_USB_C67X00_HCD=m
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_ISP1760=m
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=m
 CONFIG_USB_R8A66597_HCD=m
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbcb9be0..7ecba84656d4 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
 #define __WEAK_LLSC_MB		"		\n"
 #endif
 
-#define set_mb(var, value) \
-	do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) \
+	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 
 #define smp_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
 
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 412f945f1f5e..b71ab4a5fd50 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))));	\
 })
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 #define __cmpxchg_asm(ld, st, m, old, new)				\
 ({									\
 	__typeof(*(m)) __ret;						\
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index a594d8ed9698..f19e890b99d2 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -304,7 +304,7 @@ do {									\
 									\
 	current->thread.abi = &mips_abi;				\
 									\
-	current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31;		\
+	current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;		\
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -366,7 +366,7 @@ do {									\
 	else								\
 		current->thread.abi = &mips_abi;			\
 									\
-	current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31;		\
+	current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;		\
 									\
 	p = personality(current->personality);				\
 	if (p != PER_LINUX32 && p != PER_LINUX)				\
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 18ae5ddef118..c28a8499aec7 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -113,7 +113,7 @@
 #define _PAGE_PRESENT_SHIFT	0
 #define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
 /* R2 or later cores check for RI/XI support to determine _PAGE_READ */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 #define _PAGE_WRITE_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
 #define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
 #else
@@ -135,16 +135,16 @@
 #define _PAGE_SPLITTING		(1 << _PAGE_SPLITTING_SHIFT)
 
 /* Only R2 or newer cores have the XI bit */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 #define _PAGE_NO_EXEC_SHIFT	(_PAGE_SPLITTING_SHIFT + 1)
 #else
 #define _PAGE_GLOBAL_SHIFT	(_PAGE_SPLITTING_SHIFT + 1)
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
-#endif	/* CONFIG_CPU_MIPSR2 */
+#endif	/* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 #endif	/* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 /* XI - page cannot be executed */
 #ifndef _PAGE_NO_EXEC_SHIFT
 #define _PAGE_NO_EXEC_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
@@ -160,10 +160,10 @@
 #define _PAGE_GLOBAL_SHIFT	(_PAGE_NO_READ_SHIFT + 1)
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
 
-#else	/* !CONFIG_CPU_MIPSR2 */
+#else	/* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
 #define _PAGE_GLOBAL_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
-#endif	/* CONFIG_CPU_MIPSR2 */
+#endif	/* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 #define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
 #define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
@@ -205,7 +205,7 @@
  */
 static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 {
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	if (cpu_has_rixi) {
 		int sa;
 #ifdef CONFIG_32BIT
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index bb02fac9b4fa..2b25d1ba1ea0 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS];
 #define SMP_DUMP		0x8
 #define SMP_ASK_C0COUNT		0x10
 
-extern volatile cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callin_map;
 
 /* Mask of CPUs which are currently definitely operating coherently */
 extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index e92d6c4b5ed1..7163cd7fdd69 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -104,7 +104,6 @@ do {									\
 	if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA))		\
 		__fpsave = FP_SAVE_VECTOR;				\
 	(last) = resume(prev, next, task_thread_info(next), __fpsave);	\
-	disable_msa();							\
 } while (0)
 
 #define finish_arch_switch(prev)					\
@@ -122,6 +121,7 @@ do {									\
 	if (cpu_has_userlocal)						\
 		write_c0_userlocal(current_thread_info()->tp_value);	\
 	__restore_watch();						\
+	disable_msa();							\
 } while (0)
 
 #endif /* _ASM_SWITCH_TO_H */
diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
index 3e307ec2afba..7afda4150a59 100644
--- a/arch/mips/include/asm/topology.h
+++ b/arch/mips/include/asm/topology.h
@@ -15,7 +15,7 @@
 #define topology_physical_package_id(cpu)	(cpu_data[cpu].package)
 #define topology_core_id(cpu)			(cpu_data[cpu].core)
 #define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu)		(&cpu_sibling_map[cpu])
+#define topology_sibling_cpumask(cpu)		(&cpu_sibling_map[cpu])
 #endif
 
 #endif /* __ASM_TOPOLOGY_H */
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index bf8b32450ef6..9722357d2854 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -103,7 +103,8 @@ extern u64 __ua_limit;
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -138,7 +139,8 @@ extern u64 __ua_limit;
  * @x:	 Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -157,7 +159,8 @@ extern u64 __ua_limit;
  * @x:	 Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -177,7 +180,8 @@ extern u64 __ua_limit;
  * @x:	 Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -199,7 +203,8 @@ extern u64 __ua_limit;
  * @x:	 Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -498,7 +503,8 @@ extern void __put_user_unknown(void);
  * @x:	 Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -517,7 +523,8 @@ extern void __put_user_unknown(void);
  * @x:	 Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -537,7 +544,8 @@ extern void __put_user_unknown(void);
  * @x:	 Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -559,7 +567,8 @@ extern void __put_user_unknown(void);
  * @x:	 Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -815,7 +824,8 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n);
  * @from: Source address, in kernel space.
  * @n:	  Number of bytes to copy.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -888,7 +898,8 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
  * @from: Source address, in kernel space.
  * @n:	  Number of bytes to copy.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -1075,7 +1086,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
  * @from: Source address, in user space.
  * @n:	  Number of bytes to copy.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -1107,7 +1119,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
  * @from: Source address, in user space.
  * @n:	  Number of bytes to copy.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
@@ -1329,7 +1342,8 @@ strncpy_from_user(char *__to, const char __user *__from, long __len)
  * strlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
@@ -1398,7 +1412,8 @@ static inline long __strnlen_user(const char __user *s, long n)
  * strnlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index e36515dcd3b2..209e5b76c1bc 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -74,13 +74,12 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
 {
 	unsigned long sr, mask, fcsr, fcsr0, fcsr1;
 
+	fcsr = c->fpu_csr31;
 	mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
 
 	sr = read_c0_status();
 	__enable_fpu(FPU_AS_IS);
 
-	fcsr = read_32bit_cp1_register(CP1_STATUS);
-
 	fcsr0 = fcsr & mask;
 	write_32bit_cp1_register(CP1_STATUS, fcsr0);
 	fcsr0 = read_32bit_cp1_register(CP1_STATUS);
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index be4899f3c393..4a4d9e067c89 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 
 	/* Lets see if this is an O32 ELF */
 	if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
-		/* FR = 1 for N32 */
-		if (ehdr32->e_flags & EF_MIPS_ABI2)
-			state->overall_fp_mode = FP_FR1;
-		else
-			/* Set a good default FPU mode for O32 */
-			state->overall_fp_mode = cpu_has_mips_r6 ?
-				FP_FRE : FP_FR0;
-
 		if (ehdr32->e_flags & EF_MIPS_FP64) {
 			/*
 			 * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
@@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 				  (char *)&abiflags,
 				  sizeof(abiflags));
 	} else {
-		/* FR=1 is really the only option for 64-bit */
-		state->overall_fp_mode = FP_FR1;
-
 		if (phdr64->p_type != PT_MIPS_ABIFLAGS)
 			return 0;
 		if (phdr64->p_filesz < sizeof(abiflags))
@@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
 	struct elf32_hdr *ehdr = _ehdr;
 	struct mode_req prog_req, interp_req;
 	int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
+	bool is_mips64;
 
 	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return 0;
@@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
 		abi0 = abi1 = fp_abi;
 	}
 
-	/* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */
-	max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) &&
-		   (!(ehdr->e_flags & EF_MIPS_ABI2))) ?
-		MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT;
+	is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
+		    (ehdr->e_flags & EF_MIPS_ABI2);
+
+	if (is_mips64) {
+		/* MIPS64 code always uses FR=1, thus the default is easy */
+		state->overall_fp_mode = FP_FR1;
+
+		/* Disallow access to the various FPXX & FP64 ABIs */
+		max_abi = MIPS_ABI_FP_SOFT;
+	} else {
+		/* Default to a mode capable of running code expecting FR=0 */
+		state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
+
+		/* Allow all ABIs we know about */
+		max_abi = MIPS_ABI_FP_64A;
+	}
 
 	if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
 	    (abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN))
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index d2bfbc2e8995..3c8a18a00a65 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -29,7 +29,7 @@
 int kgdb_early_setup;
 #endif
 
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+static DECLARE_BITMAP(irq_map, NR_IRQS);
 
 int allocate_irqno(void)
 {
@@ -109,7 +109,7 @@ void __init init_IRQ(void)
 #endif
 }
 
-#ifdef DEBUG_STACKOVERFLOW
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
 static inline void check_stack_overflow(void)
 {
 	unsigned long sp;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d544e774eea6..e933a309f2ea 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -176,7 +176,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
 
 	__get_user(value, data + 64);
 	fcr31 = child->thread.fpu.fcr31;
-	mask = current_cpu_data.fpu_msk31;
+	mask = boot_cpu_data.fpu_msk31;
 	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
 
 	/* FIR may not be written.  */
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
index 06805e09bcd3..0b85f827cd18 100644
--- a/arch/mips/kernel/signal-common.h
+++ b/arch/mips/kernel/signal-common.h
@@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 extern int fpcsr_pending(unsigned int __user *fpcsr);
 
 /* Make sure we will not lose FPU ownership */
-#ifdef CONFIG_PREEMPT
-#define lock_fpu_owner()	preempt_disable()
-#define unlock_fpu_owner()	preempt_enable()
-#else
-#define lock_fpu_owner()	pagefault_disable()
-#define unlock_fpu_owner()	pagefault_enable()
-#endif
+#define lock_fpu_owner()	({ preempt_disable(); pagefault_disable(); })
+#define unlock_fpu_owner()	({ pagefault_enable(); preempt_enable(); })
 
 #endif	/* __SIGNAL_COMMON_H */
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index fd528d7ea278..336708ae5c5b 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
 static void bmips_wr_vec(unsigned long dst, char *start, char *end)
 {
 	memcpy((void *)dst, start, end - start);
-	dma_cache_wback((unsigned long)start, end - start);
+	dma_cache_wback(dst, end - start);
 	local_flush_icache_range(dst, dst + (end - start));
 	instruction_hazard();
 }
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 7e011f95bb8e..4251d390b5b6 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -92,7 +92,7 @@ static void __init cps_smp_setup(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
-		cpu_set(0, mt_fpu_cpumask);
+		cpumask_set_cpu(0, &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 }
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 193ace7955fb..faa46ebd9dda 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -43,7 +43,7 @@
 #include <asm/time.h>
 #include <asm/setup.h>
 
-volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
+cpumask_t cpu_callin_map;		/* Bitmask of started secondaries */
 
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 EXPORT_SYMBOL(__cpu_number_map);
@@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	/*
 	 * Trust is futile.  We should really have timeouts ...
 	 */
-	while (!cpumask_test_cpu(cpu, &cpu_callin_map))
+	while (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
 		udelay(100);
+		schedule();
+	}
 
 	synchronise_count_master(cpu);
 	return 0;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ba32e48d4697..d2d1c1933bc9 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -269,7 +269,6 @@ static void __show_regs(const struct pt_regs *regs)
 	 */
 	printk("epc   : %0*lx %pS\n", field, regs->cp0_epc,
 	       (void *) regs->cp0_epc);
-	printk("    %s\n", print_tainted());
 	printk("ra    : %0*lx %pS\n", field, regs->regs[31],
 	       (void *) regs->regs[31]);
 
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 6230f376a44e..d5fa3eaf39a1 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -2389,7 +2389,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 {
 	unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
 	enum emulation_result er = EMULATE_DONE;
-	unsigned long curr_pc;
 
 	if (run->mmio.len > sizeof(*gpr)) {
 		kvm_err("Bad MMIO length: %d", run->mmio.len);
@@ -2397,11 +2396,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 		goto done;
 	}
 
-	/*
-	 * Update PC and hold onto current PC in case there is
-	 * an error and we want to rollback the PC
-	 */
-	curr_pc = vcpu->arch.pc;
 	er = update_pc(vcpu, vcpu->arch.pending_load_cause);
 	if (er == EMULATE_FAIL)
 		return er;
@@ -2415,7 +2409,7 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 		if (vcpu->mmio_needed == 2)
 			*gpr = *(int16_t *) run->mmio.data;
 		else
-			*gpr = *(int16_t *) run->mmio.data;
+			*gpr = *(uint16_t *)run->mmio.data;
 
 		break;
 	case 1:
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 7d12c0dded3d..77e64942f004 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
 FEXPORT(__strnlen_\func\()_nocheck_asm)
 	move		v0, a0
 	PTR_ADDU	a1, a0			# stop pointer
-1:	beq		v0, a1, 1f		# limit reached?
+1:
+#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
+	.set		noat
+	li		AT, 1
+#endif
+	beq		v0, a1, 1f		# limit reached?
 .ifeqs "\func", "kernel"
 	EX(lb, t0, (v0), .Lfault\@)
 .else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
 .endif
 	.set		noreorder
 	bnez		t0, 1b
-1:	 PTR_ADDIU	v0, 1
+1:
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+	 PTR_ADDIU	v0, 1
+#else
+	 PTR_ADDU	v0, AT
+	.set		at
+#endif
 	.set		reorder
 	PTR_SUBU	v0, a0
 	jr		ra
diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile
index e70c33fdb881..f2e8153e44f5 100644
--- a/arch/mips/loongson/common/Makefile
+++ b/arch/mips/loongson/common/Makefile
@@ -3,15 +3,13 @@
 #
 
 obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
-    bonito-irq.o mem.o machtype.o platform.o
+    bonito-irq.o mem.o machtype.o platform.o serial.o
 obj-$(CONFIG_PCI) += pci.o
 
 #
 # Serial port support
 #
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-loongson-serial-$(CONFIG_SERIAL_8250) := serial.o
-obj-y += $(loongson-serial-m) $(loongson-serial-y)
 obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c
index e3c68b5da18d..509877c6e9d9 100644
--- a/arch/mips/loongson/loongson-3/smp.c
+++ b/arch/mips/loongson/loongson-3/smp.c
@@ -272,7 +272,7 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
 	if (action & SMP_ASK_C0COUNT) {
 		BUG_ON(cpu != 0);
 		c0count = read_c0_count();
-		for (i = 1; i < loongson_sysconf.nr_cpus; i++)
+		for (i = 1; i < num_possible_cpus(); i++)
 			per_cpu(core0_c0count, i) = c0count;
 	}
 }
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d31c537ace1d..22b9b2cb9219 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -889,7 +889,7 @@ static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 		break;
 
 	case FPCREG_RID:
-		value = current_cpu_data.fpu_id;
+		value = boot_cpu_data.fpu_id;
 		break;
 
 	default:
@@ -921,7 +921,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
 
 		/* Preserve read-only bits.  */
-		mask = current_cpu_data.fpu_msk31;
+		mask = boot_cpu_data.fpu_msk31;
 		fcr31 = (value & ~mask) | (fcr31 & mask);
 		break;
 
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 0dbb65a51ce5..2e03ab173591 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1372,7 +1372,7 @@ static int probe_scache(void)
 	scache_size = addr;
 	c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22);
 	c->scache.ways = 1;
-	c->dcache.waybit = 0;		/* does not matter */
+	c->scache.waybit = 0;		/* does not matter */
 
 	return 1;
 }
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 7ff8637e530d..36c0f26fac6b 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -21,10 +21,10 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
+#include <linux/uaccess.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/highmem.h>		/* For VMALLOC_END */
 #include <linux/kdebug.h>
@@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto bad_area_nosemaphore;
 
 	if (user_mode(regs))
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index da815d295239..11661cbc11a8 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page)
 	unsigned long vaddr;
 	int idx, type;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr)
 
 	if (vaddr < FIXADDR_START) { // FIXME
 		pagefault_enable();
+		preempt_enable();
 		return;
 	}
 
@@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr)
 #endif
 	kmap_atomic_idx_pop();
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
@@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	unsigned long vaddr;
 	int idx, type;
 
+	preempt_disable();
 	pagefault_disable();
 
 	type = kmap_atomic_idx_push();
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index faa5c9822ecc..198a3147dd7d 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
 
 	BUG_ON(Page_dcache_dirty(page));
 
+	preempt_disable();
 	pagefault_disable();
 	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
 	idx += in_interrupt() ? FIX_N_COLOURS : 0;
@@ -152,6 +153,7 @@ void kunmap_coherent(void)
 	write_c0_entryhi(old_ctx);
 	local_irq_restore(flags);
 	pagefault_enable();
+	preempt_enable();
 }
 
 void copy_user_highpage(struct page *to, struct page *from,
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index a27a088e6f9f..08318ecb803a 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -495,7 +495,7 @@ static void r4k_tlb_configure(void)
 
 	if (cpu_has_rixi) {
 		/*
-		 * Enable the no read, no exec bits, and enable large virtual
+		 * Enable the no read, no exec bits, and enable large physical
 		 * address.
 		 */
 #ifdef CONFIG_64BIT
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 5d6139390bf8..e23fdf2a9c80 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -681,11 +681,7 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
 		sp_off += config_enabled(CONFIG_64BIT) ?
 			(ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
 
-	/*
-	 * Subtract the bytes for the last registers since we only care about
-	 * the location on the stack pointer.
-	 */
-	return sp_off - RSIZE;
+	return sp_off;
 }
 
 static void build_prologue(struct jit_ctx *ctx)
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index e20b02e3ae28..e10d10b9e82a 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -41,7 +41,7 @@ static irqreturn_t ill_acc_irq_handler(int irq, void *_priv)
 		addr, (type >> ILL_ACC_OFF_S) & ILL_ACC_OFF_M,
 		type & ILL_ACC_LEN_M);
 
-	rt_memc_w32(REG_ILL_ACC_TYPE, REG_ILL_ACC_TYPE);
+	rt_memc_w32(ILL_INT_STATUS, REG_ILL_ACC_TYPE);
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c
index 0134db2ad0a8..5a2a82148d8d 100644
--- a/arch/mips/sgi-ip32/ip32-platform.c
+++ b/arch/mips/sgi-ip32/ip32-platform.c
@@ -130,9 +130,9 @@ struct platform_device ip32_rtc_device = {
 	.resource		= ip32_rtc_resources,
 };
 
-+static int __init sgio2_rtc_devinit(void)
+static __init int sgio2_rtc_devinit(void)
 {
 	return platform_device_register(&ip32_rtc_device);
 }
 
-device_initcall(sgio2_cmos_devinit);
+device_initcall(sgio2_rtc_devinit);
diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h
index 2fbbe4d920aa..1ddea5afba09 100644
--- a/arch/mn10300/include/asm/highmem.h
+++ b/arch/mn10300/include/asm/highmem.h
@@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page)
 	unsigned long vaddr;
 	int idx, type;
 
+	preempt_disable();
 	pagefault_disable();
 	if (page < highmem_start_page)
 		return page_address(page);
@@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
 
 	if (vaddr < FIXADDR_START) { /* FIXME */
 		pagefault_enable();
+		preempt_enable();
 		return;
 	}
 
@@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
 
 	kmap_atomic_idx_pop();
 	pagefault_enable();
+	preempt_enable();
 }
 #endif /* __KERNEL__ */
 
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 0c2cc5d39c8e..4a1d181ed32f 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -23,8 +23,8 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/hardirq.h>
 #include <asm/cpu-regs.h>
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 0c9b6afe69e9..b51878b0c6b8 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto bad_area_nosemaphore;
 
 	if (user_mode(regs))
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index de65f66ea64e..ec2df4bab302 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -142,6 +142,7 @@ static inline void kunmap(struct page *page)
 
 static inline void *kmap_atomic(struct page *page)
 {
+	preempt_disable();
 	pagefault_disable();
 	return page_address(page);
 }
@@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void *addr)
 {
 	flush_kernel_dcache_page_addr(addr);
 	pagefault_enable();
+	preempt_enable();
 }
 
 #define kmap_atomic_prot(page, prot)	kmap_atomic(page)
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index dbd13354ec41..0a90b965cccb 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
 #define xchg(ptr, x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
 
-#define __HAVE_ARCH_CMPXCHG	1
-
 /* bug catcher for when unsupported size is used - won't link */
 extern void __cmpxchg_called_with_bad_pointer(void);
 
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 3391d061eccc..78c9fd32c554 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -348,6 +348,10 @@ struct pt_regs;	/* forward declaration... */
 
 #define ELF_HWCAP	0
 
+#define STACK_RND_MASK	(is_32bit_task() ? \
+				0x7ff >> (PAGE_SHIFT - 12) : \
+				0x3ffff >> (PAGE_SHIFT - 12))
+
 struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *);
 #define arch_randomize_brk arch_randomize_brk
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8a488c22a99f..809905a811ed 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -181,9 +181,12 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
 	return 1;
 }
 
+/*
+ * Copy architecture-specific thread state
+ */
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long arg, struct task_struct *p)
+	    unsigned long kthread_arg, struct task_struct *p)
 {
 	struct pt_regs *cregs = &(p->thread.regs);
 	void *stack = task_stack_page(p);
@@ -195,11 +198,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	extern void * const child_return;
 
 	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
 		memset(cregs, 0, sizeof(struct pt_regs));
 		if (!usp) /* idle thread */
 			return 0;
-
-		/* kernel thread */
 		/* Must exit via ret_from_kernel_thread in order
 		 * to call schedule_tail()
 		 */
@@ -215,7 +217,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 #else
 		cregs->gr[26] = usp;
 #endif
-		cregs->gr[25] = arg;
+		cregs->gr[25] = kthread_arg;
 	} else {
 		/* user thread */
 		/* usp must be word aligned.  This also prevents users from
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index e1ffea2f9a0b..5aba01ac457f 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -77,6 +77,9 @@ static unsigned long mmap_upper_limit(void)
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
+	/* Add space for stack randomization. */
+	stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
 	return PAGE_ALIGN(STACK_TOP - stack_base);
 }
 
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 47ee620d15d2..6548fd1d2e62 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -26,9 +26,9 @@
 #include <linux/console.h>
 #include <linux/bug.h>
 #include <linux/ratelimit.h>
+#include <linux/uaccess.h>
 
 #include <asm/assembly.h>
-#include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/traps.h>
@@ -800,7 +800,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 	     * unless pagefault_disable() was called before.
 	     */
 
-	    if (fault_space == 0 && !in_atomic())
+	    if (fault_space == 0 && !faulthandler_disabled())
 	    {
 		pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
 		parisc_terminate("Kernel Fault", regs, code, fault_address);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e5120e653240..15503adddf4f 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -15,8 +15,8 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/traps.h>
 
 /* Various important other fields */
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 	int fault;
 	unsigned int flags;
 
-	if (in_atomic())
+	if (pagefault_disabled())
 		goto no_context;
 
 	tsk = current;
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be111ff..51ccc7232042 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
-#define set_mb(var, value)	do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); mb(); } while (0)
 
 #ifdef __SUBARCH_HAS_LWSYNC
 #    define SMPWMB      LWSYNC
@@ -89,5 +89,6 @@ do {									\
 
 #define smp_mb__before_atomic()     smp_mb()
 #define smp_mb__after_atomic()      smp_mb()
+#define smp_mb__before_spinlock()   smp_mb()
 
 #endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68fe7f0..ad6263cffb0f 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-#define __HAVE_ARCH_CMPXCHG	1
 
 static __always_inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 5f1048eaa5b6..8b3b46b7b0f2 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -87,7 +87,7 @@ static inline int prrn_is_enabled(void)
 #include <asm/smp.h>
 
 #define topology_physical_package_id(cpu)	(cpu_to_chip_id(cpu))
-#define topology_thread_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
 #define topology_core_cpumask(cpu)	(per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 #endif
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 15c99b649b04..b2eb4686bd8f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
 		    uint64_t nip, uint64_t addr)
 {
 	uint64_t srr1;
-	int index = __this_cpu_inc_return(mce_nest_count);
+	int index = __this_cpu_inc_return(mce_nest_count) - 1;
 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
 
 	/*
@@ -184,7 +184,7 @@ void machine_check_queue_event(void)
 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 		return;
 
-	index = __this_cpu_inc_return(mce_queue_count);
+	index = __this_cpu_inc_return(mce_queue_count) - 1;
 	/* If queue is full, just return for now. */
 	if (index >= MAX_MC_EVT) {
 		__this_cpu_dec(mce_queue_count);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index f096e72262f4..1db685104ffc 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -213,6 +213,7 @@ SECTIONS
 		*(.opd)
 	}
 
+	. = ALIGN(256);
 	.got : AT(ADDR(.got) - LOAD_OFFSET) {
 		__toc_start = .;
 #ifndef CONFIG_RELOCATABLE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d2ecc9..df81caab7383 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
  */
 static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu, *vnext;
 	int i;
 	int srcu_idx;
 
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	 */
 	if ((threads_per_core > 1) &&
 	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+					 arch.run_list) {
 			vcpu->arch.ret = -EBUSY;
 			kvmppc_remove_runnable(vc, vcpu);
 			wake_up(&vcpu->arch.cpu_run);
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 3cf529ceec5b..ac93a3bd2730 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -27,11 +27,11 @@ int enter_vmx_usercopy(void)
 	if (in_interrupt())
 		return 0;
 
-	/* This acts as preempt_disable() as well and will make
-	 * enable_kernel_altivec(). We need to disable page faults
-	 * as they can call schedule and thus make us lose the VMX
-	 * context. So on page faults, we just fail which will cause
-	 * a fallback to the normal non-vmx copy.
+	preempt_disable();
+	/*
+	 * We need to disable page faults as they can call schedule and
+	 * thus make us lose the VMX context. So on page faults, we just
+	 * fail which will cause a fallback to the normal non-vmx copy.
 	 */
 	pagefault_disable();
 
@@ -47,6 +47,7 @@ int enter_vmx_usercopy(void)
 int exit_vmx_usercopy(void)
 {
 	pagefault_enable();
+	preempt_enable();
 	return 0;
 }
 
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b396868d2aa7..6d535973b200 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -33,13 +33,13 @@
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
 #include <linux/hugetlb.h>
+#include <linux/uaccess.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/siginfo.h>
 #include <asm/debug.h>
@@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (!arch_irq_disabled_regs(regs))
 		local_irq_enable();
 
-	if (in_atomic() || mm == NULL) {
+	if (faulthandler_disabled() || mm == NULL) {
 		if (!user_mode(regs)) {
 			rc = SIGSEGV;
 			goto bail;
 		}
-		/* in_atomic() in user mode is really bad,
+		/* faulthandler_disabled() in user mode is really bad,
 		   as is current->mm == NULL. */
 		printk(KERN_EMERG "Page fault in user mode with "
-		       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+		       "faulthandler_disabled() = %d mm = %p\n",
+		       faulthandler_disabled(), mm);
 		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
 		       regs->nip, regs->msr);
 		die("Weird page fault", regs, SIGSEGV);
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index e7450bdbe83a..e292c8a60952 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	unsigned long vaddr;
 	int idx, type;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr)
 
 	if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
 		pagefault_enable();
+		preempt_enable();
 		return;
 	}
 
@@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr)
 
 	kmap_atomic_idx_pop();
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ce968b00b7c..3385e3d0506e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
-	pte_t *ptep;
-	struct page *page;
+	pte_t *ptep, pte;
 	unsigned shift;
 	unsigned long mask, flags;
+	struct page *page = ERR_PTR(-EINVAL);
+
+	local_irq_save(flags);
+	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+	if (!ptep)
+		goto no_page;
+	pte = READ_ONCE(*ptep);
 	/*
+	 * Verify it is a huge page else bail.
 	 * Transparent hugepages are handled by generic code. We can skip them
 	 * here.
 	 */
-	local_irq_save(flags);
-	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+	if (!shift || pmd_trans_huge(__pmd(pte_val(pte))))
+		goto no_page;
 
-	/* Verify it is a huge page else bail. */
-	if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
-		local_irq_restore(flags);
-		return ERR_PTR(-EINVAL);
+	if (!pte_present(pte)) {
+		page = NULL;
+		goto no_page;
 	}
 	mask = (1UL << shift) - 1;
-	page = pte_page(*ptep);
+	page = pte_page(pte);
 	if (page)
 		page += (address & mask) / PAGE_SIZE;
 
+no_page:
 	local_irq_restore(flags);
 	return page;
 }
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 59daa5eeec25..6bfadf1aa5cb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 	 * hash fault look at them.
 	 */
 	memset(pgtable, 0, PTE_FRAG_SIZE);
+	/*
+	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * lookup in page tables with local interrupts disabled. For huge pages
+	 * it casts pmd_t to pte_t. Since format of pte_t is different from
+	 * pmd_t we want to prevent transit from pmd pointing to page table
+	 * to pmd pointing to huge page (and back) while interrupts are disabled.
+	 * We clear pmd to possibly replace it with page table pointer in
+	 * different code paths. So make sure we wait for the parallel
+	 * find_linux_pte_or_hugepage to finish.
+	 */
+	kick_all_cpus_sync();
 	return old_pmd;
 }
 
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index cbd3d069897f..723a099f6be3 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -217,7 +217,7 @@ static DEFINE_RAW_SPINLOCK(tlbivax_lock);
 static int mm_is_core_local(struct mm_struct *mm)
 {
 	return cpumask_subset(mm_cpumask(mm),
-			      topology_thread_cpumask(smp_processor_id()));
+			      topology_sibling_cpumask(smp_processor_id()));
 }
 
 struct tlb_flush_param {
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 7940dc90e80b..b258110da952 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -16,11 +16,12 @@
 #define GHASH_DIGEST_SIZE	16
 
 struct ghash_ctx {
-	u8 icv[16];
-	u8 key[16];
+	u8 key[GHASH_BLOCK_SIZE];
 };
 
 struct ghash_desc_ctx {
+	u8 icv[GHASH_BLOCK_SIZE];
+	u8 key[GHASH_BLOCK_SIZE];
 	u8 buffer[GHASH_BLOCK_SIZE];
 	u32 bytes;
 };
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
 static int ghash_init(struct shash_desc *desc)
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 
 	memset(dctx, 0, sizeof(*dctx));
+	memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
 
 	return 0;
 }
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	}
 
 	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
-	memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
 
 	return 0;
 }
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
 			 const u8 *src, unsigned int srclen)
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	unsigned int n;
 	u8 *buf = dctx->buffer;
 	int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
 		src += n;
 
 		if (!dctx->bytes) {
-			ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+			ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
 					      GHASH_BLOCK_SIZE);
 			if (ret != GHASH_BLOCK_SIZE)
 				return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
 
 	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
 	if (n) {
-		ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+		ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
 		if (ret != n)
 			return -EIO;
 		src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
 	return 0;
 }
 
-static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static int ghash_flush(struct ghash_desc_ctx *dctx)
 {
 	u8 *buf = dctx->buffer;
 	int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
 
 		memset(pos, 0, dctx->bytes);
 
-		ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+		ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
 		if (ret != GHASH_BLOCK_SIZE)
 			return -EIO;
+
+		dctx->bytes = 0;
 	}
 
-	dctx->bytes = 0;
 	return 0;
 }
 
 static int ghash_final(struct shash_desc *desc, u8 *dst)
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	int ret;
 
-	ret = ghash_flush(ctx, dctx);
+	ret = ghash_flush(dctx);
 	if (!ret)
-		memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+		memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
 	return ret;
 }
 
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 1f374b39a4ec..9d5192c94963 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
 		/* fill page with urandom bytes */
 		get_random_bytes(pg, PAGE_SIZE);
 		/* exor page with stckf values */
-		for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) {
+		for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
 			u64 *p = ((u64 *)pg) + n;
 			*p ^= get_tod_clock_fast();
 		}
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..e6f8615a11eb 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
 #define smp_mb__before_atomic()		smp_mb()
 #define smp_mb__after_atomic()		smp_mb()
 
-#define set_mb(var, value)		do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value)		do { WRITE_ONCE(var, value); mb(); } while (0)
 
 #define smp_store_release(p, v)						\
 do {									\
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec466b8c..411464f4c97a 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
 	__old;								\
 })
 
-#define __HAVE_ARCH_CMPXCHG
-
 #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)		\
 ({									\
 	register __typeof__(*(p1)) __old1 asm("2") = (o1);		\
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fc642399b489..ef24a212eeb7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
 	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 }
 
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
 {
 	unsigned long origin_mask;
 
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index b1453a2ae1ca..4990f6c66288 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 
 #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
 #define topology_thread_id(cpu)		  (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_thread_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_sibling_cpumask(cpu) \
+		(&per_cpu(cpu_topology, cpu).thread_mask)
 #define topology_core_id(cpu)		  (per_cpu(cpu_topology, cpu).core_id)
 #define topology_core_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_book_id(cpu)		  (per_cpu(cpu_topology, cpu).book_id)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d64a7a62164f..9dd4cc47ddc7 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
  * @from: Source address, in user space.
  * @n:	  Number of bytes to copy.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from,
  * @from: Source address, in kernel space.
  * @n:	  Number of bytes to copy.
  *
- * Context: User context only.	This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn));
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct")
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n
  * strlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 76515bcea2f1..4c8f5d7f9c23 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	 * user context.
 	 */
 	fault = VM_FAULT_BADCONTEXT;
-	if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
+	if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
 		goto out;
 
 	address = trans_exc_code & __FAIL_ADDR_MASK;
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index ba8593a515ba..de156ba3bd71 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -48,7 +48,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  * We get 160 bytes stack space from calling function, but only use
  * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
  */
-#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_SPACE	(MAX_BPF_STACK + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED	(160 - 11 * 8)
+#define STK_OFF		(STK_SPACE - STK_160_UNUSED)
 #define STK_OFF_TMP	160	/* Offset of tmp buffer on stack */
 #define STK_OFF_HLEN	168	/* Offset of SKB header length on stack */
 
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7690dc8e1ab5..55423d8be580 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -384,13 +384,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
 	}
 	/* Setup stack and backchain */
 	if (jit->seen & SEEN_STACK) {
-		/* lgr %bfp,%r15 (BPF frame pointer) */
-		EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+		if (jit->seen & SEEN_FUNC)
+			/* lgr %w1,%r15 (backchain) */
+			EMIT4(0xb9040000, REG_W1, REG_15);
+		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
 		/* aghi %r15,-STK_OFF */
 		EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
 		if (jit->seen & SEEN_FUNC)
-			/* stg %bfp,152(%r15) (backchain) */
-			EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+			/* stg %w1,152(%r15) (backchain) */
+			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
 	}
 	/*
@@ -443,8 +446,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 
 /*
  * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
  */
-static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
 	int jmp_off, last, insn_count = 1;
@@ -588,8 +594,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
-	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -602,10 +608,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4_IMM(0xa7090000, REG_W0, 0);
 		/* lgr %w1,%dst */
 		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* llgfr %dst,%src (u32 cast) */
-		EMIT4(0xb9160000, dst_reg, src_reg);
 		/* dlgr %w0,%dst */
-		EMIT4(0xb9870000, REG_W0, dst_reg);
+		EMIT4(0xb9870000, REG_W0, src_reg);
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
@@ -632,8 +636,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
-	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -649,7 +653,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4(0xb9040000, REG_W1, dst_reg);
 		/* dlg %w0,<d(imm)>(%l) */
 		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U64((u32) imm));
+			      EMIT_CONST_U64(imm));
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h
index f384839c3ee5..cc3f6420b71c 100644
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
 					(unsigned long)(o),	\
 					(unsigned long)(n)))
 
-#define __HAVE_ARCH_CMPXCHG	1
-
 #include <asm-generic/cmpxchg-local.h>
 
 #endif /* _ASM_SCORE_CMPXCHG_H */
diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h
index ab66ddde777b..20a3591225cc 100644
--- a/arch/score/include/asm/uaccess.h
+++ b/arch/score/include/asm/uaccess.h
@@ -36,7 +36,8 @@
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -61,7 +62,8 @@
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -79,7 +81,8 @@
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -98,7 +101,8 @@
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -119,7 +123,8 @@
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
diff --git a/arch/score/lib/string.S b/arch/score/lib/string.S
index 00b7d3a2fc60..16efa3ad037f 100644
--- a/arch/score/lib/string.S
+++ b/arch/score/lib/string.S
@@ -175,10 +175,10 @@ ENTRY(__clear_user)
 	br	r3
 
 	.section .fixup, "ax"
+99:
 	br	r3
 	.previous
 	.section __ex_table, "a"
 	.align	2
-99:
 	.word	0b, 99b
 	.previous
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 6860beb2a280..37a6c2e0e969 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -34,6 +34,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/uaccess.h>
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 	* If we're in an interrupt or have no user
 	* context, we must not take the fault..
 	*/
-	if (in_atomic() || !mm)
+	if (pagefault_disabled() || !mm)
 		goto bad_area_nosemaphore;
 
 	if (user_mode(regs))
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 43715308b068..bf91037db4e0 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
 #define ctrl_barrier()	__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
 #endif
 
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
 #include <asm-generic/barrier.h>
 
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index f6bd1406b897..85c97b188d71 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
  * if something tries to do an invalid cmpxchg(). */
 extern void __cmpxchg_called_with_bad_pointer(void);
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
 		unsigned long new, int size)
 {
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index a58fec9b55e0..79d8276377d1 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/kdebug.h>
+#include <linux/uaccess.h>
 #include <asm/io_trapped.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
-	 * in an atomic region then we must not take the fault:
+	 * with pagefaults disabled then we must not take the fault:
 	 */
-	if (unlikely(in_atomic() || !mm)) {
+	if (unlikely(faulthandler_disabled() || !mm)) {
 		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 76648941fea7..809941e33e12 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 #define dma_rmb()	rmb()
 #define dma_wmb()	wmb()
 
-#define set_mb(__var, __value) \
-	do { __var = __value; membar_safe("#StoreLoad"); } while(0)
+#define smp_store_mb(__var, __value) \
+	do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index d38b52dca216..83ffb83c5397 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
  *
  * Cribbed from <asm-parisc/atomic.h>
  */
-#define __HAVE_ARCH_CMPXCHG	1
 
 /* bug catcher for when unsupported size is used - won't link */
 void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 0e1ed6cfbf68..faa2f61058c2 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
 
 #include <asm-generic/cmpxchg-local.h>
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 static inline unsigned long
 __cmpxchg_u32(volatile int *m, int old, int new)
 {
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6e424d185d0..a6cfdabb6054 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,7 +24,8 @@ typedef struct {
 	unsigned int	icache_line_size;
 	unsigned int	ecache_size;
 	unsigned int	ecache_line_size;
-	int		core_id;
+	unsigned short	sock_id;
+	unsigned short	core_id;
 	int		proc_id;
 } cpuinfo_sparc;
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index dc165ebdf05a..2a52c91d2c8a 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
 	"	sllx		%1, 32, %1\n"
 	"	or		%0, %1, %0\n"
 	"	.previous\n"
+	"	.section	.sun_m7_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%4), %1\n"
+	"	sethi		%%hi(%4), %0\n"
+	"	.word		662b\n"
+	"	or		%1, %%ulo(%4), %1\n"
+	"	or		%0, %%lo(%4), %0\n"
+	"	.word		663b\n"
+	"	sllx		%1, 32, %1\n"
+	"	or		%0, %1, %0\n"
+	"	.previous\n"
 	: "=r" (mask), "=r" (tmp)
 	: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
 	       _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
 	       _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
 	  "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
 	       _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
+	       _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V),
+	  "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+	       _PAGE_CP_4V | _PAGE_E_4V |
 	       _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
 
 	return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
@@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
 	"	andn		%0, %4, %0\n"
 	"	or		%0, %5, %0\n"
 	"	.previous\n"
+	"	.section	.sun_m7_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	andn		%0, %6, %0\n"
+	"	or		%0, %5, %0\n"
+	"	.previous\n"
 	: "=r" (val)
 	: "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
-	             "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V));
+	             "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V),
+	             "i" (_PAGE_CP_4V));
 
 	return __pgprot(val);
 }
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index ed8f071132e4..01d17046225a 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #ifdef CONFIG_SMP
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
-#define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)		(&cpu_core_sib_map[cpu])
+#define topology_sibling_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #endif /* CONFIG_SMP */
 
 extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_map[NR_CPUS];
 static inline const struct cpumask *cpu_coregroup_mask(int cpu)
 {
         return &cpu_core_map[cpu];
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 6fd4436d32f0..ec9c04de3664 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry {
 };
 extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
 	__sun4v_2insn_patch_end;
+extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch,
+	__sun_m7_2insn_patch_end;
 
 
 #endif /* !(__ASSEMBLY__) */
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 07cc49e541f4..0f679421b468 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
 			     struct sun4v_1insn_patch_entry *);
 void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
 			     struct sun4v_2insn_patch_entry *);
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+			     struct sun4v_2insn_patch_entry *);
 extern unsigned int dcache_parity_tl1_occurred;
 extern unsigned int icache_parity_tl1_occurred;
 
diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
index 94e392bdee7d..814fb1729b12 100644
--- a/arch/sparc/kernel/leon_pci_grpci2.c
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev)
 		err = -ENOMEM;
 		goto err1;
 	}
-	memset(grpci2priv, 0, sizeof(*grpci2priv));
 	priv->regs = regs;
 	priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */
 	priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT;
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 26c80e18d7b1..6f80936e0eea 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp)
 	}
 }
 
-static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
+static void find_back_node_value(struct mdesc_handle *hp, u64 node,
+				 char *srch_val,
+				 void (*func)(struct mdesc_handle *, u64, int),
+				 u64 val, int depth)
 {
-	u64 a;
-
-	mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
-		u64 t = mdesc_arc_target(hp, a);
-		const char *name;
-		const u64 *id;
+	u64 arc;
 
-		name = mdesc_node_name(hp, t);
-		if (!strcmp(name, "cpu")) {
-			id = mdesc_get_property(hp, t, "id", NULL);
-			if (*id < NR_CPUS)
-				cpu_data(*id).core_id = core_id;
-		} else {
-			u64 j;
+	/* Since we have an estimate of recursion depth, do a sanity check. */
+	if (depth == 0)
+		return;
 
-			mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
-				u64 n = mdesc_arc_target(hp, j);
-				const char *n_name;
+	mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 n = mdesc_arc_target(hp, arc);
+		const char *name = mdesc_node_name(hp, n);
 
-				n_name = mdesc_node_name(hp, n);
-				if (strcmp(n_name, "cpu"))
-					continue;
+		if (!strcmp(srch_val, name))
+			(*func)(hp, n, val);
 
-				id = mdesc_get_property(hp, n, "id", NULL);
-				if (*id < NR_CPUS)
-					cpu_data(*id).core_id = core_id;
-			}
-		}
+		find_back_node_value(hp, n, srch_val, func, val, depth-1);
 	}
 }
 
+static void __mark_core_id(struct mdesc_handle *hp, u64 node,
+			   int core_id)
+{
+	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+	if (*id < num_possible_cpus())
+		cpu_data(*id).core_id = core_id;
+}
+
+static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
+			   int sock_id)
+{
+	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+	if (*id < num_possible_cpus())
+		cpu_data(*id).sock_id = sock_id;
+}
+
+static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
+			  int core_id)
+{
+	find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
+}
+
+static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
+			  int sock_id)
+{
+	find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+}
+
 static void set_core_ids(struct mdesc_handle *hp)
 {
 	int idx;
 	u64 mp;
 
 	idx = 1;
+
+	/* Identify unique cores by looking for cpus backpointed to by
+	 * level 1 instruction caches.
+	 */
 	mdesc_for_each_node_by_name(hp, mp, "cache") {
 		const u64 *level;
 		const char *type;
@@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp)
 			continue;
 
 		mark_core_ids(hp, mp, idx);
+		idx++;
+	}
+}
+
+static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+{
+	u64 mp;
+	int idx = 1;
+	int fnd = 0;
+
+	/* Identify unique sockets by looking for cpus backpointed to by
+	 * shared level n caches.
+	 */
+	mdesc_for_each_node_by_name(hp, mp, "cache") {
+		const u64 *cur_lvl;
+
+		cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
+		if (*cur_lvl != level)
+			continue;
+
+		mark_sock_ids(hp, mp, idx);
+		idx++;
+		fnd = 1;
+	}
+	return fnd;
+}
+
+static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp)
+{
+	int idx = 1;
 
+	mdesc_for_each_node_by_name(hp, mp, "socket") {
+		u64 a;
+
+		mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+			u64 t = mdesc_arc_target(hp, a);
+			const char *name;
+			const u64 *id;
+
+			name = mdesc_node_name(hp, t);
+			if (strcmp(name, "cpu"))
+				continue;
+
+			id = mdesc_get_property(hp, t, "id", NULL);
+			if (*id < num_possible_cpus())
+				cpu_data(*id).sock_id = idx;
+		}
 		idx++;
 	}
 }
 
+static void set_sock_ids(struct mdesc_handle *hp)
+{
+	u64 mp;
+
+	/* If machine description exposes sockets data use it.
+	 * Otherwise fallback to use shared L3 or L2 caches.
+	 */
+	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
+	if (mp != MDESC_NODE_NULL)
+		return set_sock_ids_by_socket(hp, mp);
+
+	if (!set_sock_ids_by_cache(hp, 3))
+		set_sock_ids_by_cache(hp, 2);
+}
+
 static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
 {
 	u64 a;
@@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
 			continue;
 
 		mark_proc_ids(hp, mp, idx);
-
 		idx++;
 	}
 }
@@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask)
 
 	set_core_ids(hp);
 	set_proc_ids(hp);
+	set_sock_ids(hp);
 
 	mdesc_release(hp);
 
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 6f7251fd2eab..c928bc64b4ba 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1002,6 +1002,38 @@ static int __init pcibios_init(void)
 subsys_initcall(pcibios_init);
 
 #ifdef CONFIG_SYSFS
+
+#define SLOT_NAME_SIZE  11  /* Max decimal digits + null in u32 */
+
+static void pcie_bus_slot_names(struct pci_bus *pbus)
+{
+	struct pci_dev *pdev;
+	struct pci_bus *bus;
+
+	list_for_each_entry(pdev, &pbus->devices, bus_list) {
+		char name[SLOT_NAME_SIZE];
+		struct pci_slot *pci_slot;
+		const u32 *slot_num;
+		int len;
+
+		slot_num = of_get_property(pdev->dev.of_node,
+					   "physical-slot#", &len);
+
+		if (slot_num == NULL || len != 4)
+			continue;
+
+		snprintf(name, sizeof(name), "%u", slot_num[0]);
+		pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL);
+
+		if (IS_ERR(pci_slot))
+			pr_err("PCI: pci_create_slot returned %ld.\n",
+			       PTR_ERR(pci_slot));
+	}
+
+	list_for_each_entry(bus, &pbus->children, node)
+		pcie_bus_slot_names(bus);
+}
+
 static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus)
 {
 	const struct pci_slot_names {
@@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void)
 
 	while ((pbus = pci_find_next_bus(pbus)) != NULL) {
 		struct device_node *node;
+		struct pci_dev *pdev;
+
+		pdev = list_first_entry(&pbus->devices, struct pci_dev,
+					bus_list);
 
-		if (pbus->self) {
-			/* PCI->PCI bridge */
-			node = pbus->self->dev.of_node;
+		if (pdev && pci_is_pcie(pdev)) {
+			pcie_bus_slot_names(pbus);
 		} else {
-			struct pci_pbm_info *pbm = pbus->sysdata;
 
-			/* Host PCI controller */
-			node = pbm->op->dev.of_node;
-		}
+			if (pbus->self) {
+
+				/* PCI->PCI bridge */
+				node = pbus->self->dev.of_node;
+
+			} else {
+				struct pci_pbm_info *pbm = pbus->sysdata;
 
-		pci_bus_slot_names(node, pbus);
+				/* Host PCI controller */
+				node = pbm->op->dev.of_node;
+			}
+
+			pci_bus_slot_names(node, pbus);
+		}
 	}
 
 	return 0;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index c38d19fc27ba..f7b261749383 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
 	}
 }
 
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+			     struct sun4v_2insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *) (addr +  0) = start->insns[0];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		*(unsigned int *) (addr +  4) = start->insns[1];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
+
+		start++;
+	}
+}
+
 static void __init sun4v_patch(void)
 {
 	extern void sun4v_hvapi_init(void);
@@ -267,6 +285,9 @@ static void __init sun4v_patch(void)
 
 	sun4v_patch_2insn_range(&__sun4v_2insn_patch,
 				&__sun4v_2insn_patch_end);
+	if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7)
+		sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
+					 &__sun_m7_2insn_patch_end);
 
 	sun4v_hvapi_init();
 }
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 61139d9924ca..19cd08d18672 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
+cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL(cpu_core_sib_map);
 
 static cpumask_t smp_commenced_mask;
 
@@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void)
 		}
 	}
 
+	for_each_present_cpu(i)  {
+		unsigned int j;
+
+		for_each_present_cpu(j)  {
+			if (cpu_data(i).sock_id == cpu_data(j).sock_id)
+				cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+		}
+	}
+
 	for_each_present_cpu(i) {
 		unsigned int j;
 
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 09243057cb0b..f1a2f688b28a 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -138,6 +138,11 @@ SECTIONS
 		*(.pause_3insn_patch)
 		__pause_3insn_patch_end = .;
 	}
+	.sun_m7_2insn_patch : {
+		__sun_m7_2insn_patch = .;
+		*(.sun_m7_2insn_patch)
+		__sun_m7_2insn_patch_end = .;
+	}
 	PERCPU_SECTION(SMP_CACHE_BYTES)
 
 	. = ALIGN(PAGE_SIZE);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 70d817154fe8..c399e7b3b035 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -21,6 +21,7 @@
 #include <linux/perf_event.h>
 #include <linux/interrupt.h>
 #include <linux/kdebug.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -29,7 +30,6 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
 
 #include "mm_32.h"
 
@@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (pagefault_disabled() || !mm)
 		goto no_context;
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 479823249429..e9268ea1a68d 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -22,12 +22,12 @@
 #include <linux/kdebug.h>
 #include <linux/percpu.h>
 #include <linux/context_tracking.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/openprom.h>
 #include <asm/oplib.h>
-#include <asm/uaccess.h>
 #include <asm/asi.h>
 #include <asm/lsu.h>
 #include <asm/sections.h>
@@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto intr_or_no_mm;
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 449f864f0cef..a454ec5ff07a 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page)
 	unsigned long vaddr;
 	long idx, type;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr)
 
 	if (vaddr < FIXADDR_START) { // FIXME
 		pagefault_enable();
+		preempt_enable();
 		return;
 	}
 
@@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr)
 
 	kmap_atomic_idx_pop();
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4ca0d6ba5ec8..c5d08b89a96c 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -54,6 +54,7 @@
 #include "init_64.h"
 
 unsigned long kern_linear_pte_xor[4] __read_mostly;
+static unsigned long page_cache4v_flag;
 
 /* A bitmap, two bits for every 256MB of physical memory.  These two
  * bits determine what page size we use for kernel linear
@@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void)
 
 static void __init sun4v_linear_pte_xor_finalize(void)
 {
+	unsigned long pagecv_flag;
+
+	/* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
+	 * enables MCD error. Do not set bit 9 on M7 processor.
+	 */
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+		pagecv_flag = 0x00;
+		break;
+	default:
+		pagecv_flag = _PAGE_CV_4V;
+		break;
+	}
 #ifndef CONFIG_DEBUG_PAGEALLOC
 	if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
 		kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
 			PAGE_OFFSET;
-		kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+		kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
 					   _PAGE_P_4V | _PAGE_W_4V);
 	} else {
 		kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
@@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 	if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
 		kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
 			PAGE_OFFSET;
-		kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+		kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
 					   _PAGE_P_4V | _PAGE_W_4V);
 	} else {
 		kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
@@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 	if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
 		kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
 			PAGE_OFFSET;
-		kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+		kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
 					   _PAGE_P_4V | _PAGE_W_4V);
 	} else {
 		kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
@@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void)
 	return available;
 }
 
+#define _PAGE_CACHE_4U	(_PAGE_CP_4U | _PAGE_CV_4U)
+#define _PAGE_CACHE_4V	(_PAGE_CP_4V | _PAGE_CV_4V)
+#define __DIRTY_BITS_4U	 (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
+#define __DIRTY_BITS_4V	 (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
+#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
+#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
+
 /* We need to exclude reserved regions. This exclusion will include
  * vmlinux and initrd. To be more precise the initrd size could be used to
  * compute a new lower limit because it is freed later during initialization.
@@ -2034,6 +2055,25 @@ void __init paging_init(void)
 	memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
 #endif
 
+	/* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
+	 * bit on M7 processor. This is a conflicting usage of the same
+	 * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
+	 * Detection error on all pages and this will lead to problems
+	 * later. Kernel does not run with MCD enabled and hence rest
+	 * of the required steps to fully configure memory corruption
+	 * detection are not taken. We need to ensure TTE.mcde is not
+	 * set on M7 processor. Compute the value of cacheability
+	 * flag for use later taking this into consideration.
+	 */
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+		page_cache4v_flag = _PAGE_CP_4V;
+		break;
+	default:
+		page_cache4v_flag = _PAGE_CACHE_4V;
+		break;
+	}
+
 	if (tlb_type == hypervisor)
 		sun4v_pgprot_init();
 	else
@@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-#define _PAGE_CACHE_4U	(_PAGE_CP_4U | _PAGE_CV_4U)
-#define _PAGE_CACHE_4V	(_PAGE_CP_4V | _PAGE_CV_4V)
-#define __DIRTY_BITS_4U	 (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
-#define __DIRTY_BITS_4V	 (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
-#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
-#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
-
 pgprot_t PAGE_KERNEL __read_mostly;
 EXPORT_SYMBOL(PAGE_KERNEL);
 
@@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
 		    _PAGE_P_4U | _PAGE_W_4U);
 	if (tlb_type == hypervisor)
 		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-			    _PAGE_CP_4V | _PAGE_CV_4V |
-			    _PAGE_P_4V | _PAGE_W_4V);
+			    page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
 
 	pte_base |= _PAGE_PMD_HUGE;
 
@@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void)
 	int i;
 
 	PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
-				_PAGE_CACHE_4V | _PAGE_P_4V |
+				page_cache4v_flag | _PAGE_P_4V |
 				__ACCESS_BITS_4V | __DIRTY_BITS_4V |
 				_PAGE_EXEC_4V);
 	PAGE_KERNEL_LOCKED = PAGE_KERNEL;
 
 	_PAGE_IE = _PAGE_IE_4V;
 	_PAGE_E = _PAGE_E_4V;
-	_PAGE_CACHE = _PAGE_CACHE_4V;
+	_PAGE_CACHE = page_cache4v_flag;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
@@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void)
 	kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
 		PAGE_OFFSET;
 #endif
-	kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
-				   _PAGE_P_4V | _PAGE_W_4V);
+	kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
+				   _PAGE_W_4V);
 
 	for (i = 1; i < 4; i++)
 		kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
@@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void)
 			     _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
 			     _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
 
-	page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
-	page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+	page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
+	page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
 		       __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
-	page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+	page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
 		       __ACCESS_BITS_4V | _PAGE_EXEC_4V);
-	page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+	page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
 			 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
 
 	page_exec_bit = _PAGE_EXEC_4V;
@@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr)
 	       _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
 	if (tlb_type == hypervisor)
 		val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-		       _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
+		       page_cache4v_flag | _PAGE_P_4V |
 		       _PAGE_EXEC_4V | _PAGE_W_4V);
 
 	return val | paddr;
@@ -2706,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs)
 	struct mm_struct *mm = current->mm;
 	struct tsb_config *tp;
 
-	if (in_atomic() || !mm) {
+	if (faulthandler_disabled() || !mm) {
 		const struct exception_table_entry *entry;
 
 		entry = search_exception_tables(regs->tpc);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5fadd42..0496970cef82 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
-/* Define this to indicate that cmpxchg is an efficient operation. */
-#define __HAVE_ARCH_CMPXCHG
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 938311844233..76b0d0ebb244 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -55,7 +55,7 @@ static inline const struct cpumask *cpumask_of_node(int node)
 #define topology_physical_package_id(cpu)       ((void)(cpu), 0)
 #define topology_core_id(cpu)                   (cpu)
 #define topology_core_cpumask(cpu)              ((void)(cpu), cpu_online_mask)
-#define topology_thread_cpumask(cpu)            cpumask_of(cpu)
+#define topology_sibling_cpumask(cpu)           cpumask_of(cpu)
 #endif
 
 #endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index f41cb53cf645..a33276bf5ca1 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsigned long size);
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -192,7 +193,8 @@ extern int __get_user_bad(void)
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -274,7 +276,8 @@ extern int __put_user_bad(void)
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -330,7 +333,8 @@ extern int __put_user_bad(void)
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -437,7 +442,8 @@ static inline unsigned long __must_check copy_from_user(void *to,
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to user space.  Caller must check
  * the specified blocks with access_ok() before calling this function.
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index e83cc999da02..3f4f58d34a92 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs,
 
 	/*
 	 * If we're in an interrupt, have no user context or are running in an
-	 * atomic region then we must not take the fault.
+	 * region with pagefaults disabled then we must not take the fault.
 	 */
-	if (in_atomic() || !mm) {
+	if (pagefault_disabled() || !mm) {
 		vma = NULL;  /* happy compiler */
 		goto bad_area_nosemaphore;
 	}
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 6aa2f2625447..fcd545014e79 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	int idx, type;
 	pte_t *pte;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 
 	/* Avoid icache flushes by disallowing atomic executable mappings. */
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
 	}
 
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 8e4daf44e980..47ff9b7f3e5d 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -35,10 +36,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	*code_out = SEGV_MAPERR;
 
 	/*
-	 * If the fault was during atomic operation, don't take the fault, just
+	 * If the fault was with pagefaults disabled, don't take the fault, just
 	 * fail.
 	 */
-	if (in_atomic())
+	if (faulthandler_disabled())
 		goto out_nosemaphore;
 
 	if (is_user)
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 0dc922dba915..afccef5529cc 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 226d5696e1d1..4e986e809861 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,7 +127,8 @@ config X86
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
 	select ARCH_USE_BUILTIN_BSWAP
-	select ARCH_USE_QUEUE_RWLOCK
+	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_QUEUED_RWLOCKS
 	select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
 	select OLD_SIGACTION if X86_32
 	select COMPAT_OLD_SIGACTION if IA32_EMULATION
@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
 config PARAVIRT_SPINLOCKS
 	bool "Paravirtualization layer for spinlocks"
 	depends on PARAVIRT && SMP
-	select UNINLINE_SPIN_UNLOCK
+	select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
 	---help---
 	  Paravirtualized spinlocks allow a pvops backend to replace the
 	  spinlock implementation with something virtualization-friendly
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 72484a645f05..2fd3ebbb4e33 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -332,4 +332,16 @@ config X86_DEBUG_STATIC_CPU_HAS
 
 	  If unsure, say N.
 
+config X86_DEBUG_FPU
+	bool "Debug the x86 FPU code"
+	depends on DEBUG_KERNEL
+	default y
+	---help---
+	  If this option is enabled then there will be extra sanity
+	  checks and (boot time) debug printouts added to the kernel.
+	  This debugging adds some small amount of runtime overhead
+	  to the kernel.
+
+	  If unsure, say N.
+
 endmenu
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ef17683484e9..48304b89b601 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1109,6 +1109,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+	/* Fill in upper bits of command line address, NOP on 32 bit  */
+	boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
 
 	hdr->ramdisk_image = 0;
 	hdr->ramdisk_size = 0;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 89dd0d78013a..805d25ca5f1d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -2,15 +2,14 @@
 #define BOOT_COMPRESSED_MISC_H
 
 /*
- * we have to be careful, because no indirections are allowed here, and
- * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
- * we just keep it from happening
+ * Special hack: we have to be careful, because no indirections are allowed here,
+ * and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening. (This list needs to be extended when new
+ * paravirt and debugging variants are added.)
  */
 #undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS
 #undef CONFIG_KASAN
-#ifdef CONFIG_X86_32
-#define _ASM_X86_DESC_H 1
-#endif
 
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 112cefacf2af..b419f43ce0c5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -32,7 +32,7 @@
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/aes.h>
 #include <crypto/ablk_helper.h>
 #include <crypto/scatterwalk.h>
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index baf0ac21ace5..4c65c70e628b 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -19,8 +19,7 @@
 #include <crypto/ctr.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -561,16 +560,15 @@ static struct crypto_alg cmll_algs[10] = { {
 
 static int __init camellia_aesni_init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
 	if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
 		pr_info("AVX2 or AES-NI instructions are not detected.\n");
 		return -ENODEV;
 	}
 
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX2 detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 78818a1e73e3..80a0e4389c9a 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -19,8 +19,7 @@
 #include <crypto/ctr.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -553,16 +552,10 @@ static struct crypto_alg cmll_algs[10] = { {
 
 static int __init camellia_aesni_init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
-	if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
-		pr_info("AVX or AES-NI instructions are not detected.\n");
-		return -ENODEV;
-	}
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 236c80974457..be00aa48b2b5 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -31,8 +31,7 @@
 #include <crypto/cast5.h>
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/glue_helper.h>
 
 #define CAST5_PARALLEL_BLOCKS 16
@@ -468,16 +467,10 @@ static struct crypto_alg cast5_algs[6] = { {
 
 static int __init cast5_init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
-	if (!cpu_has_avx || !cpu_has_osxsave) {
-		pr_info("AVX instructions are not detected.\n");
-		return -ENODEV;
-	}
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index f448810ca4ac..5dbba7224221 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -36,8 +36,7 @@
 #include <crypto/ctr.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/glue_helper.h>
 
 #define CAST6_PARALLEL_BLOCKS 8
@@ -590,16 +589,10 @@ static struct crypto_alg cast6_algs[10] = { {
 
 static int __init cast6_init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
-	if (!cpu_has_avx || !cpu_has_osxsave) {
-		pr_info("AVX instructions are not detected.\n");
-		return -ENODEV;
-	}
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 1937fc1d8763..07d2c6c86a54 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -35,7 +35,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/cpu_device_id.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 #define CHKSUM_BLOCK_SIZE	1
 #define CHKSUM_DIGEST_SIZE	4
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 28640c3d6af7..81a595d75cf5 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -32,8 +32,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/cpu_device_id.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 
 #define CHKSUM_BLOCK_SIZE	1
 #define CHKSUM_DIGEST_SIZE	4
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index b6c67bf30fdf..a3fcfc97a311 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_device_id.h>
 
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index f368ba261739..5a2f30f9f52d 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -18,7 +18,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/crypto.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 struct crypto_fpu_ctx {
 	struct crypto_blkcipher *child;
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 2079baf06bdd..64d7cf1b50e1 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -19,7 +19,7 @@
 #include <crypto/cryptd.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/hash.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <asm/cpu_device_id.h>
 
 #define GHASH_BLOCK_SIZE	16
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 2f63dc89e7a9..7d838dc4d888 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -20,8 +20,7 @@
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <crypto/serpent.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/serpent-avx.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -537,16 +536,14 @@ static struct crypto_alg srp_algs[10] = { {
 
 static int __init init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
 	if (!cpu_has_avx2 || !cpu_has_osxsave) {
 		pr_info("AVX2 instructions are not detected.\n");
 		return -ENODEV;
 	}
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index c8d478af8456..da7dafc9b16d 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -36,8 +36,7 @@
 #include <crypto/ctr.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/serpent-avx.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -596,16 +595,10 @@ static struct crypto_alg serpent_algs[10] = { {
 
 static int __init serpent_init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
-	if (!cpu_has_avx || !cpu_has_osxsave) {
-		printk(KERN_INFO "AVX instructions are not detected.\n");
-		return -ENODEV;
-	}
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		printk(KERN_INFO "AVX detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index e510b1c5d690..f53ed1dc88ea 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -65,11 +65,8 @@
 #include <crypto/mcryptd.h>
 #include <crypto/crypto_wq.h>
 #include <asm/byteorder.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
 #include <linux/hardirq.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include "sha_mb_ctx.h"
 
 #define FLUSH_INTERVAL 1000 /* in usec */
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 33d1b9dc14cc..7c48e8b20848 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -29,9 +29,7 @@
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 
 
 asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
@@ -123,15 +121,9 @@ static struct shash_alg alg = {
 #ifdef CONFIG_AS_AVX
 static bool __init avx_usable(void)
 {
-	u64 xcr0;
-
-	if (!cpu_has_avx || !cpu_has_osxsave)
-		return false;
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
-
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+		if (cpu_has_avx)
+			pr_info("AVX detected but unusable.\n");
 		return false;
 	}
 
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index ccc338881ee8..f8097fc0d1d1 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -37,9 +37,7 @@
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha256_base.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <linux/string.h>
 
 asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
@@ -132,15 +130,9 @@ static struct shash_alg algs[] = { {
 #ifdef CONFIG_AS_AVX
 static bool __init avx_usable(void)
 {
-	u64 xcr0;
-
-	if (!cpu_has_avx || !cpu_has_osxsave)
-		return false;
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
-
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+		if (cpu_has_avx)
+			pr_info("AVX detected but unusable.\n");
 		return false;
 	}
 
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index d9fa4c1e063f..2edad7b81870 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -35,9 +35,7 @@
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha512_base.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 
 #include <linux/string.h>
 
@@ -131,15 +129,9 @@ static struct shash_alg algs[] = { {
 #ifdef CONFIG_AS_AVX
 static bool __init avx_usable(void)
 {
-	u64 xcr0;
-
-	if (!cpu_has_avx || !cpu_has_osxsave)
-		return false;
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
-
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+		if (cpu_has_avx)
+			pr_info("AVX detected but unusable.\n");
 		return false;
 	}
 
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index b5e2d5651851..c2bd0ce718ee 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -36,9 +36,7 @@
 #include <crypto/ctr.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
 #include <asm/crypto/twofish.h>
 #include <asm/crypto/glue_helper.h>
 #include <crypto/scatterwalk.h>
@@ -558,16 +556,10 @@ static struct crypto_alg twofish_algs[10] = { {
 
 static int __init twofish_init(void)
 {
-	u64 xcr0;
+	const char *feature_name;
 
-	if (!cpu_has_avx || !cpu_has_osxsave) {
-		printk(KERN_INFO "AVX instructions are not detected.\n");
-		return -ENODEV;
-	}
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		printk(KERN_INFO "AVX detected but unusable.\n");
+	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index c81d35e6c7f1..ae3a29ae875b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -21,8 +21,8 @@
 #include <linux/binfmts.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
 #include <asm/ptrace.h>
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
@@ -198,7 +198,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 		buf = compat_ptr(tmp);
 	} get_user_catch(err);
 
-	err |= restore_xstate_sig(buf, 1);
+	err |= fpu__restore_sig(buf, 1);
 
 	force_iret();
 
@@ -308,6 +308,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 				 size_t frame_size,
 				 void __user **fpstate)
 {
+	struct fpu *fpu = &current->thread.fpu;
 	unsigned long sp;
 
 	/* Default to using normal stack */
@@ -322,12 +323,12 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 		 ksig->ka.sa.sa_restorer)
 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
-	if (used_math()) {
+	if (fpu->fpstate_active) {
 		unsigned long fx_aligned, math_size;
 
-		sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
+		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
 		*fpstate = (struct _fpstate_ia32 __user *) sp;
-		if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
+		if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
 				    math_size) < 0)
 			return (void __user *) -1L;
 	}
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ba32af062f61..7bfc85bbb8ff 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -52,6 +52,12 @@ struct alt_instr {
 	u8  padlen;		/* length of build-time padding */
 } __packed;
 
+/*
+ * Debug flag that can be tested to see whether alternative
+ * instructions were patched in already:
+ */
+extern int alternatives_patched;
+
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index aaac3b2fb746..1a5da2e63aee 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -98,11 +98,22 @@ static inline u16 amd_get_node_id(struct pci_dev *pdev)
 	return 0;
 }
 
+static inline bool amd_gart_present(void)
+{
+	/* GART present only on Fam15h, upto model 0fh */
+	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+	    (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
+		return true;
+
+	return false;
+}
+
 #else
 
 #define amd_nb_num(x)		0
 #define amd_nb_has_feature(x)	false
 #define node_to_amd_nb(x)	NULL
+#define amd_gart_present(x)	false
 
 #endif
 
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b81fe2..e51a8f803f55 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
 #define smp_mb()	mb()
 #define smp_rmb()	dma_rmb()
 #define smp_wmb()	barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 #else /* !SMP */
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
 #endif /* SMP */
 
 #define read_barrier_depends()		do { } while (0)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99c105d78b7e..ad19841eddfe 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 /*
  * Non-existant functions to indicate usage errors at link time
  * (or compile-time if the compiler implements __compiletime_error().
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 1eef55596e82..03bb1065c335 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -7,7 +7,7 @@
 
 #include <linux/kernel.h>
 #include <linux/crypto.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <crypto/b128ops.h>
 
 typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 808dae63eeea..1f5b7287d1ad 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -127,50 +127,14 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 
 #define dma_alloc_coherent(d,s,h,f)	dma_alloc_attrs(d,s,h,f,NULL)
 
-static inline void *
+void *
 dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, struct dma_attrs *attrs)
-{
-	struct dma_map_ops *ops = get_dma_ops(dev);
-	void *memory;
-
-	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
-	if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
-		return memory;
-
-	if (!dev)
-		dev = &x86_dma_fallback_dev;
-
-	if (!is_device_dma_capable(dev))
-		return NULL;
-
-	if (!ops->alloc)
-		return NULL;
-
-	memory = ops->alloc(dev, size, dma_handle,
-			    dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
-	debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
-
-	return memory;
-}
+		gfp_t gfp, struct dma_attrs *attrs);
 
 #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
 
-static inline void dma_free_attrs(struct device *dev, size_t size,
-				  void *vaddr, dma_addr_t bus,
-				  struct dma_attrs *attrs)
-{
-	struct dma_map_ops *ops = get_dma_ops(dev);
-
-	WARN_ON(irqs_disabled());       /* for portability */
-
-	if (dma_release_from_coherent(dev, get_order(size), vaddr))
-		return;
-
-	debug_dma_free_coherent(dev, size, vaddr, bus);
-	if (ops->free)
-		ops->free(dev, size, vaddr, bus, attrs);
-}
+void dma_free_attrs(struct device *dev, size_t size,
+		    void *vaddr, dma_addr_t bus,
+		    struct dma_attrs *attrs);
 
 #endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 3738b138b843..155162ea0e00 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_EFI_H
 #define _ASM_X86_EFI_H
 
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <asm/pgtable.h>
 
 /*
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
deleted file mode 100644
index da5e96756570..000000000000
--- a/arch/x86/include/asm/fpu-internal.h
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _FPU_INTERNAL_H
-#define _FPU_INTERNAL_H
-
-#include <linux/kernel_stat.h>
-#include <linux/regset.h>
-#include <linux/compat.h>
-#include <linux/slab.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/uaccess.h>
-#include <asm/xsave.h>
-#include <asm/smap.h>
-
-#ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
-# include <asm/user32.h>
-struct ksignal;
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
-			compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct ksignal *ksig,
-		     compat_sigset_t *set, struct pt_regs *regs);
-#else
-# define user_i387_ia32_struct	user_i387_struct
-# define user32_fxsr_struct	user_fxsr_struct
-# define ia32_setup_frame	__setup_frame
-# define ia32_setup_rt_frame	__setup_rt_frame
-#endif
-
-extern unsigned int mxcsr_feature_mask;
-extern void fpu_init(void);
-extern void eager_fpu_init(void);
-
-DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
-
-extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
-			      struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
-			    const struct user_i387_ia32_struct *env);
-
-extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
-				xstateregs_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
-				 xstateregs_set;
-
-/*
- * xstateregs_active == fpregs_active. Please refer to the comment
- * at the definition of fpregs_active.
- */
-#define xstateregs_active	fpregs_active
-
-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
-	per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-/*
- * Used to indicate that the FPU state in memory is newer than the FPU
- * state in registers, and the FPU state should be reloaded next time the
- * task is run. Only safe on the current task, or non-running tasks.
- */
-static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
-{
-	tsk->thread.fpu.last_cpu = ~0;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
-	return new == this_cpu_read_stable(fpu_owner_task) &&
-		cpu == new->thread.fpu.last_cpu;
-}
-
-static inline int is_ia32_compat_frame(void)
-{
-	return config_enabled(CONFIG_IA32_EMULATION) &&
-	       test_thread_flag(TIF_IA32);
-}
-
-static inline int is_ia32_frame(void)
-{
-	return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
-}
-
-static inline int is_x32_frame(void)
-{
-	return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
-}
-
-#define X87_FSW_ES (1 << 7)	/* Exception Summary */
-
-static __always_inline __pure bool use_eager_fpu(void)
-{
-	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
-}
-
-static __always_inline __pure bool use_xsaveopt(void)
-{
-	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
-	return static_cpu_has_safe(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
-	return static_cpu_has_safe(X86_FEATURE_FXSR);
-}
-
-static inline void fx_finit(struct i387_fxsave_struct *fx)
-{
-	fx->cwd = 0x37f;
-	fx->mxcsr = MXCSR_DEFAULT;
-}
-
-extern void __sanitize_i387_state(struct task_struct *);
-
-static inline void sanitize_i387_state(struct task_struct *tsk)
-{
-	if (!use_xsaveopt())
-		return;
-	__sanitize_i387_state(tsk);
-}
-
-#define user_insn(insn, output, input...)				\
-({									\
-	int err;							\
-	asm volatile(ASM_STAC "\n"					\
-		     "1:" #insn "\n\t"					\
-		     "2: " ASM_CLAC "\n"				\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:  movl $-1,%[err]\n"				\
-		     "    jmp  2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err), output				\
-		     : "0"(0), input);					\
-	err;								\
-})
-
-#define check_insn(insn, output, input...)				\
-({									\
-	int err;							\
-	asm volatile("1:" #insn "\n\t"					\
-		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:  movl $-1,%[err]\n"				\
-		     "    jmp  2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err), output				\
-		     : "0"(0), input);					\
-	err;								\
-})
-
-static inline int fsave_user(struct i387_fsave_struct __user *fx)
-{
-	return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
-}
-
-static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
-{
-	if (config_enabled(CONFIG_X86_32))
-		return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
-		return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
-
-	/* See comment in fpu_fxsave() below. */
-	return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
-}
-
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
-	if (config_enabled(CONFIG_X86_32))
-		return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
-		return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	/* See comment in fpu_fxsave() below. */
-	return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
-			  "m" (*fx));
-}
-
-static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
-{
-	if (config_enabled(CONFIG_X86_32))
-		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
-		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	/* See comment in fpu_fxsave() below. */
-	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
-			  "m" (*fx));
-}
-
-static inline int frstor_checking(struct i387_fsave_struct *fx)
-{
-	return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int frstor_user(struct i387_fsave_struct __user *fx)
-{
-	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
-	if (config_enabled(CONFIG_X86_32))
-		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
-		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
-	else {
-		/* Using "rex64; fxsave %0" is broken because, if the memory
-		 * operand uses any extended registers for addressing, a second
-		 * REX prefix will be generated (to the assembler, rex64
-		 * followed by semicolon is a separate instruction), and hence
-		 * the 64-bitness is lost.
-		 *
-		 * Using "fxsaveq %0" would be the ideal choice, but is only
-		 * supported starting with gas 2.16.
-		 *
-		 * Using, as a workaround, the properly prefixed form below
-		 * isn't accepted by any binutils version so far released,
-		 * complaining that the same type of prefix is used twice if
-		 * an extended register is needed for addressing (fix submitted
-		 * to mainline 2005-11-21).
-		 *
-		 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
-		 *
-		 * This, however, we can work around by forcing the compiler to
-		 * select an addressing mode that doesn't require extended
-		 * registers.
-		 */
-		asm volatile( "rex64/fxsave (%[fx])"
-			     : "=m" (fpu->state->fxsave)
-			     : [fx] "R" (&fpu->state->fxsave));
-	}
-}
-
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact.
- */
-static inline int fpu_save_init(struct fpu *fpu)
-{
-	if (use_xsave()) {
-		fpu_xsave(fpu);
-
-		/*
-		 * xsave header may indicate the init state of the FP.
-		 */
-		if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
-			return 1;
-	} else if (use_fxsr()) {
-		fpu_fxsave(fpu);
-	} else {
-		asm volatile("fnsave %[fx]; fwait"
-			     : [fx] "=m" (fpu->state->fsave));
-		return 0;
-	}
-
-	/*
-	 * If exceptions are pending, we need to clear them so
-	 * that we don't randomly get exceptions later.
-	 *
-	 * FIXME! Is this perhaps only true for the old-style
-	 * irq13 case? Maybe we could leave the x87 state
-	 * intact otherwise?
-	 */
-	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
-		asm volatile("fnclex");
-		return 0;
-	}
-	return 1;
-}
-
-static inline int __save_init_fpu(struct task_struct *tsk)
-{
-	return fpu_save_init(&tsk->thread.fpu);
-}
-
-static inline int fpu_restore_checking(struct fpu *fpu)
-{
-	if (use_xsave())
-		return fpu_xrstor_checking(&fpu->state->xsave);
-	else if (use_fxsr())
-		return fxrstor_checking(&fpu->state->fxsave);
-	else
-		return frstor_checking(&fpu->state->fsave);
-}
-
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
-	/*
-	 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
-	 * pending. Clear the x87 state here by setting it to fixed values.
-	 * "m" is a random variable that should be in L1.
-	 */
-	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
-		asm volatile(
-			"fnclex\n\t"
-			"emms\n\t"
-			"fildl %P[addr]"	/* set F?P to defined value */
-			: : [addr] "m" (tsk->thread.fpu.has_fpu));
-	}
-
-	return fpu_restore_checking(&tsk->thread.fpu);
-}
-
-/*
- * Software FPU state helpers. Careful: these need to
- * be preemption protection *and* they need to be
- * properly paired with the CR0.TS changes!
- */
-static inline int __thread_has_fpu(struct task_struct *tsk)
-{
-	return tsk->thread.fpu.has_fpu;
-}
-
-/* Must be paired with an 'stts' after! */
-static inline void __thread_clear_has_fpu(struct task_struct *tsk)
-{
-	tsk->thread.fpu.has_fpu = 0;
-	this_cpu_write(fpu_owner_task, NULL);
-}
-
-/* Must be paired with a 'clts' before! */
-static inline void __thread_set_has_fpu(struct task_struct *tsk)
-{
-	tsk->thread.fpu.has_fpu = 1;
-	this_cpu_write(fpu_owner_task, tsk);
-}
-
-/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void __thread_fpu_end(struct task_struct *tsk)
-{
-	__thread_clear_has_fpu(tsk);
-	if (!use_eager_fpu())
-		stts();
-}
-
-static inline void __thread_fpu_begin(struct task_struct *tsk)
-{
-	if (!use_eager_fpu())
-		clts();
-	__thread_set_has_fpu(tsk);
-}
-
-static inline void drop_fpu(struct task_struct *tsk)
-{
-	/*
-	 * Forget coprocessor state..
-	 */
-	preempt_disable();
-	tsk->thread.fpu_counter = 0;
-
-	if (__thread_has_fpu(tsk)) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		__thread_fpu_end(tsk);
-	}
-
-	clear_stopped_child_used_math(tsk);
-	preempt_enable();
-}
-
-static inline void restore_init_xstate(void)
-{
-	if (use_xsave())
-		xrstor_state(init_xstate_buf, -1);
-	else
-		fxrstor_checking(&init_xstate_buf->i387);
-}
-
-/*
- * Reset the FPU state in the eager case and drop it in the lazy case (later use
- * will reinit it).
- */
-static inline void fpu_reset_state(struct task_struct *tsk)
-{
-	if (!use_eager_fpu())
-		drop_fpu(tsk);
-	else
-		restore_init_xstate();
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- *  - switch_fpu_prepare() saves the old state and
- *    sets the new state of the CR0.TS bit. This is
- *    done within the context of the old process.
- *
- *  - switch_fpu_finish() restores the new state as
- *    necessary.
- */
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
-{
-	fpu_switch_t fpu;
-
-	/*
-	 * If the task has used the math, pre-load the FPU on xsave processors
-	 * or if the past 5 consecutive context-switches used math.
-	 */
-	fpu.preload = tsk_used_math(new) &&
-		      (use_eager_fpu() || new->thread.fpu_counter > 5);
-
-	if (__thread_has_fpu(old)) {
-		if (!__save_init_fpu(old))
-			task_disable_lazy_fpu_restore(old);
-		else
-			old->thread.fpu.last_cpu = cpu;
-
-		/* But leave fpu_owner_task! */
-		old->thread.fpu.has_fpu = 0;
-
-		/* Don't change CR0.TS if we just switch! */
-		if (fpu.preload) {
-			new->thread.fpu_counter++;
-			__thread_set_has_fpu(new);
-			prefetch(new->thread.fpu.state);
-		} else if (!use_eager_fpu())
-			stts();
-	} else {
-		old->thread.fpu_counter = 0;
-		task_disable_lazy_fpu_restore(old);
-		if (fpu.preload) {
-			new->thread.fpu_counter++;
-			if (fpu_lazy_restore(new, cpu))
-				fpu.preload = 0;
-			else
-				prefetch(new->thread.fpu.state);
-			__thread_fpu_begin(new);
-		}
-	}
-	return fpu;
-}
-
-/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
- */
-static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
-{
-	if (fpu.preload) {
-		if (unlikely(restore_fpu_checking(new)))
-			fpu_reset_state(new);
-	}
-}
-
-/*
- * Signal frame handlers...
- */
-extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
-extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
-
-static inline int xstate_sigframe_size(void)
-{
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
-}
-
-static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
-{
-	void __user *buf_fx = buf;
-	int size = xstate_sigframe_size();
-
-	if (ia32_frame && use_fxsr()) {
-		buf_fx = buf + sizeof(struct i387_fsave_struct);
-		size += sizeof(struct i387_fsave_struct);
-	}
-
-	return __restore_xstate_sig(buf, buf_fx, size);
-}
-
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
-	preempt_disable();
-	if (!user_has_fpu())
-		__thread_fpu_begin(current);
-	preempt_enable();
-}
-
-static inline void __save_fpu(struct task_struct *tsk)
-{
-	if (use_xsave()) {
-		if (unlikely(system_state == SYSTEM_BOOTING))
-			xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
-		else
-			xsave_state(&tsk->thread.fpu.state->xsave, -1);
-	} else
-		fpu_fxsave(&tsk->thread.fpu);
-}
-
-/*
- * i387 state interaction
- */
-static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.fpu.state->fxsave.cwd;
-	} else {
-		return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
-	}
-}
-
-static inline unsigned short get_fpu_swd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.fpu.state->fxsave.swd;
-	} else {
-		return (unsigned short)tsk->thread.fpu.state->fsave.swd;
-	}
-}
-
-static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
-	if (cpu_has_xmm) {
-		return tsk->thread.fpu.state->fxsave.mxcsr;
-	} else {
-		return MXCSR_DEFAULT;
-	}
-}
-
-static bool fpu_allocated(struct fpu *fpu)
-{
-	return fpu->state != NULL;
-}
-
-static inline int fpu_alloc(struct fpu *fpu)
-{
-	if (fpu_allocated(fpu))
-		return 0;
-	fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
-	if (!fpu->state)
-		return -ENOMEM;
-	WARN_ON((unsigned long)fpu->state & 15);
-	return 0;
-}
-
-static inline void fpu_free(struct fpu *fpu)
-{
-	if (fpu->state) {
-		kmem_cache_free(task_xstate_cachep, fpu->state);
-		fpu->state = NULL;
-	}
-}
-
-static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
-{
-	if (use_eager_fpu()) {
-		memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
-		__save_fpu(dst);
-	} else {
-		struct fpu *dfpu = &dst->thread.fpu;
-		struct fpu *sfpu = &src->thread.fpu;
-
-		unlazy_fpu(src);
-		memcpy(dfpu->state, sfpu->state, xstate_size);
-	}
-}
-
-static inline unsigned long
-alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
-		unsigned long *size)
-{
-	unsigned long frame_size = xstate_sigframe_size();
-
-	*buf_fx = sp = round_down(sp - frame_size, 64);
-	if (ia32_frame && use_fxsr()) {
-		frame_size += sizeof(struct i387_fsave_struct);
-		sp -= sizeof(struct i387_fsave_struct);
-	}
-
-	*size = frame_size;
-	return sp;
-}
-
-#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
new file mode 100644
index 000000000000..1429a7c736db
--- /dev/null
+++ b/arch/x86/include/asm/fpu/api.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_FPU_API_H
+#define _ASM_X86_FPU_API_H
+
+/*
+ * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
+ * and they don't touch the preempt state on their own.
+ * If you enable preemption after __kernel_fpu_begin(), preempt notifier
+ * should call the __kernel_fpu_end() to prevent the kernel/user FPU
+ * state from getting corrupted. KVM for example uses this model.
+ *
+ * All other cases use kernel_fpu_begin/end() which disable preemption
+ * during kernel FPU usage.
+ */
+extern void __kernel_fpu_begin(void);
+extern void __kernel_fpu_end(void);
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+extern bool irq_fpu_usable(void);
+
+/*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
+ * should use them only in the context of irq_ts_save/restore()
+ */
+extern int  irq_ts_save(void);
+extern void irq_ts_restore(int TS_state);
+
+/*
+ * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
+ *
+ * If 'feature_name' is set then put a human-readable description of
+ * the feature there as well - this can be used to print error (or success)
+ * messages.
+ */
+extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
+
+#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
new file mode 100644
index 000000000000..3c3550c3a4a3
--- /dev/null
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_FPU_INTERNAL_H
+#define _ASM_X86_FPU_INTERNAL_H
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/user.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/xstate.h>
+
+/*
+ * High level FPU state handling functions:
+ */
+extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__activate_fpstate_read(struct fpu *fpu);
+extern void fpu__activate_fpstate_write(struct fpu *fpu);
+extern void fpu__save(struct fpu *fpu);
+extern void fpu__restore(struct fpu *fpu);
+extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
+extern void fpu__drop(struct fpu *fpu);
+extern int  fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
+extern void fpu__clear(struct fpu *fpu);
+extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
+extern int  dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
+
+/*
+ * Boot time FPU initialization functions:
+ */
+extern void fpu__init_cpu(void);
+extern void fpu__init_system_xstate(void);
+extern void fpu__init_cpu_xstate(void);
+extern void fpu__init_system(struct cpuinfo_x86 *c);
+extern void fpu__init_check_bugs(void);
+extern void fpu__resume_cpu(void);
+
+/*
+ * Debugging facility:
+ */
+#ifdef CONFIG_X86_DEBUG_FPU
+# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
+#else
+# define WARN_ON_FPU(x) ({ (void)(x); 0; })
+#endif
+
+/*
+ * FPU related CPU feature flag helper routines:
+ */
+static __always_inline __pure bool use_eager_fpu(void)
+{
+	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+}
+
+static __always_inline __pure bool use_xsaveopt(void)
+{
+	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+}
+
+static __always_inline __pure bool use_xsave(void)
+{
+	return static_cpu_has_safe(X86_FEATURE_XSAVE);
+}
+
+static __always_inline __pure bool use_fxsr(void)
+{
+	return static_cpu_has_safe(X86_FEATURE_FXSR);
+}
+
+/*
+ * fpstate handling functions:
+ */
+
+extern union fpregs_state init_fpstate;
+
+extern void fpstate_init(union fpregs_state *state);
+#ifdef CONFIG_MATH_EMULATION
+extern void fpstate_init_soft(struct swregs_state *soft);
+#else
+static inline void fpstate_init_soft(struct swregs_state *soft) {}
+#endif
+static inline void fpstate_init_fxstate(struct fxregs_state *fx)
+{
+	fx->cwd = 0x37f;
+	fx->mxcsr = MXCSR_DEFAULT;
+}
+extern void fpstate_sanitize_xstate(struct fpu *fpu);
+
+#define user_insn(insn, output, input...)				\
+({									\
+	int err;							\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:" #insn "\n\t"					\
+		     "2: " ASM_CLAC "\n"				\
+		     ".section .fixup,\"ax\"\n"				\
+		     "3:  movl $-1,%[err]\n"				\
+		     "    jmp  2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 3b)				\
+		     : [err] "=r" (err), output				\
+		     : "0"(0), input);					\
+	err;								\
+})
+
+#define check_insn(insn, output, input...)				\
+({									\
+	int err;							\
+	asm volatile("1:" #insn "\n\t"					\
+		     "2:\n"						\
+		     ".section .fixup,\"ax\"\n"				\
+		     "3:  movl $-1,%[err]\n"				\
+		     "    jmp  2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 3b)				\
+		     : [err] "=r" (err), output				\
+		     : "0"(0), input);					\
+	err;								\
+})
+
+static inline int copy_fregs_to_user(struct fregs_state __user *fx)
+{
+	return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
+}
+
+static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
+{
+	if (config_enabled(CONFIG_X86_32))
+		return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
+
+	/* See comment in copy_fxregs_to_kernel() below. */
+	return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
+}
+
+static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
+{
+	int err;
+
+	if (config_enabled(CONFIG_X86_32)) {
+		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+	} else {
+		if (config_enabled(CONFIG_AS_FXSAVEQ)) {
+			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+		} else {
+			/* See comment in copy_fxregs_to_kernel() below. */
+			err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+		}
+	}
+	/* Copying from a kernel buffer to FPU registers should never fail: */
+	WARN_ON_FPU(err);
+}
+
+static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
+{
+	if (config_enabled(CONFIG_X86_32))
+		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+	/* See comment in copy_fxregs_to_kernel() below. */
+	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
+			  "m" (*fx));
+}
+
+static inline void copy_kernel_to_fregs(struct fregs_state *fx)
+{
+	int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+	WARN_ON_FPU(err);
+}
+
+static inline int copy_user_to_fregs(struct fregs_state __user *fx)
+{
+	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline void copy_fxregs_to_kernel(struct fpu *fpu)
+{
+	if (config_enabled(CONFIG_X86_32))
+		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
+	else {
+		/* Using "rex64; fxsave %0" is broken because, if the memory
+		 * operand uses any extended registers for addressing, a second
+		 * REX prefix will be generated (to the assembler, rex64
+		 * followed by semicolon is a separate instruction), and hence
+		 * the 64-bitness is lost.
+		 *
+		 * Using "fxsaveq %0" would be the ideal choice, but is only
+		 * supported starting with gas 2.16.
+		 *
+		 * Using, as a workaround, the properly prefixed form below
+		 * isn't accepted by any binutils version so far released,
+		 * complaining that the same type of prefix is used twice if
+		 * an extended register is needed for addressing (fix submitted
+		 * to mainline 2005-11-21).
+		 *
+		 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
+		 *
+		 * This, however, we can work around by forcing the compiler to
+		 * select an addressing mode that doesn't require extended
+		 * registers.
+		 */
+		asm volatile( "rex64/fxsave (%[fx])"
+			     : "=m" (fpu->state.fxsave)
+			     : [fx] "R" (&fpu->state.fxsave));
+	}
+}
+
+/* These macros all use (%edi)/(%rdi) as the single memory argument. */
+#define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
+#define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVES		".byte " REX_PREFIX "0x0f,0xc7,0x2f"
+#define XRSTOR		".byte " REX_PREFIX "0x0f,0xae,0x2f"
+#define XRSTORS		".byte " REX_PREFIX "0x0f,0xc7,0x1f"
+
+/* xstate instruction fault handler: */
+#define xstate_fault(__err)		\
+					\
+	".section .fixup,\"ax\"\n"	\
+					\
+	"3:  movl $-2,%[_err]\n"	\
+	"    jmp  2b\n"			\
+					\
+	".previous\n"			\
+					\
+	_ASM_EXTABLE(1b, 3b)		\
+	: [_err] "=r" (__err)
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
+{
+	u64 mask = -1;
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+	int err = 0;
+
+	WARN_ON(system_state != SYSTEM_BOOTING);
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		asm volatile("1:"XSAVES"\n\t"
+			"2:\n\t"
+			     xstate_fault(err)
+			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+			: "memory");
+	else
+		asm volatile("1:"XSAVE"\n\t"
+			"2:\n\t"
+			     xstate_fault(err)
+			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+			: "memory");
+
+	/* We should never fault when copying to a kernel buffer: */
+	WARN_ON_FPU(err);
+}
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
+{
+	u64 mask = -1;
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+	int err = 0;
+
+	WARN_ON(system_state != SYSTEM_BOOTING);
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		asm volatile("1:"XRSTORS"\n\t"
+			"2:\n\t"
+			     xstate_fault(err)
+			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+			: "memory");
+	else
+		asm volatile("1:"XRSTOR"\n\t"
+			"2:\n\t"
+			     xstate_fault(err)
+			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+			: "memory");
+
+	/* We should never fault when copying from a kernel buffer: */
+	WARN_ON_FPU(err);
+}
+
+/*
+ * Save processor xstate to xsave area.
+ */
+static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
+{
+	u64 mask = -1;
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+	int err = 0;
+
+	WARN_ON(!alternatives_patched);
+
+	/*
+	 * If xsaves is enabled, xsaves replaces xsaveopt because
+	 * it supports compact format and supervisor states in addition to
+	 * modified optimization in xsaveopt.
+	 *
+	 * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
+	 * because xsaveopt supports modified optimization which is not
+	 * supported by xsave.
+	 *
+	 * If none of xsaves and xsaveopt is enabled, use xsave.
+	 */
+	alternative_input_2(
+		"1:"XSAVE,
+		XSAVEOPT,
+		X86_FEATURE_XSAVEOPT,
+		XSAVES,
+		X86_FEATURE_XSAVES,
+		[xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
+		"memory");
+	asm volatile("2:\n\t"
+		     xstate_fault(err)
+		     : "0" (err)
+		     : "memory");
+
+	/* We should never fault when copying to a kernel buffer: */
+	WARN_ON_FPU(err);
+}
+
+/*
+ * Restore processor xstate from xsave area.
+ */
+static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
+{
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+	int err = 0;
+
+	/*
+	 * Use xrstors to restore context if it is enabled. xrstors supports
+	 * compacted format of xsave area which is not supported by xrstor.
+	 */
+	alternative_input(
+		"1: " XRSTOR,
+		XRSTORS,
+		X86_FEATURE_XSAVES,
+		"D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
+		: "memory");
+
+	asm volatile("2:\n"
+		     xstate_fault(err)
+		     : "0" (err)
+		     : "memory");
+
+	/* We should never fault when copying from a kernel buffer: */
+	WARN_ON_FPU(err);
+}
+
+/*
+ * Save xstate to user space xsave area.
+ *
+ * We don't use modified optimization because xrstor/xrstors might track
+ * a different application.
+ *
+ * We don't use compacted format xsave area for
+ * backward compatibility for old applications which don't understand
+ * compacted format of xsave area.
+ */
+static inline int copy_xregs_to_user(struct xregs_state __user *buf)
+{
+	int err;
+
+	/*
+	 * Clear the xsave header first, so that reserved fields are
+	 * initialized to zero.
+	 */
+	err = __clear_user(&buf->header, sizeof(buf->header));
+	if (unlikely(err))
+		return -EFAULT;
+
+	__asm__ __volatile__(ASM_STAC "\n"
+			     "1:"XSAVE"\n"
+			     "2: " ASM_CLAC "\n"
+			     xstate_fault(err)
+			     : "D" (buf), "a" (-1), "d" (-1), "0" (err)
+			     : "memory");
+	return err;
+}
+
+/*
+ * Restore xstate from user space xsave area.
+ */
+static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
+{
+	struct xregs_state *xstate = ((__force struct xregs_state *)buf);
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+	int err = 0;
+
+	__asm__ __volatile__(ASM_STAC "\n"
+			     "1:"XRSTOR"\n"
+			     "2: " ASM_CLAC "\n"
+			     xstate_fault(err)
+			     : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
+			     : "memory");	/* memory required? */
+	return err;
+}
+
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact and we can
+ * keep registers active.
+ *
+ * The legacy FNSAVE instruction cleared all FPU state
+ * unconditionally, so registers are essentially destroyed.
+ * Modern FPU state can be kept in registers, if there are
+ * no pending FP exceptions.
+ */
+static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
+{
+	if (likely(use_xsave())) {
+		copy_xregs_to_kernel(&fpu->state.xsave);
+		return 1;
+	}
+
+	if (likely(use_fxsr())) {
+		copy_fxregs_to_kernel(fpu);
+		return 1;
+	}
+
+	/*
+	 * Legacy FPU register saving, FNSAVE always clears FPU registers,
+	 * so we have to mark them inactive:
+	 */
+	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
+
+	return 0;
+}
+
+static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
+{
+	if (use_xsave()) {
+		copy_kernel_to_xregs(&fpstate->xsave, -1);
+	} else {
+		if (use_fxsr())
+			copy_kernel_to_fxregs(&fpstate->fxsave);
+		else
+			copy_kernel_to_fregs(&fpstate->fsave);
+	}
+}
+
+static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
+{
+	/*
+	 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
+	 * pending. Clear the x87 state here by setting it to fixed values.
+	 * "m" is a random variable that should be in L1.
+	 */
+	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+		asm volatile(
+			"fnclex\n\t"
+			"emms\n\t"
+			"fildl %P[addr]"	/* set F?P to defined value */
+			: : [addr] "m" (fpstate));
+	}
+
+	__copy_kernel_to_fpregs(fpstate);
+}
+
+extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+
+/*
+ * FPU context switch related helper methods:
+ */
+
+DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+/*
+ * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+}
+
+static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
+{
+	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+}
+
+
+/*
+ * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
+ * idiom, which is then paired with the sw-flag (fpregs_active) later on:
+ */
+
+static inline void __fpregs_activate_hw(void)
+{
+	if (!use_eager_fpu())
+		clts();
+}
+
+static inline void __fpregs_deactivate_hw(void)
+{
+	if (!use_eager_fpu())
+		stts();
+}
+
+/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
+static inline void __fpregs_deactivate(struct fpu *fpu)
+{
+	WARN_ON_FPU(!fpu->fpregs_active);
+
+	fpu->fpregs_active = 0;
+	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+}
+
+/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
+static inline void __fpregs_activate(struct fpu *fpu)
+{
+	WARN_ON_FPU(fpu->fpregs_active);
+
+	fpu->fpregs_active = 1;
+	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+}
+
+/*
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
+ */
+static inline int fpregs_active(void)
+{
+	return current->thread.fpu.fpregs_active;
+}
+
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void fpregs_activate(struct fpu *fpu)
+{
+	__fpregs_activate_hw();
+	__fpregs_activate(fpu);
+}
+
+static inline void fpregs_deactivate(struct fpu *fpu)
+{
+	__fpregs_deactivate(fpu);
+	__fpregs_deactivate_hw();
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ *  - switch_fpu_prepare() saves the old state and
+ *    sets the new state of the CR0.TS bit. This is
+ *    done within the context of the old process.
+ *
+ *  - switch_fpu_finish() restores the new state as
+ *    necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+static inline fpu_switch_t
+switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+{
+	fpu_switch_t fpu;
+
+	/*
+	 * If the task has used the math, pre-load the FPU on xsave processors
+	 * or if the past 5 consecutive context-switches used math.
+	 */
+	fpu.preload = new_fpu->fpstate_active &&
+		      (use_eager_fpu() || new_fpu->counter > 5);
+
+	if (old_fpu->fpregs_active) {
+		if (!copy_fpregs_to_fpstate(old_fpu))
+			old_fpu->last_cpu = -1;
+		else
+			old_fpu->last_cpu = cpu;
+
+		/* But leave fpu_fpregs_owner_ctx! */
+		old_fpu->fpregs_active = 0;
+
+		/* Don't change CR0.TS if we just switch! */
+		if (fpu.preload) {
+			new_fpu->counter++;
+			__fpregs_activate(new_fpu);
+			prefetch(&new_fpu->state);
+		} else {
+			__fpregs_deactivate_hw();
+		}
+	} else {
+		old_fpu->counter = 0;
+		old_fpu->last_cpu = -1;
+		if (fpu.preload) {
+			new_fpu->counter++;
+			if (fpu_want_lazy_restore(new_fpu, cpu))
+				fpu.preload = 0;
+			else
+				prefetch(&new_fpu->state);
+			fpregs_activate(new_fpu);
+		}
+	}
+	return fpu;
+}
+
+/*
+ * Misc helper functions:
+ */
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+{
+	if (fpu_switch.preload)
+		copy_kernel_to_fpregs(&new_fpu->state);
+}
+
+/*
+ * Needs to be preemption-safe.
+ *
+ * NOTE! user_fpu_begin() must be used only immediately before restoring
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
+ */
+static inline void user_fpu_begin(void)
+{
+	struct fpu *fpu = &current->thread.fpu;
+
+	preempt_disable();
+	if (!fpregs_active())
+		fpregs_activate(fpu);
+	preempt_enable();
+}
+
+/*
+ * MXCSR and XCR definitions:
+ */
+
+extern unsigned int mxcsr_feature_mask;
+
+#define XCR_XFEATURE_ENABLED_MASK	0x00000000
+
+static inline u64 xgetbv(u32 index)
+{
+	u32 eax, edx;
+
+	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (index));
+	return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+	u32 eax = value;
+	u32 edx = value >> 32;
+
+	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
+		     : : "a" (eax), "d" (edx), "c" (index));
+}
+
+#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
new file mode 100644
index 000000000000..39d3107ac6c7
--- /dev/null
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -0,0 +1,21 @@
+/*
+ * FPU regset handling methods:
+ */
+#ifndef _ASM_X86_FPU_REGSET_H
+#define _ASM_X86_FPU_REGSET_H
+
+#include <linux/regset.h>
+
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+				xstateregs_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
+				 xstateregs_set;
+
+/*
+ * xstateregs_active == regset_fpregs_active. Please refer to the comment
+ * at the definition of regset_fpregs_active.
+ */
+#define xstateregs_active	regset_fpregs_active
+
+#endif /* _ASM_X86_FPU_REGSET_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
new file mode 100644
index 000000000000..7358e9d61f1e
--- /dev/null
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -0,0 +1,33 @@
+/*
+ * x86 FPU signal frame handling methods:
+ */
+#ifndef _ASM_X86_FPU_SIGNAL_H
+#define _ASM_X86_FPU_SIGNAL_H
+
+#ifdef CONFIG_X86_64
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
+struct ksignal;
+int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
+			compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct ksignal *ksig,
+		     compat_sigset_t *set, struct pt_regs *regs);
+#else
+# define user_i387_ia32_struct	user_i387_struct
+# define user32_fxsr_struct	user_fxsr_struct
+# define ia32_setup_frame	__setup_frame
+# define ia32_setup_rt_frame	__setup_rt_frame
+#endif
+
+extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
+			      struct task_struct *tsk);
+extern void convert_to_fxsr(struct task_struct *tsk,
+			    const struct user_i387_ia32_struct *env);
+
+unsigned long
+fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
+		     unsigned long *buf_fx, unsigned long *size);
+
+extern void fpu__init_prepare_fx_sw_frame(void);
+
+#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
new file mode 100644
index 000000000000..0637826292de
--- /dev/null
+++ b/arch/x86/include/asm/fpu/types.h
@@ -0,0 +1,293 @@
+/*
+ * FPU data structures:
+ */
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+/*
+ * The legacy x87 FPU state format, as saved by FSAVE and
+ * restored by the FRSTOR instructions:
+ */
+struct fregs_state {
+	u32			cwd;	/* FPU Control Word		*/
+	u32			swd;	/* FPU Status Word		*/
+	u32			twd;	/* FPU Tag Word			*/
+	u32			fip;	/* FPU IP Offset		*/
+	u32			fcs;	/* FPU IP Selector		*/
+	u32			foo;	/* FPU Operand Pointer Offset	*/
+	u32			fos;	/* FPU Operand Pointer Selector	*/
+
+	/* 8*10 bytes for each FP-reg = 80 bytes:			*/
+	u32			st_space[20];
+
+	/* Software status information [not touched by FSAVE]:		*/
+	u32			status;
+};
+
+/*
+ * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
+ * restored by the FXRSTOR instructions. It's similar to the FSAVE
+ * format, but differs in some areas, plus has extensions at
+ * the end for the XMM registers.
+ */
+struct fxregs_state {
+	u16			cwd; /* Control Word			*/
+	u16			swd; /* Status Word			*/
+	u16			twd; /* Tag Word			*/
+	u16			fop; /* Last Instruction Opcode		*/
+	union {
+		struct {
+			u64	rip; /* Instruction Pointer		*/
+			u64	rdp; /* Data Pointer			*/
+		};
+		struct {
+			u32	fip; /* FPU IP Offset			*/
+			u32	fcs; /* FPU IP Selector			*/
+			u32	foo; /* FPU Operand Offset		*/
+			u32	fos; /* FPU Operand Selector		*/
+		};
+	};
+	u32			mxcsr;		/* MXCSR Register State */
+	u32			mxcsr_mask;	/* MXCSR Mask		*/
+
+	/* 8*16 bytes for each FP-reg = 128 bytes:			*/
+	u32			st_space[32];
+
+	/* 16*16 bytes for each XMM-reg = 256 bytes:			*/
+	u32			xmm_space[64];
+
+	u32			padding[12];
+
+	union {
+		u32		padding1[12];
+		u32		sw_reserved[12];
+	};
+
+} __attribute__((aligned(16)));
+
+/* Default value for fxregs_state.mxcsr: */
+#define MXCSR_DEFAULT		0x1f80
+
+/*
+ * Software based FPU emulation state. This is arbitrary really,
+ * it matches the x87 format to make it easier to understand:
+ */
+struct swregs_state {
+	u32			cwd;
+	u32			swd;
+	u32			twd;
+	u32			fip;
+	u32			fcs;
+	u32			foo;
+	u32			fos;
+	/* 8*10 bytes for each FP-reg = 80 bytes: */
+	u32			st_space[20];
+	u8			ftop;
+	u8			changed;
+	u8			lookahead;
+	u8			no_update;
+	u8			rm;
+	u8			alimit;
+	struct math_emu_info	*info;
+	u32			entry_eip;
+};
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+	XSTATE_BIT_FP,
+	XSTATE_BIT_SSE,
+	XSTATE_BIT_YMM,
+	XSTATE_BIT_BNDREGS,
+	XSTATE_BIT_BNDCSR,
+	XSTATE_BIT_OPMASK,
+	XSTATE_BIT_ZMM_Hi256,
+	XSTATE_BIT_Hi16_ZMM,
+
+	XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP		(1 << XSTATE_BIT_FP)
+#define XSTATE_SSE		(1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM		(1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS		(1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR		(1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK		(1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256	(1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM		(1 << XSTATE_BIT_Hi16_ZMM)
+
+#define XSTATE_FPSSE		(XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512		(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+
+/*
+ * There are 16x 256-bit AVX registers named YMM0-YMM15.
+ * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
+ * and are stored in 'struct fxregs_state::xmm_space[]'.
+ *
+ * The high 128 bits are stored here:
+ *    16x 128 bits == 256 bytes.
+ */
+struct ymmh_struct {
+	u8				ymmh_space[256];
+};
+
+/* We don't support LWP yet: */
+struct lwp_struct {
+	u8				reserved[128];
+};
+
+/* Intel MPX support: */
+struct bndreg {
+	u64				lower_bound;
+	u64				upper_bound;
+} __packed;
+
+struct bndcsr {
+	u64				bndcfgu;
+	u64				bndstatus;
+} __packed;
+
+struct mpx_struct {
+	struct bndreg			bndreg[4];
+	struct bndcsr			bndcsr;
+};
+
+struct xstate_header {
+	u64				xfeatures;
+	u64				xcomp_bv;
+	u64				reserved[6];
+} __attribute__((packed));
+
+/* New processor state extensions should be added here: */
+#define XSTATE_RESERVE			(sizeof(struct ymmh_struct) + \
+					 sizeof(struct lwp_struct)  + \
+					 sizeof(struct mpx_struct)  )
+/*
+ * This is our most modern FPU state format, as saved by the XSAVE
+ * and restored by the XRSTOR instructions.
+ *
+ * It consists of a legacy fxregs portion, an xstate header and
+ * subsequent fixed size areas as defined by the xstate header.
+ * Not all CPUs support all the extensions.
+ */
+struct xregs_state {
+	struct fxregs_state		i387;
+	struct xstate_header		header;
+	u8				__reserved[XSTATE_RESERVE];
+} __attribute__ ((packed, aligned (64)));
+
+/*
+ * This is a union of all the possible FPU state formats
+ * put together, so that we can pick the right one runtime.
+ *
+ * The size of the structure is determined by the largest
+ * member - which is the xsave area:
+ */
+union fpregs_state {
+	struct fregs_state		fsave;
+	struct fxregs_state		fxsave;
+	struct swregs_state		soft;
+	struct xregs_state		xsave;
+};
+
+/*
+ * Highest level per task FPU state data structure that
+ * contains the FPU register state plus various FPU
+ * state fields:
+ */
+struct fpu {
+	/*
+	 * @state:
+	 *
+	 * In-memory copy of all FPU registers that we save/restore
+	 * over context switches. If the task is using the FPU then
+	 * the registers in the FPU are more recent than this state
+	 * copy. If the task context-switches away then they get
+	 * saved here and represent the FPU state.
+	 *
+	 * After context switches there may be a (short) time period
+	 * during which the in-FPU hardware registers are unchanged
+	 * and still perfectly match this state, if the tasks
+	 * scheduled afterwards are not using the FPU.
+	 *
+	 * This is the 'lazy restore' window of optimization, which
+	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+	 *
+	 * We detect whether a subsequent task uses the FPU via setting
+	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+	 *
+	 * During this window, if the task gets scheduled again, we
+	 * might be able to skip having to do a restore from this
+	 * memory buffer to the hardware registers - at the cost of
+	 * incurring the overhead of #NM fault traps.
+	 *
+	 * Note that on modern CPUs that support the XSAVEOPT (or other
+	 * optimized XSAVE instructions), we don't use #NM traps anymore,
+	 * as the hardware can track whether FPU registers need saving
+	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+	 * logic, which unconditionally saves/restores all FPU state
+	 * across context switches. (if FPU state exists.)
+	 */
+	union fpregs_state		state;
+
+	/*
+	 * @last_cpu:
+	 *
+	 * Records the last CPU on which this context was loaded into
+	 * FPU registers. (In the lazy-restore case we might be
+	 * able to reuse FPU registers across multiple context switches
+	 * this way, if no intermediate task used the FPU.)
+	 *
+	 * A value of -1 is used to indicate that the FPU state in context
+	 * memory is newer than the FPU state in registers, and that the
+	 * FPU state should be reloaded next time the task is run.
+	 */
+	unsigned int			last_cpu;
+
+	/*
+	 * @fpstate_active:
+	 *
+	 * This flag indicates whether this context is active: if the task
+	 * is not running then we can restore from this context, if the task
+	 * is running then we should save into this context.
+	 */
+	unsigned char			fpstate_active;
+
+	/*
+	 * @fpregs_active:
+	 *
+	 * This flag determines whether a given context is actively
+	 * loaded into the FPU's registers and that those registers
+	 * represent the task's current FPU state.
+	 *
+	 * Note the interaction with fpstate_active:
+	 *
+	 *   # task does not use the FPU:
+	 *   fpstate_active == 0
+	 *
+	 *   # task uses the FPU and regs are active:
+	 *   fpstate_active == 1 && fpregs_active == 1
+	 *
+	 *   # the regs are inactive but still match fpstate:
+	 *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
+	 *
+	 * The third state is what we use for the lazy restore optimization
+	 * on lazy-switching CPUs.
+	 */
+	unsigned char			fpregs_active;
+
+	/*
+	 * @counter:
+	 *
+	 * This counter contains the number of consecutive context switches
+	 * during which the FPU stays used. If this is over a threshold, the
+	 * lazy FPU restore logic becomes eager, to save the trap overhead.
+	 * This is an unsigned char so that after 256 iterations the counter
+	 * wraps and the context switch behavior turns lazy again; this is to
+	 * deal with bursty apps that only use the FPU for a short time:
+	 */
+	unsigned char			counter;
+};
+
+#endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
new file mode 100644
index 000000000000..4656b25bb9a7
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -0,0 +1,46 @@
+#ifndef __ASM_X86_XSAVE_H
+#define __ASM_X86_XSAVE_H
+
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+
+/* Bit 63 of XCR0 is reserved for future expansion */
+#define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
+
+#define XSTATE_CPUID		0x0000000d
+
+#define FXSAVE_SIZE	512
+
+#define XSAVE_HDR_SIZE	    64
+#define XSAVE_HDR_OFFSET    FXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE	    256
+#define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+
+/* Supported features which support lazy state saving */
+#define XSTATE_LAZY	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM		      \
+			| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+
+/* Supported features which require eager state saving */
+#define XSTATE_EAGER	(XSTATE_BNDREGS | XSTATE_BNDCSR)
+
+/* All currently supported features */
+#define XCNTXT_MASK	(XSTATE_LAZY | XSTATE_EAGER)
+
+#ifdef CONFIG_X86_64
+#define REX_PREFIX	"0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+extern unsigned int xstate_size;
+extern u64 xfeatures_mask;
+extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
+
+extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+
+void *get_xsave_addr(struct xregs_state *xsave, int xstate);
+const void *get_xsave_field_ptr(int xstate_field);
+
+#endif
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e42f758a0fbd..055ea9941dd5 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
 /* Recognized hypervisors */
 extern const struct hypervisor_x86 x86_hyper_vmware;
 extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
 extern const struct hypervisor_x86 x86_hyper_kvm;
 
 extern void init_hypervisor(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
deleted file mode 100644
index 6eb6fcb83f63..000000000000
--- a/arch/x86/include/asm/i387.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/sched.h>
-#include <linux/hardirq.h>
-
-struct pt_regs;
-struct user_i387_struct;
-
-extern int init_fpu(struct task_struct *child);
-extern void fpu_finit(struct fpu *fpu);
-extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
-extern void math_state_restore(void);
-
-extern bool irq_fpu_usable(void);
-
-/*
- * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
- * and they don't touch the preempt state on their own.
- * If you enable preemption after __kernel_fpu_begin(), preempt notifier
- * should call the __kernel_fpu_end() to prevent the kernel/user FPU
- * state from getting corrupted. KVM for example uses this model.
- *
- * All other cases use kernel_fpu_begin/end() which disable preemption
- * during kernel FPU usage.
- */
-extern void __kernel_fpu_begin(void);
-extern void __kernel_fpu_end(void);
-
-static inline void kernel_fpu_begin(void)
-{
-	preempt_disable();
-	WARN_ON_ONCE(!irq_fpu_usable());
-	__kernel_fpu_begin();
-}
-
-static inline void kernel_fpu_end(void)
-{
-	__kernel_fpu_end();
-	preempt_enable();
-}
-
-/* Must be called with preempt disabled */
-extern void kernel_fpu_disable(void);
-extern void kernel_fpu_enable(void);
-
-/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-static inline int irq_ts_save(void)
-{
-	/*
-	 * If in process context and not atomic, we can take a spurious DNA fault.
-	 * Otherwise, doing clts() in process context requires disabling preemption
-	 * or some heavy lifting like kernel_fpu_begin()
-	 */
-	if (!in_atomic())
-		return 0;
-
-	if (read_cr0() & X86_CR0_TS) {
-		clts();
-		return 1;
-	}
-
-	return 0;
-}
-
-static inline void irq_ts_restore(int TS_state)
-{
-	if (TS_state)
-		stts();
-}
-
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int user_has_fpu(void)
-{
-	return current->thread.fpu.has_fpu;
-}
-
-extern void unlazy_fpu(struct task_struct *tsk);
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e962e3..f8c0ec3a4a97 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
 		unsigned nxe:1;
 		unsigned cr0_wp:1;
 		unsigned smep_andnot_wp:1;
+		unsigned smap_andnot_wp:1;
 	};
 };
 
@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
 	struct fpu guest_fpu;
+	bool eager_fpu;
 	u64 xcr0;
 	u64 guest_supported_xcr0;
 	u32 guest_xstate_size;
@@ -743,6 +745,7 @@ struct kvm_x86_ops {
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+	void (*fpu_activate)(struct kvm_vcpu *vcpu);
 	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -999,8 +1002,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
-int fx_init(struct kvm_vcpu *vcpu);
-
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes);
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 883f6b933fa4..5e8daee7c5c9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
 	paravirt_arch_exit_mmap(mm);
 }
 
+#ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return	!config_enabled(CONFIG_IA32_EMULATION) ||
+		!(mm->context.ia32_compat == TIF_IA32);
+}
+#else
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return false;
+}
+#endif
+
 static inline void arch_bprm_mm_init(struct mm_struct *mm,
 		struct vm_area_struct *vma)
 {
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index a952a13d59a7..7a35495275a9 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -13,55 +13,50 @@
 #define MPX_BNDCFG_ENABLE_FLAG	0x1
 #define MPX_BD_ENTRY_VALID_FLAG	0x1
 
-#ifdef CONFIG_X86_64
-
-/* upper 28 bits [47:20] of the virtual address in 64-bit used to
- * index into bounds directory (BD).
- */
-#define MPX_BD_ENTRY_OFFSET	28
-#define MPX_BD_ENTRY_SHIFT	3
-/* bits [19:3] of the virtual address in 64-bit used to index into
- * bounds table (BT).
+/*
+ * The upper 28 bits [47:20] of the virtual address in 64-bit
+ * are used to index into bounds directory (BD).
+ *
+ * The directory is 2G (2^31) in size, and with 8-byte entries
+ * it has 2^28 entries.
  */
-#define MPX_BT_ENTRY_OFFSET	17
-#define MPX_BT_ENTRY_SHIFT	5
-#define MPX_IGN_BITS		3
-#define MPX_BD_ENTRY_TAIL	3
+#define MPX_BD_SIZE_BYTES_64	(1UL<<31)
+#define MPX_BD_ENTRY_BYTES_64	8
+#define MPX_BD_NR_ENTRIES_64	(MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
 
-#else
-
-#define MPX_BD_ENTRY_OFFSET	20
-#define MPX_BD_ENTRY_SHIFT	2
-#define MPX_BT_ENTRY_OFFSET	10
-#define MPX_BT_ENTRY_SHIFT	4
-#define MPX_IGN_BITS		2
-#define MPX_BD_ENTRY_TAIL	2
+/*
+ * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
+ * entries it has 2^20 entries.
+ */
+#define MPX_BD_SIZE_BYTES_32	(1UL<<22)
+#define MPX_BD_ENTRY_BYTES_32	4
+#define MPX_BD_NR_ENTRIES_32	(MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
 
-#endif
+/*
+ * A 64-bit table is 4MB total in size, and an entry is
+ * 4 64-bit pointers in size.
+ */
+#define MPX_BT_SIZE_BYTES_64	(1UL<<22)
+#define MPX_BT_ENTRY_BYTES_64	32
+#define MPX_BT_NR_ENTRIES_64	(MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
 
-#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
-#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
+/*
+ * A 32-bit table is 16kB total in size, and an entry is
+ * 4 32-bit pointers in size.
+ */
+#define MPX_BT_SIZE_BYTES_32	(1UL<<14)
+#define MPX_BT_ENTRY_BYTES_32	16
+#define MPX_BT_NR_ENTRIES_32	(MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
 
 #define MPX_BNDSTA_TAIL		2
 #define MPX_BNDCFG_TAIL		12
 #define MPX_BNDSTA_ADDR_MASK	(~((1UL<<MPX_BNDSTA_TAIL)-1))
 #define MPX_BNDCFG_ADDR_MASK	(~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BT_ADDR_MASK	(~((1UL<<MPX_BD_ENTRY_TAIL)-1))
-
-#define MPX_BNDCFG_ADDR_MASK	(~((1UL<<MPX_BNDCFG_TAIL)-1))
 #define MPX_BNDSTA_ERROR_CODE	0x3
 
-#define MPX_BD_ENTRY_MASK	((1<<MPX_BD_ENTRY_OFFSET)-1)
-#define MPX_BT_ENTRY_MASK	((1<<MPX_BT_ENTRY_OFFSET)-1)
-#define MPX_GET_BD_ENTRY_OFFSET(addr)	((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
-		MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
-#define MPX_GET_BT_ENTRY_OFFSET(addr)	((((addr)>>MPX_IGN_BITS) & \
-		MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
-
 #ifdef CONFIG_X86_INTEL_MPX
-siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
-				struct xsave_struct *xsave_buf);
-int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
+siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
+int mpx_handle_bd_fault(void);
 static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
 {
 	return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
@@ -77,12 +72,11 @@ static inline void mpx_mm_init(struct mm_struct *mm)
 void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long start, unsigned long end);
 #else
-static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
-					      struct xsave_struct *xsave_buf)
+static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 {
 	return NULL;
 }
-static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+static inline int mpx_handle_bd_fault(void)
 {
 	return -EINVAL;
 }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8957810ad7d1..d143bfad45d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
+							u32 val)
+{
+	PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
+}
+
+static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+	PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
+}
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+	PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+	PVOP_VCALL1(pv_lock_ops.kick, cpu);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
 static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
 							__ticket_t ticket)
 {
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
 	PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
 }
 
-#endif
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#endif /* SMP && PARAVIRT_SPINLOCKS */
 
 #ifdef CONFIG_X86_32
 #define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f7b0b5c112f2..8766c7c395c2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -333,9 +333,19 @@ struct arch_spinlock;
 typedef u16 __ticket_t;
 #endif
 
+struct qspinlock;
+
 struct pv_lock_ops {
+#ifdef CONFIG_QUEUED_SPINLOCKS
+	void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
+	struct paravirt_callee_save queued_spin_unlock;
+
+	void (*wait)(u8 *ptr, u8 val);
+	void (*kick)(int cpu);
+#else /* !CONFIG_QUEUED_SPINLOCKS */
 	struct paravirt_callee_save lock_spinning;
 	void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
 };
 
 /* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 8f3271842533..dca71714f860 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -99,11 +99,9 @@ static __always_inline bool should_resched(void)
   extern asmlinkage void ___preempt_schedule(void);
 # define __preempt_schedule() asm ("call ___preempt_schedule")
   extern asmlinkage void preempt_schedule(void);
-# ifdef CONFIG_CONTEXT_TRACKING
-    extern asmlinkage void ___preempt_schedule_context(void);
-#   define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
-    extern asmlinkage void preempt_schedule_context(void);
-# endif
+  extern asmlinkage void ___preempt_schedule_notrace(void);
+# define __preempt_schedule_notrace() asm ("call ___preempt_schedule_notrace")
+  extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 23ba6765b718..43e6519df0d5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,6 +21,7 @@ struct mm_struct;
 #include <asm/desc_defs.h>
 #include <asm/nops.h>
 #include <asm/special_insns.h>
+#include <asm/fpu/types.h>
 
 #include <linux/personality.h>
 #include <linux/cpumask.h>
@@ -52,11 +53,16 @@ static inline void *current_text_addr(void)
 	return pc;
 }
 
+/*
+ * These alignment constraints are for performance in the vSMP case,
+ * but in the task_struct case we must also meet hardware imposed
+ * alignment requirements of the FPU state:
+ */
 #ifdef CONFIG_X86_VSMP
 # define ARCH_MIN_TASKALIGN		(1 << INTERNODE_CACHE_SHIFT)
 # define ARCH_MIN_MMSTRUCT_ALIGN	(1 << INTERNODE_CACHE_SHIFT)
 #else
-# define ARCH_MIN_TASKALIGN		16
+# define ARCH_MIN_TASKALIGN		__alignof__(union fpregs_state)
 # define ARCH_MIN_MMSTRUCT_ALIGN	0
 #endif
 
@@ -166,7 +172,6 @@ extern const struct seq_operations cpuinfo_op;
 #define cache_line_size()	(boot_cpu_data.x86_cache_alignment)
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
-extern void fpu_detect(struct cpuinfo_x86 *c);
 
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
@@ -313,128 +318,6 @@ struct orig_ist {
 	unsigned long		ist[7];
 };
 
-#define	MXCSR_DEFAULT		0x1f80
-
-struct i387_fsave_struct {
-	u32			cwd;	/* FPU Control Word		*/
-	u32			swd;	/* FPU Status Word		*/
-	u32			twd;	/* FPU Tag Word			*/
-	u32			fip;	/* FPU IP Offset		*/
-	u32			fcs;	/* FPU IP Selector		*/
-	u32			foo;	/* FPU Operand Pointer Offset	*/
-	u32			fos;	/* FPU Operand Pointer Selector	*/
-
-	/* 8*10 bytes for each FP-reg = 80 bytes:			*/
-	u32			st_space[20];
-
-	/* Software status information [not touched by FSAVE ]:		*/
-	u32			status;
-};
-
-struct i387_fxsave_struct {
-	u16			cwd; /* Control Word			*/
-	u16			swd; /* Status Word			*/
-	u16			twd; /* Tag Word			*/
-	u16			fop; /* Last Instruction Opcode		*/
-	union {
-		struct {
-			u64	rip; /* Instruction Pointer		*/
-			u64	rdp; /* Data Pointer			*/
-		};
-		struct {
-			u32	fip; /* FPU IP Offset			*/
-			u32	fcs; /* FPU IP Selector			*/
-			u32	foo; /* FPU Operand Offset		*/
-			u32	fos; /* FPU Operand Selector		*/
-		};
-	};
-	u32			mxcsr;		/* MXCSR Register State */
-	u32			mxcsr_mask;	/* MXCSR Mask		*/
-
-	/* 8*16 bytes for each FP-reg = 128 bytes:			*/
-	u32			st_space[32];
-
-	/* 16*16 bytes for each XMM-reg = 256 bytes:			*/
-	u32			xmm_space[64];
-
-	u32			padding[12];
-
-	union {
-		u32		padding1[12];
-		u32		sw_reserved[12];
-	};
-
-} __attribute__((aligned(16)));
-
-struct i387_soft_struct {
-	u32			cwd;
-	u32			swd;
-	u32			twd;
-	u32			fip;
-	u32			fcs;
-	u32			foo;
-	u32			fos;
-	/* 8*10 bytes for each FP-reg = 80 bytes: */
-	u32			st_space[20];
-	u8			ftop;
-	u8			changed;
-	u8			lookahead;
-	u8			no_update;
-	u8			rm;
-	u8			alimit;
-	struct math_emu_info	*info;
-	u32			entry_eip;
-};
-
-struct ymmh_struct {
-	/* 16 * 16 bytes for each YMMH-reg = 256 bytes */
-	u32 ymmh_space[64];
-};
-
-/* We don't support LWP yet: */
-struct lwp_struct {
-	u8 reserved[128];
-};
-
-struct bndreg {
-	u64 lower_bound;
-	u64 upper_bound;
-} __packed;
-
-struct bndcsr {
-	u64 bndcfgu;
-	u64 bndstatus;
-} __packed;
-
-struct xsave_hdr_struct {
-	u64 xstate_bv;
-	u64 xcomp_bv;
-	u64 reserved[6];
-} __attribute__((packed));
-
-struct xsave_struct {
-	struct i387_fxsave_struct i387;
-	struct xsave_hdr_struct xsave_hdr;
-	struct ymmh_struct ymmh;
-	struct lwp_struct lwp;
-	struct bndreg bndreg[4];
-	struct bndcsr bndcsr;
-	/* new processor state extensions will go here */
-} __attribute__ ((packed, aligned (64)));
-
-union thread_xstate {
-	struct i387_fsave_struct	fsave;
-	struct i387_fxsave_struct	fxsave;
-	struct i387_soft_struct		soft;
-	struct xsave_struct		xsave;
-};
-
-struct fpu {
-	unsigned int last_cpu;
-	unsigned int has_fpu;
-	union thread_xstate *state;
-};
-
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 
@@ -483,8 +366,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
 extern unsigned int xstate_size;
-extern void free_thread_xstate(struct task_struct *);
-extern struct kmem_cache *task_xstate_cachep;
 
 struct perf_event;
 
@@ -508,6 +389,10 @@ struct thread_struct {
 	unsigned long		fs;
 #endif
 	unsigned long		gs;
+
+	/* Floating point and extended processor state */
+	struct fpu		fpu;
+
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
 	/* Debug status used for traps, single steps, etc... */
@@ -518,8 +403,6 @@ struct thread_struct {
 	unsigned long		cr2;
 	unsigned long		trap_nr;
 	unsigned long		error_code;
-	/* floating point and extended processor state */
-	struct fpu		fpu;
 #ifdef CONFIG_X86_32
 	/* Virtual 86 mode info */
 	struct vm86_struct __user *vm86_info;
@@ -535,15 +418,6 @@ struct thread_struct {
 	unsigned long		iopl;
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
-	/*
-	 * fpu_counter contains the number of consecutive context switches
-	 * that the FPU is used. If this is over a threshold, the lazy fpu
-	 * saving becomes unlazy to save the trap. This is an unsigned char
-	 * so that after 256 times the counter wraps and the behavior turns
-	 * lazy again; this to deal with bursty apps that only use FPU for
-	 * a short time
-	 */
-	unsigned char fpu_counter;
 };
 
 /*
@@ -928,24 +802,25 @@ extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
 /* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT(tsk)	mpx_enable_management((tsk))
-#define MPX_DISABLE_MANAGEMENT(tsk)	mpx_disable_management((tsk))
+#define MPX_ENABLE_MANAGEMENT()	mpx_enable_management()
+#define MPX_DISABLE_MANAGEMENT()	mpx_disable_management()
 
 #ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(struct task_struct *tsk);
-extern int mpx_disable_management(struct task_struct *tsk);
+extern int mpx_enable_management(void);
+extern int mpx_disable_management(void);
 #else
-static inline int mpx_enable_management(struct task_struct *tsk)
+static inline int mpx_enable_management(void)
 {
 	return -EINVAL;
 }
-static inline int mpx_disable_management(struct task_struct *tsk)
+static inline int mpx_disable_management(void)
 {
 	return -EINVAL;
 }
 #endif /* CONFIG_X86_INTEL_MPX */
 
 extern u16 amd_get_nb_id(int cpu);
+extern u32 amd_get_nodes_per_socket(void);
 
 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 {
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19507ffa5d28..5fabf1362942 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -107,7 +107,7 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 static inline int user_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
-	return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
+	return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
 #else
 	return !!(regs->cs & 3);
 #endif
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644
index 000000000000..9d51fae1cba3
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
+#ifndef _ASM_X86_QSPINLOCK_H
+#define _ASM_X86_QSPINLOCK_H
+
+#include <asm/cpufeature.h>
+#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#define	queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * A smp_store_release() on the least-significant byte.
+ */
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+	smp_store_release((u8 *)lock, 0);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+	pv_queued_spin_lock_slowpath(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	pv_queued_spin_unlock(lock);
+}
+#else
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	native_queued_spin_unlock(lock);
+}
+#endif
+
+#define virt_queued_spin_lock virt_queued_spin_lock
+
+static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+{
+	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+		return false;
+
+	while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
+		cpu_relax();
+
+	return true;
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000000000000..b002e711ba88
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_QSPINLOCK_PARAVIRT_H
+#define __ASM_QSPINLOCK_PARAVIRT_H
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+
+#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5a9856eb12ba..7d5a1929d76b 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -231,11 +231,21 @@
 #define TLS_SIZE			(GDT_ENTRY_TLS_ENTRIES* 8)
 
 #ifdef __KERNEL__
+
+/*
+ * early_idt_handler_array is an array of entry points referenced in the
+ * early IDT.  For simplicity, it's a real array with one entry point
+ * every nine bytes.  That leaves room for an optional 'push $0' if the
+ * vector has no error code (two bytes), a 'push $vector_number' (two
+ * bytes), and a jump to the common entry code (up to five bytes).
+ */
+#define EARLY_IDT_HANDLER_SIZE 9
+
 #ifndef __ASSEMBLY__
 
-extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 #ifdef CONFIG_TRACING
-# define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handler_array early_idt_handler_array
 #endif
 
 /*
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index ee80b92f0096..6c8a7ed13365 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,5 +1,5 @@
 
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 /*
  * may_use_simd - whether it is allowable at this time to issue SIMD
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 17a8dced12da..222a6a3ca2b5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -37,16 +37,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
 DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
 
-static inline struct cpumask *cpu_sibling_mask(int cpu)
-{
-	return per_cpu(cpu_sibling_map, cpu);
-}
-
-static inline struct cpumask *cpu_core_mask(int cpu)
-{
-	return per_cpu(cpu_core_map, cpu);
-}
-
 static inline struct cpumask *cpu_llc_shared_mask(int cpu)
 {
 	return per_cpu(cpu_llc_shared_map, cpu);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index cf87de3fc390..be0a05913b91 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
 extern struct static_key paravirt_ticketlocks_enabled;
 static __always_inline bool static_key_false(struct static_key *key);
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
+
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 
 static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -169,7 +173,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	tmp.head &= ~TICKET_SLOWPATH_FLAG;
-	return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 		cpu_relax();
 	}
 }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 /*
  * Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d7572d82b..65c3e37f879a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
 
 #define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
 typedef struct arch_spinlock {
 	union {
 		__ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 #include <asm-generic/qrwlock_types.h>
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 6a998598f172..c2e00bb2a136 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -39,7 +39,9 @@
 #include <asm/processor.h>
 #include <asm/percpu.h>
 #include <asm/desc.h>
+
 #include <linux/random.h>
+#include <linux/sched.h>
 
 /*
  * 24 byte read-only segment initializer for stack canary.  Linker
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 552d6c90a6d4..d1793f06854d 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -7,7 +7,7 @@
 #define _ASM_X86_SUSPEND_32_H
 
 #include <asm/desc.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 /* image of the saved processor state */
 struct saved_context {
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index bc6232834bab..7ebf0ebe4e68 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -7,7 +7,7 @@
 #define _ASM_X86_SUSPEND_64_H
 
 #include <asm/desc.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 /*
  * Image of the saved processor state, used by the low level ACPI suspend to
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0e8f04f2c26f..5a77593fdace 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -124,7 +124,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
-#define topology_thread_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
new file mode 100644
index 000000000000..173dd3ba108c
--- /dev/null
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -0,0 +1,132 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mpx
+
+#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MPX_H
+
+#include <linux/tracepoint.h>
+
+#ifdef CONFIG_X86_INTEL_MPX
+
+TRACE_EVENT(mpx_bounds_register_exception,
+
+	TP_PROTO(void *addr_referenced,
+		 const struct bndreg *bndreg),
+	TP_ARGS(addr_referenced, bndreg),
+
+	TP_STRUCT__entry(
+		__field(void *, addr_referenced)
+		__field(u64, lower_bound)
+		__field(u64, upper_bound)
+	),
+
+	TP_fast_assign(
+		__entry->addr_referenced = addr_referenced;
+		__entry->lower_bound = bndreg->lower_bound;
+		__entry->upper_bound = bndreg->upper_bound;
+	),
+	/*
+	 * Note that we are printing out the '~' of the upper
+	 * bounds register here.  It is actually stored in its
+	 * one's complement form so that its 'init' state
+	 * corresponds to all 0's.  But, that looks like
+	 * gibberish when printed out, so print out the 1's
+	 * complement instead of the actual value here.  Note
+	 * though that you still need to specify filters for the
+	 * actual value, not the displayed one.
+	 */
+	TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
+		__entry->addr_referenced,
+		__entry->lower_bound,
+		~__entry->upper_bound
+	)
+);
+
+TRACE_EVENT(bounds_exception_mpx,
+
+	TP_PROTO(const struct bndcsr *bndcsr),
+	TP_ARGS(bndcsr),
+
+	TP_STRUCT__entry(
+		__field(u64, bndcfgu)
+		__field(u64, bndstatus)
+	),
+
+	TP_fast_assign(
+		/* need to get rid of the 'const' on bndcsr */
+		__entry->bndcfgu   = (u64)bndcsr->bndcfgu;
+		__entry->bndstatus = (u64)bndcsr->bndstatus;
+	),
+
+	TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
+		__entry->bndcfgu,
+		__entry->bndstatus)
+);
+
+DECLARE_EVENT_CLASS(mpx_range_trace,
+
+	TP_PROTO(unsigned long start,
+		 unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, start)
+		__field(unsigned long, end)
+	),
+
+	TP_fast_assign(
+		__entry->start = start;
+		__entry->end   = end;
+	),
+
+	TP_printk("[0x%p:0x%p]",
+		(void *)__entry->start,
+		(void *)__entry->end
+	)
+);
+
+DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end)
+);
+
+DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end)
+);
+
+TRACE_EVENT(mpx_new_bounds_table,
+
+	TP_PROTO(unsigned long table_vaddr),
+	TP_ARGS(table_vaddr),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, table_vaddr)
+	),
+
+	TP_fast_assign(
+		__entry->table_vaddr = table_vaddr;
+	),
+
+	TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
+);
+
+#else
+
+/*
+ * This gets used outside of MPX-specific code, so we need a stub.
+ */
+static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
+{
+}
+
+#endif /* CONFIG_X86_INTEL_MPX */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mpx
+#endif /* _TRACE_MPX_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ace9dec050b1..a8df874f3e88 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -145,7 +146,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -240,7 +242,8 @@ extern void __put_user_8(void);
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -455,7 +458,8 @@ struct __large_struct { unsigned long buf[100]; };
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -479,7 +483,8 @@ struct __large_struct { unsigned long buf[100]; };
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 3c03a5de64d3..7c8ad3451988 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -70,7 +70,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -117,7 +118,8 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index ccab4af1646d..59a54e869f15 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -14,8 +14,8 @@ struct user_ymmh_regs {
 	__u32 ymmh_space[64];
 };
 
-struct user_xsave_hdr {
-	__u64 xstate_bv;
+struct user_xstate_header {
+	__u64 xfeatures;
 	__u64 reserved1[2];
 	__u64 reserved2[5];
 };
@@ -41,11 +41,11 @@ struct user_xsave_hdr {
  * particular process/thread.
  *
  * Also when the user modifies certain state FP/SSE/etc through the
- * ptrace interface, they must ensure that the xsave_hdr.xstate_bv
+ * ptrace interface, they must ensure that the header.xfeatures
  * bytes[512..519] of the memory layout are updated correspondingly.
  * i.e., for example when FP state is modified to a non-init state,
- * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to
- * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc.
+ * header.xfeatures's bit 0 must be set to '1', when SSE is modified to
+ * non-init state, header.xfeatures's bit 1 must to be set to '1', etc.
  */
 #define USER_XSTATE_FX_SW_WORDS 6
 #define USER_XSTATE_XCR0_WORD	0
@@ -55,7 +55,7 @@ struct user_xstateregs {
 		__u64 fpx_space[58];
 		__u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
 	} i387;
-	struct user_xsave_hdr xsave_hdr;
+	struct user_xstate_header header;
 	struct user_ymmh_regs ymmh;
 	/* further processor state extensions go here */
 };
diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h
deleted file mode 100644
index f2cba4e79a23..000000000000
--- a/arch/x86/include/asm/xcr.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- *   Copyright 2008 rPath, Inc. - All Rights Reserved
- *
- *   This file is part of the Linux kernel, and is made available under
- *   the terms of the GNU General Public License version 2 or (at your
- *   option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * asm-x86/xcr.h
- *
- * Definitions for the eXtended Control Register instructions
- */
-
-#ifndef _ASM_X86_XCR_H
-#define _ASM_X86_XCR_H
-
-#define XCR_XFEATURE_ENABLED_MASK	0x00000000
-
-#ifdef __KERNEL__
-# ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-static inline u64 xgetbv(u32 index)
-{
-	u32 eax, edx;
-
-	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
-		     : "=a" (eax), "=d" (edx)
-		     : "c" (index));
-	return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
-	u32 eax = value;
-	u32 edx = value >> 32;
-
-	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
-		     : : "a" (eax), "d" (edx), "c" (index));
-}
-
-# endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_XCR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 358dcd338915..c44a5d53e464 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
 	return false;
 }
 
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	return __get_free_pages(__GFP_NOWARN, order);
+}
+
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index d8829751b3f8..1f5c5161ead6 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -36,7 +36,7 @@
  * no advantages to be gotten from x86-64 here anyways.
  */
 
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 #ifdef CONFIG_X86_32
 /* reduce register pressure */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index ce05722e3c68..5a08bc8bff33 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -26,7 +26,7 @@
 #define XO3(x, y)	"       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
 #define XO4(x, y)	"       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
 
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 static void
 xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 492b29802f57..7c0a517ec751 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -18,7 +18,7 @@
 #ifdef CONFIG_AS_AVX
 
 #include <linux/compiler.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 #define BLOCK4(i) \
 		BLOCK(32 * i, 0) \
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
deleted file mode 100644
index c9a6d68b8d62..000000000000
--- a/arch/x86/include/asm/xsave.h
+++ /dev/null
@@ -1,257 +0,0 @@
-#ifndef __ASM_X86_XSAVE_H
-#define __ASM_X86_XSAVE_H
-
-#include <linux/types.h>
-#include <asm/processor.h>
-
-#define XSTATE_CPUID		0x0000000d
-
-#define XSTATE_FP		0x1
-#define XSTATE_SSE		0x2
-#define XSTATE_YMM		0x4
-#define XSTATE_BNDREGS		0x8
-#define XSTATE_BNDCSR		0x10
-#define XSTATE_OPMASK		0x20
-#define XSTATE_ZMM_Hi256	0x40
-#define XSTATE_Hi16_ZMM		0x80
-
-#define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
-#define XSTATE_AVX512	(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
-/* Bit 63 of XCR0 is reserved for future expansion */
-#define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
-
-#define FXSAVE_SIZE	512
-
-#define XSAVE_HDR_SIZE	    64
-#define XSAVE_HDR_OFFSET    FXSAVE_SIZE
-
-#define XSAVE_YMM_SIZE	    256
-#define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
-
-/* Supported features which support lazy state saving */
-#define XSTATE_LAZY	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM		      \
-			| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XSTATE_EAGER	(XSTATE_BNDREGS | XSTATE_BNDCSR)
-
-/* All currently supported features */
-#define XCNTXT_MASK	(XSTATE_LAZY | XSTATE_EAGER)
-
-#ifdef CONFIG_X86_64
-#define REX_PREFIX	"0x48, "
-#else
-#define REX_PREFIX
-#endif
-
-extern unsigned int xstate_size;
-extern u64 pcntxt_mask;
-extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
-extern struct xsave_struct *init_xstate_buf;
-
-extern void xsave_init(void);
-extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
-extern int init_fpu(struct task_struct *child);
-
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVES		".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR		".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS		".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-
-#define xstate_fault	".section .fixup,\"ax\"\n"	\
-			"3:  movl $-1,%[err]\n"		\
-			"    jmp  2b\n"			\
-			".previous\n"			\
-			_ASM_EXTABLE(1b, 3b)		\
-			: [err] "=r" (err)
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
-{
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err = 0;
-
-	WARN_ON(system_state != SYSTEM_BOOTING);
-
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		asm volatile("1:"XSAVES"\n\t"
-			"2:\n\t"
-			     xstate_fault
-			: "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-			:   "memory");
-	else
-		asm volatile("1:"XSAVE"\n\t"
-			"2:\n\t"
-			     xstate_fault
-			: "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-			:   "memory");
-	return err;
-}
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
-{
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err = 0;
-
-	WARN_ON(system_state != SYSTEM_BOOTING);
-
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		asm volatile("1:"XRSTORS"\n\t"
-			"2:\n\t"
-			     xstate_fault
-			: "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-			:   "memory");
-	else
-		asm volatile("1:"XRSTOR"\n\t"
-			"2:\n\t"
-			     xstate_fault
-			: "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-			:   "memory");
-	return err;
-}
-
-/*
- * Save processor xstate to xsave area.
- */
-static inline int xsave_state(struct xsave_struct *fx, u64 mask)
-{
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err = 0;
-
-	/*
-	 * If xsaves is enabled, xsaves replaces xsaveopt because
-	 * it supports compact format and supervisor states in addition to
-	 * modified optimization in xsaveopt.
-	 *
-	 * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
-	 * because xsaveopt supports modified optimization which is not
-	 * supported by xsave.
-	 *
-	 * If none of xsaves and xsaveopt is enabled, use xsave.
-	 */
-	alternative_input_2(
-		"1:"XSAVE,
-		XSAVEOPT,
-		X86_FEATURE_XSAVEOPT,
-		XSAVES,
-		X86_FEATURE_XSAVES,
-		[fx] "D" (fx), "a" (lmask), "d" (hmask) :
-		"memory");
-	asm volatile("2:\n\t"
-		     xstate_fault
-		     : "0" (0)
-		     : "memory");
-
-	return err;
-}
-
-/*
- * Restore processor xstate from xsave area.
- */
-static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
-{
-	int err = 0;
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-
-	/*
-	 * Use xrstors to restore context if it is enabled. xrstors supports
-	 * compacted format of xsave area which is not supported by xrstor.
-	 */
-	alternative_input(
-		"1: " XRSTOR,
-		XRSTORS,
-		X86_FEATURE_XSAVES,
-		"D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-		: "memory");
-
-	asm volatile("2:\n"
-		     xstate_fault
-		     : "0" (0)
-		     : "memory");
-
-	return err;
-}
-
-/*
- * Save xstate context for old process during context switch.
- */
-static inline void fpu_xsave(struct fpu *fpu)
-{
-	xsave_state(&fpu->state->xsave, -1);
-}
-
-/*
- * Restore xstate context for new process during context switch.
- */
-static inline int fpu_xrstor_checking(struct xsave_struct *fx)
-{
-	return xrstor_state(fx, -1);
-}
-
-/*
- * Save xstate to user space xsave area.
- *
- * We don't use modified optimization because xrstor/xrstors might track
- * a different application.
- *
- * We don't use compacted format xsave area for
- * backward compatibility for old applications which don't understand
- * compacted format of xsave area.
- */
-static inline int xsave_user(struct xsave_struct __user *buf)
-{
-	int err;
-
-	/*
-	 * Clear the xsave header first, so that reserved fields are
-	 * initialized to zero.
-	 */
-	err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
-	if (unlikely(err))
-		return -EFAULT;
-
-	__asm__ __volatile__(ASM_STAC "\n"
-			     "1:"XSAVE"\n"
-			     "2: " ASM_CLAC "\n"
-			     xstate_fault
-			     : "D" (buf), "a" (-1), "d" (-1), "0" (0)
-			     : "memory");
-	return err;
-}
-
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
-{
-	int err = 0;
-	struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-
-	__asm__ __volatile__(ASM_STAC "\n"
-			     "1:"XRSTOR"\n"
-			     "2: " ASM_CLAC "\n"
-			     xstate_fault
-			     : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
-			     : "memory");	/* memory required? */
-	return err;
-}
-
-void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
-void setup_xstate_comp(void);
-
-#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c469490db4a8..3c6bb342a48f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -140,6 +140,7 @@
 #define MSR_CORE_C3_RESIDENCY		0x000003fc
 #define MSR_CORE_C6_RESIDENCY		0x000003fd
 #define MSR_CORE_C7_RESIDENCY		0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY	0x000003ff
 #define MSR_PKG_C2_RESIDENCY		0x0000060d
 #define MSR_PKG_C8_RESIDENCY		0x00000630
 #define MSR_PKG_C9_RESIDENCY		0x00000631
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 16dc4e8a2cd3..0e8a973de9ee 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -25,7 +25,7 @@ struct _fpx_sw_bytes {
 	__u32 extended_size;	/* total size of the layout referred by
 				 * fpstate pointer in the sigcontext.
 				 */
-	__u64 xstate_bv;
+	__u64 xfeatures;
 				/* feature bit mask (including fp/sse/extended
 				 * state) that is present in the memory
 				 * layout.
@@ -209,8 +209,8 @@ struct sigcontext {
 
 #endif /* !__i386__ */
 
-struct _xsave_hdr {
-	__u64 xstate_bv;
+struct _header {
+	__u64 xfeatures;
 	__u64 reserved1[2];
 	__u64 reserved2[5];
 };
@@ -228,7 +228,7 @@ struct _ymmh_state {
  */
 struct _xstate {
 	struct _fpstate fpstate;
-	struct _xsave_hdr xstate_hdr;
+	struct _header xstate_hdr;
 	struct _ymmh_state ymmh;
 	/* new processor state extensions go here */
 };
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bcd0b56ca17..febaf180621b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,7 +44,7 @@ obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
 
 obj-y				+= process.o
-obj-y				+= i387.o xsave.o
+obj-y				+= fpu/
 obj-y				+= ptrace.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index aef653193160..7fe097235376 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -21,6 +21,10 @@
 #include <asm/io.h>
 #include <asm/fixmap.h>
 
+int __read_mostly alternatives_patched;
+
+EXPORT_SYMBOL_GPL(alternatives_patched);
+
 #define MAX_PATCH_LEN (255-1)
 
 static int __initdata_or_module debug_alternative;
@@ -627,6 +631,7 @@ void __init alternative_instructions(void)
 	apply_paravirt(__parainstructions, __parainstructions_end);
 
 	restart_nmi();
+	alternatives_patched = 1;
 }
 
 /**
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 5caed1dd7ccf..29fa475ec518 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -89,9 +89,7 @@ int amd_cache_northbridges(void)
 			next_northbridge(link, amd_nb_link_ids);
 	}
 
-	/* GART present only on Fam15h upto model 0fh */
-	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
-	    (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
+	if (amd_gart_present())
 		amd_northbridges.flags |= AMD_NB_GART;
 
 	/*
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 76164e173a24..6e85f713641d 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -262,6 +262,9 @@ void __init early_gart_iommu_check(void)
 	u64 aper_base = 0, last_aper_base = 0;
 	int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0;
 
+	if (!amd_gart_present())
+		return;
+
 	if (!early_pci_allowed())
 		return;
 
@@ -355,6 +358,9 @@ int __init gart_iommu_hole_init(void)
 	int fix, slot, valid_agp = 0;
 	int i, node;
 
+	if (!amd_gart_present())
+		return -ENODEV;
+
 	if (gart_iommu_aperture_disabled || !fix_aperture ||
 	    !early_pci_allowed())
 		return -ENODEV;
@@ -452,7 +458,7 @@ out:
 		   force_iommu ||
 		   valid_agp ||
 		   fallback_aper_force) {
-		pr_info("Your BIOS doesn't leave a aperture memory hole\n");
+		pr_info("Your BIOS doesn't leave an aperture memory hole\n");
 		pr_info("Please enable the IOMMU option in the BIOS setup\n");
 		pr_info("This costs you %dMB of RAM\n",
 			32 << fallback_aper_order);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 9f6b9341950f..b27f6ec90caa 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -41,6 +41,25 @@ void common(void) {
 	OFFSET(pbe_orig_address, pbe, orig_address);
 	OFFSET(pbe_next, pbe, next);
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+	BLANK();
+	OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
+	OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
+	OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
+	OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
+	OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
+	OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
+	OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
+	OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
+	OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
+
+	BLANK();
+	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+
+	BLANK();
+	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
+#endif
+
 #ifdef CONFIG_PARAVIRT
 	BLANK();
 	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 47703aed74cf..6ce39025f467 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -17,17 +17,6 @@ void foo(void);
 
 void foo(void)
 {
-	OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
-	OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
-	OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
-	OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
-	OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
-	OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
-	OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
-	OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
-	OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
-	BLANK();
-
 	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
 	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
 	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
@@ -37,10 +26,6 @@ void foo(void)
 	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
 	BLANK();
 
-	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
-	OFFSET(TI_cpu, thread_info, cpu);
-	BLANK();
-
 	OFFSET(PT_EBX, pt_regs, bx);
 	OFFSET(PT_ECX, pt_regs, cx);
 	OFFSET(PT_EDX, pt_regs, dx);
@@ -60,9 +45,6 @@ void foo(void)
 	OFFSET(PT_OLDSS,  pt_regs, ss);
 	BLANK();
 
-	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
-	BLANK();
-
 	OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
 	BLANK();
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 5ce6f2da8763..dcaab87da629 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -29,27 +29,6 @@ int main(void)
 	BLANK();
 #endif
 
-#ifdef CONFIG_IA32_EMULATION
-	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
-	BLANK();
-
-#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
-	ENTRY(ax);
-	ENTRY(bx);
-	ENTRY(cx);
-	ENTRY(dx);
-	ENTRY(si);
-	ENTRY(di);
-	ENTRY(bp);
-	ENTRY(sp);
-	ENTRY(ip);
-	BLANK();
-#undef ENTRY
-
-	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
-	BLANK();
-#endif
-
 #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
 	ENTRY(bx);
 	ENTRY(cx);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e4cf63301ff4..56cae1964a81 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -19,6 +19,13 @@
 
 #include "cpu.h"
 
+/*
+ * nodes_per_socket: Stores the number of nodes per socket.
+ * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
+ * Node Identifiers[10:8]
+ */
+static u32 nodes_per_socket = 1;
+
 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 {
 	u32 gprs[8] = { 0 };
@@ -291,7 +298,7 @@ static int nearby_node(int apicid)
 #ifdef CONFIG_X86_HT
 static void amd_get_topology(struct cpuinfo_x86 *c)
 {
-	u32 nodes, cores_per_cu = 1;
+	u32 cores_per_cu = 1;
 	u8 node_id;
 	int cpu = smp_processor_id();
 
@@ -300,7 +307,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		u32 eax, ebx, ecx, edx;
 
 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		nodes = ((ecx >> 8) & 7) + 1;
+		nodes_per_socket = ((ecx >> 8) & 7) + 1;
 		node_id = ecx & 7;
 
 		/* get compute unit information */
@@ -311,18 +318,18 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		u64 value;
 
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
-		nodes = ((value >> 3) & 7) + 1;
+		nodes_per_socket = ((value >> 3) & 7) + 1;
 		node_id = value & 7;
 	} else
 		return;
 
 	/* fixup multi-node processor information */
-	if (nodes > 1) {
+	if (nodes_per_socket > 1) {
 		u32 cores_per_node;
 		u32 cus_per_node;
 
 		set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-		cores_per_node = c->x86_max_cores / nodes;
+		cores_per_node = c->x86_max_cores / nodes_per_socket;
 		cus_per_node = cores_per_node / cores_per_cu;
 
 		/* store NodeID, use llc_shared_map to store sibling info */
@@ -366,6 +373,12 @@ u16 amd_get_nb_id(int cpu)
 }
 EXPORT_SYMBOL_GPL(amd_get_nb_id);
 
+u32 amd_get_nodes_per_socket(void)
+{
+	return nodes_per_socket;
+}
+EXPORT_SYMBOL_GPL(amd_get_nodes_per_socket);
+
 static void srat_detect_node(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_NUMA
@@ -520,8 +533,16 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
-	/* check CPU config space for extended APIC ID */
-	if (cpu_has_apic && c->x86 >= 0xf) {
+	/*
+	 * ApicID can always be treated as an 8-bit value for AMD APIC versions
+	 * >= 0x10, but even old K8s came out of reset with version 0x10. So, we
+	 * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
+	 * after 16h.
+	 */
+	if (cpu_has_apic && c->x86 > 0x16) {
+		set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+	} else if (cpu_has_apic && c->x86 >= 0xf) {
+		/* check CPU config space for extended APIC ID */
 		unsigned int val;
 		val = read_pci_config(0, 24, 0, 0x68);
 		if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03445346ee0a..bd17db15a2c1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,57 +12,11 @@
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
-#include <asm/i387.h>
+#include <asm/fpu/internal.h>
 #include <asm/msr.h>
 #include <asm/paravirt.h>
 #include <asm/alternative.h>
 
-static double __initdata x = 4195835.0;
-static double __initdata y = 3145727.0;
-
-/*
- * This used to check for exceptions..
- * However, it turns out that to support that,
- * the XMM trap handlers basically had to
- * be buggy. So let's have a correct XMM trap
- * handler, and forget about printing out
- * some status at boot.
- *
- * We should really only care about bugs here
- * anyway. Not features.
- */
-static void __init check_fpu(void)
-{
-	s32 fdiv_bug;
-
-	kernel_fpu_begin();
-
-	/*
-	 * trap_init() enabled FXSR and company _before_ testing for FP
-	 * problems here.
-	 *
-	 * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
-	 */
-	__asm__("fninit\n\t"
-		"fldl %1\n\t"
-		"fdivl %2\n\t"
-		"fmull %2\n\t"
-		"fldl %1\n\t"
-		"fsubp %%st,%%st(1)\n\t"
-		"fistpl %0\n\t"
-		"fwait\n\t"
-		"fninit"
-		: "=m" (*&fdiv_bug)
-		: "m" (*&x), "m" (*&y));
-
-	kernel_fpu_end();
-
-	if (fdiv_bug) {
-		set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
-		pr_warn("Hmm, FPU with FDIV bug\n");
-	}
-}
-
 void __init check_bugs(void)
 {
 	identify_boot_cpu();
@@ -85,10 +39,5 @@ void __init check_bugs(void)
 		'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
 	alternative_instructions();
 
-	/*
-	 * kernel_fpu_begin/end() in check_fpu() relies on the patched
-	 * alternative instructions.
-	 */
-	if (cpu_has_fpu)
-		check_fpu();
+	fpu__init_check_bugs();
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a62cf04dac8a..b28e5262a0a5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/init.h>
@@ -31,8 +32,7 @@
 #include <asm/setup.h>
 #include <asm/apic.h>
 #include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 #include <asm/mtrr.h>
 #include <linux/numa.h>
 #include <asm/asm.h>
@@ -145,32 +145,21 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 } };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
-static int __init x86_xsave_setup(char *s)
+static int __init x86_mpx_setup(char *s)
 {
+	/* require an exact match without trailing characters */
 	if (strlen(s))
 		return 0;
-	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-	setup_clear_cpu_cap(X86_FEATURE_AVX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX2);
-	return 1;
-}
-__setup("noxsave", x86_xsave_setup);
 
-static int __init x86_xsaveopt_setup(char *s)
-{
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-	return 1;
-}
-__setup("noxsaveopt", x86_xsaveopt_setup);
+	/* do not emit a message if the feature is not present */
+	if (!boot_cpu_has(X86_FEATURE_MPX))
+		return 1;
 
-static int __init x86_xsaves_setup(char *s)
-{
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+	setup_clear_cpu_cap(X86_FEATURE_MPX);
+	pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
 	return 1;
 }
-__setup("noxsaves", x86_xsaves_setup);
+__setup("nompx", x86_mpx_setup);
 
 #ifdef CONFIG_X86_32
 static int cachesize_override = -1;
@@ -183,14 +172,6 @@ static int __init cachesize_setup(char *str)
 }
 __setup("cachesize=", cachesize_setup);
 
-static int __init x86_fxsr_setup(char *s)
-{
-	setup_clear_cpu_cap(X86_FEATURE_FXSR);
-	setup_clear_cpu_cap(X86_FEATURE_XMM);
-	return 1;
-}
-__setup("nofxsr", x86_fxsr_setup);
-
 static int __init x86_sep_setup(char *s)
 {
 	setup_clear_cpu_cap(X86_FEATURE_SEP);
@@ -419,7 +400,7 @@ static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
 static void get_model_name(struct cpuinfo_x86 *c)
 {
 	unsigned int *v;
-	char *p, *q;
+	char *p, *q, *s;
 
 	if (c->extended_cpuid_level < 0x80000004)
 		return;
@@ -430,19 +411,21 @@ static void get_model_name(struct cpuinfo_x86 *c)
 	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
 	c->x86_model_id[48] = 0;
 
-	/*
-	 * Intel chips right-justify this string for some dumb reason;
-	 * undo that brain damage:
-	 */
-	p = q = &c->x86_model_id[0];
+	/* Trim whitespace */
+	p = q = s = &c->x86_model_id[0];
+
 	while (*p == ' ')
 		p++;
-	if (p != q) {
-		while (*p)
-			*q++ = *p++;
-		while (q <= &c->x86_model_id[48])
-			*q++ = '\0';	/* Zero-pad the rest */
+
+	while (*p) {
+		/* Note the last non-whitespace index */
+		if (!isspace(*p))
+			s = q;
+
+		*q++ = *p++;
 	}
+
+	*(s + 1) = '\0';
 }
 
 void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
@@ -759,7 +742,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	cpu_detect(c);
 	get_cpu_vendor(c);
 	get_cpu_cap(c);
-	fpu_detect(c);
+	fpu__init_system(c);
 
 	if (this_cpu->c_early_init)
 		this_cpu->c_early_init(c);
@@ -1122,7 +1105,7 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 		printk(KERN_CONT "%s ", vendor);
 
 	if (c->x86_model_id[0])
-		printk(KERN_CONT "%s", strim(c->x86_model_id));
+		printk(KERN_CONT "%s", c->x86_model_id);
 	else
 		printk(KERN_CONT "%d86", c->x86);
 
@@ -1183,8 +1166,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
-
 /*
  * Special IST stacks which the CPU switches to when it calls
  * an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1275,7 +1256,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
-DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
 /*
  * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
@@ -1439,7 +1419,7 @@ void cpu_init(void)
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
 
-	fpu_init();
+	fpu__init_cpu();
 
 	if (is_uv_system())
 		uv_cpu_init();
@@ -1495,7 +1475,7 @@ void cpu_init(void)
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
 
-	fpu_init();
+	fpu__init_cpu();
 }
 #endif
 
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 36ce402a3fa5..d820d8eae96b 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -27,8 +27,8 @@
 
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
-#ifdef CONFIG_XEN_PVHVM
-	&x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+	&x86_hyper_xen,
 #endif
 	&x86_hyper_vmware,
 	&x86_hyper_ms_hyperv,
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533d5ab8..95cf78d44ab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -53,9 +53,12 @@
 static DEFINE_MUTEX(mce_chrdev_read_mutex);
 
 #define rcu_dereference_check_mce(p) \
-	rcu_dereference_index_check((p), \
-			      rcu_read_lock_sched_held() || \
-			      lockdep_is_held(&mce_chrdev_read_mutex))
+({ \
+	rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+			   lockdep_is_held(&mce_chrdev_read_mutex), \
+			   "suspicious rcu_dereference_check_mce() usage"); \
+	smp_load_acquire(&(p)); \
+})
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
@@ -708,6 +711,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			  struct pt_regs *regs)
 {
 	int i, ret = 0;
+	char *tmp;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +720,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			if (quirk_no_way_out)
 				quirk_no_way_out(i, m, regs);
 		}
-		if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
-		    MCE_PANIC_SEVERITY)
+
+		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			*msg = tmp;
 			ret = 1;
+		}
 	}
 	return ret;
 }
@@ -1884,7 +1890,7 @@ out:
 static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &mce_chrdev_wait, wait);
-	if (rcu_access_index(mcelog.next))
+	if (READ_ONCE(mcelog.next))
 		return POLLIN | POLLRDNORM;
 	if (!mce_apei_read_done && apei_check_mce())
 		return POLLIN | POLLRDNORM;
@@ -1929,8 +1935,8 @@ void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
 }
 EXPORT_SYMBOL_GPL(register_mce_write_callback);
 
-ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
-			 size_t usize, loff_t *off)
+static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
+				size_t usize, loff_t *off)
 {
 	if (mce_write)
 		return mce_write(filp, ubuf, usize, off);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 87848ebe2bb7..5801a14f7524 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 }
 
 static atomic_t active_events;
+static atomic_t pmc_refcount;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -190,6 +191,7 @@ static bool check_hw_exists(void)
 	u64 val, val_fail, val_new= ~0;
 	int i, reg, reg_fail, ret = 0;
 	int bios_fail = 0;
+	int reg_safe = -1;
 
 	/*
 	 * Check to see if the BIOS enabled any of the counters, if so
@@ -204,6 +206,8 @@ static bool check_hw_exists(void)
 			bios_fail = 1;
 			val_fail = val;
 			reg_fail = reg;
+		} else {
+			reg_safe = i;
 		}
 	}
 
@@ -222,11 +226,22 @@ static bool check_hw_exists(void)
 	}
 
 	/*
+	 * If all the counters are enabled, the below test will always
+	 * fail.  The tools will also become useless in this scenario.
+	 * Just fail and disable the hardware counters.
+	 */
+
+	if (reg_safe == -1) {
+		reg = reg_safe;
+		goto msr_fail;
+	}
+
+	/*
 	 * Read the current value, change it and read it back to see if it
 	 * matches, this is needed to detect certain hardware emulators
 	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
 	 */
-	reg = x86_pmu_event_addr(0);
+	reg = x86_pmu_event_addr(reg_safe);
 	if (rdmsrl_safe(reg, &val))
 		goto msr_fail;
 	val ^= 0xffffUL;
@@ -256,11 +271,8 @@ msr_fail:
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
-		release_pmc_hardware();
-		release_ds_buffers();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	x86_release_hardware();
+	atomic_dec(&active_events);
 }
 
 void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -310,6 +322,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 	return x86_pmu_extra_regs(val, event);
 }
 
+int x86_reserve_hardware(void)
+{
+	int err = 0;
+
+	if (!atomic_inc_not_zero(&pmc_refcount)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&pmc_refcount) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				reserve_ds_buffers();
+		}
+		if (!err)
+			atomic_inc(&pmc_refcount);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+
+	return err;
+}
+
+void x86_release_hardware(void)
+{
+	if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		release_ds_buffers();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
 /*
  * Check if we can create event of a certain type (that no conflicting events
  * are present).
@@ -322,21 +363,34 @@ int x86_add_exclusive(unsigned int what)
 		return 0;
 
 	mutex_lock(&pmc_reserve_mutex);
-	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
 		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
 			goto out;
+	}
 
 	atomic_inc(&x86_pmu.lbr_exclusive[what]);
 	ret = 0;
 
 out:
 	mutex_unlock(&pmc_reserve_mutex);
+
+	/*
+	 * Assuming that all exclusive events will share the PMI handler
+	 * (which checks active_events for whether there is work to do),
+	 * we can bump active_events counter right here, except for
+	 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
+	 * path, which already bumps active_events for them.
+	 */
+	if (!ret && what != x86_lbr_exclusive_lbr)
+		atomic_inc(&active_events);
+
 	return ret;
 }
 
 void x86_del_exclusive(unsigned int what)
 {
 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
+	atomic_dec(&active_events);
 }
 
 int x86_setup_perfctr(struct perf_event *event)
@@ -513,22 +567,11 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	if (!x86_pmu_initialized())
 		return -ENODEV;
 
-	err = 0;
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			if (!reserve_pmc_hardware())
-				err = -EBUSY;
-			else
-				reserve_ds_buffers();
-		}
-		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	err = x86_reserve_hardware();
 	if (err)
 		return err;
 
+	atomic_inc(&active_events);
 	event->destroy = hw_perf_event_destroy;
 
 	event->hw.idx = -1;
@@ -611,6 +654,7 @@ struct sched_state {
 	int	event;		/* event index */
 	int	counter;	/* counter index */
 	int	unassigned;	/* number of events to be assigned left */
+	int	nr_gp;		/* number of GP counters used */
 	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 };
 
@@ -620,27 +664,29 @@ struct sched_state {
 struct perf_sched {
 	int			max_weight;
 	int			max_events;
-	struct perf_event	**events;
-	struct sched_state	state;
+	int			max_gp;
 	int			saved_states;
+	struct event_constraint	**constraints;
+	struct sched_state	state;
 	struct sched_state	saved[SCHED_STATES_MAX];
 };
 
 /*
  * Initialize interator that runs through all events and counters.
  */
-static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
-			    int num, int wmin, int wmax)
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+			    int num, int wmin, int wmax, int gpmax)
 {
 	int idx;
 
 	memset(sched, 0, sizeof(*sched));
 	sched->max_events	= num;
 	sched->max_weight	= wmax;
-	sched->events		= events;
+	sched->max_gp		= gpmax;
+	sched->constraints	= constraints;
 
 	for (idx = 0; idx < num; idx++) {
-		if (events[idx]->hw.constraint->weight == wmin)
+		if (constraints[idx]->weight == wmin)
 			break;
 	}
 
@@ -687,7 +733,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	if (sched->state.event >= sched->max_events)
 		return false;
 
-	c = sched->events[sched->state.event]->hw.constraint;
+	c = sched->constraints[sched->state.event];
 	/* Prefer fixed purpose counters */
 	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
 		idx = INTEL_PMC_IDX_FIXED;
@@ -696,11 +742,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 				goto done;
 		}
 	}
+
 	/* Grab the first unused counter starting with idx */
 	idx = sched->state.counter;
 	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-		if (!__test_and_set_bit(idx, sched->state.used))
+		if (!__test_and_set_bit(idx, sched->state.used)) {
+			if (sched->state.nr_gp++ >= sched->max_gp)
+				return false;
+
 			goto done;
+		}
 	}
 
 	return false;
@@ -745,7 +796,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 			if (sched->state.weight > sched->max_weight)
 				return false;
 		}
-		c = sched->events[sched->state.event]->hw.constraint;
+		c = sched->constraints[sched->state.event];
 	} while (c->weight != sched->state.weight);
 
 	sched->state.counter = 0;	/* start with first counter */
@@ -756,12 +807,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 /*
  * Assign a counter for each event.
  */
-int perf_assign_events(struct perf_event **events, int n,
-			int wmin, int wmax, int *assign)
+int perf_assign_events(struct event_constraint **constraints, int n,
+			int wmin, int wmax, int gpmax, int *assign)
 {
 	struct perf_sched sched;
 
-	perf_sched_init(&sched, events, n, wmin, wmax);
+	perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
 
 	do {
 		if (!perf_sched_find_counter(&sched))
@@ -788,9 +839,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		x86_pmu.start_scheduling(cpuc);
 
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-		hwc = &cpuc->event_list[i]->hw;
+		cpuc->event_constraint[i] = NULL;
 		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
-		hwc->constraint = c;
+		cpuc->event_constraint[i] = c;
 
 		wmin = min(wmin, c->weight);
 		wmax = max(wmax, c->weight);
@@ -801,7 +852,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	 */
 	for (i = 0; i < n; i++) {
 		hwc = &cpuc->event_list[i]->hw;
-		c = hwc->constraint;
+		c = cpuc->event_constraint[i];
 
 		/* never assigned */
 		if (hwc->idx == -1)
@@ -821,9 +872,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	}
 
 	/* slow path */
-	if (i != n)
-		unsched = perf_assign_events(cpuc->event_list, n, wmin,
-					     wmax, assign);
+	if (i != n) {
+		int gpmax = x86_pmu.num_counters;
+
+		/*
+		 * Do not allow scheduling of more than half the available
+		 * generic counters.
+		 *
+		 * This helps avoid counter starvation of sibling thread by
+		 * ensuring at most half the counters cannot be in exclusive
+		 * mode. There is no designated counters for the limits. Any
+		 * N/2 counters can be used. This helps with events with
+		 * specific counter constraints.
+		 */
+		if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+		    READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+			gpmax /= 2;
+
+		unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
+					     wmax, gpmax, assign);
+	}
 
 	/*
 	 * In case of success (unsched = 0), mark events as committed,
@@ -840,12 +908,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			e = cpuc->event_list[i];
 			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
 			if (x86_pmu.commit_scheduling)
-				x86_pmu.commit_scheduling(cpuc, e, assign[i]);
+				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
 		}
-	}
-
-	if (!assign || unsched) {
-
+	} else {
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			/*
@@ -1058,13 +1123,16 @@ int x86_perf_event_set_period(struct perf_event *event)
 
 	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
-	/*
-	 * The hw event starts counting from this event offset,
-	 * mark it to be able to extra future deltas:
-	 */
-	local64_set(&hwc->prev_count, (u64)-left);
+	if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+	    local64_read(&hwc->prev_count) != (u64)-left) {
+		/*
+		 * The hw event starts counting from this event offset,
+		 * mark it to be able to extra future deltas:
+		 */
+		local64_set(&hwc->prev_count, (u64)-left);
 
-	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+		wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+	}
 
 	/*
 	 * Due to erratum on certan cpu we need
@@ -1292,8 +1360,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 		x86_pmu.put_event_constraints(cpuc, event);
 
 	/* Delete the array entry. */
-	while (++i < cpuc->n_events)
+	while (++i < cpuc->n_events) {
 		cpuc->event_list[i-1] = cpuc->event_list[i];
+		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+	}
 	--cpuc->n_events;
 
 	perf_event_update_userpage(event);
@@ -1374,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 	u64 finish_clock;
 	int ret;
 
+	/*
+	 * All PMUs/events that share this PMI handler should make sure to
+	 * increment active_events for their events.
+	 */
 	if (!atomic_read(&active_events))
 		return NMI_DONE;
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6ac5cb7a9e14..3e7fd27dfe20 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -74,6 +74,9 @@ struct event_constraint {
 #define PERF_X86_EVENT_EXCL		0x0040 /* HT exclusivity on counter */
 #define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
 #define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING	0x0800 /* use freerunning PEBS */
 
 
 struct amd_nb {
@@ -87,6 +90,18 @@ struct amd_nb {
 #define MAX_PEBS_EVENTS		8
 
 /*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+	PERF_SAMPLE_TRANSACTION)
+
+/*
  * A debug store configuration.
  *
  * We only support architectures that use 64bit fields.
@@ -132,10 +147,7 @@ enum intel_excl_state_type {
 };
 
 struct intel_excl_states {
-	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
 	enum intel_excl_state_type state[X86_PMC_IDX_MAX];
-	int  num_alloc_cntrs;/* #counters allocated */
-	int  max_alloc_cntrs;/* max #counters allowed */
 	bool sched_started; /* true if scheduling has started */
 };
 
@@ -144,6 +156,11 @@ struct intel_excl_cntrs {
 
 	struct intel_excl_states states[2];
 
+	union {
+		u16	has_exclusive[2];
+		u32	exclusive_present;
+	};
+
 	int		refcnt;		/* per-core: #HT threads */
 	unsigned	core_id;	/* per-core: core id */
 };
@@ -172,7 +189,11 @@ struct cpu_hw_events {
 					     added in the current transaction */
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 	u64			tags[X86_PMC_IDX_MAX];
+
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+	struct event_constraint	*event_constraint[X86_PMC_IDX_MAX];
+
+	int			n_excl; /* the number of exclusive events */
 
 	unsigned int		group_flag;
 	int			is_fake;
@@ -519,12 +540,10 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 
-	void		(*commit_scheduling)(struct cpu_hw_events *cpuc,
-					     struct perf_event *event,
-					     int cntr);
-
 	void		(*start_scheduling)(struct cpu_hw_events *cpuc);
 
+	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
 	void		(*stop_scheduling)(struct cpu_hw_events *cpuc);
 
 	struct event_constraint *event_constraints;
@@ -697,6 +716,10 @@ int x86_add_exclusive(unsigned int what);
 
 void x86_del_exclusive(unsigned int what);
 
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
 void hw_perf_lbr_event_destroy(struct perf_event *event);
 
 int x86_setup_perfctr(struct perf_event *event);
@@ -717,8 +740,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 
 void x86_pmu_enable_all(int added);
 
-int perf_assign_events(struct perf_event **events, int n,
-			int wmin, int wmax, int *assign);
+int perf_assign_events(struct event_constraint **constraints, int n,
+			int wmin, int wmax, int gpmax, int *assign);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
 
 void x86_pmu_stop(struct perf_event *event, int flags);
@@ -860,6 +883,8 @@ void intel_pmu_pebs_enable_all(void);
 
 void intel_pmu_pebs_disable_all(void);
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
@@ -929,4 +954,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
 	return NULL;
 }
 
+static inline int is_ht_workaround_enabled(void)
+{
+	return 0;
+}
 #endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 219d3fb423a1..b9826a981fb2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
-		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
@@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids
 	[ C(OP_READ) ] = {
 		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01b7,
+		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
 		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
@@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+		[ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
@@ -1904,9 +1903,8 @@ static void
 intel_start_scheduling(struct cpu_hw_events *cpuc)
 {
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xl, *xlo;
+	struct intel_excl_states *xl;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid; /* sibling thread */
 
 	/*
 	 * nothing needed if in group validation mode
@@ -1917,35 +1915,52 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	/*
 	 * no exclusion needed
 	 */
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
-	xlo = &excl_cntrs->states[o_tid];
 	xl = &excl_cntrs->states[tid];
 
 	xl->sched_started = true;
-	xl->num_alloc_cntrs = 0;
 	/*
 	 * lock shared state until we are done scheduling
 	 * in stop_event_scheduling()
 	 * makes scheduling appear as a transaction
 	 */
-	WARN_ON_ONCE(!irqs_disabled());
 	raw_spin_lock(&excl_cntrs->lock);
+}
 
-	/*
-	 * save initial state of sibling thread
-	 */
-	memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct event_constraint *c = cpuc->event_constraint[idx];
+	struct intel_excl_states *xl;
+	int tid = cpuc->excl_thread_id;
+
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+
+	if (WARN_ON_ONCE(!excl_cntrs))
+		return;
+
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+		return;
+
+	xl = &excl_cntrs->states[tid];
+
+	lockdep_assert_held(&excl_cntrs->lock);
+
+	if (c->flags & PERF_X86_EVENT_EXCL)
+		xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+	else
+		xl->state[cntr] = INTEL_EXCL_SHARED;
 }
 
 static void
 intel_stop_scheduling(struct cpu_hw_events *cpuc)
 {
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xl, *xlo;
+	struct intel_excl_states *xl;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid; /* sibling thread */
 
 	/*
 	 * nothing needed if in group validation mode
@@ -1955,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
 	/*
 	 * no exclusion needed
 	 */
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
-	xlo = &excl_cntrs->states[o_tid];
 	xl = &excl_cntrs->states[tid];
 
-	/*
-	 * make new sibling thread state visible
-	 */
-	memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
-
 	xl->sched_started = false;
 	/*
 	 * release shared state lock (acquired in intel_start_scheduling())
@@ -1977,12 +1986,10 @@ static struct event_constraint *
 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 			   int idx, struct event_constraint *c)
 {
-	struct event_constraint *cx;
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xl, *xlo;
-	int is_excl, i;
+	struct intel_excl_states *xlo;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid; /* alternate */
+	int is_excl, i;
 
 	/*
 	 * validating a group does not require
@@ -1994,34 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	/*
 	 * no exclusion needed
 	 */
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return c;
-	/*
-	 * event requires exclusive counter access
-	 * across HT threads
-	 */
-	is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
-	/*
-	 * xl = state of current HT
-	 * xlo = state of sibling HT
-	 */
-	xl = &excl_cntrs->states[tid];
-	xlo = &excl_cntrs->states[o_tid];
-
-	/*
-	 * do not allow scheduling of more than max_alloc_cntrs
-	 * which is set to half the available generic counters.
-	 * this helps avoid counter starvation of sibling thread
-	 * by ensuring at most half the counters cannot be in
-	 * exclusive mode. There is not designated counters for the
-	 * limits. Any N/2 counters can be used. This helps with
-	 * events with specifix counter constraints
-	 */
-	if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
-		return &emptyconstraint;
-
-	cx = c;
 
 	/*
 	 * because we modify the constraint, we need
@@ -2032,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * been cloned (marked dynamic)
 	 */
 	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-
-		/* sanity check */
-		if (idx < 0)
-			return &emptyconstraint;
+		struct event_constraint *cx;
 
 		/*
 		 * grab pre-allocated constraint entry
@@ -2046,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 		 * initialize dynamic constraint
 		 * with static constraint
 		 */
-		memcpy(cx, c, sizeof(*cx));
+		*cx = *c;
 
 		/*
 		 * mark constraint as dynamic, so we
 		 * can free it later on
 		 */
 		cx->flags |= PERF_X86_EVENT_DYNAMIC;
+		c = cx;
 	}
 
 	/*
@@ -2063,6 +2042,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 */
 
 	/*
+	 * state of sibling HT
+	 */
+	xlo = &excl_cntrs->states[tid ^ 1];
+
+	/*
+	 * event requires exclusive counter access
+	 * across HT threads
+	 */
+	is_excl = c->flags & PERF_X86_EVENT_EXCL;
+	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+		if (!cpuc->n_excl++)
+			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+	}
+
+	/*
 	 * Modify static constraint with current dynamic
 	 * state of thread
 	 *
@@ -2070,44 +2065,44 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * SHARED   : sibling counter measuring non-exclusive event
 	 * UNUSED   : sibling counter unused
 	 */
-	for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+	for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
 		/*
 		 * exclusive event in sibling counter
 		 * our corresponding counter cannot be used
 		 * regardless of our event
 		 */
-		if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
-			__clear_bit(i, cx->idxmsk);
+		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+			__clear_bit(i, c->idxmsk);
 		/*
 		 * if measuring an exclusive event, sibling
 		 * measuring non-exclusive, then counter cannot
 		 * be used
 		 */
-		if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
-			__clear_bit(i, cx->idxmsk);
+		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+			__clear_bit(i, c->idxmsk);
 	}
 
 	/*
 	 * recompute actual bit weight for scheduling algorithm
 	 */
-	cx->weight = hweight64(cx->idxmsk64);
+	c->weight = hweight64(c->idxmsk64);
 
 	/*
 	 * if we return an empty mask, then switch
 	 * back to static empty constraint to avoid
 	 * the cost of freeing later on
 	 */
-	if (cx->weight == 0)
-		cx = &emptyconstraint;
+	if (c->weight == 0)
+		c = &emptyconstraint;
 
-	return cx;
+	return c;
 }
 
 static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			    struct perf_event *event)
 {
-	struct event_constraint *c1 = event->hw.constraint;
+	struct event_constraint *c1 = cpuc->event_constraint[idx];
 	struct event_constraint *c2;
 
 	/*
@@ -2133,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xlo, *xl;
-	unsigned long flags = 0; /* keep compiler happy */
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid;
+	struct intel_excl_states *xl;
 
 	/*
 	 * nothing needed if in group validation mode
@@ -2144,31 +2137,35 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 	if (cpuc->is_fake)
 		return;
 
-	WARN_ON_ONCE(!excl_cntrs);
-
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
-	xl = &excl_cntrs->states[tid];
-	xlo = &excl_cntrs->states[o_tid];
+	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+		if (!--cpuc->n_excl)
+			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+	}
 
 	/*
-	 * put_constraint may be called from x86_schedule_events()
-	 * which already has the lock held so here make locking
-	 * conditional
+	 * If event was actually assigned, then mark the counter state as
+	 * unused now.
 	 */
-	if (!xl->sched_started)
-		raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+	if (hwc->idx >= 0) {
+		xl = &excl_cntrs->states[tid];
 
-	/*
-	 * if event was actually assigned, then mark the
-	 * counter state as unused now
-	 */
-	if (hwc->idx >= 0)
-		xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+		/*
+		 * put_constraint may be called from x86_schedule_events()
+		 * which already has the lock held so here make locking
+		 * conditional.
+		 */
+		if (!xl->sched_started)
+			raw_spin_lock(&excl_cntrs->lock);
+
+		xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
 
-	if (!xl->sched_started)
-		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+		if (!xl->sched_started)
+			raw_spin_unlock(&excl_cntrs->lock);
+	}
 }
 
 static void
@@ -2189,8 +2186,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 					struct perf_event *event)
 {
-	struct event_constraint *c = event->hw.constraint;
-
 	intel_put_shared_regs_event_constraints(cpuc, event);
 
 	/*
@@ -2198,48 +2193,8 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 	 * all events are subject to and must call the
 	 * put_excl_constraints() routine
 	 */
-	if (c && cpuc->excl_cntrs)
+	if (cpuc->excl_cntrs)
 		intel_put_excl_constraints(cpuc, event);
-
-	/* cleanup dynamic constraint */
-	if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
-		event->hw.constraint = NULL;
-}
-
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
-				    struct perf_event *event, int cntr)
-{
-	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct event_constraint *c = event->hw.constraint;
-	struct intel_excl_states *xlo, *xl;
-	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid;
-	int is_excl;
-
-	if (cpuc->is_fake || !c)
-		return;
-
-	is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
-	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
-		return;
-
-	WARN_ON_ONCE(!excl_cntrs);
-
-	if (!excl_cntrs)
-		return;
-
-	xl = &excl_cntrs->states[tid];
-	xlo = &excl_cntrs->states[o_tid];
-
-	WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
-
-	if (cntr >= 0) {
-		if (is_excl)
-			xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
-		else
-			xlo->init_state[cntr] = INTEL_EXCL_SHARED;
-	}
 }
 
 static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -2305,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (ret)
 		return ret;
 
-	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
-		x86_pmu.pebs_aliases(event);
+	if (event->attr.precise_ip) {
+		if (!event->attr.freq) {
+			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+			if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+		}
+		if (x86_pmu.pebs_aliases)
+			x86_pmu.pebs_aliases(event);
+	}
 
 	if (needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
@@ -2533,34 +2495,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
 	return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-	.name			= "core",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= core_pmu_enable_all,
-	.enable			= core_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_pmu_hw_config,
-	.schedule_events	= x86_schedule_events,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.get_event_constraints	= intel_get_event_constraints,
-	.put_event_constraints	= intel_put_event_constraints,
-	.event_constraints	= intel_core_event_constraints,
-	.guest_get_msrs		= core_guest_get_msrs,
-	.format_attrs		= intel_arch_formats_attr,
-	.events_sysfs_show	= intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
 	struct intel_shared_regs *regs;
@@ -2583,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
 {
 	struct intel_excl_cntrs *c;
-	int i;
 
 	c = kzalloc_node(sizeof(struct intel_excl_cntrs),
 			 GFP_KERNEL, cpu_to_node(cpu));
 	if (c) {
 		raw_spin_lock_init(&c->lock);
-		for (i = 0; i < X86_PMC_IDX_MAX; i++) {
-			c->states[0].state[i] = INTEL_EXCL_UNUSED;
-			c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
-
-			c->states[1].state[i] = INTEL_EXCL_UNUSED;
-			c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
-		}
 		c->core_id = -1;
 	}
 	return c;
@@ -2650,7 +2576,7 @@ static void intel_pmu_cpu_starting(int cpu)
 	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
 		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
 
-		for_each_cpu(i, topology_thread_cpumask(cpu)) {
+		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
 			struct intel_shared_regs *pc;
 
 			pc = per_cpu(cpu_hw_events, i).shared_regs;
@@ -2668,9 +2594,7 @@ static void intel_pmu_cpu_starting(int cpu)
 		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
 
 	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-		int h = x86_pmu.num_counters >> 1;
-
-		for_each_cpu(i, topology_thread_cpumask(cpu)) {
+		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
 			struct intel_excl_cntrs *c;
 
 			c = per_cpu(cpu_hw_events, i).excl_cntrs;
@@ -2683,11 +2607,6 @@ static void intel_pmu_cpu_starting(int cpu)
 		}
 		cpuc->excl_cntrs->core_id = core_id;
 		cpuc->excl_cntrs->refcnt++;
-		/*
-		 * set hard limit to half the number of generic counters
-		 */
-		cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
-		cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
 	}
 }
 
@@ -2723,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
 	fini_debug_store_on_cpu(cpu);
 }
 
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+				 bool sched_in)
+{
+	if (x86_pmu.pebs_active)
+		intel_pmu_pebs_sched_task(ctx, sched_in);
+	if (x86_pmu.lbr_nr)
+		intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2743,6 +2671,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= core_pmu_enable_all,
+	.enable			= core_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32-bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL<<31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.put_event_constraints	= intel_put_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+	.guest_get_msrs		= core_guest_get_msrs,
+	.format_attrs		= intel_arch_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
+
+	/*
+	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+	 * together with PMU version 1 and thus be using core_pmu with
+	 * shared_regs. We need following callbacks here to allocate
+	 * it properly.
+	 */
+	.cpu_prepare		= intel_pmu_cpu_prepare,
+	.cpu_starting		= intel_pmu_cpu_starting,
+	.cpu_dying		= intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
@@ -2774,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
 	.guest_get_msrs		= intel_guest_get_msrs,
-	.sched_task		= intel_pmu_lbr_sched_task,
+	.sched_task		= intel_pmu_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -2947,8 +2913,8 @@ static __init void intel_ht_bug(void)
 {
 	x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
 
-	x86_pmu.commit_scheduling = intel_commit_scheduling;
 	x86_pmu.start_scheduling = intel_start_scheduling;
+	x86_pmu.commit_scheduling = intel_commit_scheduling;
 	x86_pmu.stop_scheduling = intel_stop_scheduling;
 }
 
@@ -3261,6 +3227,8 @@ __init int intel_pmu_init(void)
 
 	case 61: /* 14nm Broadwell Core-M */
 	case 86: /* 14nm Broadwell Xeon D */
+	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+	case 79: /* 14nm Broadwell Server */
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3330,13 +3298,13 @@ __init int intel_pmu_init(void)
 		 * counter, so do not extend mask to generic counters
 		 */
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != FIXED_EVENT_FLAGS
-			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
-				continue;
+			if (c->cmask == FIXED_EVENT_FLAGS
+			    && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+				c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
 			}
-
-			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
-			c->weight += x86_pmu.num_counters;
+			c->idxmsk64 &=
+				~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+			c->weight = hweight64(c->idxmsk64);
 		}
 	}
 
@@ -3394,7 +3362,7 @@ static __init int fixup_ht_bug(void)
 	if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
 		return 0;
 
-	w = cpumask_weight(topology_thread_cpumask(cpu));
+	w = cpumask_weight(topology_sibling_cpumask(cpu));
 	if (w > 1) {
 		pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
 		return 0;
@@ -3404,8 +3372,8 @@ static __init int fixup_ht_bug(void)
 
 	x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
 
-	x86_pmu.commit_scheduling = NULL;
 	x86_pmu.start_scheduling = NULL;
+	x86_pmu.commit_scheduling = NULL;
 	x86_pmu.stop_scheduling = NULL;
 
 	watchdog_nmi_enable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index ac1f0c55f379..7795f3f8b1d5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode)
 
 static void bts_event_destroy(struct perf_event *event)
 {
+	x86_release_hardware();
 	x86_del_exclusive(x86_lbr_exclusive_bts);
 }
 
 static int bts_event_init(struct perf_event *event)
 {
+	int ret;
+
 	if (event->attr.type != bts_pmu.type)
 		return -ENOENT;
 
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
 		return -EBUSY;
 
+	ret = x86_reserve_hardware();
+	if (ret) {
+		x86_del_exclusive(x86_lbr_exclusive_bts);
+		return ret;
+	}
+
 	event->destroy = bts_event_destroy;
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b738fa..188076161c1b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -13,16 +13,35 @@
 #define MSR_IA32_QM_CTR		0x0c8e
 #define MSR_IA32_QM_EVTSEL	0x0c8d
 
-static unsigned int cqm_max_rmid = -1;
+static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
-struct intel_cqm_state {
-	raw_spinlock_t		lock;
-	int			rmid;
-	int			cnt;
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid:		The cached Resource Monitoring ID
+ * @closid:		The cached Class Of Service ID
+ * @rmid_usecnt:	The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+	u32			rmid;
+	u32			closid;
+	int			rmid_usecnt;
 };
 
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -57,7 +76,7 @@ static cpumask_t cqm_cpumask;
  * near-zero occupancy value, i.e. no cachelines are tagged with this
  * RMID, once __intel_cqm_rmid_rotate() returns.
  */
-static unsigned int intel_cqm_rotation_rmid;
+static u32 intel_cqm_rotation_rmid;
 
 #define INVALID_RMID		(-1)
 
@@ -69,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(unsigned int rmid)
+static inline bool __rmid_valid(u32 rmid)
 {
 	if (!rmid || rmid == INVALID_RMID)
 		return false;
@@ -77,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
 	return true;
 }
 
-static u64 __rmid_read(unsigned int rmid)
+static u64 __rmid_read(u32 rmid)
 {
 	u64 val;
 
@@ -102,7 +121,7 @@ enum rmid_recycle_state {
 };
 
 struct cqm_rmid_entry {
-	unsigned int rmid;
+	u32 rmid;
 	enum rmid_recycle_state state;
 	struct list_head list;
 	unsigned long queue_time;
@@ -147,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
  */
 static struct cqm_rmid_entry **cqm_rmid_ptrs;
 
-static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
 {
 	struct cqm_rmid_entry *entry;
 
@@ -162,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
  *
  * We expect to be called with cache_mutex held.
  */
-static int __get_rmid(void)
+static u32 __get_rmid(void)
 {
 	struct cqm_rmid_entry *entry;
 
@@ -177,7 +196,7 @@ static int __get_rmid(void)
 	return entry->rmid;
 }
 
-static void __put_rmid(unsigned int rmid)
+static void __put_rmid(u32 rmid)
 {
 	struct cqm_rmid_entry *entry;
 
@@ -372,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
 }
 
 struct rmid_read {
-	unsigned int rmid;
+	u32 rmid;
 	atomic64_t value;
 };
 
@@ -381,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
 /*
  * Exchange the RMID of a group of events.
  */
-static unsigned int
-intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
 {
 	struct perf_event *event;
-	unsigned int old_rmid = group->hw.cqm_rmid;
 	struct list_head *head = &group->hw.cqm_group_entry;
+	u32 old_rmid = group->hw.cqm_rmid;
 
 	lockdep_assert_held(&cache_mutex);
 
@@ -451,7 +469,7 @@ static void intel_cqm_stable(void *arg)
  * If we have group events waiting for an RMID that don't conflict with
  * events already running, assign @rmid.
  */
-static bool intel_cqm_sched_in_event(unsigned int rmid)
+static bool intel_cqm_sched_in_event(u32 rmid)
 {
 	struct perf_event *leader, *event;
 
@@ -598,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
 static void __intel_cqm_pick_and_rotate(struct perf_event *next)
 {
 	struct perf_event *rotor;
-	unsigned int rmid;
+	u32 rmid;
 
 	lockdep_assert_held(&cache_mutex);
 
@@ -626,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
 static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
 {
 	struct perf_event *group, *g;
-	unsigned int rmid;
+	u32 rmid;
 
 	lockdep_assert_held(&cache_mutex);
 
@@ -828,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
 				  struct perf_event **group)
 {
 	struct perf_event *iter;
-	unsigned int rmid;
 	bool conflict = false;
+	u32 rmid;
 
 	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
 		rmid = iter->hw.cqm_rmid;
@@ -860,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
 static void intel_cqm_event_read(struct perf_event *event)
 {
 	unsigned long flags;
-	unsigned int rmid;
+	u32 rmid;
 	u64 val;
 
 	/*
@@ -961,55 +979,48 @@ out:
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
-	unsigned int rmid = event->hw.cqm_rmid;
-	unsigned long flags;
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+	u32 rmid = event->hw.cqm_rmid;
 
 	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
 		return;
 
 	event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-	raw_spin_lock_irqsave(&state->lock, flags);
-
-	if (state->cnt++)
-		WARN_ON_ONCE(state->rmid != rmid);
-	else
+	if (state->rmid_usecnt++) {
+		if (!WARN_ON_ONCE(state->rmid != rmid))
+			return;
+	} else {
 		WARN_ON_ONCE(state->rmid);
+	}
 
 	state->rmid = rmid;
-	wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
-
-	raw_spin_unlock_irqrestore(&state->lock, flags);
+	wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
-	unsigned long flags;
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 
 	if (event->hw.cqm_state & PERF_HES_STOPPED)
 		return;
 
 	event->hw.cqm_state |= PERF_HES_STOPPED;
 
-	raw_spin_lock_irqsave(&state->lock, flags);
 	intel_cqm_event_read(event);
 
-	if (!--state->cnt) {
+	if (!--state->rmid_usecnt) {
 		state->rmid = 0;
-		wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+		wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
 	} else {
 		WARN_ON_ONCE(!state->rmid);
 	}
-
-	raw_spin_unlock_irqrestore(&state->lock, flags);
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
 	unsigned long flags;
-	unsigned int rmid;
+	u32 rmid;
 
 	raw_spin_lock_irqsave(&cache_lock, flags);
 
@@ -1024,11 +1035,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 	return 0;
 }
 
-static void intel_cqm_event_del(struct perf_event *event, int mode)
-{
-	intel_cqm_event_stop(event, mode);
-}
-
 static void intel_cqm_event_destroy(struct perf_event *event)
 {
 	struct perf_event *group_other = NULL;
@@ -1057,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 			list_replace(&event->hw.cqm_groups_entry,
 				     &group_other->hw.cqm_groups_entry);
 		} else {
-			unsigned int rmid = event->hw.cqm_rmid;
+			u32 rmid = event->hw.cqm_rmid;
 
 			if (__rmid_valid(rmid))
 				__put_rmid(rmid);
@@ -1221,7 +1227,7 @@ static struct pmu intel_cqm_pmu = {
 	.task_ctx_nr	     = perf_sw_context,
 	.event_init	     = intel_cqm_event_init,
 	.add		     = intel_cqm_event_add,
-	.del		     = intel_cqm_event_del,
+	.del		     = intel_cqm_event_stop,
 	.start		     = intel_cqm_event_start,
 	.stop		     = intel_cqm_event_stop,
 	.read		     = intel_cqm_event_read,
@@ -1243,12 +1249,12 @@ static inline void cqm_pick_event_reader(int cpu)
 
 static void intel_cqm_cpu_prepare(unsigned int cpu)
 {
-	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	raw_spin_lock_init(&state->lock);
 	state->rmid = 0;
-	state->cnt  = 0;
+	state->closid = 0;
+	state->rmid_usecnt = 0;
 
 	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
 	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 813f75d71175..71fc40238843 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -11,7 +11,7 @@
 #define BTS_RECORD_SIZE		24
 
 #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE	PAGE_SIZE
+#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
 #define PEBS_FIXUP_SIZE		PAGE_SIZE
 
 /*
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 	int node = cpu_to_node(cpu);
-	int max, thresh = 1; /* always use a single PEBS record */
+	int max;
 	void *buffer, *ibuffer;
 
 	if (!x86_pmu.pebs)
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
 	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
 		max * x86_pmu.pebs_record_size;
 
-	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
-		thresh * x86_pmu.pebs_record_size;
-
 	return 0;
 }
 
@@ -549,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
 	return 1;
 }
 
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+	struct pt_regs regs;
+
+	x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	if (!sched_in)
+		intel_pmu_drain_pebs_buffer();
+}
+
 /*
  * PEBS
  */
@@ -684,33 +694,81 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 	return &emptyconstraint;
 }
 
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+	return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
 void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
+	struct debug_store *ds = cpuc->ds;
+	bool first_pebs;
+	u64 threshold;
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
+	first_pebs = !pebs_is_enabled(cpuc);
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled |= 1ULL << 63;
+
+	/*
+	 * When the event is constrained enough we can use a larger
+	 * threshold and run the event with less frequent PMI.
+	 */
+	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+		threshold = ds->pebs_absolute_maximum -
+			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+		if (first_pebs)
+			perf_sched_cb_inc(event->ctx->pmu);
+	} else {
+		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+		/*
+		 * If not all events can use larger buffer,
+		 * roll back to threshold = 1
+		 */
+		if (!first_pebs &&
+		    (ds->pebs_interrupt_threshold > threshold))
+			perf_sched_cb_dec(event->ctx->pmu);
+	}
+
+	/* Use auto-reload if possible to save a MSR write in the PMI */
+	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+		ds->pebs_event_reset[hwc->idx] =
+			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+	}
+
+	if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+		ds->pebs_interrupt_threshold = threshold;
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
+	struct debug_store *ds = cpuc->ds;
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 
-	if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
 		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
-	else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
 
+	if (ds->pebs_interrupt_threshold >
+	    ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
+		intel_pmu_drain_pebs_buffer();
+		if (!pebs_is_enabled(cpuc))
+			perf_sched_cb_dec(event->ctx->pmu);
+	}
+
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
@@ -846,8 +904,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
 	return txn;
 }
 
-static void __intel_pmu_pebs_event(struct perf_event *event,
-				   struct pt_regs *iregs, void *__pebs)
+static void setup_pebs_sample_data(struct perf_event *event,
+				   struct pt_regs *iregs, void *__pebs,
+				   struct perf_sample_data *data,
+				   struct pt_regs *regs)
 {
 #define PERF_X86_EVENT_PEBS_HSW_PREC \
 		(PERF_X86_EVENT_PEBS_ST_HSW | \
@@ -859,13 +919,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	 */
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct pebs_record_hsw *pebs = __pebs;
-	struct perf_sample_data data;
-	struct pt_regs regs;
 	u64 sample_type;
 	int fll, fst, dsrc;
 	int fl = event->hw.flags;
 
-	if (!intel_pmu_save_and_restart(event))
+	if (pebs == NULL)
 		return;
 
 	sample_type = event->attr.sample_type;
@@ -874,15 +932,15 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
 	fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
 
-	perf_sample_data_init(&data, 0, event->hw.last_period);
+	perf_sample_data_init(data, 0, event->hw.last_period);
 
-	data.period = event->hw.last_period;
+	data->period = event->hw.last_period;
 
 	/*
 	 * Use latency for weight (only avail with PEBS-LL)
 	 */
 	if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
-		data.weight = pebs->lat;
+		data->weight = pebs->lat;
 
 	/*
 	 * data.data_src encodes the data source
@@ -895,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 			val = precise_datala_hsw(event, pebs->dse);
 		else if (fst)
 			val = precise_store_data(pebs->dse);
-		data.data_src.val = val;
+		data->data_src.val = val;
 	}
 
 	/*
@@ -908,61 +966,123 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
 	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
 	 */
-	regs = *iregs;
-	regs.flags = pebs->flags;
-	set_linear_ip(&regs, pebs->ip);
-	regs.bp = pebs->bp;
-	regs.sp = pebs->sp;
+	*regs = *iregs;
+	regs->flags = pebs->flags;
+	set_linear_ip(regs, pebs->ip);
+	regs->bp = pebs->bp;
+	regs->sp = pebs->sp;
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
-		regs.ax = pebs->ax;
-		regs.bx = pebs->bx;
-		regs.cx = pebs->cx;
-		regs.dx = pebs->dx;
-		regs.si = pebs->si;
-		regs.di = pebs->di;
-		regs.bp = pebs->bp;
-		regs.sp = pebs->sp;
-
-		regs.flags = pebs->flags;
+		regs->ax = pebs->ax;
+		regs->bx = pebs->bx;
+		regs->cx = pebs->cx;
+		regs->dx = pebs->dx;
+		regs->si = pebs->si;
+		regs->di = pebs->di;
+		regs->bp = pebs->bp;
+		regs->sp = pebs->sp;
+
+		regs->flags = pebs->flags;
 #ifndef CONFIG_X86_32
-		regs.r8 = pebs->r8;
-		regs.r9 = pebs->r9;
-		regs.r10 = pebs->r10;
-		regs.r11 = pebs->r11;
-		regs.r12 = pebs->r12;
-		regs.r13 = pebs->r13;
-		regs.r14 = pebs->r14;
-		regs.r15 = pebs->r15;
+		regs->r8 = pebs->r8;
+		regs->r9 = pebs->r9;
+		regs->r10 = pebs->r10;
+		regs->r11 = pebs->r11;
+		regs->r12 = pebs->r12;
+		regs->r13 = pebs->r13;
+		regs->r14 = pebs->r14;
+		regs->r15 = pebs->r15;
 #endif
 	}
 
 	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-		regs.ip = pebs->real_ip;
-		regs.flags |= PERF_EFLAGS_EXACT;
-	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
-		regs.flags |= PERF_EFLAGS_EXACT;
+		regs->ip = pebs->real_ip;
+		regs->flags |= PERF_EFLAGS_EXACT;
+	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+		regs->flags |= PERF_EFLAGS_EXACT;
 	else
-		regs.flags &= ~PERF_EFLAGS_EXACT;
+		regs->flags &= ~PERF_EFLAGS_EXACT;
 
 	if ((sample_type & PERF_SAMPLE_ADDR) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
-		data.addr = pebs->dla;
+		data->addr = pebs->dla;
 
 	if (x86_pmu.intel_cap.pebs_format >= 2) {
 		/* Only set the TSX weight when no memory weight. */
 		if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
-			data.weight = intel_hsw_weight(pebs);
+			data->weight = intel_hsw_weight(pebs);
 
 		if (sample_type & PERF_SAMPLE_TRANSACTION)
-			data.txn = intel_hsw_transaction(pebs);
+			data->txn = intel_hsw_transaction(pebs);
 	}
 
 	if (has_branch_stack(event))
-		data.br_stack = &cpuc->lbr_stack;
+		data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	void *at;
+	u64 pebs_status;
+
+	if (base == NULL)
+		return NULL;
+
+	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+		struct pebs_record_nhm *p = at;
 
-	if (perf_event_overflow(event, &data, &regs))
+		if (test_bit(bit, (unsigned long *)&p->status)) {
+			/* PEBS v3 has accurate status bits */
+			if (x86_pmu.intel_cap.pebs_format >= 3)
+				return at;
+
+			if (p->status == (1 << bit))
+				return at;
+
+			/* clear non-PEBS bit and re-check */
+			pebs_status = p->status & cpuc->pebs_enabled;
+			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+			if (pebs_status == (1 << bit))
+				return at;
+		}
+	}
+	return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+				   struct pt_regs *iregs,
+				   void *base, void *top,
+				   int bit, int count)
+{
+	struct perf_sample_data data;
+	struct pt_regs regs;
+	void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+	if (!intel_pmu_save_and_restart(event) &&
+	    !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+		return;
+
+	while (count > 1) {
+		setup_pebs_sample_data(event, iregs, at, &data, &regs);
+		perf_event_output(event, &data, &regs);
+		at += x86_pmu.pebs_record_size;
+		at = get_next_pebs_record_by_bit(at, top, bit);
+		count--;
+	}
+
+	setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+	/*
+	 * All but the last records are processed.
+	 * The last one is left to be able to call the overflow handler.
+	 */
+	if (perf_event_overflow(event, &data, &regs)) {
 		x86_pmu_stop(event, 0);
+		return;
+	}
+
 }
 
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
@@ -992,72 +1112,99 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	if (!event->attr.precise_ip)
 		return;
 
-	n = top - at;
+	n = (top - at) / x86_pmu.pebs_record_size;
 	if (n <= 0)
 		return;
 
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
-	at += n - 1;
-
-	__intel_pmu_pebs_event(event, iregs, at);
+	__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
 }
 
 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
-	struct perf_event *event = NULL;
-	void *at, *top;
-	u64 status = 0;
-	int bit;
+	struct perf_event *event;
+	void *base, *at, *top;
+	short counts[MAX_PEBS_EVENTS] = {};
+	short error[MAX_PEBS_EVENTS] = {};
+	int bit, i;
 
 	if (!x86_pmu.pebs_active)
 		return;
 
-	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	if (unlikely(at > top))
+	if (unlikely(base >= top))
 		return;
 
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
-		  "Unexpected number of pebs records %ld\n",
-		  (long)(top - at) / x86_pmu.pebs_record_size);
-
-	for (; at < top; at += x86_pmu.pebs_record_size) {
+	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;
 
-		for_each_set_bit(bit, (unsigned long *)&p->status,
-				 x86_pmu.max_pebs_events) {
-			event = cpuc->events[bit];
-			if (!test_bit(bit, cpuc->active_mask))
-				continue;
-
-			WARN_ON_ONCE(!event);
+		/* PEBS v3 has accurate status bits */
+		if (x86_pmu.intel_cap.pebs_format >= 3) {
+			for_each_set_bit(bit, (unsigned long *)&p->status,
+					 MAX_PEBS_EVENTS)
+				counts[bit]++;
 
-			if (!event->attr.precise_ip)
-				continue;
+			continue;
+		}
 
-			if (__test_and_set_bit(bit, (unsigned long *)&status))
+		bit = find_first_bit((unsigned long *)&p->status,
+					x86_pmu.max_pebs_events);
+		if (bit >= x86_pmu.max_pebs_events)
+			continue;
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+		/*
+		 * The PEBS hardware does not deal well with the situation
+		 * when events happen near to each other and multiple bits
+		 * are set. But it should happen rarely.
+		 *
+		 * If these events include one PEBS and multiple non-PEBS
+		 * events, it doesn't impact PEBS record. The record will
+		 * be handled normally. (slow path)
+		 *
+		 * If these events include two or more PEBS events, the
+		 * records for the events can be collapsed into a single
+		 * one, and it's not possible to reconstruct all events
+		 * that caused the PEBS record. It's called collision.
+		 * If collision happened, the record will be dropped.
+		 *
+		 */
+		if (p->status != (1 << bit)) {
+			u64 pebs_status;
+
+			/* slow path */
+			pebs_status = p->status & cpuc->pebs_enabled;
+			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+			if (pebs_status != (1 << bit)) {
+				for_each_set_bit(i, (unsigned long *)&pebs_status,
+						 MAX_PEBS_EVENTS)
+					error[i]++;
 				continue;
-
-			break;
+			}
 		}
+		counts[bit]++;
+	}
 
-		if (!event || bit >= x86_pmu.max_pebs_events)
+	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+		if ((counts[bit] == 0) && (error[bit] == 0))
 			continue;
+		event = cpuc->events[bit];
+		WARN_ON_ONCE(!event);
+		WARN_ON_ONCE(!event->attr.precise_ip);
 
-		__intel_pmu_pebs_event(event, iregs, at);
+		/* log dropped samples number */
+		if (error[bit])
+			perf_log_lost_samples(event, error[bit]);
+
+		if (counts[bit]) {
+			__intel_pmu_pebs_event(event, iregs, base,
+					       top, bit, counts[bit]);
+		}
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 94e5b506caa6..452a7bd2dedb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -96,6 +96,7 @@ enum {
 	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
+	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -113,6 +114,7 @@ enum {
 	 X86_BR_IRQ	 |\
 	 X86_BR_ABORT	 |\
 	 X86_BR_IND_CALL |\
+	 X86_BR_IND_JMP  |\
 	 X86_BR_ZERO_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -262,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
-	if (!x86_pmu.lbr_nr)
-		return;
-
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush
@@ -523,6 +522,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 			X86_BR_CALL_STACK;
 	}
 
+	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+		mask |= X86_BR_IND_JMP;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -736,7 +738,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 			break;
 		case 4:
 		case 5:
-			ret = X86_BR_JMP;
+			ret = X86_BR_IND_JMP;
 			break;
 		}
 		break;
@@ -844,6 +846,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	 */
 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -856,6 +859,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 						| LBR_FAR,
 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
 };
 
 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -870,6 +874,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
 						| LBR_RETURN | LBR_CALL_STACK,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
 };
 
 /* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index ffe666c2c6b5..159887c3a89d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)
 
 		de_attr->attr.attr.name = pt_caps[i].name;
 
-		sysfs_attr_init(&de_attrs->attr.attr);
+		sysfs_attr_init(&de_attr->attr.attr);
 
 		de_attr->attr.attr.mode		= S_IRUGO;
 		de_attr->attr.show		= pt_cap_show;
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
  * These all are cpu affine and operate on a local PT
  */
 
-static bool pt_is_running(void)
-{
-	u64 ctl;
-
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-
-	return !!(ctl & RTIT_CTL_TRACEEN);
-}
-
 static void pt_config(struct perf_event *event)
 {
 	u64 reg;
@@ -609,16 +600,19 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
  * @handle:	Current output handle.
  *
  * Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected.
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
  */
 static int pt_buffer_reset_markers(struct pt_buffer *buf,
 				   struct perf_output_handle *handle)
 
 {
-	unsigned long idx, npages, end;
-
-	if (buf->snapshot)
-		return 0;
+	unsigned long head = local64_read(&buf->head);
+	unsigned long idx, npages, wakeup;
 
 	/* can't stop in the middle of an output region */
 	if (buf->output_off + handle->size + 1 <
@@ -634,17 +628,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 	buf->topa_index[buf->stop_pos]->stop = 0;
 	buf->topa_index[buf->intr_pos]->intr = 0;
 
-	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
-		npages = (handle->size + 1) >> PAGE_SHIFT;
-		end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
-		/*if (end > handle->wakeup >> PAGE_SHIFT)
-		  end = handle->wakeup >> PAGE_SHIFT;*/
-		idx = end & (buf->nr_pages - 1);
-		buf->stop_pos = idx;
-		idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;
-		idx &= buf->nr_pages - 1;
-		buf->intr_pos = idx;
-	}
+	/* how many pages till the STOP marker */
+	npages = handle->size >> PAGE_SHIFT;
+
+	/* if it's on a page boundary, fill up one more page */
+	if (!offset_in_page(head + handle->size + 1))
+		npages++;
+
+	idx = (head >> PAGE_SHIFT) + npages;
+	idx &= buf->nr_pages - 1;
+	buf->stop_pos = idx;
+
+	wakeup = handle->wakeup >> PAGE_SHIFT;
+
+	/* in the worst case, wake up the consumer one page before hard stop */
+	idx = (head >> PAGE_SHIFT) + npages - 1;
+	if (idx > wakeup)
+		idx = wakeup;
+
+	idx &= buf->nr_pages - 1;
+	buf->intr_pos = idx;
 
 	buf->topa_index[buf->stop_pos]->stop = 1;
 	buf->topa_index[buf->intr_pos]->intr = 1;
@@ -664,7 +667,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
 	struct topa *cur = buf->first, *prev = buf->last;
 	struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
 		*te_prev = TOPA_ENTRY(prev, prev->last - 1);
-	int pg = 0, idx = 0, ntopa = 0;
+	int pg = 0, idx = 0;
 
 	while (pg < buf->nr_pages) {
 		int tidx;
@@ -679,9 +682,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
 			/* advance to next topa table */
 			idx = 0;
 			cur = list_entry(cur->list.next, struct topa, list);
-			ntopa++;
-		} else
+		} else {
 			idx++;
+		}
 		te_cur = TOPA_ENTRY(cur, idx);
 	}
 
@@ -693,7 +696,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
  * @head:	Write pointer (aux_head) from AUX buffer.
  *
  * Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly.
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
  */
 static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
 {
@@ -891,6 +901,7 @@ void intel_pt_interrupt(void)
 		}
 
 		pt_buffer_reset_offsets(buf, pt->handle.head);
+		/* snapshot counters don't use PMI, so it's safe */
 		ret = pt_buffer_reset_markers(buf, &pt->handle);
 		if (ret) {
 			perf_aux_output_end(&pt->handle, 0, true);
@@ -913,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
-	if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+	if (!buf || pt_buffer_is_full(buf, pt)) {
 		event->hw.state = PERF_HES_STOPPED;
 		return;
 	}
@@ -944,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	event->hw.state = PERF_HES_STOPPED;
 
 	if (mode & PERF_EF_UPDATE) {
-		struct pt *pt = this_cpu_ptr(&pt_ctx);
 		struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
 		if (!buf)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 999289b94025..358c54ad20d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)
 		break;
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
+	case 61: /* Broadwell */
 		rapl_cntr_mask = RAPL_IDX_HSW;
 		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
 		break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index c635b8b49e93..7c1de1610178 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 
 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-		hwc = &box->event_list[i]->hw;
 		c = uncore_get_event_constraint(box, box->event_list[i]);
-		hwc->constraint = c;
+		box->event_constraint[i] = c;
 		wmin = min(wmin, c->weight);
 		wmax = max(wmax, c->weight);
 	}
@@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
 	/* fastpath, try to reuse previous register */
 	for (i = 0; i < n; i++) {
 		hwc = &box->event_list[i]->hw;
-		c = hwc->constraint;
+		c = box->event_constraint[i];
 
 		/* never assigned */
 		if (hwc->idx == -1)
@@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
 	}
 	/* slow path */
 	if (i != n)
-		ret = perf_assign_events(box->event_list, n,
-					 wmin, wmax, assign);
+		ret = perf_assign_events(box->event_constraint, n,
+					 wmin, wmax, n, assign);
 
 	if (!assign || ret) {
 		for (i = 0; i < n; i++)
@@ -840,6 +839,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	box->phys_id = phys_id;
 	box->pci_dev = pdev;
 	box->pmu = pmu;
+	uncore_box_init(box);
 	pci_set_drvdata(pdev, box);
 
 	raw_spin_lock(&uncore_box_lock);
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
 	case 69: /* Haswell Celeron */
 		ret = hsw_uncore_pci_init();
 		break;
+	case 61: /* Broadwell */
+		ret = bdw_uncore_pci_init();
+		break;
 	default:
 		return 0;
 	}
@@ -1003,8 +1006,10 @@ static int uncore_cpu_starting(int cpu)
 			pmu = &type->pmus[j];
 			box = *per_cpu_ptr(pmu->box, cpu);
 			/* called by uncore_cpu_init? */
-			if (box && box->phys_id >= 0)
+			if (box && box->phys_id >= 0) {
+				uncore_box_init(box);
 				continue;
+			}
 
 			for_each_online_cpu(k) {
 				exist = *per_cpu_ptr(pmu->box, k);
@@ -1020,8 +1025,10 @@ static int uncore_cpu_starting(int cpu)
 				}
 			}
 
-			if (box)
+			if (box) {
 				box->phys_id = phys_id;
+				uncore_box_init(box);
+			}
 		}
 	}
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 6c8c1e7e69d8..0f77f0a196e4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,7 @@ struct intel_uncore_box {
 	atomic_t refcnt;
 	struct perf_event *events[UNCORE_PMC_IDX_MAX];
 	struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+	struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
 	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 	u64 tags[UNCORE_PMC_IDX_MAX];
 	struct pci_dev *pci_dev;
@@ -257,14 +258,6 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)
 	return box->pmu->type->num_counters;
 }
 
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
-	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
-		if (box->pmu->type->ops->init_box)
-			box->pmu->type->ops->init_box(box);
-	}
-}
-
 static inline void uncore_disable_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->type->ops->disable_box)
@@ -273,8 +266,6 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)
 
 static inline void uncore_enable_box(struct intel_uncore_box *box)
 {
-	uncore_box_init(box);
-
 	if (box->pmu->type->ops->enable_box)
 		box->pmu->type->ops->enable_box(box);
 }
@@ -297,6 +288,14 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,
 	return box->pmu->type->ops->read_counter(box, event);
 }
 
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+		if (box->pmu->type->ops->init_box)
+			box->pmu->type->ops->init_box(box);
+	}
+}
+
 static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
 {
 	return (box->phys_id < 0);
@@ -326,6 +325,7 @@ extern struct event_constraint uncore_constraint_empty;
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 3001015b755c..b005a78c7012 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,6 +1,14 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
+
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
 #define SNB_UNC_CTL_UMASK_MASK			0x0000ff00
@@ -472,6 +480,18 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* end: all zeroes */ },
+};
+
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -490,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
 	.id_table	= hsw_uncore_pci_ids,
 };
 
+static struct pci_driver bdw_uncore_pci_driver = {
+	.name		= "bdw_uncore",
+	.id_table	= bdw_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
 	__u32 pci_id;
 	struct pci_driver *driver;
@@ -502,6 +527,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
 	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
+	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
 	{  /* end marker */ }
 };
 
@@ -549,6 +576,11 @@ int hsw_uncore_pci_init(void)
 	return imc_uncore_pci_init();
 }
 
+int bdw_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 12d9548457e7..6d6e85dd5849 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -164,8 +164,8 @@
 				((1ULL << (n)) - 1)))
 
 /* Haswell-EP Ubox */
-#define HSWEP_U_MSR_PMON_CTR0			0x705
-#define HSWEP_U_MSR_PMON_CTL0			0x709
+#define HSWEP_U_MSR_PMON_CTR0			0x709
+#define HSWEP_U_MSR_PMON_CTL0			0x705
 #define HSWEP_U_MSR_PMON_FILTER			0x707
 
 #define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL		0x703
@@ -1914,7 +1914,7 @@ static struct intel_uncore_type hswep_uncore_cbox = {
 	.name			= "cbox",
 	.num_counters		= 4,
 	.num_boxes		= 18,
-	.perf_ctr_bits		= 44,
+	.perf_ctr_bits		= 48,
 	.event_ctl		= HSWEP_C0_MSR_PMON_CTL0,
 	.perf_ctr		= HSWEP_C0_MSR_PMON_CTR0,
 	.event_mask		= SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index e7d8c7608471..18ca99f2798b 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -12,7 +12,8 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
 {
 #ifdef CONFIG_SMP
 	seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-	seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu)));
+	seq_printf(m, "siblings\t: %d\n",
+		   cpumask_weight(topology_core_cpumask(cpu)));
 	seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
 	seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	seq_printf(m, "apicid\t\t: %d\n", c->apicid);
diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile
new file mode 100644
index 000000000000..68279efb811a
--- /dev/null
+++ b/arch/x86/kernel/fpu/Makefile
@@ -0,0 +1,5 @@
+#
+# Build rules for the FPU support code:
+#
+
+obj-y				+= init.o bugs.o core.o regset.o signal.o xstate.o
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
new file mode 100644
index 000000000000..dd9ca9b60ff3
--- /dev/null
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -0,0 +1,71 @@
+/*
+ * x86 FPU bug checks:
+ */
+#include <asm/fpu/internal.h>
+
+/*
+ * Boot time CPU/FPU FDIV bug detection code:
+ */
+
+static double __initdata x = 4195835.0;
+static double __initdata y = 3145727.0;
+
+/*
+ * This used to check for exceptions..
+ * However, it turns out that to support that,
+ * the XMM trap handlers basically had to
+ * be buggy. So let's have a correct XMM trap
+ * handler, and forget about printing out
+ * some status at boot.
+ *
+ * We should really only care about bugs here
+ * anyway. Not features.
+ */
+static void __init check_fpu(void)
+{
+	u32 cr0_saved;
+	s32 fdiv_bug;
+
+	/* We might have CR0::TS set already, clear it: */
+	cr0_saved = read_cr0();
+	write_cr0(cr0_saved & ~X86_CR0_TS);
+
+	kernel_fpu_begin();
+
+	/*
+	 * trap_init() enabled FXSR and company _before_ testing for FP
+	 * problems here.
+	 *
+	 * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
+	 */
+	__asm__("fninit\n\t"
+		"fldl %1\n\t"
+		"fdivl %2\n\t"
+		"fmull %2\n\t"
+		"fldl %1\n\t"
+		"fsubp %%st,%%st(1)\n\t"
+		"fistpl %0\n\t"
+		"fwait\n\t"
+		"fninit"
+		: "=m" (*&fdiv_bug)
+		: "m" (*&x), "m" (*&y));
+
+	kernel_fpu_end();
+
+	write_cr0(cr0_saved);
+
+	if (fdiv_bug) {
+		set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
+		pr_warn("Hmm, FPU with FDIV bug\n");
+	}
+}
+
+void __init fpu__init_check_bugs(void)
+{
+	/*
+	 * kernel_fpu_begin/end() in check_fpu() relies on the patched
+	 * alternative instructions.
+	 */
+	if (cpu_has_fpu)
+		check_fpu();
+}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
new file mode 100644
index 000000000000..79de954626fd
--- /dev/null
+++ b/arch/x86/kernel/fpu/core.c
@@ -0,0 +1,523 @@
+/*
+ *  Copyright (C) 1994 Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+#include <asm/fpu/internal.h>
+#include <asm/fpu/regset.h>
+#include <asm/fpu/signal.h>
+#include <asm/traps.h>
+
+#include <linux/hardirq.h>
+
+/*
+ * Represents the initial FPU state. It's mostly (but not completely) zeroes,
+ * depending on the FPU hardware format:
+ */
+union fpregs_state init_fpstate __read_mostly;
+
+/*
+ * Track whether the kernel is using the FPU state
+ * currently.
+ *
+ * This flag is used:
+ *
+ *   - by IRQ context code to potentially use the FPU
+ *     if it's unused.
+ *
+ *   - to debug kernel_fpu_begin()/end() correctness
+ */
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+/*
+ * Track which context is using the FPU on the CPU:
+ */
+DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+static void kernel_fpu_disable(void)
+{
+	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
+	this_cpu_write(in_kernel_fpu, true);
+}
+
+static void kernel_fpu_enable(void)
+{
+	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+	this_cpu_write(in_kernel_fpu, false);
+}
+
+static bool kernel_fpu_disabled(void)
+{
+	return this_cpu_read(in_kernel_fpu);
+}
+
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ *
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
+ */
+static bool interrupted_kernel_fpu_idle(void)
+{
+	if (kernel_fpu_disabled())
+		return false;
+
+	if (use_eager_fpu())
+		return true;
+
+	return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static bool interrupted_user_mode(void)
+{
+	struct pt_regs *regs = get_irq_regs();
+	return regs && user_mode(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+bool irq_fpu_usable(void)
+{
+	return !in_interrupt() ||
+		interrupted_user_mode() ||
+		interrupted_kernel_fpu_idle();
+}
+EXPORT_SYMBOL(irq_fpu_usable);
+
+void __kernel_fpu_begin(void)
+{
+	struct fpu *fpu = &current->thread.fpu;
+
+	WARN_ON_FPU(!irq_fpu_usable());
+
+	kernel_fpu_disable();
+
+	if (fpu->fpregs_active) {
+		copy_fpregs_to_fpstate(fpu);
+	} else {
+		this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+		__fpregs_activate_hw();
+	}
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(void)
+{
+	struct fpu *fpu = &current->thread.fpu;
+
+	if (fpu->fpregs_active)
+		copy_kernel_to_fpregs(&fpu->state);
+	else
+		__fpregs_deactivate_hw();
+
+	kernel_fpu_enable();
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
+
+void kernel_fpu_begin(void)
+{
+	preempt_disable();
+	__kernel_fpu_begin();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+	__kernel_fpu_end();
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+/*
+ * CR0::TS save/restore functions:
+ */
+int irq_ts_save(void)
+{
+	/*
+	 * If in process context and not atomic, we can take a spurious DNA fault.
+	 * Otherwise, doing clts() in process context requires disabling preemption
+	 * or some heavy lifting like kernel_fpu_begin()
+	 */
+	if (!in_atomic())
+		return 0;
+
+	if (read_cr0() & X86_CR0_TS) {
+		clts();
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_ts_save);
+
+void irq_ts_restore(int TS_state)
+{
+	if (TS_state)
+		stts();
+}
+EXPORT_SYMBOL_GPL(irq_ts_restore);
+
+/*
+ * Save the FPU state (mark it for reload if necessary):
+ *
+ * This only ever gets called for the current task.
+ */
+void fpu__save(struct fpu *fpu)
+{
+	WARN_ON_FPU(fpu != &current->thread.fpu);
+
+	preempt_disable();
+	if (fpu->fpregs_active) {
+		if (!copy_fpregs_to_fpstate(fpu))
+			fpregs_deactivate(fpu);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(fpu__save);
+
+/*
+ * Legacy x87 fpstate state init:
+ */
+static inline void fpstate_init_fstate(struct fregs_state *fp)
+{
+	fp->cwd = 0xffff037fu;
+	fp->swd = 0xffff0000u;
+	fp->twd = 0xffffffffu;
+	fp->fos = 0xffff0000u;
+}
+
+void fpstate_init(union fpregs_state *state)
+{
+	if (!cpu_has_fpu) {
+		fpstate_init_soft(&state->soft);
+		return;
+	}
+
+	memset(state, 0, xstate_size);
+
+	if (cpu_has_fxsr)
+		fpstate_init_fxstate(&state->fxsave);
+	else
+		fpstate_init_fstate(&state->fsave);
+}
+EXPORT_SYMBOL_GPL(fpstate_init);
+
+/*
+ * Copy the current task's FPU state to a new task's FPU context.
+ *
+ * In both the 'eager' and the 'lazy' case we save hardware registers
+ * directly to the destination buffer.
+ */
+static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+{
+	WARN_ON_FPU(src_fpu != &current->thread.fpu);
+
+	/*
+	 * Don't let 'init optimized' areas of the XSAVE area
+	 * leak into the child task:
+	 */
+	if (use_eager_fpu())
+		memset(&dst_fpu->state.xsave, 0, xstate_size);
+
+	/*
+	 * Save current FPU registers directly into the child
+	 * FPU context, without any memory-to-memory copying.
+	 *
+	 * If the FPU context got destroyed in the process (FNSAVE
+	 * done on old CPUs) then copy it back into the source
+	 * context and mark the current task for lazy restore.
+	 *
+	 * We have to do all this with preemption disabled,
+	 * mostly because of the FNSAVE case, because in that
+	 * case we must not allow preemption in the window
+	 * between the FNSAVE and us marking the context lazy.
+	 *
+	 * It shouldn't be an issue as even FNSAVE is plenty
+	 * fast in terms of critical section length.
+	 */
+	preempt_disable();
+	if (!copy_fpregs_to_fpstate(dst_fpu)) {
+		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		fpregs_deactivate(src_fpu);
+	}
+	preempt_enable();
+}
+
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+{
+	dst_fpu->counter = 0;
+	dst_fpu->fpregs_active = 0;
+	dst_fpu->last_cpu = -1;
+
+	if (src_fpu->fpstate_active)
+		fpu_copy(dst_fpu, src_fpu);
+
+	return 0;
+}
+
+/*
+ * Activate the current task's in-memory FPU context,
+ * if it has not been used before:
+ */
+void fpu__activate_curr(struct fpu *fpu)
+{
+	WARN_ON_FPU(fpu != &current->thread.fpu);
+
+	if (!fpu->fpstate_active) {
+		fpstate_init(&fpu->state);
+
+		/* Safe to do for the current task: */
+		fpu->fpstate_active = 1;
+	}
+}
+EXPORT_SYMBOL_GPL(fpu__activate_curr);
+
+/*
+ * This function must be called before we read a task's fpstate.
+ *
+ * If the task has not used the FPU before then initialize its
+ * fpstate.
+ *
+ * If the task has used the FPU before then save it.
+ */
+void fpu__activate_fpstate_read(struct fpu *fpu)
+{
+	/*
+	 * If fpregs are active (in the current CPU), then
+	 * copy them to the fpstate:
+	 */
+	if (fpu->fpregs_active) {
+		fpu__save(fpu);
+	} else {
+		if (!fpu->fpstate_active) {
+			fpstate_init(&fpu->state);
+
+			/* Safe to do for current and for stopped child tasks: */
+			fpu->fpstate_active = 1;
+		}
+	}
+}
+
+/*
+ * This function must be called before we write a task's fpstate.
+ *
+ * If the task has used the FPU before then unlazy it.
+ * If the task has not used the FPU before then initialize its fpstate.
+ *
+ * After this function call, after registers in the fpstate are
+ * modified and the child task has woken up, the child task will
+ * restore the modified FPU state from the modified context. If we
+ * didn't clear its lazy status here then the lazy in-registers
+ * state pending on its former CPU could be restored, corrupting
+ * the modifications.
+ */
+void fpu__activate_fpstate_write(struct fpu *fpu)
+{
+	/*
+	 * Only stopped child tasks can be used to modify the FPU
+	 * state in the fpstate buffer:
+	 */
+	WARN_ON_FPU(fpu == &current->thread.fpu);
+
+	if (fpu->fpstate_active) {
+		/* Invalidate any lazy state: */
+		fpu->last_cpu = -1;
+	} else {
+		fpstate_init(&fpu->state);
+
+		/* Safe to do for stopped child tasks: */
+		fpu->fpstate_active = 1;
+	}
+}
+
+/*
+ * 'fpu__restore()' is called to copy FPU registers from
+ * the FPU fpstate to the live hw registers and to activate
+ * access to the hardware registers, so that FPU instructions
+ * can be used afterwards.
+ *
+ * Must be called with kernel preemption disabled (for example
+ * with local interrupts disabled, as it is in the case of
+ * do_device_not_available()).
+ */
+void fpu__restore(struct fpu *fpu)
+{
+	fpu__activate_curr(fpu);
+
+	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
+	kernel_fpu_disable();
+	fpregs_activate(fpu);
+	copy_kernel_to_fpregs(&fpu->state);
+	fpu->counter++;
+	kernel_fpu_enable();
+}
+EXPORT_SYMBOL_GPL(fpu__restore);
+
+/*
+ * Drops current FPU state: deactivates the fpregs and
+ * the fpstate. NOTE: it still leaves previous contents
+ * in the fpregs in the eager-FPU case.
+ *
+ * This function can be used in cases where we know that
+ * a state-restore is coming: either an explicit one,
+ * or a reschedule.
+ */
+void fpu__drop(struct fpu *fpu)
+{
+	preempt_disable();
+	fpu->counter = 0;
+
+	if (fpu->fpregs_active) {
+		/* Ignore delayed exceptions from user space */
+		asm volatile("1: fwait\n"
+			     "2:\n"
+			     _ASM_EXTABLE(1b, 2b));
+		fpregs_deactivate(fpu);
+	}
+
+	fpu->fpstate_active = 0;
+
+	preempt_enable();
+}
+
+/*
+ * Clear FPU registers by setting them up from
+ * the init fpstate:
+ */
+static inline void copy_init_fpstate_to_fpregs(void)
+{
+	if (use_xsave())
+		copy_kernel_to_xregs(&init_fpstate.xsave, -1);
+	else
+		copy_kernel_to_fxregs(&init_fpstate.fxsave);
+}
+
+/*
+ * Clear the FPU state back to init state.
+ *
+ * Called by sys_execve(), by the signal handler code and by various
+ * error paths.
+ */
+void fpu__clear(struct fpu *fpu)
+{
+	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
+
+	if (!use_eager_fpu()) {
+		/* FPU state will be reallocated lazily at the first use. */
+		fpu__drop(fpu);
+	} else {
+		if (!fpu->fpstate_active) {
+			fpu__activate_curr(fpu);
+			user_fpu_begin();
+		}
+		copy_init_fpstate_to_fpregs();
+	}
+}
+
+/*
+ * x87 math exception handling:
+ */
+
+static inline unsigned short get_fpu_cwd(struct fpu *fpu)
+{
+	if (cpu_has_fxsr) {
+		return fpu->state.fxsave.cwd;
+	} else {
+		return (unsigned short)fpu->state.fsave.cwd;
+	}
+}
+
+static inline unsigned short get_fpu_swd(struct fpu *fpu)
+{
+	if (cpu_has_fxsr) {
+		return fpu->state.fxsave.swd;
+	} else {
+		return (unsigned short)fpu->state.fsave.swd;
+	}
+}
+
+static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
+{
+	if (cpu_has_xmm) {
+		return fpu->state.fxsave.mxcsr;
+	} else {
+		return MXCSR_DEFAULT;
+	}
+}
+
+int fpu__exception_code(struct fpu *fpu, int trap_nr)
+{
+	int err;
+
+	if (trap_nr == X86_TRAP_MF) {
+		unsigned short cwd, swd;
+		/*
+		 * (~cwd & swd) will mask out exceptions that are not set to unmasked
+		 * status.  0x3f is the exception bits in these regs, 0x200 is the
+		 * C1 reg you need in case of a stack fault, 0x040 is the stack
+		 * fault bit.  We should only be taking one exception at a time,
+		 * so if this combination doesn't produce any single exception,
+		 * then we have a bad program that isn't synchronizing its FPU usage
+		 * and it will suffer the consequences since we won't be able to
+		 * fully reproduce the context of the exception
+		 */
+		cwd = get_fpu_cwd(fpu);
+		swd = get_fpu_swd(fpu);
+
+		err = swd & ~cwd;
+	} else {
+		/*
+		 * The SIMD FPU exceptions are handled a little differently, as there
+		 * is only a single status/control register.  Thus, to determine which
+		 * unmasked exception was caught we must mask the exception mask bits
+		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+		 */
+		unsigned short mxcsr = get_fpu_mxcsr(fpu);
+		err = ~(mxcsr >> 7) & mxcsr;
+	}
+
+	if (err & 0x001) {	/* Invalid op */
+		/*
+		 * swd & 0x240 == 0x040: Stack Underflow
+		 * swd & 0x240 == 0x240: Stack Overflow
+		 * User must clear the SF bit (0x40) if set
+		 */
+		return FPE_FLTINV;
+	} else if (err & 0x004) { /* Divide by Zero */
+		return FPE_FLTDIV;
+	} else if (err & 0x008) { /* Overflow */
+		return FPE_FLTOVF;
+	} else if (err & 0x012) { /* Denormal, Underflow */
+		return FPE_FLTUND;
+	} else if (err & 0x020) { /* Precision */
+		return FPE_FLTRES;
+	}
+
+	/*
+	 * If we're using IRQ 13, or supposedly even some trap
+	 * X86_TRAP_MF implementations, it's possible
+	 * we get a spurious trap, which is not an error.
+	 */
+	return 0;
+}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
new file mode 100644
index 000000000000..fc878fee6a51
--- /dev/null
+++ b/arch/x86/kernel/fpu/init.c
@@ -0,0 +1,354 @@
+/*
+ * x86 FPU boot time init code:
+ */
+#include <asm/fpu/internal.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Initialize the TS bit in CR0 according to the style of context-switches
+ * we are using:
+ */
+static void fpu__init_cpu_ctx_switch(void)
+{
+	if (!cpu_has_eager_fpu)
+		stts();
+	else
+		clts();
+}
+
+/*
+ * Initialize the registers found in all CPUs, CR0 and CR4:
+ */
+static void fpu__init_cpu_generic(void)
+{
+	unsigned long cr0;
+	unsigned long cr4_mask = 0;
+
+	if (cpu_has_fxsr)
+		cr4_mask |= X86_CR4_OSFXSR;
+	if (cpu_has_xmm)
+		cr4_mask |= X86_CR4_OSXMMEXCPT;
+	if (cr4_mask)
+		cr4_set_bits(cr4_mask);
+
+	cr0 = read_cr0();
+	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
+	if (!cpu_has_fpu)
+		cr0 |= X86_CR0_EM;
+	write_cr0(cr0);
+
+	/* Flush out any pending x87 state: */
+	asm volatile ("fninit");
+}
+
+/*
+ * Enable all supported FPU features. Called when a CPU is brought online:
+ */
+void fpu__init_cpu(void)
+{
+	fpu__init_cpu_generic();
+	fpu__init_cpu_xstate();
+	fpu__init_cpu_ctx_switch();
+}
+
+/*
+ * The earliest FPU detection code.
+ *
+ * Set the X86_FEATURE_FPU CPU-capability bit based on
+ * trying to execute an actual sequence of FPU instructions:
+ */
+static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+{
+	unsigned long cr0;
+	u16 fsw, fcw;
+
+	fsw = fcw = 0xffff;
+
+	cr0 = read_cr0();
+	cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
+	write_cr0(cr0);
+
+	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+		     : "+m" (fsw), "+m" (fcw));
+
+	if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+		set_cpu_cap(c, X86_FEATURE_FPU);
+	else
+		clear_cpu_cap(c, X86_FEATURE_FPU);
+
+#ifndef CONFIG_MATH_EMULATION
+	if (!cpu_has_fpu) {
+		pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
+		for (;;)
+			asm volatile("hlt");
+	}
+#endif
+}
+
+/*
+ * Boot time FPU feature detection code:
+ */
+unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+
+static void __init fpu__init_system_mxcsr(void)
+{
+	unsigned int mask = 0;
+
+	if (cpu_has_fxsr) {
+		struct fxregs_state fx_tmp __aligned(32) = { };
+
+		asm volatile("fxsave %0" : "+m" (fx_tmp));
+
+		mask = fx_tmp.mxcsr_mask;
+
+		/*
+		 * If zero then use the default features mask,
+		 * which has all features set, except the
+		 * denormals-are-zero feature bit:
+		 */
+		if (mask == 0)
+			mask = 0x0000ffbf;
+	}
+	mxcsr_feature_mask &= mask;
+}
+
+/*
+ * Once per bootup FPU initialization sequences that will run on most x86 CPUs:
+ */
+static void __init fpu__init_system_generic(void)
+{
+	/*
+	 * Set up the legacy init FPU context. (xstate init might overwrite this
+	 * with a more modern format, if the CPU supports it.)
+	 */
+	fpstate_init_fxstate(&init_fpstate.fxsave);
+
+	fpu__init_system_mxcsr();
+}
+
+/*
+ * Size of the FPU context state. All tasks in the system use the
+ * same context size, regardless of what portion they use.
+ * This is inherent to the XSAVE architecture which puts all state
+ * components into a single, continuous memory block:
+ */
+unsigned int xstate_size;
+EXPORT_SYMBOL_GPL(xstate_size);
+
+/*
+ * Set up the xstate_size based on the legacy FPU context size.
+ *
+ * We set this up first, and later it will be overwritten by
+ * fpu__init_system_xstate() if the CPU knows about xstates.
+ */
+static void __init fpu__init_system_xstate_size_legacy(void)
+{
+	static int on_boot_cpu = 1;
+
+	WARN_ON_FPU(!on_boot_cpu);
+	on_boot_cpu = 0;
+
+	/*
+	 * Note that xstate_size might be overwriten later during
+	 * fpu__init_system_xstate().
+	 */
+
+	if (!cpu_has_fpu) {
+		/*
+		 * Disable xsave as we do not support it if i387
+		 * emulation is enabled.
+		 */
+		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+		xstate_size = sizeof(struct swregs_state);
+	} else {
+		if (cpu_has_fxsr)
+			xstate_size = sizeof(struct fxregs_state);
+		else
+			xstate_size = sizeof(struct fregs_state);
+	}
+	/*
+	 * Quirk: we don't yet handle the XSAVES* instructions
+	 * correctly, as we don't correctly convert between
+	 * standard and compacted format when interfacing
+	 * with user-space - so disable it for now.
+	 *
+	 * The difference is small: with recent CPUs the
+	 * compacted format is only marginally smaller than
+	 * the standard FPU state format.
+	 *
+	 * ( This is easy to backport while we are fixing
+	 *   XSAVES* support. )
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+}
+
+/*
+ * FPU context switching strategies:
+ *
+ * Against popular belief, we don't do lazy FPU saves, due to the
+ * task migration complications it brings on SMP - we only do
+ * lazy FPU restores.
+ *
+ * 'lazy' is the traditional strategy, which is based on setting
+ * CR0::TS to 1 during context-switch (instead of doing a full
+ * restore of the FPU state), which causes the first FPU instruction
+ * after the context switch (whenever it is executed) to fault - at
+ * which point we lazily restore the FPU state into FPU registers.
+ *
+ * Tasks are of course under no obligation to execute FPU instructions,
+ * so it can easily happen that another context-switch occurs without
+ * a single FPU instruction being executed. If we eventually switch
+ * back to the original task (that still owns the FPU) then we have
+ * not only saved the restores along the way, but we also have the
+ * FPU ready to be used for the original task.
+ *
+ * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * state during every context switch, regardless of whether the task
+ * has used FPU instructions in that time slice or not. This is done
+ * because modern FPU context saving instructions are able to optimize
+ * state saving and restoration in hardware: they can detect both
+ * unused and untouched FPU state and optimize accordingly.
+ *
+ * [ Note that even in 'lazy' mode we might optimize context switches
+ *   to use 'eager' restores, if we detect that a task is using the FPU
+ *   frequently. See the fpu->counter logic in fpu/internal.h for that. ]
+ */
+static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+
+static int __init eager_fpu_setup(char *s)
+{
+	if (!strcmp(s, "on"))
+		eagerfpu = ENABLE;
+	else if (!strcmp(s, "off"))
+		eagerfpu = DISABLE;
+	else if (!strcmp(s, "auto"))
+		eagerfpu = AUTO;
+	return 1;
+}
+__setup("eagerfpu=", eager_fpu_setup);
+
+/*
+ * Pick the FPU context switching strategy:
+ */
+static void __init fpu__init_system_ctx_switch(void)
+{
+	static bool on_boot_cpu = 1;
+
+	WARN_ON_FPU(!on_boot_cpu);
+	on_boot_cpu = 0;
+
+	WARN_ON_FPU(current->thread.fpu.fpstate_active);
+	current_thread_info()->status = 0;
+
+	/* Auto enable eagerfpu for xsaveopt */
+	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+		eagerfpu = ENABLE;
+
+	if (xfeatures_mask & XSTATE_EAGER) {
+		if (eagerfpu == DISABLE) {
+			pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
+			       xfeatures_mask & XSTATE_EAGER);
+			xfeatures_mask &= ~XSTATE_EAGER;
+		} else {
+			eagerfpu = ENABLE;
+		}
+	}
+
+	if (eagerfpu == ENABLE)
+		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+
+	printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
+}
+
+/*
+ * Called on the boot CPU once per system bootup, to set up the initial
+ * FPU state that is later cloned into all processes:
+ */
+void __init fpu__init_system(struct cpuinfo_x86 *c)
+{
+	fpu__init_system_early_generic(c);
+
+	/*
+	 * The FPU has to be operational for some of the
+	 * later FPU init activities:
+	 */
+	fpu__init_cpu();
+
+	/*
+	 * But don't leave CR0::TS set yet, as some of the FPU setup
+	 * methods depend on being able to execute FPU instructions
+	 * that will fault on a set TS, such as the FXSAVE in
+	 * fpu__init_system_mxcsr().
+	 */
+	clts();
+
+	fpu__init_system_generic();
+	fpu__init_system_xstate_size_legacy();
+	fpu__init_system_xstate();
+
+	fpu__init_system_ctx_switch();
+}
+
+/*
+ * Boot parameter to turn off FPU support and fall back to math-emu:
+ */
+static int __init no_387(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_FPU);
+	return 1;
+}
+__setup("no387", no_387);
+
+/*
+ * Disable all xstate CPU features:
+ */
+static int __init x86_noxsave_setup(char *s)
+{
+	if (strlen(s))
+		return 0;
+
+	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+	setup_clear_cpu_cap(X86_FEATURE_AVX);
+	setup_clear_cpu_cap(X86_FEATURE_AVX2);
+
+	return 1;
+}
+__setup("noxsave", x86_noxsave_setup);
+
+/*
+ * Disable the XSAVEOPT instruction specifically:
+ */
+static int __init x86_noxsaveopt_setup(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+	return 1;
+}
+__setup("noxsaveopt", x86_noxsaveopt_setup);
+
+/*
+ * Disable the XSAVES instruction:
+ */
+static int __init x86_noxsaves_setup(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+	return 1;
+}
+__setup("noxsaves", x86_noxsaves_setup);
+
+/*
+ * Disable FX save/restore and SSE support:
+ */
+static int __init x86_nofxsr_setup(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_FXSR);
+	setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
+	setup_clear_cpu_cap(X86_FEATURE_XMM);
+
+	return 1;
+}
+__setup("nofxsr", x86_nofxsr_setup);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
new file mode 100644
index 000000000000..dc60810c1c74
--- /dev/null
+++ b/arch/x86/kernel/fpu/regset.c
@@ -0,0 +1,356 @@
+/*
+ * FPU register's regset abstraction, for ptrace, core dumps, etc.
+ */
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
+
+/*
+ * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
+ * as the "regset->n" for the xstate regset will be updated based on the feature
+ * capabilites supported by the xsave.
+ */
+int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	struct fpu *target_fpu = &target->thread.fpu;
+
+	return target_fpu->fpstate_active ? regset->n : 0;
+}
+
+int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	struct fpu *target_fpu = &target->thread.fpu;
+
+	return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+}
+
+int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+
+	if (!cpu_has_fxsr)
+		return -ENODEV;
+
+	fpu__activate_fpstate_read(fpu);
+	fpstate_sanitize_xstate(fpu);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &fpu->state.fxsave, 0, -1);
+}
+
+int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	int ret;
+
+	if (!cpu_has_fxsr)
+		return -ENODEV;
+
+	fpu__activate_fpstate_write(fpu);
+	fpstate_sanitize_xstate(fpu);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &fpu->state.fxsave, 0, -1);
+
+	/*
+	 * mxcsr reserved bits must be masked to zero for security reasons.
+	 */
+	fpu->state.fxsave.mxcsr &= mxcsr_feature_mask;
+
+	/*
+	 * update the header bits in the xsave header, indicating the
+	 * presence of FP and SSE state.
+	 */
+	if (cpu_has_xsave)
+		fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE;
+
+	return ret;
+}
+
+int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	struct xregs_state *xsave;
+	int ret;
+
+	if (!cpu_has_xsave)
+		return -ENODEV;
+
+	fpu__activate_fpstate_read(fpu);
+
+	xsave = &fpu->state.xsave;
+
+	/*
+	 * Copy the 48bytes defined by the software first into the xstate
+	 * memory layout in the thread struct, so that we can copy the entire
+	 * xstateregs to the user using one user_regset_copyout().
+	 */
+	memcpy(&xsave->i387.sw_reserved,
+		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
+	/*
+	 * Copy the xstate memory layout.
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	return ret;
+}
+
+int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
+		  unsigned int pos, unsigned int count,
+		  const void *kbuf, const void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	struct xregs_state *xsave;
+	int ret;
+
+	if (!cpu_has_xsave)
+		return -ENODEV;
+
+	fpu__activate_fpstate_write(fpu);
+
+	xsave = &fpu->state.xsave;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	/*
+	 * mxcsr reserved bits must be masked to zero for security reasons.
+	 */
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+	xsave->header.xfeatures &= xfeatures_mask;
+	/*
+	 * These bits must be zero.
+	 */
+	memset(&xsave->header.reserved, 0, 48);
+
+	return ret;
+}
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
+	tmp = ~twd;
+	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	/* and move the valid bits to the lower byte. */
+	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+
+	return tmp;
+}
+
+#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16)
+#define FP_EXP_TAG_VALID	0
+#define FP_EXP_TAG_ZERO		1
+#define FP_EXP_TAG_SPECIAL	2
+#define FP_EXP_TAG_EMPTY	3
+
+static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave)
+{
+	struct _fpxreg *st;
+	u32 tos = (fxsave->swd >> 11) & 7;
+	u32 twd = (unsigned long) fxsave->twd;
+	u32 tag;
+	u32 ret = 0xffff0000u;
+	int i;
+
+	for (i = 0; i < 8; i++, twd >>= 1) {
+		if (twd & 0x1) {
+			st = FPREG_ADDR(fxsave, (i - tos) & 7);
+
+			switch (st->exponent & 0x7fff) {
+			case 0x7fff:
+				tag = FP_EXP_TAG_SPECIAL;
+				break;
+			case 0x0000:
+				if (!st->significand[0] &&
+				    !st->significand[1] &&
+				    !st->significand[2] &&
+				    !st->significand[3])
+					tag = FP_EXP_TAG_ZERO;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
+				break;
+			default:
+				if (st->significand[3] & 0x8000)
+					tag = FP_EXP_TAG_VALID;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
+				break;
+			}
+		} else {
+			tag = FP_EXP_TAG_EMPTY;
+		}
+		ret |= tag << (2 * i);
+	}
+	return ret;
+}
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+void
+convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
+{
+	struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
+	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
+	struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
+	int i;
+
+	env->cwd = fxsave->cwd | 0xffff0000u;
+	env->swd = fxsave->swd | 0xffff0000u;
+	env->twd = twd_fxsr_to_i387(fxsave);
+
+#ifdef CONFIG_X86_64
+	env->fip = fxsave->rip;
+	env->foo = fxsave->rdp;
+	/*
+	 * should be actually ds/cs at fpu exception time, but
+	 * that information is not available in 64bit mode.
+	 */
+	env->fcs = task_pt_regs(tsk)->cs;
+	if (tsk == current) {
+		savesegment(ds, env->fos);
+	} else {
+		env->fos = tsk->thread.ds;
+	}
+	env->fos |= 0xffff0000;
+#else
+	env->fip = fxsave->fip;
+	env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
+	env->foo = fxsave->foo;
+	env->fos = fxsave->fos;
+#endif
+
+	for (i = 0; i < 8; ++i)
+		memcpy(&to[i], &from[i], sizeof(to[0]));
+}
+
+void convert_to_fxsr(struct task_struct *tsk,
+		     const struct user_i387_ia32_struct *env)
+
+{
+	struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
+	struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
+	struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
+	int i;
+
+	fxsave->cwd = env->cwd;
+	fxsave->swd = env->swd;
+	fxsave->twd = twd_i387_to_fxsr(env->twd);
+	fxsave->fop = (u16) ((u32) env->fcs >> 16);
+#ifdef CONFIG_X86_64
+	fxsave->rip = env->fip;
+	fxsave->rdp = env->foo;
+	/* cs and ds ignored */
+#else
+	fxsave->fip = env->fip;
+	fxsave->fcs = (env->fcs & 0xffff);
+	fxsave->foo = env->foo;
+	fxsave->fos = env->fos;
+#endif
+
+	for (i = 0; i < 8; ++i)
+		memcpy(&to[i], &from[i], sizeof(from[0]));
+}
+
+int fpregs_get(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       void *kbuf, void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	struct user_i387_ia32_struct env;
+
+	fpu__activate_fpstate_read(fpu);
+
+	if (!static_cpu_has(X86_FEATURE_FPU))
+		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+
+	if (!cpu_has_fxsr)
+		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					   &fpu->state.fsave, 0,
+					   -1);
+
+	fpstate_sanitize_xstate(fpu);
+
+	if (kbuf && pos == 0 && count == sizeof(env)) {
+		convert_from_fxsr(kbuf, target);
+		return 0;
+	}
+
+	convert_from_fxsr(&env, target);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+}
+
+int fpregs_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	struct user_i387_ia32_struct env;
+	int ret;
+
+	fpu__activate_fpstate_write(fpu);
+	fpstate_sanitize_xstate(fpu);
+
+	if (!static_cpu_has(X86_FEATURE_FPU))
+		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+
+	if (!cpu_has_fxsr)
+		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					  &fpu->state.fsave, 0,
+					  -1);
+
+	if (pos > 0 || count < sizeof(env))
+		convert_from_fxsr(&env, target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+	if (!ret)
+		convert_to_fxsr(target, &env);
+
+	/*
+	 * update the header bit in the xsave header, indicating the
+	 * presence of FP.
+	 */
+	if (cpu_has_xsave)
+		fpu->state.xsave.header.xfeatures |= XSTATE_FP;
+	return ret;
+}
+
+/*
+ * FPU state for core dumps.
+ * This is only used for a.out dumps now.
+ * It is declared generically using elf_fpregset_t (which is
+ * struct user_i387_struct) but is in fact only used for 32-bit
+ * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
+ */
+int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
+{
+	struct task_struct *tsk = current;
+	struct fpu *fpu = &tsk->thread.fpu;
+	int fpvalid;
+
+	fpvalid = fpu->fpstate_active;
+	if (fpvalid)
+		fpvalid = !fpregs_get(tsk, NULL,
+				      0, sizeof(struct user_i387_ia32_struct),
+				      ufpu, NULL);
+
+	return fpvalid;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+#endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
new file mode 100644
index 000000000000..50ec9af1bd51
--- /dev/null
+++ b/arch/x86/kernel/fpu/signal.c
@@ -0,0 +1,404 @@
+/*
+ * FPU signal frame handling routines.
+ */
+
+#include <linux/compat.h>
+#include <linux/cpu.h>
+
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
+
+#include <asm/sigframe.h>
+
+static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
+
+/*
+ * Check for the presence of extended state information in the
+ * user fpstate pointer in the sigcontext.
+ */
+static inline int check_for_xstate(struct fxregs_state __user *buf,
+				   void __user *fpstate,
+				   struct _fpx_sw_bytes *fx_sw)
+{
+	int min_xstate_size = sizeof(struct fxregs_state) +
+			      sizeof(struct xstate_header);
+	unsigned int magic2;
+
+	if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
+		return -1;
+
+	/* Check for the first magic field and other error scenarios. */
+	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
+	    fx_sw->xstate_size < min_xstate_size ||
+	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fx_sw->extended_size)
+		return -1;
+
+	/*
+	 * Check for the presence of second magic word at the end of memory
+	 * layout. This detects the case where the user just copied the legacy
+	 * fpstate layout with out copying the extended state information
+	 * in the memory layout.
+	 */
+	if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
+	    || magic2 != FP_XSTATE_MAGIC2)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Signal frame handlers.
+ */
+static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
+{
+	if (use_fxsr()) {
+		struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+		struct user_i387_ia32_struct env;
+		struct _fpstate_ia32 __user *fp = buf;
+
+		convert_from_fxsr(&env, tsk);
+
+		if (__copy_to_user(buf, &env, sizeof(env)) ||
+		    __put_user(xsave->i387.swd, &fp->status) ||
+		    __put_user(X86_FXSR_MAGIC, &fp->magic))
+			return -1;
+	} else {
+		struct fregs_state __user *fp = buf;
+		u32 swd;
+		if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
+{
+	struct xregs_state __user *x = buf;
+	struct _fpx_sw_bytes *sw_bytes;
+	u32 xfeatures;
+	int err;
+
+	/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
+	sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
+	err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
+
+	if (!use_xsave())
+		return err;
+
+	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+
+	/*
+	 * Read the xfeatures which we copied (directly from the cpu or
+	 * from the state in task struct) to the user buffers.
+	 */
+	err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures);
+
+	/*
+	 * For legacy compatible, we always set FP/SSE bits in the bit
+	 * vector while saving the state to the user context. This will
+	 * enable us capturing any changes(during sigreturn) to
+	 * the FP/SSE bits by the legacy applications which don't touch
+	 * xfeatures in the xsave header.
+	 *
+	 * xsave aware apps can change the xfeatures in the xsave
+	 * header as well as change any contents in the memory layout.
+	 * xrestore as part of sigreturn will capture all the changes.
+	 */
+	xfeatures |= XSTATE_FPSSE;
+
+	err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures);
+
+	return err;
+}
+
+static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
+{
+	int err;
+
+	if (use_xsave())
+		err = copy_xregs_to_user(buf);
+	else if (use_fxsr())
+		err = copy_fxregs_to_user((struct fxregs_state __user *) buf);
+	else
+		err = copy_fregs_to_user((struct fregs_state __user *) buf);
+
+	if (unlikely(err) && __clear_user(buf, xstate_size))
+		err = -EFAULT;
+	return err;
+}
+
+/*
+ * Save the fpu, extended register state to the user signal frame.
+ *
+ * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
+ *  state is copied.
+ *  'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
+ *
+ *	buf == buf_fx for 64-bit frames and 32-bit fsave frame.
+ *	buf != buf_fx for 32-bit frames with fxstate.
+ *
+ * If the fpu, extended register state is live, save the state directly
+ * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
+ * copy the thread's fpu state to the user frame starting at 'buf_fx'.
+ *
+ * If this is a 32-bit frame with fxstate, put a fsave header before
+ * the aligned state at 'buf_fx'.
+ *
+ * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
+ * indicating the absence/presence of the extended state to the user.
+ */
+int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
+{
+	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+	struct task_struct *tsk = current;
+	int ia32_fxstate = (buf != buf_fx);
+
+	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+			 config_enabled(CONFIG_IA32_EMULATION));
+
+	if (!access_ok(VERIFY_WRITE, buf, size))
+		return -EACCES;
+
+	if (!static_cpu_has(X86_FEATURE_FPU))
+		return fpregs_soft_get(current, NULL, 0,
+			sizeof(struct user_i387_ia32_struct), NULL,
+			(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
+
+	if (fpregs_active()) {
+		/* Save the live register state to the user directly. */
+		if (copy_fpregs_to_sigframe(buf_fx))
+			return -1;
+		/* Update the thread's fxstate to save the fsave header. */
+		if (ia32_fxstate)
+			copy_fxregs_to_kernel(&tsk->thread.fpu);
+	} else {
+		fpstate_sanitize_xstate(&tsk->thread.fpu);
+		if (__copy_to_user(buf_fx, xsave, xstate_size))
+			return -1;
+	}
+
+	/* Save the fsave header for the 32-bit frames. */
+	if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
+		return -1;
+
+	if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
+		return -1;
+
+	return 0;
+}
+
+static inline void
+sanitize_restored_xstate(struct task_struct *tsk,
+			 struct user_i387_ia32_struct *ia32_env,
+			 u64 xfeatures, int fx_only)
+{
+	struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+	struct xstate_header *header = &xsave->header;
+
+	if (use_xsave()) {
+		/* These bits must be zero. */
+		memset(header->reserved, 0, 48);
+
+		/*
+		 * Init the state that is not present in the memory
+		 * layout and not enabled by the OS.
+		 */
+		if (fx_only)
+			header->xfeatures = XSTATE_FPSSE;
+		else
+			header->xfeatures &= (xfeatures_mask & xfeatures);
+	}
+
+	if (use_fxsr()) {
+		/*
+		 * mscsr reserved bits must be masked to zero for security
+		 * reasons.
+		 */
+		xsave->i387.mxcsr &= mxcsr_feature_mask;
+
+		convert_to_fxsr(tsk, ia32_env);
+	}
+}
+
+/*
+ * Restore the extended state if present. Otherwise, restore the FP/SSE state.
+ */
+static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
+{
+	if (use_xsave()) {
+		if ((unsigned long)buf % 64 || fx_only) {
+			u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE;
+			copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+			return copy_user_to_fxregs(buf);
+		} else {
+			u64 init_bv = xfeatures_mask & ~xbv;
+			if (unlikely(init_bv))
+				copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+			return copy_user_to_xregs(buf, xbv);
+		}
+	} else if (use_fxsr()) {
+		return copy_user_to_fxregs(buf);
+	} else
+		return copy_user_to_fregs(buf);
+}
+
+static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
+{
+	int ia32_fxstate = (buf != buf_fx);
+	struct task_struct *tsk = current;
+	struct fpu *fpu = &tsk->thread.fpu;
+	int state_size = xstate_size;
+	u64 xfeatures = 0;
+	int fx_only = 0;
+
+	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+			 config_enabled(CONFIG_IA32_EMULATION));
+
+	if (!buf) {
+		fpu__clear(fpu);
+		return 0;
+	}
+
+	if (!access_ok(VERIFY_READ, buf, size))
+		return -EACCES;
+
+	fpu__activate_curr(fpu);
+
+	if (!static_cpu_has(X86_FEATURE_FPU))
+		return fpregs_soft_set(current, NULL,
+				       0, sizeof(struct user_i387_ia32_struct),
+				       NULL, buf) != 0;
+
+	if (use_xsave()) {
+		struct _fpx_sw_bytes fx_sw_user;
+		if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
+			/*
+			 * Couldn't find the extended state information in the
+			 * memory layout. Restore just the FP/SSE and init all
+			 * the other extended state.
+			 */
+			state_size = sizeof(struct fxregs_state);
+			fx_only = 1;
+		} else {
+			state_size = fx_sw_user.xstate_size;
+			xfeatures = fx_sw_user.xfeatures;
+		}
+	}
+
+	if (ia32_fxstate) {
+		/*
+		 * For 32-bit frames with fxstate, copy the user state to the
+		 * thread's fpu state, reconstruct fxstate from the fsave
+		 * header. Sanitize the copied state etc.
+		 */
+		struct fpu *fpu = &tsk->thread.fpu;
+		struct user_i387_ia32_struct env;
+		int err = 0;
+
+		/*
+		 * Drop the current fpu which clears fpu->fpstate_active. This ensures
+		 * that any context-switch during the copy of the new state,
+		 * avoids the intermediate state from getting restored/saved.
+		 * Thus avoiding the new restored state from getting corrupted.
+		 * We will be ready to restore/save the state only after
+		 * fpu->fpstate_active is again set.
+		 */
+		fpu__drop(fpu);
+
+		if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
+		    __copy_from_user(&env, buf, sizeof(env))) {
+			fpstate_init(&fpu->state);
+			err = -1;
+		} else {
+			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
+		}
+
+		fpu->fpstate_active = 1;
+		if (use_eager_fpu()) {
+			preempt_disable();
+			fpu__restore(fpu);
+			preempt_enable();
+		}
+
+		return err;
+	} else {
+		/*
+		 * For 64-bit frames and 32-bit fsave frames, restore the user
+		 * state to the registers directly (with exceptions handled).
+		 */
+		user_fpu_begin();
+		if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
+			fpu__clear(fpu);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static inline int xstate_sigframe_size(void)
+{
+	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+}
+
+/*
+ * Restore FPU state from a sigframe:
+ */
+int fpu__restore_sig(void __user *buf, int ia32_frame)
+{
+	void __user *buf_fx = buf;
+	int size = xstate_sigframe_size();
+
+	if (ia32_frame && use_fxsr()) {
+		buf_fx = buf + sizeof(struct fregs_state);
+		size += sizeof(struct fregs_state);
+	}
+
+	return __fpu__restore_sig(buf, buf_fx, size);
+}
+
+unsigned long
+fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
+		     unsigned long *buf_fx, unsigned long *size)
+{
+	unsigned long frame_size = xstate_sigframe_size();
+
+	*buf_fx = sp = round_down(sp - frame_size, 64);
+	if (ia32_frame && use_fxsr()) {
+		frame_size += sizeof(struct fregs_state);
+		sp -= sizeof(struct fregs_state);
+	}
+
+	*size = frame_size;
+
+	return sp;
+}
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed by the fpstate pointer in the sigcontext.
+ * This will be saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+void fpu__init_prepare_fx_sw_frame(void)
+{
+	int fsave_header_size = sizeof(struct fregs_state);
+	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+
+	if (config_enabled(CONFIG_X86_32))
+		size += fsave_header_size;
+
+	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
+	fx_sw_reserved.extended_size = size;
+	fx_sw_reserved.xfeatures = xfeatures_mask;
+	fx_sw_reserved.xstate_size = xstate_size;
+
+	if (config_enabled(CONFIG_IA32_EMULATION)) {
+		fx_sw_reserved_ia32 = fx_sw_reserved;
+		fx_sw_reserved_ia32.extended_size += fsave_header_size;
+	}
+}
+
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
new file mode 100644
index 000000000000..62fc001c7846
--- /dev/null
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -0,0 +1,461 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/compat.h>
+#include <linux/cpu.h>
+
+#include <asm/fpu/api.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
+
+#include <asm/tlbflush.h>
+
+static const char *xfeature_names[] =
+{
+	"x87 floating point registers"	,
+	"SSE registers"			,
+	"AVX registers"			,
+	"MPX bounds registers"		,
+	"MPX CSR"			,
+	"AVX-512 opmask"		,
+	"AVX-512 Hi256"			,
+	"AVX-512 ZMM_Hi256"		,
+	"unknown xstate feature"	,
+};
+
+/*
+ * Mask of xstate features supported by the CPU and the kernel:
+ */
+u64 xfeatures_mask __read_mostly;
+
+static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
+static unsigned int xstate_sizes[XFEATURES_NR_MAX]   = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
+static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+
+/* The number of supported xfeatures in xfeatures_mask: */
+static unsigned int xfeatures_nr;
+
+/*
+ * Return whether the system supports a given xfeature.
+ *
+ * Also return the name of the (most advanced) feature that the caller requested:
+ */
+int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
+{
+	u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
+
+	if (unlikely(feature_name)) {
+		long xfeature_idx, max_idx;
+		u64 xfeatures_print;
+		/*
+		 * So we use FLS here to be able to print the most advanced
+		 * feature that was requested but is missing. So if a driver
+		 * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the
+		 * missing AVX feature - this is the most informative message
+		 * to users:
+		 */
+		if (xfeatures_missing)
+			xfeatures_print = xfeatures_missing;
+		else
+			xfeatures_print = xfeatures_needed;
+
+		xfeature_idx = fls64(xfeatures_print)-1;
+		max_idx = ARRAY_SIZE(xfeature_names)-1;
+		xfeature_idx = min(xfeature_idx, max_idx);
+
+		*feature_name = xfeature_names[xfeature_idx];
+	}
+
+	if (xfeatures_missing)
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
+
+/*
+ * When executing XSAVEOPT (or other optimized XSAVE instructions), if
+ * a processor implementation detects that an FPU state component is still
+ * (or is again) in its initialized state, it may clear the corresponding
+ * bit in the header.xfeatures field, and can skip the writeout of registers
+ * to the corresponding memory layout.
+ *
+ * This means that when the bit is zero, the state component might still contain
+ * some previous - non-initialized register state.
+ *
+ * Before writing xstate information to user-space we sanitize those components,
+ * to always ensure that the memory layout of a feature will be in the init state
+ * if the corresponding header bit is zero. This is to ensure that user-space doesn't
+ * see some stale state in the memory layout during signal handling, debugging etc.
+ */
+void fpstate_sanitize_xstate(struct fpu *fpu)
+{
+	struct fxregs_state *fx = &fpu->state.fxsave;
+	int feature_bit;
+	u64 xfeatures;
+
+	if (!use_xsaveopt())
+		return;
+
+	xfeatures = fpu->state.xsave.header.xfeatures;
+
+	/*
+	 * None of the feature bits are in init state. So nothing else
+	 * to do for us, as the memory layout is up to date.
+	 */
+	if ((xfeatures & xfeatures_mask) == xfeatures_mask)
+		return;
+
+	/*
+	 * FP is in init state
+	 */
+	if (!(xfeatures & XSTATE_FP)) {
+		fx->cwd = 0x37f;
+		fx->swd = 0;
+		fx->twd = 0;
+		fx->fop = 0;
+		fx->rip = 0;
+		fx->rdp = 0;
+		memset(&fx->st_space[0], 0, 128);
+	}
+
+	/*
+	 * SSE is in init state
+	 */
+	if (!(xfeatures & XSTATE_SSE))
+		memset(&fx->xmm_space[0], 0, 256);
+
+	/*
+	 * First two features are FPU and SSE, which above we handled
+	 * in a special way already:
+	 */
+	feature_bit = 0x2;
+	xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
+
+	/*
+	 * Update all the remaining memory layouts according to their
+	 * standard xstate layout, if their header bit is in the init
+	 * state:
+	 */
+	while (xfeatures) {
+		if (xfeatures & 0x1) {
+			int offset = xstate_offsets[feature_bit];
+			int size = xstate_sizes[feature_bit];
+
+			memcpy((void *)fx + offset,
+			       (void *)&init_fpstate.xsave + offset,
+			       size);
+		}
+
+		xfeatures >>= 1;
+		feature_bit++;
+	}
+}
+
+/*
+ * Enable the extended processor state save/restore feature.
+ * Called once per CPU onlining.
+ */
+void fpu__init_cpu_xstate(void)
+{
+	if (!cpu_has_xsave || !xfeatures_mask)
+		return;
+
+	cr4_set_bits(X86_CR4_OSXSAVE);
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+}
+
+/*
+ * Record the offsets and sizes of various xstates contained
+ * in the XSAVE state memory layout.
+ *
+ * ( Note that certain features might be non-present, for them
+ *   we'll have 0 offset and 0 size. )
+ */
+static void __init setup_xstate_features(void)
+{
+	u32 eax, ebx, ecx, edx, leaf;
+
+	xfeatures_nr = fls64(xfeatures_mask);
+
+	for (leaf = 2; leaf < xfeatures_nr; leaf++) {
+		cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
+
+		xstate_offsets[leaf] = ebx;
+		xstate_sizes[leaf] = eax;
+
+		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax);
+	}
+}
+
+static void __init print_xstate_feature(u64 xstate_mask)
+{
+	const char *feature_name;
+
+	if (cpu_has_xfeatures(xstate_mask, &feature_name))
+		pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name);
+}
+
+/*
+ * Print out all the supported xstate features:
+ */
+static void __init print_xstate_features(void)
+{
+	print_xstate_feature(XSTATE_FP);
+	print_xstate_feature(XSTATE_SSE);
+	print_xstate_feature(XSTATE_YMM);
+	print_xstate_feature(XSTATE_BNDREGS);
+	print_xstate_feature(XSTATE_BNDCSR);
+	print_xstate_feature(XSTATE_OPMASK);
+	print_xstate_feature(XSTATE_ZMM_Hi256);
+	print_xstate_feature(XSTATE_Hi16_ZMM);
+}
+
+/*
+ * This function sets up offsets and sizes of all extended states in
+ * xsave area. This supports both standard format and compacted format
+ * of the xsave aread.
+ */
+static void __init setup_xstate_comp(void)
+{
+	unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+	int i;
+
+	/*
+	 * The FP xstates and SSE xstates are legacy states. They are always
+	 * in the fixed offsets in the xsave area in either compacted form
+	 * or standard form.
+	 */
+	xstate_comp_offsets[0] = 0;
+	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+
+	if (!cpu_has_xsaves) {
+		for (i = 2; i < xfeatures_nr; i++) {
+			if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
+				xstate_comp_offsets[i] = xstate_offsets[i];
+				xstate_comp_sizes[i] = xstate_sizes[i];
+			}
+		}
+		return;
+	}
+
+	xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+	for (i = 2; i < xfeatures_nr; i++) {
+		if (test_bit(i, (unsigned long *)&xfeatures_mask))
+			xstate_comp_sizes[i] = xstate_sizes[i];
+		else
+			xstate_comp_sizes[i] = 0;
+
+		if (i > 2)
+			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+					+ xstate_comp_sizes[i-1];
+
+	}
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+static void __init setup_init_fpu_buf(void)
+{
+	static int on_boot_cpu = 1;
+
+	WARN_ON_FPU(!on_boot_cpu);
+	on_boot_cpu = 0;
+
+	if (!cpu_has_xsave)
+		return;
+
+	setup_xstate_features();
+	print_xstate_features();
+
+	if (cpu_has_xsaves) {
+		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
+		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
+	}
+
+	/*
+	 * Init all the features state with header_bv being 0x0
+	 */
+	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
+
+	/*
+	 * Dump the init state again. This is to identify the init state
+	 * of any feature which is not represented by all zero's.
+	 */
+	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
+}
+
+/*
+ * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ */
+static void __init init_xstate_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	int i;
+
+	if (!cpu_has_xsaves) {
+		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+		xstate_size = ebx;
+		return;
+	}
+
+	xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	for (i = 2; i < 64; i++) {
+		if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
+			cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+			xstate_size += eax;
+		}
+	}
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ * Called once per system bootup.
+ */
+void __init fpu__init_system_xstate(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	static int on_boot_cpu = 1;
+
+	WARN_ON_FPU(!on_boot_cpu);
+	on_boot_cpu = 0;
+
+	if (!cpu_has_xsave) {
+		pr_info("x86/fpu: Legacy x87 FPU detected.\n");
+		return;
+	}
+
+	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
+		WARN_ON_FPU(1);
+		return;
+	}
+
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	xfeatures_mask = eax + ((u64)edx << 32);
+
+	if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
+		BUG();
+	}
+
+	/* Support only the state known to the OS: */
+	xfeatures_mask = xfeatures_mask & XCNTXT_MASK;
+
+	/* Enable xstate instructions to be able to continue with initialization: */
+	fpu__init_cpu_xstate();
+
+	/* Recompute the context size for enabled features: */
+	init_xstate_size();
+
+	update_regset_xstate_info(xstate_size, xfeatures_mask);
+	fpu__init_prepare_fx_sw_frame();
+	setup_init_fpu_buf();
+	setup_xstate_comp();
+
+	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n",
+		xfeatures_mask,
+		xstate_size,
+		cpu_has_xsaves ? "compacted" : "standard");
+}
+
+/*
+ * Restore minimal FPU state after suspend:
+ */
+void fpu__resume_cpu(void)
+{
+	/*
+	 * Restore XCR0 on xsave capable CPUs:
+	 */
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+}
+
+/*
+ * Given the xsave area and a state inside, this function returns the
+ * address of the state.
+ *
+ * This is the API that is called to get xstate address in either
+ * standard format or compacted format of xsave area.
+ *
+ * Note that if there is no data for the field in the xsave buffer
+ * this will return NULL.
+ *
+ * Inputs:
+ *	xstate: the thread's storage area for all FPU data
+ *	xstate_feature: state which is defined in xsave.h (e.g.
+ *	XSTATE_FP, XSTATE_SSE, etc...)
+ * Output:
+ *	address of the state in the xsave area, or NULL if the
+ *	field is not present in the xsave buffer.
+ */
+void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+{
+	int feature_nr = fls64(xstate_feature) - 1;
+	/*
+	 * Do we even *have* xsave state?
+	 */
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
+		return NULL;
+
+	xsave = &current->thread.fpu.state.xsave;
+	/*
+	 * We should not ever be requesting features that we
+	 * have not enabled.  Remember that pcntxt_mask is
+	 * what we write to the XCR0 register.
+	 */
+	WARN_ONCE(!(xfeatures_mask & xstate_feature),
+		  "get of unsupported state");
+	/*
+	 * This assumes the last 'xsave*' instruction to
+	 * have requested that 'xstate_feature' be saved.
+	 * If it did not, we might be seeing and old value
+	 * of the field in the buffer.
+	 *
+	 * This can happen because the last 'xsave' did not
+	 * request that this feature be saved (unlikely)
+	 * or because the "init optimization" caused it
+	 * to not be saved.
+	 */
+	if (!(xsave->header.xfeatures & xstate_feature))
+		return NULL;
+
+	return (void *)xsave + xstate_comp_offsets[feature_nr];
+}
+EXPORT_SYMBOL_GPL(get_xsave_addr);
+
+/*
+ * This wraps up the common operations that need to occur when retrieving
+ * data from xsave state.  It first ensures that the current task was
+ * using the FPU and retrieves the data in to a buffer.  It then calculates
+ * the offset of the requested field in the buffer.
+ *
+ * This function is safe to call whether the FPU is in use or not.
+ *
+ * Note that this only works on the current task.
+ *
+ * Inputs:
+ *	@xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP,
+ *	XSTATE_SSE, etc...)
+ * Output:
+ *	address of the state in the xsave area or NULL if the state
+ *	is not present or is in its 'init state'.
+ */
+const void *get_xsave_field_ptr(int xsave_state)
+{
+	struct fpu *fpu = &current->thread.fpu;
+
+	if (!fpu->fpstate_active)
+		return NULL;
+	/*
+	 * fpu__save() takes the CPU's xstate registers
+	 * and saves them off to the 'fpu memory buffer.
+	 */
+	fpu__save(fpu);
+
+	return get_xsave_addr(&fpu->state.xsave, xsave_state);
+}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b55ee6db053..5a4668136e98 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	clear_bss();
 
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
-		set_intr_gate(i, early_idt_handlers[i]);
+		set_intr_gate(i, early_idt_handler_array[i]);
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d031bad9e07e..53eeb226657c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -478,21 +478,22 @@ is486:
 __INIT
 setup_once:
 	/*
-	 * Set up a idt with 256 entries pointing to ignore_int,
-	 * interrupt gates. It doesn't actually load idt - that needs
-	 * to be done on each CPU. Interrupts are enabled elsewhere,
-	 * when we can be relatively sure everything is ok.
+	 * Set up a idt with 256 interrupt gates that push zero if there
+	 * is no error code and then jump to early_idt_handler_common.
+	 * It doesn't actually load the idt - that needs to be done on
+	 * each CPU. Interrupts are enabled elsewhere, when we can be
+	 * relatively sure everything is ok.
 	 */
 
 	movl $idt_table,%edi
-	movl $early_idt_handlers,%eax
+	movl $early_idt_handler_array,%eax
 	movl $NUM_EXCEPTION_VECTORS,%ecx
 1:
 	movl %eax,(%edi)
 	movl %eax,4(%edi)
 	/* interrupt gate, dpl=0, present */
 	movl $(0x8E000000 + __KERNEL_CS),2(%edi)
-	addl $9,%eax
+	addl $EARLY_IDT_HANDLER_SIZE,%eax
 	addl $8,%edi
 	loop 1b
 
@@ -524,26 +525,28 @@ setup_once:
 	andl $0,setup_once_ref	/* Once is enough, thanks */
 	ret
 
-ENTRY(early_idt_handlers)
+ENTRY(early_idt_handler_array)
 	# 36(%esp) %eflags
 	# 32(%esp) %cs
 	# 28(%esp) %eip
 	# 24(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.if (EXCEPTION_ERRCODE_MASK >> i) & 1
-	ASM_NOP2
-	.else
+	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
 	pushl $0		# Dummy error code, to make stack frame uniform
 	.endif
 	pushl $i		# 20(%esp) Vector number
-	jmp early_idt_handler
+	jmp early_idt_handler_common
 	i = i + 1
+	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
-ENDPROC(early_idt_handlers)
+ENDPROC(early_idt_handler_array)
 	
-	/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+	/*
+	 * The stack is the hardware frame, an error code or zero, and the
+	 * vector number.
+	 */
 	cld
 
 	cmpl $2,(%esp)		# X86_TRAP_NMI
@@ -603,7 +606,7 @@ ex_entry:
 is_nmi:
 	addl $8,%esp		/* drop vector number and error code */
 	iret
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
 	ALIGN
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ae6588b301c2..df7e78057ae0 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -321,26 +321,28 @@ bad_address:
 	jmp bad_address
 
 	__INIT
-	.globl early_idt_handlers
-early_idt_handlers:
+ENTRY(early_idt_handler_array)
 	# 104(%rsp) %rflags
 	#  96(%rsp) %cs
 	#  88(%rsp) %rip
 	#  80(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.if (EXCEPTION_ERRCODE_MASK >> i) & 1
-	ASM_NOP2
-	.else
+	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
 	pushq $0		# Dummy error code, to make stack frame uniform
 	.endif
 	pushq $i		# 72(%rsp) Vector number
-	jmp early_idt_handler
+	jmp early_idt_handler_common
 	i = i + 1
+	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
+ENDPROC(early_idt_handler_array)
 
-/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+	/*
+	 * The stack is the hardware frame, an error code or zero, and the
+	 * vector number.
+	 */
 	cld
 
 	cmpl $2,(%rsp)		# X86_TRAP_NMI
@@ -412,7 +414,7 @@ ENTRY(early_idt_handler)
 is_nmi:
 	addq $16,%rsp		# drop vector number and error code
 	INTERRUPT_RETURN
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
 
 	__INITDATA
 
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 05fd74f537d6..64341aa485ae 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -40,7 +40,5 @@ EXPORT_SYMBOL(empty_zero_page);
 
 #ifdef CONFIG_PREEMPT
 EXPORT_SYMBOL(___preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
-EXPORT_SYMBOL(___preempt_schedule_context);
-#endif
+EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
deleted file mode 100644
index 009183276bb7..000000000000
--- a/arch/x86/kernel/i387.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- *  Copyright (C) 1994 Linus Torvalds
- *
- *  Pentium III FXSR, SSE support
- *  General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- */
-#include <linux/module.h>
-#include <linux/regset.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include <asm/sigcontext.h>
-#include <asm/processor.h>
-#include <asm/math_emu.h>
-#include <asm/tlbflush.h>
-#include <asm/uaccess.h>
-#include <asm/ptrace.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
-#include <asm/user.h>
-
-static DEFINE_PER_CPU(bool, in_kernel_fpu);
-
-void kernel_fpu_disable(void)
-{
-	WARN_ON(this_cpu_read(in_kernel_fpu));
-	this_cpu_write(in_kernel_fpu, true);
-}
-
-void kernel_fpu_enable(void)
-{
-	this_cpu_write(in_kernel_fpu, false);
-}
-
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
-static inline bool interrupted_kernel_fpu_idle(void)
-{
-	if (this_cpu_read(in_kernel_fpu))
-		return false;
-
-	if (use_eager_fpu())
-		return true;
-
-	return !__thread_has_fpu(current) &&
-		(read_cr0() & X86_CR0_TS);
-}
-
-/*
- * Were we in user mode (or vm86 mode) when we were
- * interrupted?
- *
- * Doing kernel_fpu_begin/end() is ok if we are running
- * in an interrupt context from user mode - we'll just
- * save the FPU state as required.
- */
-static inline bool interrupted_user_mode(void)
-{
-	struct pt_regs *regs = get_irq_regs();
-	return regs && user_mode(regs);
-}
-
-/*
- * Can we use the FPU in kernel mode with the
- * whole "kernel_fpu_begin/end()" sequence?
- *
- * It's always ok in process context (ie "not interrupt")
- * but it is sometimes ok even from an irq.
- */
-bool irq_fpu_usable(void)
-{
-	return !in_interrupt() ||
-		interrupted_user_mode() ||
-		interrupted_kernel_fpu_idle();
-}
-EXPORT_SYMBOL(irq_fpu_usable);
-
-void __kernel_fpu_begin(void)
-{
-	struct task_struct *me = current;
-
-	this_cpu_write(in_kernel_fpu, true);
-
-	if (__thread_has_fpu(me)) {
-		__save_init_fpu(me);
-	} else {
-		this_cpu_write(fpu_owner_task, NULL);
-		if (!use_eager_fpu())
-			clts();
-	}
-}
-EXPORT_SYMBOL(__kernel_fpu_begin);
-
-void __kernel_fpu_end(void)
-{
-	struct task_struct *me = current;
-
-	if (__thread_has_fpu(me)) {
-		if (WARN_ON(restore_fpu_checking(me)))
-			fpu_reset_state(me);
-	} else if (!use_eager_fpu()) {
-		stts();
-	}
-
-	this_cpu_write(in_kernel_fpu, false);
-}
-EXPORT_SYMBOL(__kernel_fpu_end);
-
-void unlazy_fpu(struct task_struct *tsk)
-{
-	preempt_disable();
-	if (__thread_has_fpu(tsk)) {
-		if (use_eager_fpu()) {
-			__save_fpu(tsk);
-		} else {
-			__save_init_fpu(tsk);
-			__thread_fpu_end(tsk);
-		}
-	}
-	preempt_enable();
-}
-EXPORT_SYMBOL(unlazy_fpu);
-
-unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
-static struct i387_fxsave_struct fx_scratch;
-
-static void mxcsr_feature_mask_init(void)
-{
-	unsigned long mask = 0;
-
-	if (cpu_has_fxsr) {
-		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
-		asm volatile("fxsave %0" : "+m" (fx_scratch));
-		mask = fx_scratch.mxcsr_mask;
-		if (mask == 0)
-			mask = 0x0000ffbf;
-	}
-	mxcsr_feature_mask &= mask;
-}
-
-static void init_thread_xstate(void)
-{
-	/*
-	 * Note that xstate_size might be overwriten later during
-	 * xsave_init().
-	 */
-
-	if (!cpu_has_fpu) {
-		/*
-		 * Disable xsave as we do not support it if i387
-		 * emulation is enabled.
-		 */
-		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct i387_soft_struct);
-		return;
-	}
-
-	if (cpu_has_fxsr)
-		xstate_size = sizeof(struct i387_fxsave_struct);
-	else
-		xstate_size = sizeof(struct i387_fsave_struct);
-}
-
-/*
- * Called at bootup to set up the initial FPU state that is later cloned
- * into all processes.
- */
-
-void fpu_init(void)
-{
-	unsigned long cr0;
-	unsigned long cr4_mask = 0;
-
-#ifndef CONFIG_MATH_EMULATION
-	if (!cpu_has_fpu) {
-		pr_emerg("No FPU found and no math emulation present\n");
-		pr_emerg("Giving up\n");
-		for (;;)
-			asm volatile("hlt");
-	}
-#endif
-	if (cpu_has_fxsr)
-		cr4_mask |= X86_CR4_OSFXSR;
-	if (cpu_has_xmm)
-		cr4_mask |= X86_CR4_OSXMMEXCPT;
-	if (cr4_mask)
-		cr4_set_bits(cr4_mask);
-
-	cr0 = read_cr0();
-	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
-	if (!cpu_has_fpu)
-		cr0 |= X86_CR0_EM;
-	write_cr0(cr0);
-
-	/*
-	 * init_thread_xstate is only called once to avoid overriding
-	 * xstate_size during boot time or during CPU hotplug.
-	 */
-	if (xstate_size == 0)
-		init_thread_xstate();
-
-	mxcsr_feature_mask_init();
-	xsave_init();
-	eager_fpu_init();
-}
-
-void fpu_finit(struct fpu *fpu)
-{
-	if (!cpu_has_fpu) {
-		finit_soft_fpu(&fpu->state->soft);
-		return;
-	}
-
-	memset(fpu->state, 0, xstate_size);
-
-	if (cpu_has_fxsr) {
-		fx_finit(&fpu->state->fxsave);
-	} else {
-		struct i387_fsave_struct *fp = &fpu->state->fsave;
-		fp->cwd = 0xffff037fu;
-		fp->swd = 0xffff0000u;
-		fp->twd = 0xffffffffu;
-		fp->fos = 0xffff0000u;
-	}
-}
-EXPORT_SYMBOL_GPL(fpu_finit);
-
-/*
- * The _current_ task is using the FPU for the first time
- * so initialize it and set the mxcsr to its default
- * value at reset if we support XMM instructions and then
- * remember the current task has used the FPU.
- */
-int init_fpu(struct task_struct *tsk)
-{
-	int ret;
-
-	if (tsk_used_math(tsk)) {
-		if (cpu_has_fpu && tsk == current)
-			unlazy_fpu(tsk);
-		task_disable_lazy_fpu_restore(tsk);
-		return 0;
-	}
-
-	/*
-	 * Memory allocation at the first usage of the FPU and other state.
-	 */
-	ret = fpu_alloc(&tsk->thread.fpu);
-	if (ret)
-		return ret;
-
-	fpu_finit(&tsk->thread.fpu);
-
-	set_stopped_child_used_math(tsk);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(init_fpu);
-
-/*
- * The xstateregs_active() routine is the same as the fpregs_active() routine,
- * as the "regset->n" for the xstate regset will be updated based on the feature
- * capabilites supported by the xsave.
- */
-int fpregs_active(struct task_struct *target, const struct user_regset *regset)
-{
-	return tsk_used_math(target) ? regset->n : 0;
-}
-
-int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
-{
-	return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
-}
-
-int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
-		unsigned int pos, unsigned int count,
-		void *kbuf, void __user *ubuf)
-{
-	int ret;
-
-	if (!cpu_has_fxsr)
-		return -ENODEV;
-
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
-
-	sanitize_i387_state(target);
-
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fpu.state->fxsave, 0, -1);
-}
-
-int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
-		unsigned int pos, unsigned int count,
-		const void *kbuf, const void __user *ubuf)
-{
-	int ret;
-
-	if (!cpu_has_fxsr)
-		return -ENODEV;
-
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
-
-	sanitize_i387_state(target);
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.fpu.state->fxsave, 0, -1);
-
-	/*
-	 * mxcsr reserved bits must be masked to zero for security reasons.
-	 */
-	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
-	/*
-	 * update the header bits in the xsave header, indicating the
-	 * presence of FP and SSE state.
-	 */
-	if (cpu_has_xsave)
-		target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
-
-	return ret;
-}
-
-int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
-		unsigned int pos, unsigned int count,
-		void *kbuf, void __user *ubuf)
-{
-	struct xsave_struct *xsave;
-	int ret;
-
-	if (!cpu_has_xsave)
-		return -ENODEV;
-
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
-
-	xsave = &target->thread.fpu.state->xsave;
-
-	/*
-	 * Copy the 48bytes defined by the software first into the xstate
-	 * memory layout in the thread struct, so that we can copy the entire
-	 * xstateregs to the user using one user_regset_copyout().
-	 */
-	memcpy(&xsave->i387.sw_reserved,
-		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-	/*
-	 * Copy the xstate memory layout.
-	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-	return ret;
-}
-
-int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
-		  unsigned int pos, unsigned int count,
-		  const void *kbuf, const void __user *ubuf)
-{
-	struct xsave_struct *xsave;
-	int ret;
-
-	if (!cpu_has_xsave)
-		return -ENODEV;
-
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
-
-	xsave = &target->thread.fpu.state->xsave;
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-	/*
-	 * mxcsr reserved bits must be masked to zero for security reasons.
-	 */
-	xsave->i387.mxcsr &= mxcsr_feature_mask;
-	xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
-	/*
-	 * These bits must be zero.
-	 */
-	memset(&xsave->xsave_hdr.reserved, 0, 48);
-	return ret;
-}
-
-#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
-	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
-	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
-	tmp = ~twd;
-	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-	/* and move the valid bits to the lower byte. */
-	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-
-	return tmp;
-}
-
-#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16)
-#define FP_EXP_TAG_VALID	0
-#define FP_EXP_TAG_ZERO		1
-#define FP_EXP_TAG_SPECIAL	2
-#define FP_EXP_TAG_EMPTY	3
-
-static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
-{
-	struct _fpxreg *st;
-	u32 tos = (fxsave->swd >> 11) & 7;
-	u32 twd = (unsigned long) fxsave->twd;
-	u32 tag;
-	u32 ret = 0xffff0000u;
-	int i;
-
-	for (i = 0; i < 8; i++, twd >>= 1) {
-		if (twd & 0x1) {
-			st = FPREG_ADDR(fxsave, (i - tos) & 7);
-
-			switch (st->exponent & 0x7fff) {
-			case 0x7fff:
-				tag = FP_EXP_TAG_SPECIAL;
-				break;
-			case 0x0000:
-				if (!st->significand[0] &&
-				    !st->significand[1] &&
-				    !st->significand[2] &&
-				    !st->significand[3])
-					tag = FP_EXP_TAG_ZERO;
-				else
-					tag = FP_EXP_TAG_SPECIAL;
-				break;
-			default:
-				if (st->significand[3] & 0x8000)
-					tag = FP_EXP_TAG_VALID;
-				else
-					tag = FP_EXP_TAG_SPECIAL;
-				break;
-			}
-		} else {
-			tag = FP_EXP_TAG_EMPTY;
-		}
-		ret |= tag << (2 * i);
-	}
-	return ret;
-}
-
-/*
- * FXSR floating point environment conversions.
- */
-
-void
-convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
-{
-	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
-	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
-	struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
-	int i;
-
-	env->cwd = fxsave->cwd | 0xffff0000u;
-	env->swd = fxsave->swd | 0xffff0000u;
-	env->twd = twd_fxsr_to_i387(fxsave);
-
-#ifdef CONFIG_X86_64
-	env->fip = fxsave->rip;
-	env->foo = fxsave->rdp;
-	/*
-	 * should be actually ds/cs at fpu exception time, but
-	 * that information is not available in 64bit mode.
-	 */
-	env->fcs = task_pt_regs(tsk)->cs;
-	if (tsk == current) {
-		savesegment(ds, env->fos);
-	} else {
-		env->fos = tsk->thread.ds;
-	}
-	env->fos |= 0xffff0000;
-#else
-	env->fip = fxsave->fip;
-	env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
-	env->foo = fxsave->foo;
-	env->fos = fxsave->fos;
-#endif
-
-	for (i = 0; i < 8; ++i)
-		memcpy(&to[i], &from[i], sizeof(to[0]));
-}
-
-void convert_to_fxsr(struct task_struct *tsk,
-		     const struct user_i387_ia32_struct *env)
-
-{
-	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
-	struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
-	struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
-	int i;
-
-	fxsave->cwd = env->cwd;
-	fxsave->swd = env->swd;
-	fxsave->twd = twd_i387_to_fxsr(env->twd);
-	fxsave->fop = (u16) ((u32) env->fcs >> 16);
-#ifdef CONFIG_X86_64
-	fxsave->rip = env->fip;
-	fxsave->rdp = env->foo;
-	/* cs and ds ignored */
-#else
-	fxsave->fip = env->fip;
-	fxsave->fcs = (env->fcs & 0xffff);
-	fxsave->foo = env->foo;
-	fxsave->fos = env->fos;
-#endif
-
-	for (i = 0; i < 8; ++i)
-		memcpy(&to[i], &from[i], sizeof(from[0]));
-}
-
-int fpregs_get(struct task_struct *target, const struct user_regset *regset,
-	       unsigned int pos, unsigned int count,
-	       void *kbuf, void __user *ubuf)
-{
-	struct user_i387_ia32_struct env;
-	int ret;
-
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
-
-	if (!static_cpu_has(X86_FEATURE_FPU))
-		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
-
-	if (!cpu_has_fxsr)
-		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-					   &target->thread.fpu.state->fsave, 0,
-					   -1);
-
-	sanitize_i387_state(target);
-
-	if (kbuf && pos == 0 && count == sizeof(env)) {
-		convert_from_fxsr(kbuf, target);
-		return 0;
-	}
-
-	convert_from_fxsr(&env, target);
-
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
-}
-
-int fpregs_set(struct task_struct *target, const struct user_regset *regset,
-	       unsigned int pos, unsigned int count,
-	       const void *kbuf, const void __user *ubuf)
-{
-	struct user_i387_ia32_struct env;
-	int ret;
-
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
-
-	sanitize_i387_state(target);
-
-	if (!static_cpu_has(X86_FEATURE_FPU))
-		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
-
-	if (!cpu_has_fxsr)
-		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					  &target->thread.fpu.state->fsave, 0,
-					  -1);
-
-	if (pos > 0 || count < sizeof(env))
-		convert_from_fxsr(&env, target);
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
-	if (!ret)
-		convert_to_fxsr(target, &env);
-
-	/*
-	 * update the header bit in the xsave header, indicating the
-	 * presence of FP.
-	 */
-	if (cpu_has_xsave)
-		target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
-	return ret;
-}
-
-/*
- * FPU state for core dumps.
- * This is only used for a.out dumps now.
- * It is declared generically using elf_fpregset_t (which is
- * struct user_i387_struct) but is in fact only used for 32-bit
- * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
- */
-int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
-{
-	struct task_struct *tsk = current;
-	int fpvalid;
-
-	fpvalid = !!used_math();
-	if (fpvalid)
-		fpvalid = !fpregs_get(tsk, NULL,
-				      0, sizeof(struct user_i387_ia32_struct),
-				      fpu, NULL);
-
-	return fpvalid;
-}
-EXPORT_SYMBOL(dump_fpu);
-
-#endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
-
-static int __init no_387(char *s)
-{
-	setup_clear_cpu_cap(X86_FEATURE_FPU);
-	return 1;
-}
-
-__setup("no387", no_387);
-
-void fpu_detect(struct cpuinfo_x86 *c)
-{
-	unsigned long cr0;
-	u16 fsw, fcw;
-
-	fsw = fcw = 0xffff;
-
-	cr0 = read_cr0();
-	cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
-	write_cr0(cr0);
-
-	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
-		     : "+m" (fsw), "+m" (fcw));
-
-	if (fsw == 0 && (fcw & 0x103f) == 0x003f)
-		set_cpu_cap(c, X86_FEATURE_FPU);
-	else
-		clear_cpu_cap(c, X86_FEATURE_FPU);
-
-	/* The final cr0 value is set in fpu_init() */
-}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620062df..1681504e44a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
 	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
 }
 
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void kvm_wait(u8 *ptr, u8 val)
+{
+	unsigned long flags;
+
+	if (in_nmi())
+		return;
+
+	local_irq_save(flags);
+
+	if (READ_ONCE(*ptr) != val)
+		goto out;
+
+	/*
+	 * halt until it's our turn and kicked. Note that we do safe halt
+	 * for irq enabled case to avoid hang when lock info is overwritten
+	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
+	 */
+	if (arch_irqs_disabled_flags(flags))
+		halt();
+	else
+		safe_halt();
+
+out:
+	local_irq_restore(flags);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
 enum kvm_contention_stat {
 	TAKEN_SLOW,
 	TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
 	}
 }
 
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+
 /*
  * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
  */
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
 	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
 		return;
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+	__pv_init_lock_hash();
+	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+	pv_lock_ops.wait = kvm_wait;
+	pv_lock_ops.kick = kvm_kick_cpu;
+#else /* !CONFIG_QUEUED_SPINLOCKS */
 	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
 	pv_lock_ops.unlock_kick = kvm_unlock_kick;
+#endif
 }
 
 static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c7316341..33ee3e0efd65 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
 
 #include <asm/paravirt.h>
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+	native_queued_spin_unlock(lock);
+}
+
+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
+
+bool pv_is_native_spin_unlock(void)
+{
+	return pv_lock_ops.queued_spin_unlock.func ==
+		__raw_callee_save___native_queued_spin_unlock;
+}
+#endif
+
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
+#ifdef CONFIG_QUEUED_SPINLOCKS
+	.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+	.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+	.wait = paravirt_nop,
+	.kick = paravirt_nop,
+#else /* !CONFIG_QUEUED_SPINLOCKS */
 	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
 	.unlock_kick = paravirt_nop,
-#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+#endif /* SMP */
 };
 EXPORT_SYMBOL(pv_lock_ops);
 
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab6..e1b013696dde 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
 
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+#endif
+
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
 {
 	/* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 	return 0;
 }
 
+extern bool pv_is_native_spin_unlock(void);
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		      unsigned long addr, unsigned len)
 {
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_mmu_ops, write_cr3);
 		PATCH_SITE(pv_cpu_ops, clts);
 		PATCH_SITE(pv_cpu_ops, read_tsc);
-
-	patch_site:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
-		break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+			if (pv_is_native_spin_unlock()) {
+				start = start_pv_lock_ops_queued_spin_unlock;
+				end   = end_pv_lock_ops_queued_spin_unlock;
+				goto patch_site;
+			}
+#endif
 
 	default:
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
+
+patch_site:
+		ret = paravirt_patch_insns(ibuf, len, start, end);
+		break;
 	}
 #undef PATCH_SITE
 	return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index a1da6737ba5b..a1fa86782186 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+#endif
+
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
 {
 	return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 				    start__mov64, end__mov64);
 }
 
+extern bool pv_is_native_spin_unlock(void);
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		      unsigned long addr, unsigned len)
 {
@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_cpu_ops, clts);
 		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
 		PATCH_SITE(pv_cpu_ops, wbinvd);
-
-	patch_site:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
-		break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+			if (pv_is_native_spin_unlock()) {
+				start = start_pv_lock_ops_queued_spin_unlock;
+				end   = end_pv_lock_ops_queued_spin_unlock;
+				goto patch_site;
+			}
+#endif
 
 	default:
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
+
+patch_site:
+		ret = paravirt_patch_insns(ibuf, len, start, end);
+		break;
 	}
 #undef PATCH_SITE
 	return ret;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a25e202bb319..353972c1946c 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -140,6 +140,51 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
 		free_pages((unsigned long)vaddr, get_order(size));
 }
 
+void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		      gfp_t gfp, struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	void *memory;
+
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
+		return memory;
+
+	if (!dev)
+		dev = &x86_dma_fallback_dev;
+
+	if (!is_device_dma_capable(dev))
+		return NULL;
+
+	if (!ops->alloc)
+		return NULL;
+
+	memory = ops->alloc(dev, size, dma_handle,
+			    dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
+
+	return memory;
+}
+EXPORT_SYMBOL(dma_alloc_attrs);
+
+void dma_free_attrs(struct device *dev, size_t size,
+		    void *vaddr, dma_addr_t bus,
+		    struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	WARN_ON(irqs_disabled());       /* for portability */
+
+	if (dma_release_from_coherent(dev, get_order(size), vaddr))
+		return;
+
+	debug_dma_free_coherent(dev, size, vaddr, bus);
+	if (ops->free)
+		ops->free(dev, size, vaddr, bus, attrs);
+}
+EXPORT_SYMBOL(dma_free_attrs);
+
 /*
  * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
  * parameter documentation.
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 77dd0ad58be4..adf0392d549a 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -20,6 +20,13 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 {
 	void *vaddr;
 
+	/*
+	 * Don't print a warning when the first allocation attempt fails.
+	 * swiotlb_alloc_coherent() will print a warning when the DMA
+	 * memory allocation ultimately failed.
+	 */
+	flags |= __GFP_NOWARN;
+
 	vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
 					   attrs);
 	if (vaddr)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8213da62b1b7..9cad694ed7c4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -25,8 +25,7 @@
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/mwait.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 #include <asm/debugreg.h>
 #include <asm/nmi.h>
 #include <asm/tlbflush.h>
@@ -57,7 +56,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
 };
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -76,9 +75,6 @@ void idle_notifier_unregister(struct notifier_block *n)
 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 #endif
 
-struct kmem_cache *task_xstate_cachep;
-EXPORT_SYMBOL_GPL(task_xstate_cachep);
-
 /*
  * this gets called so that we can store lazy state into memory and copy the
  * current task into the new thread.
@@ -87,36 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
 
-	dst->thread.fpu_counter = 0;
-	dst->thread.fpu.has_fpu = 0;
-	dst->thread.fpu.state = NULL;
-	task_disable_lazy_fpu_restore(dst);
-	if (tsk_used_math(src)) {
-		int err = fpu_alloc(&dst->thread.fpu);
-		if (err)
-			return err;
-		fpu_copy(dst, src);
-	}
-	return 0;
-}
-
-void free_thread_xstate(struct task_struct *tsk)
-{
-	fpu_free(&tsk->thread.fpu);
-}
-
-void arch_release_task_struct(struct task_struct *tsk)
-{
-	free_thread_xstate(tsk);
-}
-
-void arch_task_cache_init(void)
-{
-        task_xstate_cachep =
-        	kmem_cache_create("task_xstate", xstate_size,
-				  __alignof__(union thread_xstate),
-				  SLAB_PANIC | SLAB_NOTRACK, NULL);
-	setup_xstate_comp();
+	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
 }
 
 /*
@@ -127,6 +94,7 @@ void exit_thread(void)
 	struct task_struct *me = current;
 	struct thread_struct *t = &me->thread;
 	unsigned long *bp = t->io_bitmap_ptr;
+	struct fpu *fpu = &t->fpu;
 
 	if (bp) {
 		struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
@@ -142,7 +110,7 @@ void exit_thread(void)
 		kfree(bp);
 	}
 
-	drop_fpu(me);
+	fpu__drop(fpu);
 }
 
 void flush_thread(void)
@@ -152,17 +120,7 @@ void flush_thread(void)
 	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 
-	if (!use_eager_fpu()) {
-		/* FPU state will be reallocated lazily at the first use. */
-		drop_fpu(tsk);
-		free_thread_xstate(tsk);
-	} else if (!used_math()) {
-		/* kthread execs. TODO: cleanup this horror. */
-		if (WARN_ON(init_fpu(tsk)))
-			force_sig(SIGKILL, tsk);
-		user_fpu_begin();
-		restore_init_xstate();
-	}
+	fpu__clear(&tsk->thread.fpu);
 }
 
 static void hard_disable_TSC(void)
@@ -443,11 +401,10 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
 }
 
 /*
- * MONITOR/MWAIT with no hints, used for default default C1 state.
- * This invokes MWAIT with interrutps enabled and no flags,
- * which is backwards compatible with the original MWAIT implementation.
+ * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
+ * with interrupts enabled and no flags, which is backwards compatible with the
+ * original MWAIT implementation.
  */
-
 static void mwait_idle(void)
 {
 	if (!current_set_polling_and_test()) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8ed2106b06da..deff651835b4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -39,8 +39,7 @@
 #include <asm/pgtable.h>
 #include <asm/ldt.h>
 #include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 #include <asm/desc.h>
 #ifdef CONFIG_MATH_EMULATION
 #include <asm/math_emu.h>
@@ -242,14 +241,16 @@ __visible __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
-				 *next = &next_p->thread;
+			     *next = &next_p->thread;
+	struct fpu *prev_fpu = &prev->fpu;
+	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-	fpu_switch_t fpu;
+	fpu_switch_t fpu_switch;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
-	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
+	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
 
 	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
@@ -296,7 +297,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
 	 * the GDT and LDT are properly updated, and must be
-	 * done before math_state_restore, so the TS bit is up
+	 * done before fpu__restore(), so the TS bit is up
 	 * to date.
 	 */
 	arch_end_context_switch(next_p);
@@ -319,7 +320,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		lazy_load_gs(next->gs);
 
-	switch_fpu_finish(next_p, fpu);
+	switch_fpu_finish(next_fpu, fpu_switch);
 
 	this_cpu_write(current_task, next_p);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ddfdbf74f174..c50e013b57d2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -38,8 +38,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 #include <asm/mmu_context.h>
 #include <asm/prctl.h>
 #include <asm/desc.h>
@@ -274,12 +273,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
+	struct fpu *prev_fpu = &prev->fpu;
+	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned fsindex, gsindex;
-	fpu_switch_t fpu;
+	fpu_switch_t fpu_switch;
 
-	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
+	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
 
 	/* We must save %fs and %gs before load_TLS() because
 	 * %fs and %gs may be cleared by load_TLS().
@@ -299,7 +300,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * Leave lazy mode, flushing any hypercalls made here.  This
 	 * must be done after loading TLS entries in the GDT but before
 	 * loading segments that might reference them, and and it must
-	 * be done before math_state_restore, so the TS bit is up to
+	 * be done before fpu__restore(), so the TS bit is up to
 	 * date.
 	 */
 	arch_end_context_switch(next_p);
@@ -391,7 +392,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 	prev->gsindex = gsindex;
 
-	switch_fpu_finish(next_p, fpu);
+	switch_fpu_finish(next_fpu, fpu_switch);
 
 	/*
 	 * Switch the PDA and FPU contexts.
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a7bc79480719..9be72bc3613f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -11,7 +11,6 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/ptrace.h>
-#include <linux/regset.h>
 #include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/elf.h>
@@ -28,8 +27,9 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
 #include <asm/debugreg.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
@@ -1297,7 +1297,7 @@ static struct user_regset x86_64_regsets[] __read_mostly = {
 		.core_note_type = NT_PRFPREG,
 		.n = sizeof(struct user_i387_struct) / sizeof(long),
 		.size = sizeof(long), .align = sizeof(long),
-		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+		.active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
 	[REGSET_XSTATE] = {
 		.core_note_type = NT_X86_XSTATE,
@@ -1338,13 +1338,13 @@ static struct user_regset x86_32_regsets[] __read_mostly = {
 		.core_note_type = NT_PRFPREG,
 		.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
 		.size = sizeof(u32), .align = sizeof(u32),
-		.active = fpregs_active, .get = fpregs_get, .set = fpregs_set
+		.active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set
 	},
 	[REGSET_XFP] = {
 		.core_note_type = NT_PRXFPREG,
 		.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
 		.size = sizeof(u32), .align = sizeof(u32),
-		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+		.active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
 	[REGSET_XSTATE] = {
 		.core_note_type = NT_X86_XSTATE,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d74ac33290ae..cba828892790 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -531,12 +531,14 @@ static void __init reserve_crashkernel_low(void)
 	if (ret != 0) {
 		/*
 		 * two parts from lib/swiotlb.c:
-		 *	swiotlb size: user specified with swiotlb= or default.
-		 *	swiotlb overflow buffer: now is hardcoded to 32k.
-		 *		We round it to 8M for other buffers that
-		 *		may need to stay low too.
+		 * -swiotlb size: user-specified with swiotlb= or default.
+		 *
+		 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
+		 * to 8M for other buffers that may need to stay low too. Also
+		 * make sure we allocate enough extra low memory so that we
+		 * don't run out of DMA buffers for 32-bit devices.
 		 */
-		low_size = swiotlb_size_or_default() + (8UL<<20);
+		low_size = max(swiotlb_size_or_default() + (8UL<<20), 256UL<<20);
 		auto_set = true;
 	} else {
 		/* passed with crashkernel=0,low ? */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 1ea14fd53933..206996c1669d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -26,8 +26,8 @@
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
 #include <asm/vdso.h>
 #include <asm/mce.h>
 #include <asm/sighandling.h>
@@ -103,7 +103,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 		get_user_ex(buf, &sc->fpstate);
 	} get_user_catch(err);
 
-	err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
+	err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
 
 	force_iret();
 
@@ -199,6 +199,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	unsigned long sp = regs->sp;
 	unsigned long buf_fx = 0;
 	int onsigstack = on_sig_stack(sp);
+	struct fpu *fpu = &current->thread.fpu;
 
 	/* redzone */
 	if (config_enabled(CONFIG_X86_64))
@@ -218,9 +219,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		}
 	}
 
-	if (used_math()) {
-		sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
-				     &buf_fx, &math_size);
+	if (fpu->fpstate_active) {
+		sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+					  &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
 	}
 
@@ -234,8 +235,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		return (void __user *)-1L;
 
 	/* save i387 and extended state */
-	if (used_math() &&
-	    save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
+	if (fpu->fpstate_active &&
+	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
 		return (void __user *)-1L;
 
 	return (void __user *)sp;
@@ -593,6 +594,22 @@ badframe:
 	return 0;
 }
 
+static inline int is_ia32_compat_frame(void)
+{
+	return config_enabled(CONFIG_IA32_EMULATION) &&
+	       test_thread_flag(TIF_IA32);
+}
+
+static inline int is_ia32_frame(void)
+{
+	return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+}
+
+static inline int is_x32_frame(void)
+{
+	return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+}
+
 static int
 setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 {
@@ -617,6 +634,7 @@ static void
 handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
 	bool stepping, failed;
+	struct fpu *fpu = &current->thread.fpu;
 
 	/* Are we from a system call? */
 	if (syscall_get_nr(current, regs) >= 0) {
@@ -665,8 +683,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
-		if (used_math())
-			fpu_reset_state(current);
+		if (fpu->fpstate_active)
+			fpu__clear(fpu);
 	}
 	signal_setup_done(failed, ksig, stepping);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 50e547eac8cd..6d4bfea25874 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -68,8 +68,7 @@
 #include <asm/mwait.h>
 #include <asm/apic.h>
 #include <asm/io_apic.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 #include <asm/setup.h>
 #include <asm/uv/uv.h>
 #include <linux/mc146818rtc.h>
@@ -314,10 +313,10 @@ topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
 		cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
 }
 
-#define link_mask(_m, c1, c2)						\
+#define link_mask(mfunc, c1, c2)					\
 do {									\
-	cpumask_set_cpu((c1), cpu_##_m##_mask(c2));			\
-	cpumask_set_cpu((c2), cpu_##_m##_mask(c1));			\
+	cpumask_set_cpu((c1), mfunc(c2));				\
+	cpumask_set_cpu((c2), mfunc(c1));				\
 } while (0)
 
 static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@ -398,9 +397,9 @@ void set_cpu_sibling_map(int cpu)
 	cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
 
 	if (!has_mp) {
-		cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+		cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
 		cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
-		cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+		cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
 		c->booted_cores = 1;
 		return;
 	}
@@ -409,32 +408,34 @@ void set_cpu_sibling_map(int cpu)
 		o = &cpu_data(i);
 
 		if ((i == cpu) || (has_smt && match_smt(c, o)))
-			link_mask(sibling, cpu, i);
+			link_mask(topology_sibling_cpumask, cpu, i);
 
 		if ((i == cpu) || (has_mp && match_llc(c, o)))
-			link_mask(llc_shared, cpu, i);
+			link_mask(cpu_llc_shared_mask, cpu, i);
 
 	}
 
 	/*
 	 * This needs a separate iteration over the cpus because we rely on all
-	 * cpu_sibling_mask links to be set-up.
+	 * topology_sibling_cpumask links to be set-up.
 	 */
 	for_each_cpu(i, cpu_sibling_setup_mask) {
 		o = &cpu_data(i);
 
 		if ((i == cpu) || (has_mp && match_die(c, o))) {
-			link_mask(core, cpu, i);
+			link_mask(topology_core_cpumask, cpu, i);
 
 			/*
 			 *  Does this new cpu bringup a new core?
 			 */
-			if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
+			if (cpumask_weight(
+			    topology_sibling_cpumask(cpu)) == 1) {
 				/*
 				 * for each core in package, increment
 				 * the booted_cores for this new cpu
 				 */
-				if (cpumask_first(cpu_sibling_mask(i)) == i)
+				if (cpumask_first(
+				    topology_sibling_cpumask(i)) == i)
 					c->booted_cores++;
 				/*
 				 * increment the core count for all
@@ -1009,8 +1010,8 @@ static __init void disable_smp(void)
 		physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	else
 		physid_set_mask_of_physid(0, &phys_cpu_present_map);
-	cpumask_set_cpu(0, cpu_sibling_mask(0));
-	cpumask_set_cpu(0, cpu_core_mask(0));
+	cpumask_set_cpu(0, topology_sibling_cpumask(0));
+	cpumask_set_cpu(0, topology_core_cpumask(0));
 }
 
 enum {
@@ -1293,22 +1294,22 @@ static void remove_siblinginfo(int cpu)
 	int sibling;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	for_each_cpu(sibling, cpu_core_mask(cpu)) {
-		cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
+	for_each_cpu(sibling, topology_core_cpumask(cpu)) {
+		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 		/*/
 		 * last thread sibling in this cpu core going down
 		 */
-		if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
+		if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
 			cpu_data(sibling).booted_cores--;
 	}
 
-	for_each_cpu(sibling, cpu_sibling_mask(cpu))
-		cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
 	for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
 		cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
 	cpumask_clear(cpu_llc_shared_mask(cpu));
-	cpumask_clear(cpu_sibling_mask(cpu));
-	cpumask_clear(cpu_core_mask(cpu));
+	cpumask_clear(topology_sibling_cpumask(cpu));
+	cpumask_clear(topology_core_cpumask(cpu));
 	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab5247687..36cb15b7b367 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,12 +54,13 @@
 #include <asm/ftrace.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
 #include <asm/alternative.h>
+#include <asm/fpu/xstate.h>
+#include <asm/trace/mpx.h>
 #include <asm/mpx.h>
 
 #ifdef CONFIG_X86_64
@@ -371,10 +372,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 {
-	struct task_struct *tsk = current;
-	struct xsave_struct *xsave_buf;
 	enum ctx_state prev_state;
-	struct bndcsr *bndcsr;
+	const struct bndcsr *bndcsr;
 	siginfo_t *info;
 
 	prev_state = exception_enter();
@@ -393,15 +392,15 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 
 	/*
 	 * We need to look at BNDSTATUS to resolve this exception.
-	 * It is not directly accessible, though, so we need to
-	 * do an xsave and then pull it out of the xsave buffer.
+	 * A NULL here might mean that it is in its 'init state',
+	 * which is all zeros which indicates MPX was not
+	 * responsible for the exception.
 	 */
-	fpu_save_init(&tsk->thread.fpu);
-	xsave_buf = &(tsk->thread.fpu.state->xsave);
-	bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
 	if (!bndcsr)
 		goto exit_trap;
 
+	trace_bounds_exception_mpx(bndcsr);
 	/*
 	 * The error code field of the BNDSTATUS register communicates status
 	 * information of a bound range exception #BR or operation involving
@@ -409,11 +408,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 	 */
 	switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
 	case 2:	/* Bound directory has invalid entry. */
-		if (mpx_handle_bd_fault(xsave_buf))
+		if (mpx_handle_bd_fault())
 			goto exit_trap;
 		break; /* Success, it was handled */
 	case 1: /* Bound violation. */
-		info = mpx_generate_siginfo(regs, xsave_buf);
+		info = mpx_generate_siginfo(regs);
 		if (IS_ERR(info)) {
 			/*
 			 * We failed to decode the MPX instruction.  Act as if
@@ -709,8 +708,8 @@ NOKPROBE_SYMBOL(do_debug);
 static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 {
 	struct task_struct *task = current;
+	struct fpu *fpu = &task->thread.fpu;
 	siginfo_t info;
-	unsigned short err;
 	char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
 						"simd exception";
 
@@ -718,8 +717,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 		return;
 	conditional_sti(regs);
 
-	if (!user_mode(regs))
-	{
+	if (!user_mode(regs)) {
 		if (!fixup_exception(regs)) {
 			task->thread.error_code = error_code;
 			task->thread.trap_nr = trapnr;
@@ -731,62 +729,20 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	/*
 	 * Save the info for the exception handler and clear the error.
 	 */
-	unlazy_fpu(task);
-	task->thread.trap_nr = trapnr;
+	fpu__save(fpu);
+
+	task->thread.trap_nr	= trapnr;
 	task->thread.error_code = error_code;
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
-	if (trapnr == X86_TRAP_MF) {
-		unsigned short cwd, swd;
-		/*
-		 * (~cwd & swd) will mask out exceptions that are not set to unmasked
-		 * status.  0x3f is the exception bits in these regs, 0x200 is the
-		 * C1 reg you need in case of a stack fault, 0x040 is the stack
-		 * fault bit.  We should only be taking one exception at a time,
-		 * so if this combination doesn't produce any single exception,
-		 * then we have a bad program that isn't synchronizing its FPU usage
-		 * and it will suffer the consequences since we won't be able to
-		 * fully reproduce the context of the exception
-		 */
-		cwd = get_fpu_cwd(task);
-		swd = get_fpu_swd(task);
+	info.si_signo		= SIGFPE;
+	info.si_errno		= 0;
+	info.si_addr		= (void __user *)uprobe_get_trap_addr(regs);
 
-		err = swd & ~cwd;
-	} else {
-		/*
-		 * The SIMD FPU exceptions are handled a little differently, as there
-		 * is only a single status/control register.  Thus, to determine which
-		 * unmasked exception was caught we must mask the exception mask bits
-		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
-		 */
-		unsigned short mxcsr = get_fpu_mxcsr(task);
-		err = ~(mxcsr >> 7) & mxcsr;
-	}
+	info.si_code = fpu__exception_code(fpu, trapnr);
 
-	if (err & 0x001) {	/* Invalid op */
-		/*
-		 * swd & 0x240 == 0x040: Stack Underflow
-		 * swd & 0x240 == 0x240: Stack Overflow
-		 * User must clear the SF bit (0x40) if set
-		 */
-		info.si_code = FPE_FLTINV;
-	} else if (err & 0x004) { /* Divide by Zero */
-		info.si_code = FPE_FLTDIV;
-	} else if (err & 0x008) { /* Overflow */
-		info.si_code = FPE_FLTOVF;
-	} else if (err & 0x012) { /* Denormal, Underflow */
-		info.si_code = FPE_FLTUND;
-	} else if (err & 0x020) { /* Precision */
-		info.si_code = FPE_FLTRES;
-	} else {
-		/*
-		 * If we're using IRQ 13, or supposedly even some trap
-		 * X86_TRAP_MF implementations, it's possible
-		 * we get a spurious trap, which is not an error.
-		 */
+	/* Retry when we get spurious exceptions: */
+	if (!info.si_code)
 		return;
-	}
+
 	force_sig_info(SIGFPE, &info, task);
 }
 
@@ -827,48 +783,6 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
 
-/*
- * 'math_state_restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
- *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
- *
- * Must be called with kernel preemption disabled (eg with local
- * local interrupts as in the case of do_device_not_available).
- */
-void math_state_restore(void)
-{
-	struct task_struct *tsk = current;
-
-	if (!tsk_used_math(tsk)) {
-		local_irq_enable();
-		/*
-		 * does a slab alloc which can sleep
-		 */
-		if (init_fpu(tsk)) {
-			/*
-			 * ran out of memory!
-			 */
-			do_group_exit(SIGKILL);
-			return;
-		}
-		local_irq_disable();
-	}
-
-	/* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
-	kernel_fpu_disable();
-	__thread_fpu_begin(tsk);
-	if (unlikely(restore_fpu_checking(tsk))) {
-		fpu_reset_state(tsk);
-		force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
-	} else {
-		tsk->thread.fpu_counter++;
-	}
-	kernel_fpu_enable();
-}
-EXPORT_SYMBOL_GPL(math_state_restore);
-
 dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
@@ -889,7 +803,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 		return;
 	}
 #endif
-	math_state_restore(); /* interrupts still off */
+	fpu__restore(&current->thread.fpu); /* interrupts still off */
 #ifdef CONFIG_X86_32
 	conditional_sti(regs);
 #endif
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 26488487bc61..dd8d0791dfb5 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ static void check_tsc_warp(unsigned int timeout)
  */
 static inline unsigned int loop_timeout(int cpu)
 {
-	return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+	return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
 }
 
 /*
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0b81ad67da07..66476244731e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -29,6 +29,7 @@
 #include <linux/kdebug.h>
 #include <asm/processor.h>
 #include <asm/insn.h>
+#include <asm/mmu_context.h>
 
 /* Post-execution fixups. */
 
@@ -312,11 +313,6 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 }
 
 #ifdef CONFIG_X86_64
-static inline bool is_64bit_mm(struct mm_struct *mm)
-{
-	return	!config_enabled(CONFIG_IA32_EMULATION) ||
-		!(mm->context.ia32_compat == TIF_IA32);
-}
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
@@ -497,10 +493,6 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	}
 }
 #else /* 32-bit: */
-static inline bool is_64bit_mm(struct mm_struct *mm)
-{
-	return false;
-}
 /*
  * No RIP-relative addressing on 32-bit
  */
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 37d8fa4438f0..a0695be19864 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -75,7 +75,5 @@ EXPORT_SYMBOL(native_load_gs_index);
 
 #ifdef CONFIG_PREEMPT
 EXPORT_SYMBOL(___preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
-EXPORT_SYMBOL(___preempt_schedule_context);
-#endif
+EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
deleted file mode 100644
index 87a815b85f3e..000000000000
--- a/arch/x86/kernel/xsave.c
+++ /dev/null
@@ -1,724 +0,0 @@
-/*
- * xsave/xrstor support.
- *
- * Author: Suresh Siddha <suresh.b.siddha@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bootmem.h>
-#include <linux/compat.h>
-#include <linux/cpu.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
-#include <asm/sigframe.h>
-#include <asm/tlbflush.h>
-#include <asm/xcr.h>
-
-/*
- * Supported feature mask by the CPU and the kernel.
- */
-u64 pcntxt_mask;
-
-/*
- * Represents init state for the supported extended state.
- */
-struct xsave_struct *init_xstate_buf;
-
-static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
-static unsigned int *xstate_offsets, *xstate_sizes;
-static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
-static unsigned int xstate_features;
-
-/*
- * If a processor implementation discern that a processor state component is
- * in its initialized state it may modify the corresponding bit in the
- * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory
- * layout in the case of xsaveopt. While presenting the xstate information to
- * the user, we always ensure that the memory layout of a feature will be in
- * the init state if the corresponding header bit is zero. This is to ensure
- * that the user doesn't see some stale state in the memory layout during
- * signal handling, debugging etc.
- */
-void __sanitize_i387_state(struct task_struct *tsk)
-{
-	struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
-	int feature_bit = 0x2;
-	u64 xstate_bv;
-
-	if (!fx)
-		return;
-
-	xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
-
-	/*
-	 * None of the feature bits are in init state. So nothing else
-	 * to do for us, as the memory layout is up to date.
-	 */
-	if ((xstate_bv & pcntxt_mask) == pcntxt_mask)
-		return;
-
-	/*
-	 * FP is in init state
-	 */
-	if (!(xstate_bv & XSTATE_FP)) {
-		fx->cwd = 0x37f;
-		fx->swd = 0;
-		fx->twd = 0;
-		fx->fop = 0;
-		fx->rip = 0;
-		fx->rdp = 0;
-		memset(&fx->st_space[0], 0, 128);
-	}
-
-	/*
-	 * SSE is in init state
-	 */
-	if (!(xstate_bv & XSTATE_SSE))
-		memset(&fx->xmm_space[0], 0, 256);
-
-	xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2;
-
-	/*
-	 * Update all the other memory layouts for which the corresponding
-	 * header bit is in the init state.
-	 */
-	while (xstate_bv) {
-		if (xstate_bv & 0x1) {
-			int offset = xstate_offsets[feature_bit];
-			int size = xstate_sizes[feature_bit];
-
-			memcpy(((void *) fx) + offset,
-			       ((void *) init_xstate_buf) + offset,
-			       size);
-		}
-
-		xstate_bv >>= 1;
-		feature_bit++;
-	}
-}
-
-/*
- * Check for the presence of extended state information in the
- * user fpstate pointer in the sigcontext.
- */
-static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
-				   void __user *fpstate,
-				   struct _fpx_sw_bytes *fx_sw)
-{
-	int min_xstate_size = sizeof(struct i387_fxsave_struct) +
-			      sizeof(struct xsave_hdr_struct);
-	unsigned int magic2;
-
-	if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
-		return -1;
-
-	/* Check for the first magic field and other error scenarios. */
-	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
-	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
-	    fx_sw->xstate_size > fx_sw->extended_size)
-		return -1;
-
-	/*
-	 * Check for the presence of second magic word at the end of memory
-	 * layout. This detects the case where the user just copied the legacy
-	 * fpstate layout with out copying the extended state information
-	 * in the memory layout.
-	 */
-	if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
-	    || magic2 != FP_XSTATE_MAGIC2)
-		return -1;
-
-	return 0;
-}
-
-/*
- * Signal frame handlers.
- */
-static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
-{
-	if (use_fxsr()) {
-		struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
-		struct user_i387_ia32_struct env;
-		struct _fpstate_ia32 __user *fp = buf;
-
-		convert_from_fxsr(&env, tsk);
-
-		if (__copy_to_user(buf, &env, sizeof(env)) ||
-		    __put_user(xsave->i387.swd, &fp->status) ||
-		    __put_user(X86_FXSR_MAGIC, &fp->magic))
-			return -1;
-	} else {
-		struct i387_fsave_struct __user *fp = buf;
-		u32 swd;
-		if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
-			return -1;
-	}
-
-	return 0;
-}
-
-static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
-{
-	struct xsave_struct __user *x = buf;
-	struct _fpx_sw_bytes *sw_bytes;
-	u32 xstate_bv;
-	int err;
-
-	/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
-	sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
-	err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
-
-	if (!use_xsave())
-		return err;
-
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
-
-	/*
-	 * Read the xstate_bv which we copied (directly from the cpu or
-	 * from the state in task struct) to the user buffers.
-	 */
-	err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
-
-	/*
-	 * For legacy compatible, we always set FP/SSE bits in the bit
-	 * vector while saving the state to the user context. This will
-	 * enable us capturing any changes(during sigreturn) to
-	 * the FP/SSE bits by the legacy applications which don't touch
-	 * xstate_bv in the xsave header.
-	 *
-	 * xsave aware apps can change the xstate_bv in the xsave
-	 * header as well as change any contents in the memory layout.
-	 * xrestore as part of sigreturn will capture all the changes.
-	 */
-	xstate_bv |= XSTATE_FPSSE;
-
-	err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
-
-	return err;
-}
-
-static inline int save_user_xstate(struct xsave_struct __user *buf)
-{
-	int err;
-
-	if (use_xsave())
-		err = xsave_user(buf);
-	else if (use_fxsr())
-		err = fxsave_user((struct i387_fxsave_struct __user *) buf);
-	else
-		err = fsave_user((struct i387_fsave_struct __user *) buf);
-
-	if (unlikely(err) && __clear_user(buf, xstate_size))
-		err = -EFAULT;
-	return err;
-}
-
-/*
- * Save the fpu, extended register state to the user signal frame.
- *
- * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
- *  state is copied.
- *  'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
- *
- *	buf == buf_fx for 64-bit frames and 32-bit fsave frame.
- *	buf != buf_fx for 32-bit frames with fxstate.
- *
- * If the fpu, extended register state is live, save the state directly
- * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
- * copy the thread's fpu state to the user frame starting at 'buf_fx'.
- *
- * If this is a 32-bit frame with fxstate, put a fsave header before
- * the aligned state at 'buf_fx'.
- *
- * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
- * indicating the absence/presence of the extended state to the user.
- */
-int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
-{
-	struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
-	struct task_struct *tsk = current;
-	int ia32_fxstate = (buf != buf_fx);
-
-	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
-			 config_enabled(CONFIG_IA32_EMULATION));
-
-	if (!access_ok(VERIFY_WRITE, buf, size))
-		return -EACCES;
-
-	if (!static_cpu_has(X86_FEATURE_FPU))
-		return fpregs_soft_get(current, NULL, 0,
-			sizeof(struct user_i387_ia32_struct), NULL,
-			(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
-
-	if (user_has_fpu()) {
-		/* Save the live register state to the user directly. */
-		if (save_user_xstate(buf_fx))
-			return -1;
-		/* Update the thread's fxstate to save the fsave header. */
-		if (ia32_fxstate)
-			fpu_fxsave(&tsk->thread.fpu);
-	} else {
-		sanitize_i387_state(tsk);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
-			return -1;
-	}
-
-	/* Save the fsave header for the 32-bit frames. */
-	if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
-		return -1;
-
-	if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
-		return -1;
-
-	return 0;
-}
-
-static inline void
-sanitize_restored_xstate(struct task_struct *tsk,
-			 struct user_i387_ia32_struct *ia32_env,
-			 u64 xstate_bv, int fx_only)
-{
-	struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
-	struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
-
-	if (use_xsave()) {
-		/* These bits must be zero. */
-		memset(xsave_hdr->reserved, 0, 48);
-
-		/*
-		 * Init the state that is not present in the memory
-		 * layout and not enabled by the OS.
-		 */
-		if (fx_only)
-			xsave_hdr->xstate_bv = XSTATE_FPSSE;
-		else
-			xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
-	}
-
-	if (use_fxsr()) {
-		/*
-		 * mscsr reserved bits must be masked to zero for security
-		 * reasons.
-		 */
-		xsave->i387.mxcsr &= mxcsr_feature_mask;
-
-		convert_to_fxsr(tsk, ia32_env);
-	}
-}
-
-/*
- * Restore the extended state if present. Otherwise, restore the FP/SSE state.
- */
-static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
-{
-	if (use_xsave()) {
-		if ((unsigned long)buf % 64 || fx_only) {
-			u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
-			xrstor_state(init_xstate_buf, init_bv);
-			return fxrstor_user(buf);
-		} else {
-			u64 init_bv = pcntxt_mask & ~xbv;
-			if (unlikely(init_bv))
-				xrstor_state(init_xstate_buf, init_bv);
-			return xrestore_user(buf, xbv);
-		}
-	} else if (use_fxsr()) {
-		return fxrstor_user(buf);
-	} else
-		return frstor_user(buf);
-}
-
-int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
-{
-	int ia32_fxstate = (buf != buf_fx);
-	struct task_struct *tsk = current;
-	int state_size = xstate_size;
-	u64 xstate_bv = 0;
-	int fx_only = 0;
-
-	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
-			 config_enabled(CONFIG_IA32_EMULATION));
-
-	if (!buf) {
-		fpu_reset_state(tsk);
-		return 0;
-	}
-
-	if (!access_ok(VERIFY_READ, buf, size))
-		return -EACCES;
-
-	if (!used_math() && init_fpu(tsk))
-		return -1;
-
-	if (!static_cpu_has(X86_FEATURE_FPU))
-		return fpregs_soft_set(current, NULL,
-				       0, sizeof(struct user_i387_ia32_struct),
-				       NULL, buf) != 0;
-
-	if (use_xsave()) {
-		struct _fpx_sw_bytes fx_sw_user;
-		if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
-			/*
-			 * Couldn't find the extended state information in the
-			 * memory layout. Restore just the FP/SSE and init all
-			 * the other extended state.
-			 */
-			state_size = sizeof(struct i387_fxsave_struct);
-			fx_only = 1;
-		} else {
-			state_size = fx_sw_user.xstate_size;
-			xstate_bv = fx_sw_user.xstate_bv;
-		}
-	}
-
-	if (ia32_fxstate) {
-		/*
-		 * For 32-bit frames with fxstate, copy the user state to the
-		 * thread's fpu state, reconstruct fxstate from the fsave
-		 * header. Sanitize the copied state etc.
-		 */
-		struct fpu *fpu = &tsk->thread.fpu;
-		struct user_i387_ia32_struct env;
-		int err = 0;
-
-		/*
-		 * Drop the current fpu which clears used_math(). This ensures
-		 * that any context-switch during the copy of the new state,
-		 * avoids the intermediate state from getting restored/saved.
-		 * Thus avoiding the new restored state from getting corrupted.
-		 * We will be ready to restore/save the state only after
-		 * set_used_math() is again set.
-		 */
-		drop_fpu(tsk);
-
-		if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
-		    __copy_from_user(&env, buf, sizeof(env))) {
-			fpu_finit(fpu);
-			err = -1;
-		} else {
-			sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
-		}
-
-		set_used_math();
-		if (use_eager_fpu()) {
-			preempt_disable();
-			math_state_restore();
-			preempt_enable();
-		}
-
-		return err;
-	} else {
-		/*
-		 * For 64-bit frames and 32-bit fsave frames, restore the user
-		 * state to the registers directly (with exceptions handled).
-		 */
-		user_fpu_begin();
-		if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
-			fpu_reset_state(tsk);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Prepare the SW reserved portion of the fxsave memory layout, indicating
- * the presence of the extended state information in the memory layout
- * pointed by the fpstate pointer in the sigcontext.
- * This will be saved when ever the FP and extended state context is
- * saved on the user stack during the signal handler delivery to the user.
- */
-static void prepare_fx_sw_frame(void)
-{
-	int fsave_header_size = sizeof(struct i387_fsave_struct);
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
-
-	if (config_enabled(CONFIG_X86_32))
-		size += fsave_header_size;
-
-	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
-	fx_sw_reserved.extended_size = size;
-	fx_sw_reserved.xstate_bv = pcntxt_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
-
-	if (config_enabled(CONFIG_IA32_EMULATION)) {
-		fx_sw_reserved_ia32 = fx_sw_reserved;
-		fx_sw_reserved_ia32.extended_size += fsave_header_size;
-	}
-}
-
-/*
- * Enable the extended processor state save/restore feature
- */
-static inline void xstate_enable(void)
-{
-	cr4_set_bits(X86_CR4_OSXSAVE);
-	xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-}
-
-/*
- * Record the offsets and sizes of different state managed by the xsave
- * memory layout.
- */
-static void __init setup_xstate_features(void)
-{
-	int eax, ebx, ecx, edx, leaf = 0x2;
-
-	xstate_features = fls64(pcntxt_mask);
-	xstate_offsets = alloc_bootmem(xstate_features * sizeof(int));
-	xstate_sizes = alloc_bootmem(xstate_features * sizeof(int));
-
-	do {
-		cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
-
-		if (eax == 0)
-			break;
-
-		xstate_offsets[leaf] = ebx;
-		xstate_sizes[leaf] = eax;
-
-		leaf++;
-	} while (1);
-}
-
-/*
- * This function sets up offsets and sizes of all extended states in
- * xsave area. This supports both standard format and compacted format
- * of the xsave aread.
- *
- * Input: void
- * Output: void
- */
-void setup_xstate_comp(void)
-{
-	unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
-	int i;
-
-	/*
-	 * The FP xstates and SSE xstates are legacy states. They are always
-	 * in the fixed offsets in the xsave area in either compacted form
-	 * or standard form.
-	 */
-	xstate_comp_offsets[0] = 0;
-	xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
-
-	if (!cpu_has_xsaves) {
-		for (i = 2; i < xstate_features; i++) {
-			if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
-				xstate_comp_offsets[i] = xstate_offsets[i];
-				xstate_comp_sizes[i] = xstate_sizes[i];
-			}
-		}
-		return;
-	}
-
-	xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
-
-	for (i = 2; i < xstate_features; i++) {
-		if (test_bit(i, (unsigned long *)&pcntxt_mask))
-			xstate_comp_sizes[i] = xstate_sizes[i];
-		else
-			xstate_comp_sizes[i] = 0;
-
-		if (i > 2)
-			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
-					+ xstate_comp_sizes[i-1];
-
-	}
-}
-
-/*
- * setup the xstate image representing the init state
- */
-static void __init setup_init_fpu_buf(void)
-{
-	/*
-	 * Setup init_xstate_buf to represent the init state of
-	 * all the features managed by the xsave
-	 */
-	init_xstate_buf = alloc_bootmem_align(xstate_size,
-					      __alignof__(struct xsave_struct));
-	fx_finit(&init_xstate_buf->i387);
-
-	if (!cpu_has_xsave)
-		return;
-
-	setup_xstate_features();
-
-	if (cpu_has_xsaves) {
-		init_xstate_buf->xsave_hdr.xcomp_bv =
-						(u64)1 << 63 | pcntxt_mask;
-		init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
-	}
-
-	/*
-	 * Init all the features state with header_bv being 0x0
-	 */
-	xrstor_state_booting(init_xstate_buf, -1);
-	/*
-	 * Dump the init state again. This is to identify the init state
-	 * of any feature which is not represented by all zero's.
-	 */
-	xsave_state_booting(init_xstate_buf, -1);
-}
-
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
-static int __init eager_fpu_setup(char *s)
-{
-	if (!strcmp(s, "on"))
-		eagerfpu = ENABLE;
-	else if (!strcmp(s, "off"))
-		eagerfpu = DISABLE;
-	else if (!strcmp(s, "auto"))
-		eagerfpu = AUTO;
-	return 1;
-}
-__setup("eagerfpu=", eager_fpu_setup);
-
-
-/*
- * Calculate total size of enabled xstates in XCR0/pcntxt_mask.
- */
-static void __init init_xstate_size(void)
-{
-	unsigned int eax, ebx, ecx, edx;
-	int i;
-
-	if (!cpu_has_xsaves) {
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		xstate_size = ebx;
-		return;
-	}
-
-	xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
-	for (i = 2; i < 64; i++) {
-		if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
-			cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
-			xstate_size += eax;
-		}
-	}
-}
-
-/*
- * Enable and initialize the xsave feature.
- */
-static void __init xstate_enable_boot_cpu(void)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
-		WARN(1, KERN_ERR "XSTATE_CPUID missing\n");
-		return;
-	}
-
-	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-	pcntxt_mask = eax + ((u64)edx << 32);
-
-	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
-		pr_err("FP/SSE not shown under xsave features 0x%llx\n",
-		       pcntxt_mask);
-		BUG();
-	}
-
-	/*
-	 * Support only the state known to OS.
-	 */
-	pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
-
-	xstate_enable();
-
-	/*
-	 * Recompute the context size for enabled features
-	 */
-	init_xstate_size();
-
-	update_regset_xstate_info(xstate_size, pcntxt_mask);
-	prepare_fx_sw_frame();
-	setup_init_fpu_buf();
-
-	/* Auto enable eagerfpu for xsaveopt */
-	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
-		eagerfpu = ENABLE;
-
-	if (pcntxt_mask & XSTATE_EAGER) {
-		if (eagerfpu == DISABLE) {
-			pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n",
-					pcntxt_mask & XSTATE_EAGER);
-			pcntxt_mask &= ~XSTATE_EAGER;
-		} else {
-			eagerfpu = ENABLE;
-		}
-	}
-
-	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
-		pcntxt_mask, xstate_size,
-		cpu_has_xsaves ? "compacted form" : "standard form");
-}
-
-/*
- * For the very first instance, this calls xstate_enable_boot_cpu();
- * for all subsequent instances, this calls xstate_enable().
- *
- * This is somewhat obfuscated due to the lack of powerful enough
- * overrides for the section checks.
- */
-void xsave_init(void)
-{
-	static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
-	void (*this_func)(void);
-
-	if (!cpu_has_xsave)
-		return;
-
-	this_func = next_func;
-	next_func = xstate_enable;
-	this_func();
-}
-
-/*
- * setup_init_fpu_buf() is __init and it is OK to call it here because
- * init_xstate_buf will be unset only once during boot.
- */
-void __init_refok eager_fpu_init(void)
-{
-	WARN_ON(used_math());
-	current_thread_info()->status = 0;
-
-	if (eagerfpu == ENABLE)
-		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
-	if (!cpu_has_eager_fpu) {
-		stts();
-		return;
-	}
-
-	if (!init_xstate_buf)
-		setup_init_fpu_buf();
-}
-
-/*
- * Given the xsave area and a state inside, this function returns the
- * address of the state.
- *
- * This is the API that is called to get xstate address in either
- * standard format or compacted format of xsave area.
- *
- * Inputs:
- *	xsave: base address of the xsave area;
- *	xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
- *	etc.)
- * Output:
- *	address of the state in the xsave area.
- */
-void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
-{
-	int feature = fls64(xstate) - 1;
-	if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
-		return NULL;
-
-	return (void *)xsave + xstate_comp_offsets[feature];
-}
-EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59b69f6a2844..9f705e618af5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -17,7 +17,7 @@
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
 #include <asm/user.h>
-#include <asm/xsave.h>
+#include <asm/fpu/xstate.h>
 #include "cpuid.h"
 #include "lapic.h"
 #include "mmu.h"
@@ -95,6 +95,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
+	vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
 	 * address checks; exit if it is ever changed.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c3b1ad9fca81..496b3695d3d3 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
 	best = kvm_find_cpuid_entry(vcpu, 7, 0);
 	return best && (best->ebx & bit(X86_FEATURE_RTM));
 }
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 629af0f1c5c4..4c7deb4f78a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1090,6 +1090,17 @@ static void update_divide_count(struct kvm_lapic *apic)
 				   apic->divide_count);
 }
 
+static void apic_update_lvtt(struct kvm_lapic *apic)
+{
+	u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
+			apic->lapic_timer.timer_mode_mask;
+
+	if (apic->lapic_timer.timer_mode != timer_mode) {
+		apic->lapic_timer.timer_mode = timer_mode;
+		hrtimer_cancel(&apic->lapic_timer.timer);
+	}
+}
+
 static void apic_timer_expired(struct kvm_lapic *apic)
 {
 	struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1298,6 +1309,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
 			}
+			apic_update_lvtt(apic);
 			atomic_set(&apic->lapic_timer.pending, 0);
 
 		}
@@ -1330,20 +1342,13 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 		break;
 
-	case APIC_LVTT: {
-		u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
-
-		if (apic->lapic_timer.timer_mode != timer_mode) {
-			apic->lapic_timer.timer_mode = timer_mode;
-			hrtimer_cancel(&apic->lapic_timer.timer);
-		}
-
+	case APIC_LVTT:
 		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
+		apic_update_lvtt(apic);
 		break;
-	}
 
 	case APIC_TMICT:
 		if (apic_lvtt_tscdeadline(apic))
@@ -1576,7 +1581,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
-	apic->lapic_timer.timer_mode = 0;
+	apic_update_lvtt(apic);
 	apic_set_reg(apic, APIC_LVT0,
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1802,6 +1807,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
+	apic_update_lvtt(apic);
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d43867c33bc4..b73337634214 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 	}
 }
 
-void update_permission_bitmask(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+				      struct kvm_mmu *mmu, bool ept)
 {
 	unsigned bit, byte, pfec;
 	u8 map;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 {
 	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
 	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	context->base_role.cr0_wp  = is_write_protection(vcpu);
 	context->base_role.smep_andnot_wp
 		= smep && !is_write_protection(vcpu);
+	context->base_role.smap_andnot_wp
+		= smap && !is_write_protection(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
-	union kvm_mmu_page_role mask = { .word = 0 };
 	struct kvm_mmu_page *sp;
 	LIST_HEAD(invalid_list);
 	u64 entry, gentry, *spte;
 	int npte;
 	bool remote_flush, local_flush, zap_page;
+	union kvm_mmu_page_role mask = { };
+
+	mask.cr0_wp = 1;
+	mask.cr4_pae = 1;
+	mask.nxe = 1;
+	mask.smep_andnot_wp = 1;
+	mask.smap_andnot_wp = 1;
 
 	/*
 	 * If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
-	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
 	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
 		if (detect_write_misaligned(sp, gpa, bytes) ||
 		      detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index c7d65637c851..0ada65ecddcf 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,8 +71,6 @@ enum {
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-		bool ept);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	int index = (pfec >> 1) +
 		    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
 
+	WARN_ON(pfec & PFERR_RSVD_MASK);
+
 	return (mmu->permissions[index] >> pte_access) & 1;
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fd49c867b25a..6e6d115fe9b5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 					      mmu_is_nested(vcpu));
 		if (likely(r != RET_MMIO_PF_INVALID))
 			return r;
+
+		/*
+		 * page fault with PFEC.RSVD  = 1 is caused by shadow
+		 * page fault, should not be used to walk guest page
+		 * table.
+		 */
+		error_code &= ~PFERR_RSVD_MASK;
 	};
 
 	r = mmu_topup_memory_caches(vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8650f6..9afa233b5482 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
+	.fpu_activate = svm_fpu_activate,
 	.fpu_deactivate = svm_fpu_deactivate,
 
 	.tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b61687bd79..e11dd59398f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -40,8 +40,7 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
+#include <asm/fpu/internal.h>
 #include <asm/perf_event.h>
 #include <asm/debugreg.h>
 #include <asm/kexec.h>
@@ -1883,7 +1882,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 	 * If the FPU is not active (through the host task or
 	 * the guest vcpu), then restore the cr0.TS bit.
 	 */
-	if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
+	if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
 		stts();
 	load_gdt(this_cpu_ptr(&host_gdt));
 }
@@ -10185,6 +10184,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
+	.fpu_activate = vmx_fpu_activate,
 	.fpu_deactivate = vmx_fpu_deactivate,
 
 	.tlb_flush = vmx_flush_tlb,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd03e29..26eaeb522cab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -59,9 +59,8 @@
 #include <asm/desc.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h> /* Ugh! */
-#include <asm/xcr.h>
+#include <linux/kernel_stat.h>
+#include <asm/fpu/internal.h> /* Ugh! */
 #include <asm/pvclock.h>
 #include <asm/div64.h>
 
@@ -702,8 +701,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
-	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
-				   X86_CR4_PAE | X86_CR4_SMEP;
+	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+				   X86_CR4_SMEP | X86_CR4_SMAP;
+
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
 
@@ -744,9 +744,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
 		kvm_mmu_reset_context(vcpu);
 
-	if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
-		update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
 	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
 		kvm_update_cpuid(vcpu);
 
@@ -3196,8 +3193,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 
 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 {
-	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
-	u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+	struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+	u64 xstate_bv = xsave->header.xfeatures;
 	u64 valid;
 
 	/*
@@ -3232,7 +3229,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 
 static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 {
-	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
 	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
 	u64 valid;
 
@@ -3243,9 +3240,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 	memcpy(xsave, src, XSAVE_HDR_OFFSET);
 
 	/* Set XSTATE_BV and possibly XCOMP_BV.  */
-	xsave->xsave_hdr.xstate_bv = xstate_bv;
+	xsave->header.xfeatures = xstate_bv;
 	if (cpu_has_xsaves)
-		xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+		xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
 	 * Copy each region from the non-compacted offset to the
@@ -3277,8 +3274,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 		memcpy(guest_xsave->region,
-			&vcpu->arch.guest_fpu.state->fxsave,
-			sizeof(struct i387_fxsave_struct));
+			&vcpu->arch.guest_fpu.state.fxsave,
+			sizeof(struct fxregs_state));
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
 			XSTATE_FPSSE;
 	}
@@ -3302,8 +3299,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 	} else {
 		if (xstate_bv & ~XSTATE_FPSSE)
 			return -EINVAL;
-		memcpy(&vcpu->arch.guest_fpu.state->fxsave,
-			guest_xsave->region, sizeof(struct i387_fxsave_struct));
+		memcpy(&vcpu->arch.guest_fpu.state.fxsave,
+			guest_xsave->region, sizeof(struct fxregs_state));
 	}
 	return 0;
 }
@@ -6197,6 +6194,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 		return;
 
 	page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return;
 	kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
 
 	/*
@@ -6597,11 +6596,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct fpu *fpu = &current->thread.fpu;
 	int r;
 	sigset_t sigsaved;
 
-	if (!tsk_used_math(current) && init_fpu(current))
-		return -ENOMEM;
+	fpu__activate_curr(fpu);
 
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@ -6971,8 +6970,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	struct i387_fxsave_struct *fxsave =
-			&vcpu->arch.guest_fpu.state->fxsave;
+	struct fxregs_state *fxsave =
+			&vcpu->arch.guest_fpu.state.fxsave;
 
 	memcpy(fpu->fpr, fxsave->st_space, 128);
 	fpu->fcw = fxsave->cwd;
@@ -6988,8 +6987,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	struct i387_fxsave_struct *fxsave =
-			&vcpu->arch.guest_fpu.state->fxsave;
+	struct fxregs_state *fxsave =
+			&vcpu->arch.guest_fpu.state.fxsave;
 
 	memcpy(fxsave->st_space, fpu->fpr, 128);
 	fxsave->cwd = fpu->fcw;
@@ -7003,17 +7002,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return 0;
 }
 
-int fx_init(struct kvm_vcpu *vcpu)
+static void fx_init(struct kvm_vcpu *vcpu)
 {
-	int err;
-
-	err = fpu_alloc(&vcpu->arch.guest_fpu);
-	if (err)
-		return err;
-
-	fpu_finit(&vcpu->arch.guest_fpu);
+	fpstate_init(&vcpu->arch.guest_fpu.state);
 	if (cpu_has_xsaves)
-		vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+		vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
@@ -7022,14 +7015,6 @@ int fx_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.xcr0 = XSTATE_FP;
 
 	vcpu->arch.cr0 |= X86_CR0_ET;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(fx_init);
-
-static void fx_free(struct kvm_vcpu *vcpu)
-{
-	fpu_free(&vcpu->arch.guest_fpu);
 }
 
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
@@ -7045,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
 	__kernel_fpu_begin();
-	fpu_restore_checking(&vcpu->arch.guest_fpu);
+	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
 	trace_kvm_fpu(1);
 }
 
@@ -7057,10 +7042,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 0;
-	fpu_save_init(&vcpu->arch.guest_fpu);
+	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
 	__kernel_fpu_end();
 	++vcpu->stat.fpu_reload;
-	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+	if (!vcpu->arch.eager_fpu)
+		kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+
 	trace_kvm_fpu(0);
 }
 
@@ -7069,18 +7056,27 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	kvmclock_reset(vcpu);
 
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
-	fx_free(vcpu);
 	kvm_x86_ops->vcpu_free(vcpu);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 						unsigned int id)
 {
+	struct kvm_vcpu *vcpu;
+
 	if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
 		printk_once(KERN_WARNING
 		"kvm: SMP vm created on host with unstable TSC; "
 		"guest TSC will not be reliable\n");
-	return kvm_x86_ops->vcpu_create(kvm, id);
+
+	vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+	/*
+	 * Activate fpu unconditionally in case the guest needs eager FPU.  It will be
+	 * deactivated soon if it doesn't.
+	 */
+	kvm_x86_ops->fpu_activate(vcpu);
+	return vcpu;
 }
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -7125,7 +7121,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 
-	fx_free(vcpu);
 	kvm_x86_ops->vcpu_free(vcpu);
 }
 
@@ -7351,9 +7346,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_mce_banks;
 	}
 
-	r = fx_init(vcpu);
-	if (r)
-		goto fail_free_wbinvd_dirty_mask;
+	fx_init(vcpu);
 
 	vcpu->arch.ia32_tsc_adjust_msr = 0x0;
 	vcpu->arch.pv_time_enabled = false;
@@ -7367,8 +7360,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm_pmu_init(vcpu);
 
 	return 0;
-fail_free_wbinvd_dirty_mask:
-	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
+
 fail_free_mce_banks:
 	kfree(vcpu->arch.mce_banks);
 fail_free_lapic:
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8f9a133cc099..27f8eea0d6eb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -70,7 +70,7 @@
 #include <asm/e820.h>
 #include <asm/mce.h>
 #include <asm/io.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <asm/stackprotector.h>
 #include <asm/reboot.h>		/* for struct machine_ops */
 #include <asm/kvm_para.h>
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index c9f2d9ba8dd8..e5e3ed8dc079 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -22,7 +22,7 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 #include <asm/asm.h>
 
 void *_mmx_memcpy(void *to, const void *from, size_t len)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 5eb715087b80..e407941d0488 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -38,8 +38,6 @@
 
 #ifdef CONFIG_PREEMPT
 	THUNK ___preempt_schedule, preempt_schedule
-#ifdef CONFIG_CONTEXT_TRACKING
-	THUNK ___preempt_schedule_context, preempt_schedule_context
-#endif
+	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
 #endif
 
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index f89ba4e93025..2198902329b5 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -49,9 +49,7 @@
 
 #ifdef CONFIG_PREEMPT
 	THUNK ___preempt_schedule, preempt_schedule
-#ifdef CONFIG_CONTEXT_TRACKING
-	THUNK ___preempt_schedule_context, preempt_schedule_context
-#endif
+	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
 #endif
 
 #if defined(CONFIG_TRACE_IRQFLAGS) \
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e2f5e21c03b3..91d93b95bd86 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user);
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index dc8adad10a2f..dd76a05729b0 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -30,7 +30,7 @@ static void fclex(void)
 }
 
 /* Needs to be externally visible */
-void finit_soft_fpu(struct i387_soft_struct *soft)
+void fpstate_init_soft(struct swregs_state *soft)
 {
 	struct address *oaddr, *iaddr;
 	memset(soft, 0, sizeof(*soft));
@@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft)
 
 void finit(void)
 {
-	finit_soft_fpu(&current->thread.fpu.state->soft);
+	fpstate_init_soft(&current->thread.fpu.state.soft);
 }
 
 /*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 9b868124128d..f37e84ab49f3 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -31,7 +31,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/user.h>
-#include <asm/i387.h>
+#include <asm/fpu/internal.h>
 
 #include "fpu_system.h"
 #include "fpu_emu.h"
@@ -147,13 +147,9 @@ void math_emulate(struct math_emu_info *info)
 	unsigned long code_base = 0;
 	unsigned long code_limit = 0;	/* Initialized to stop compiler warnings */
 	struct desc_struct code_descriptor;
+	struct fpu *fpu = &current->thread.fpu;
 
-	if (!used_math()) {
-		if (init_fpu(current)) {
-			do_group_exit(SIGKILL);
-			return;
-		}
-	}
+	fpu__activate_curr(fpu);
 
 #ifdef RE_ENTRANT_CHECKING
 	if (emulating) {
@@ -673,7 +669,7 @@ void math_abort(struct math_emu_info *info, unsigned int signal)
 #endif /* PARANOID */
 }
 
-#define S387 ((struct i387_soft_struct *)s387)
+#define S387 ((struct swregs_state *)s387)
 #define sstatus_word() \
   ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
 
@@ -682,14 +678,14 @@ int fpregs_soft_set(struct task_struct *target,
 		    unsigned int pos, unsigned int count,
 		    const void *kbuf, const void __user *ubuf)
 {
-	struct i387_soft_struct *s387 = &target->thread.fpu.state->soft;
+	struct swregs_state *s387 = &target->thread.fpu.state.soft;
 	void *space = s387->st_space;
 	int ret;
 	int offset, other, i, tags, regnr, tag, newtop;
 
 	RE_ENTRANT_CHECK_OFF;
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0,
-				 offsetof(struct i387_soft_struct, st_space));
+				 offsetof(struct swregs_state, st_space));
 	RE_ENTRANT_CHECK_ON;
 
 	if (ret)
@@ -734,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
 		    unsigned int pos, unsigned int count,
 		    void *kbuf, void __user *ubuf)
 {
-	struct i387_soft_struct *s387 = &target->thread.fpu.state->soft;
+	struct swregs_state *s387 = &target->thread.fpu.state.soft;
 	const void *space = s387->st_space;
 	int ret;
 	int offset = (S387->ftop & 7) * 10, other = 80 - offset;
@@ -752,7 +748,7 @@ int fpregs_soft_get(struct task_struct *target,
 #endif /* PECULIAR_486 */
 
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0,
-				  offsetof(struct i387_soft_struct, st_space));
+				  offsetof(struct swregs_state, st_space));
 
 	/* Copy all registers in stack order. */
 	if (!ret)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 2c614410a5f3..9ccecb61a4fa 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -31,7 +31,7 @@
 #define SEG_EXPAND_DOWN(s)	(((s).b & ((1 << 11) | (1 << 10))) \
 				 == (1 << 10))
 
-#define I387			(current->thread.fpu.state)
+#define I387			(&current->thread.fpu.state)
 #define FPU_info		(I387->soft.info)
 
 #define FPU_CS			(*(unsigned short *) &(FPU_info->regs->cs))
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 181c53bac3a7..9dc909841739 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,6 +13,7 @@
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
 #include <linux/prefetch.h>		/* prefetchw			*/
 #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
+#include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -1126,9 +1127,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
-	 * in an atomic region then we must not take the fault:
+	 * in a region with pagefaults disabled then we must not take the fault
 	 */
-	if (unlikely(in_atomic() || !mm)) {
+	if (unlikely(faulthandler_disabled() || !mm)) {
 		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 4500142bc4aa..eecb207a2037 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	unsigned long vaddr;
 	int idx, type;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 
 	if (!PageHighMem(page))
@@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr)
 #endif
 
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9ca35fc60cfe..2b7ece0e103a 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 	unsigned long vaddr;
 	int idx, type;
 
+	preempt_disable();
 	pagefault_disable();
 
 	type = kmap_atomic_idx_push();
@@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr)
 	}
 
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5ead4d6cf3a7..27ff21216dfa 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -351,18 +351,20 @@ int arch_ioremap_pmd_supported(void)
  */
 void *xlate_dev_mem_ptr(phys_addr_t phys)
 {
-	void *addr;
-	unsigned long start = phys & PAGE_MASK;
+	unsigned long start  = phys &  PAGE_MASK;
+	unsigned long offset = phys & ~PAGE_MASK;
+	void *vaddr;
 
 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
 	if (page_is_ram(start >> PAGE_SHIFT))
 		return __va(phys);
 
-	addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
-	if (addr)
-		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+	vaddr = ioremap_cache(start, PAGE_SIZE);
+	/* Only add the offset on success and return NULL if the ioremap() failed: */
+	if (vaddr)
+		vaddr += offset;
 
-	return addr;
+	return vaddr;
 }
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
@@ -371,7 +373,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 		return;
 
 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
-	return;
 }
 
 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c439ec478216..7a657f58bbea 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -10,13 +10,15 @@
 #include <linux/syscalls.h>
 #include <linux/sched/sysctl.h>
 
-#include <asm/i387.h>
 #include <asm/insn.h>
 #include <asm/mman.h>
 #include <asm/mmu_context.h>
 #include <asm/mpx.h>
 #include <asm/processor.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/mpx.h>
 
 static const char *mpx_mapping_name(struct vm_area_struct *vma)
 {
@@ -32,6 +34,22 @@ static int is_mpx_vma(struct vm_area_struct *vma)
 	return (vma->vm_ops == &mpx_vma_ops);
 }
 
+static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
+{
+	if (is_64bit_mm(mm))
+		return MPX_BD_SIZE_BYTES_64;
+	else
+		return MPX_BD_SIZE_BYTES_32;
+}
+
+static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
+{
+	if (is_64bit_mm(mm))
+		return MPX_BT_SIZE_BYTES_64;
+	else
+		return MPX_BT_SIZE_BYTES_32;
+}
+
 /*
  * This is really a simplified "vm_mmap". it only handles MPX
  * bounds tables (the bounds directory is user-allocated).
@@ -47,8 +65,8 @@ static unsigned long mpx_mmap(unsigned long len)
 	vm_flags_t vm_flags;
 	struct vm_area_struct *vma;
 
-	/* Only bounds table and bounds directory can be allocated here */
-	if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
+	/* Only bounds table can be allocated here */
+	if (len != mpx_bt_size_bytes(mm))
 		return -EINVAL;
 
 	down_write(&mm->mmap_sem);
@@ -272,10 +290,9 @@ bad_opcode:
  *
  * The caller is expected to kfree() the returned siginfo_t.
  */
-siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
-				struct xsave_struct *xsave_buf)
+siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 {
-	struct bndreg *bndregs, *bndreg;
+	const struct bndreg *bndregs, *bndreg;
 	siginfo_t *info = NULL;
 	struct insn insn;
 	uint8_t bndregno;
@@ -295,8 +312,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 		err = -EINVAL;
 		goto err_out;
 	}
-	/* get the bndregs _area_ of the xsave structure */
-	bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS);
+	/* get bndregs field from current task's xsave area */
+	bndregs = get_xsave_field_ptr(XSTATE_BNDREGS);
 	if (!bndregs) {
 		err = -EINVAL;
 		goto err_out;
@@ -334,6 +351,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 		err = -EINVAL;
 		goto err_out;
 	}
+	trace_mpx_bounds_register_exception(info->si_addr, bndreg);
 	return info;
 err_out:
 	/* info might be NULL, but kfree() handles that */
@@ -341,25 +359,18 @@ err_out:
 	return ERR_PTR(err);
 }
 
-static __user void *task_get_bounds_dir(struct task_struct *tsk)
+static __user void *mpx_get_bounds_dir(void)
 {
-	struct bndcsr *bndcsr;
+	const struct bndcsr *bndcsr;
 
 	if (!cpu_feature_enabled(X86_FEATURE_MPX))
 		return MPX_INVALID_BOUNDS_DIR;
 
 	/*
-	 * 32-bit binaries on 64-bit kernels are currently
-	 * unsupported.
-	 */
-	if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
-		return MPX_INVALID_BOUNDS_DIR;
-	/*
 	 * The bounds directory pointer is stored in a register
 	 * only accessible if we first do an xsave.
 	 */
-	fpu_save_init(&tsk->thread.fpu);
-	bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
 	if (!bndcsr)
 		return MPX_INVALID_BOUNDS_DIR;
 
@@ -378,10 +389,10 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk)
 		(bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
 }
 
-int mpx_enable_management(struct task_struct *tsk)
+int mpx_enable_management(void)
 {
 	void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
-	struct mm_struct *mm = tsk->mm;
+	struct mm_struct *mm = current->mm;
 	int ret = 0;
 
 	/*
@@ -390,11 +401,12 @@ int mpx_enable_management(struct task_struct *tsk)
 	 * directory into XSAVE/XRSTOR Save Area and enable MPX through
 	 * XRSTOR instruction.
 	 *
-	 * fpu_xsave() is expected to be very expensive. Storing the bounds
-	 * directory here means that we do not have to do xsave in the unmap
-	 * path; we can just use mm->bd_addr instead.
+	 * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
+	 * expected to be relatively expensive. Storing the bounds
+	 * directory here means that we do not have to do xsave in the
+	 * unmap path; we can just use mm->bd_addr instead.
 	 */
-	bd_base = task_get_bounds_dir(tsk);
+	bd_base = mpx_get_bounds_dir();
 	down_write(&mm->mmap_sem);
 	mm->bd_addr = bd_base;
 	if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
@@ -404,7 +416,7 @@ int mpx_enable_management(struct task_struct *tsk)
 	return ret;
 }
 
-int mpx_disable_management(struct task_struct *tsk)
+int mpx_disable_management(void)
 {
 	struct mm_struct *mm = current->mm;
 
@@ -417,29 +429,59 @@ int mpx_disable_management(struct task_struct *tsk)
 	return 0;
 }
 
+static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
+		unsigned long *curval,
+		unsigned long __user *addr,
+		unsigned long old_val, unsigned long new_val)
+{
+	int ret;
+	/*
+	 * user_atomic_cmpxchg_inatomic() actually uses sizeof()
+	 * the pointer that we pass to it to figure out how much
+	 * data to cmpxchg.  We have to be careful here not to
+	 * pass a pointer to a 64-bit data type when we only want
+	 * a 32-bit copy.
+	 */
+	if (is_64bit_mm(mm)) {
+		ret = user_atomic_cmpxchg_inatomic(curval,
+				addr, old_val, new_val);
+	} else {
+		u32 uninitialized_var(curval_32);
+		u32 old_val_32 = old_val;
+		u32 new_val_32 = new_val;
+		u32 __user *addr_32 = (u32 __user *)addr;
+
+		ret = user_atomic_cmpxchg_inatomic(&curval_32,
+				addr_32, old_val_32, new_val_32);
+		*curval = curval_32;
+	}
+	return ret;
+}
+
 /*
- * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each
- * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB,
+ * With 32-bit mode, a bounds directory is 4MB, and the size of each
+ * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
  * and the size of each bounds table is 4MB.
  */
-static int allocate_bt(long __user *bd_entry)
+static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
 {
 	unsigned long expected_old_val = 0;
 	unsigned long actual_old_val = 0;
 	unsigned long bt_addr;
+	unsigned long bd_new_entry;
 	int ret = 0;
 
 	/*
 	 * Carve the virtual space out of userspace for the new
 	 * bounds table:
 	 */
-	bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES);
+	bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
 	if (IS_ERR((void *)bt_addr))
 		return PTR_ERR((void *)bt_addr);
 	/*
 	 * Set the valid flag (kinda like _PAGE_PRESENT in a pte)
 	 */
-	bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
+	bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
 
 	/*
 	 * Go poke the address of the new bounds table in to the
@@ -452,8 +494,8 @@ static int allocate_bt(long __user *bd_entry)
 	 * mmap_sem at this point, unlike some of the other part
 	 * of the MPX code that have to pagefault_disable().
 	 */
-	ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
-					   expected_old_val, bt_addr);
+	ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,	bd_entry,
+				   expected_old_val, bd_new_entry);
 	if (ret)
 		goto out_unmap;
 
@@ -481,9 +523,10 @@ static int allocate_bt(long __user *bd_entry)
 		ret = -EINVAL;
 		goto out_unmap;
 	}
+	trace_mpx_new_bounds_table(bt_addr);
 	return 0;
 out_unmap:
-	vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES);
+	vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
 	return ret;
 }
 
@@ -498,12 +541,13 @@ out_unmap:
  * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
  * and the size of each bound table is 4MB.
  */
-static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
+static int do_mpx_bt_fault(void)
 {
 	unsigned long bd_entry, bd_base;
-	struct bndcsr *bndcsr;
+	const struct bndcsr *bndcsr;
+	struct mm_struct *mm = current->mm;
 
-	bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
 	if (!bndcsr)
 		return -EINVAL;
 	/*
@@ -520,13 +564,13 @@ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
 	 * the directory is.
 	 */
 	if ((bd_entry < bd_base) ||
-	    (bd_entry >= bd_base + MPX_BD_SIZE_BYTES))
+	    (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
 		return -EINVAL;
 
-	return allocate_bt((long __user *)bd_entry);
+	return allocate_bt(mm, (long __user *)bd_entry);
 }
 
-int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+int mpx_handle_bd_fault(void)
 {
 	/*
 	 * Userspace never asked us to manage the bounds tables,
@@ -535,7 +579,7 @@ int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
 	if (!kernel_managing_mpx_tables(current->mm))
 		return -EINVAL;
 
-	if (do_mpx_bt_fault(xsave_buf)) {
+	if (do_mpx_bt_fault()) {
 		force_sig(SIGSEGV, current);
 		/*
 		 * The force_sig() is essentially "handling" this
@@ -572,29 +616,55 @@ static int mpx_resolve_fault(long __user *addr, int write)
 	return 0;
 }
 
+static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
+					     unsigned long bd_entry)
+{
+	unsigned long bt_addr = bd_entry;
+	int align_to_bytes;
+	/*
+	 * Bit 0 in a bt_entry is always the valid bit.
+	 */
+	bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
+	/*
+	 * Tables are naturally aligned at 8-byte boundaries
+	 * on 64-bit and 4-byte boundaries on 32-bit.  The
+	 * documentation makes it appear that the low bits
+	 * are ignored by the hardware, so we do the same.
+	 */
+	if (is_64bit_mm(mm))
+		align_to_bytes = 8;
+	else
+		align_to_bytes = 4;
+	bt_addr &= ~(align_to_bytes-1);
+	return bt_addr;
+}
+
 /*
  * Get the base of bounds tables pointed by specific bounds
  * directory entry.
  */
 static int get_bt_addr(struct mm_struct *mm,
-			long __user *bd_entry, unsigned long *bt_addr)
+			long __user *bd_entry_ptr,
+			unsigned long *bt_addr_result)
 {
 	int ret;
 	int valid_bit;
+	unsigned long bd_entry;
+	unsigned long bt_addr;
 
-	if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry)))
+	if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
 		return -EFAULT;
 
 	while (1) {
 		int need_write = 0;
 
 		pagefault_disable();
-		ret = get_user(*bt_addr, bd_entry);
+		ret = get_user(bd_entry, bd_entry_ptr);
 		pagefault_enable();
 		if (!ret)
 			break;
 		if (ret == -EFAULT)
-			ret = mpx_resolve_fault(bd_entry, need_write);
+			ret = mpx_resolve_fault(bd_entry_ptr, need_write);
 		/*
 		 * If we could not resolve the fault, consider it
 		 * userspace's fault and error out.
@@ -603,8 +673,8 @@ static int get_bt_addr(struct mm_struct *mm,
 			return ret;
 	}
 
-	valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG;
-	*bt_addr &= MPX_BT_ADDR_MASK;
+	valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
+	bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
 
 	/*
 	 * When the kernel is managing bounds tables, a bounds directory
@@ -613,7 +683,7 @@ static int get_bt_addr(struct mm_struct *mm,
 	 * data in the address field, we know something is wrong. This
 	 * -EINVAL return will cause a SIGSEGV.
 	 */
-	if (!valid_bit && *bt_addr)
+	if (!valid_bit && bt_addr)
 		return -EINVAL;
 	/*
 	 * Do we have an completely zeroed bt entry?  That is OK.  It
@@ -624,19 +694,100 @@ static int get_bt_addr(struct mm_struct *mm,
 	if (!valid_bit)
 		return -ENOENT;
 
+	*bt_addr_result = bt_addr;
 	return 0;
 }
 
+static inline int bt_entry_size_bytes(struct mm_struct *mm)
+{
+	if (is_64bit_mm(mm))
+		return MPX_BT_ENTRY_BYTES_64;
+	else
+		return MPX_BT_ENTRY_BYTES_32;
+}
+
+/*
+ * Take a virtual address and turns it in to the offset in bytes
+ * inside of the bounds table where the bounds table entry
+ * controlling 'addr' can be found.
+ */
+static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
+		unsigned long addr)
+{
+	unsigned long bt_table_nr_entries;
+	unsigned long offset = addr;
+
+	if (is_64bit_mm(mm)) {
+		/* Bottom 3 bits are ignored on 64-bit */
+		offset >>= 3;
+		bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
+	} else {
+		/* Bottom 2 bits are ignored on 32-bit */
+		offset >>= 2;
+		bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
+	}
+	/*
+	 * We know the size of the table in to which we are
+	 * indexing, and we have eliminated all the low bits
+	 * which are ignored for indexing.
+	 *
+	 * Mask out all the high bits which we do not need
+	 * to index in to the table.  Note that the tables
+	 * are always powers of two so this gives us a proper
+	 * mask.
+	 */
+	offset &= (bt_table_nr_entries-1);
+	/*
+	 * We now have an entry offset in terms of *entries* in
+	 * the table.  We need to scale it back up to bytes.
+	 */
+	offset *= bt_entry_size_bytes(mm);
+	return offset;
+}
+
+/*
+ * How much virtual address space does a single bounds
+ * directory entry cover?
+ *
+ * Note, we need a long long because 4GB doesn't fit in
+ * to a long on 32-bit.
+ */
+static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
+{
+	unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
+	if (is_64bit_mm(mm))
+		return virt_space / MPX_BD_NR_ENTRIES_64;
+	else
+		return virt_space / MPX_BD_NR_ENTRIES_32;
+}
+
 /*
  * Free the backing physical pages of bounds table 'bt_addr'.
  * Assume start...end is within that bounds table.
  */
-static int zap_bt_entries(struct mm_struct *mm,
+static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
 		unsigned long bt_addr,
-		unsigned long start, unsigned long end)
+		unsigned long start_mapping, unsigned long end_mapping)
 {
 	struct vm_area_struct *vma;
 	unsigned long addr, len;
+	unsigned long start;
+	unsigned long end;
+
+	/*
+	 * if we 'end' on a boundary, the offset will be 0 which
+	 * is not what we want.  Back it up a byte to get the
+	 * last bt entry.  Then once we have the entry itself,
+	 * move 'end' back up by the table entry size.
+	 */
+	start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
+	end   = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
+	/*
+	 * Move end back up by one entry.  Among other things
+	 * this ensures that it remains page-aligned and does
+	 * not screw up zap_page_range()
+	 */
+	end += bt_entry_size_bytes(mm);
 
 	/*
 	 * Find the first overlapping vma. If vma->vm_start > start, there
@@ -648,7 +799,7 @@ static int zap_bt_entries(struct mm_struct *mm,
 		return -EINVAL;
 
 	/*
-	 * A NUMA policy on a VM_MPX VMA could cause this bouds table to
+	 * A NUMA policy on a VM_MPX VMA could cause this bounds table to
 	 * be split. So we need to look across the entire 'start -> end'
 	 * range of this bounds table, find all of the VM_MPX VMAs, and
 	 * zap only those.
@@ -666,27 +817,65 @@ static int zap_bt_entries(struct mm_struct *mm,
 
 		len = min(vma->vm_end, end) - addr;
 		zap_page_range(vma, addr, len, NULL);
+		trace_mpx_unmap_zap(addr, addr+len);
 
 		vma = vma->vm_next;
 		addr = vma->vm_start;
 	}
-
 	return 0;
 }
 
-static int unmap_single_bt(struct mm_struct *mm,
+static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
+		unsigned long addr)
+{
+	/*
+	 * There are several ways to derive the bd offsets.  We
+	 * use the following approach here:
+	 * 1. We know the size of the virtual address space
+	 * 2. We know the number of entries in a bounds table
+	 * 3. We know that each entry covers a fixed amount of
+	 *    virtual address space.
+	 * So, we can just divide the virtual address by the
+	 * virtual space used by one entry to determine which
+	 * entry "controls" the given virtual address.
+	 */
+	if (is_64bit_mm(mm)) {
+		int bd_entry_size = 8; /* 64-bit pointer */
+		/*
+		 * Take the 64-bit addressing hole in to account.
+		 */
+		addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
+		return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
+	} else {
+		int bd_entry_size = 4; /* 32-bit pointer */
+		/*
+		 * 32-bit has no hole so this case needs no mask
+		 */
+		return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
+	}
+	/*
+	 * The two return calls above are exact copies.  If we
+	 * pull out a single copy and put it in here, gcc won't
+	 * realize that we're doing a power-of-2 divide and use
+	 * shifts.  It uses a real divide.  If we put them up
+	 * there, it manages to figure it out (gcc 4.8.3).
+	 */
+}
+
+static int unmap_entire_bt(struct mm_struct *mm,
 		long __user *bd_entry, unsigned long bt_addr)
 {
 	unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
-	unsigned long actual_old_val = 0;
+	unsigned long uninitialized_var(actual_old_val);
 	int ret;
 
 	while (1) {
 		int need_write = 1;
+		unsigned long cleared_bd_entry = 0;
 
 		pagefault_disable();
-		ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
-						   expected_old_val, 0);
+		ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
+				bd_entry, expected_old_val, cleared_bd_entry);
 		pagefault_enable();
 		if (!ret)
 			break;
@@ -705,9 +894,8 @@ static int unmap_single_bt(struct mm_struct *mm,
 	if (actual_old_val != expected_old_val) {
 		/*
 		 * Someone else raced with us to unmap the table.
-		 * There was no bounds table pointed to by the
-		 * directory, so declare success.  Somebody freed
-		 * it.
+		 * That is OK, since we were both trying to do
+		 * the same thing.  Declare success.
 		 */
 		if (!actual_old_val)
 			return 0;
@@ -720,176 +908,113 @@ static int unmap_single_bt(struct mm_struct *mm,
 		 */
 		return -EINVAL;
 	}
-
 	/*
 	 * Note, we are likely being called under do_munmap() already. To
 	 * avoid recursion, do_munmap() will check whether it comes
 	 * from one bounds table through VM_MPX flag.
 	 */
-	return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES);
+	return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
 }
 
-/*
- * If the bounds table pointed by bounds directory 'bd_entry' is
- * not shared, unmap this whole bounds table. Otherwise, only free
- * those backing physical pages of bounds table entries covered
- * in this virtual address region start...end.
- */
-static int unmap_shared_bt(struct mm_struct *mm,
-		long __user *bd_entry, unsigned long start,
-		unsigned long end, bool prev_shared, bool next_shared)
+static int try_unmap_single_bt(struct mm_struct *mm,
+	       unsigned long start, unsigned long end)
 {
-	unsigned long bt_addr;
-	int ret;
-
-	ret = get_bt_addr(mm, bd_entry, &bt_addr);
+	struct vm_area_struct *next;
+	struct vm_area_struct *prev;
 	/*
-	 * We could see an "error" ret for not-present bounds
-	 * tables (not really an error), or actual errors, but
-	 * stop unmapping either way.
+	 * "bta" == Bounds Table Area: the area controlled by the
+	 * bounds table that we are unmapping.
 	 */
-	if (ret)
-		return ret;
-
-	if (prev_shared && next_shared)
-		ret = zap_bt_entries(mm, bt_addr,
-				bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
-				bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
-	else if (prev_shared)
-		ret = zap_bt_entries(mm, bt_addr,
-				bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
-				bt_addr+MPX_BT_SIZE_BYTES);
-	else if (next_shared)
-		ret = zap_bt_entries(mm, bt_addr, bt_addr,
-				bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
-	else
-		ret = unmap_single_bt(mm, bd_entry, bt_addr);
-
-	return ret;
-}
-
-/*
- * A virtual address region being munmap()ed might share bounds table
- * with adjacent VMAs. We only need to free the backing physical
- * memory of these shared bounds tables entries covered in this virtual
- * address region.
- */
-static int unmap_edge_bts(struct mm_struct *mm,
-		unsigned long start, unsigned long end)
-{
+	unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
+	unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
+	unsigned long uninitialized_var(bt_addr);
+	void __user *bde_vaddr;
 	int ret;
-	long __user *bde_start, *bde_end;
-	struct vm_area_struct *prev, *next;
-	bool prev_shared = false, next_shared = false;
-
-	bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
-	bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
-
 	/*
-	 * Check whether bde_start and bde_end are shared with adjacent
-	 * VMAs.
-	 *
-	 * We already unliked the VMAs from the mm's rbtree so 'start'
+	 * We already unlinked the VMAs from the mm's rbtree so 'start'
 	 * is guaranteed to be in a hole. This gets us the first VMA
 	 * before the hole in to 'prev' and the next VMA after the hole
 	 * in to 'next'.
 	 */
 	next = find_vma_prev(mm, start, &prev);
-	if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1))
-			== bde_start)
-		prev_shared = true;
-	if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start))
-			== bde_end)
-		next_shared = true;
-
 	/*
-	 * This virtual address region being munmap()ed is only
-	 * covered by one bounds table.
-	 *
-	 * In this case, if this table is also shared with adjacent
-	 * VMAs, only part of the backing physical memory of the bounds
-	 * table need be freeed. Otherwise the whole bounds table need
-	 * be unmapped.
-	 */
-	if (bde_start == bde_end) {
-		return unmap_shared_bt(mm, bde_start, start, end,
-				prev_shared, next_shared);
+	 * Do not count other MPX bounds table VMAs as neighbors.
+	 * Although theoretically possible, we do not allow bounds
+	 * tables for bounds tables so our heads do not explode.
+	 * If we count them as neighbors here, we may end up with
+	 * lots of tables even though we have no actual table
+	 * entries in use.
+	 */
+	while (next && is_mpx_vma(next))
+		next = next->vm_next;
+	while (prev && is_mpx_vma(prev))
+		prev = prev->vm_prev;
+	/*
+	 * We know 'start' and 'end' lie within an area controlled
+	 * by a single bounds table.  See if there are any other
+	 * VMAs controlled by that bounds table.  If there are not
+	 * then we can "expand" the are we are unmapping to possibly
+	 * cover the entire table.
+	 */
+	next = find_vma_prev(mm, start, &prev);
+	if ((!prev || prev->vm_end <= bta_start_vaddr) &&
+	    (!next || next->vm_start >= bta_end_vaddr)) {
+		/*
+		 * No neighbor VMAs controlled by same bounds
+		 * table.  Try to unmap the whole thing
+		 */
+		start = bta_start_vaddr;
+		end = bta_end_vaddr;
 	}
 
+	bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
+	ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
 	/*
-	 * If more than one bounds tables are covered in this virtual
-	 * address region being munmap()ed, we need to separately check
-	 * whether bde_start and bde_end are shared with adjacent VMAs.
+	 * No bounds table there, so nothing to unmap.
 	 */
-	ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false);
-	if (ret)
-		return ret;
-	ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared);
+	if (ret == -ENOENT) {
+		ret = 0;
+		return 0;
+	}
 	if (ret)
 		return ret;
-
-	return 0;
+	/*
+	 * We are unmapping an entire table.  Either because the
+	 * unmap that started this whole process was large enough
+	 * to cover an entire table, or that the unmap was small
+	 * but was the area covered by a bounds table.
+	 */
+	if ((start == bta_start_vaddr) &&
+	    (end == bta_end_vaddr))
+		return unmap_entire_bt(mm, bde_vaddr, bt_addr);
+	return zap_bt_entries_mapping(mm, bt_addr, start, end);
 }
 
 static int mpx_unmap_tables(struct mm_struct *mm,
 		unsigned long start, unsigned long end)
 {
-	int ret;
-	long __user *bd_entry, *bde_start, *bde_end;
-	unsigned long bt_addr;
-
-	/*
-	 * "Edge" bounds tables are those which are being used by the region
-	 * (start -> end), but that may be shared with adjacent areas.  If they
-	 * turn out to be completely unshared, they will be freed.  If they are
-	 * shared, we will free the backing store (like an MADV_DONTNEED) for
-	 * areas used by this region.
-	 */
-	ret = unmap_edge_bts(mm, start, end);
-	switch (ret) {
-		/* non-present tables are OK */
-		case 0:
-		case -ENOENT:
-			/* Success, or no tables to unmap */
-			break;
-		case -EINVAL:
-		case -EFAULT:
-		default:
-			return ret;
-	}
-
-	/*
-	 * Only unmap the bounds table that are
-	 *   1. fully covered
-	 *   2. not at the edges of the mapping, even if full aligned
-	 */
-	bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
-	bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
-	for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) {
-		ret = get_bt_addr(mm, bd_entry, &bt_addr);
-		switch (ret) {
-			case 0:
-				break;
-			case -ENOENT:
-				/* No table here, try the next one */
-				continue;
-			case -EINVAL:
-			case -EFAULT:
-			default:
-				/*
-				 * Note: we are being strict here.
-				 * Any time we run in to an issue
-				 * unmapping tables, we stop and
-				 * SIGSEGV.
-				 */
-				return ret;
-		}
-
-		ret = unmap_single_bt(mm, bd_entry, bt_addr);
+	unsigned long one_unmap_start;
+	trace_mpx_unmap_search(start, end);
+
+	one_unmap_start = start;
+	while (one_unmap_start < end) {
+		int ret;
+		unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
+						       bd_entry_virt_space(mm));
+		unsigned long one_unmap_end = end;
+		/*
+		 * if the end is beyond the current bounds table,
+		 * move it back so we only deal with a single one
+		 * at a time
+		 */
+		if (one_unmap_end > next_unmap_start)
+			one_unmap_end = next_unmap_start;
+		ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
 		if (ret)
 			return ret;
-	}
 
+		one_unmap_start = next_unmap_start;
+	}
 	return 0;
 }
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 987514396c1e..ddeff4844a10 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 				if (is_ereg(dst_reg))
 					EMIT1(0x41);
 				EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
+
+				/* emit 'movzwl eax, ax' */
+				if (is_ereg(dst_reg))
+					EMIT3(0x45, 0x0F, 0xB7);
+				else
+					EMIT2(0x0F, 0xB7);
+				EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
 				break;
 			case 32:
 				/* emit 'bswap eax' to swap lower 4 bytes */
@@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			break;
 
 		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			switch (imm32) {
+			case 16:
+				/* emit 'movzwl eax, ax' to zero extend 16-bit
+				 * into 64 bit
+				 */
+				if (is_ereg(dst_reg))
+					EMIT3(0x45, 0x0F, 0xB7);
+				else
+					EMIT2(0x0F, 0xB7);
+				EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
+				break;
+			case 32:
+				/* emit 'mov eax, eax' to clear upper 32-bits */
+				if (is_ereg(dst_reg))
+					EMIT1(0x45);
+				EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
+				break;
+			case 64:
+				/* nop */
+				break;
+			}
 			break;
 
 			/* ST: *(u8*)(dst_reg + off) = imm */
@@ -938,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 	}
 	ctx.cleanup_addr = proglen;
 
-	for (pass = 0; pass < 10; pass++) {
+	/* JITed image shrinks with every pass and the loop iterates
+	 * until the image stops shrinking. Very large bpf programs
+	 * may converge on the last pass. In such case do one more
+	 * pass to emit the final image
+	 */
+	for (pass = 0; pass < 10 || image; pass++) {
 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
 		if (proglen <= 0) {
 			image = NULL;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index e4695985f9de..14a63ed6fe09 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
 	kfree(info);
 }
 
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ *     to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+	return (res->flags & IORESOURCE_IO) &&
+		res->start == 0xCF8 && res->end == 0xCFF;
+}
+
 static void probe_pci_root_info(struct pci_root_info *info,
 				struct acpi_device *device,
 				int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
 			"no IO and memory resources present in _CRS\n");
 	else
 		resource_list_for_each_entry_safe(entry, tmp, list) {
-			if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
-			    (entry->res->flags & IORESOURCE_DISABLED))
+			if ((entry->res->flags & IORESOURCE_DISABLED) ||
+			    resource_is_pcicfg_ioport(entry->res))
 				resource_list_destroy_entry(entry);
 			else
 				entry->res->name = info->name;
@@ -462,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	struct pci_sysdata *sd = bridge->bus->sysdata;
-
-	ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+	/*
+	 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+	 * here, pci_create_root_bus() has been called by someone else and
+	 * sysdata is likely to be different from what we expect.  Let it go in
+	 * that case.
+	 */
+	if (!bridge->dev.parent) {
+		struct pci_sysdata *sd = bridge->bus->sysdata;
+		ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+	}
 	return 0;
 }
 
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 02744df576d5..3b984c3aa1b0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -501,6 +501,8 @@ void __init efi_init(void)
 
 	if (efi_enabled(EFI_DBG))
 		print_efi_memmap();
+
+	efi_esrt_init();
 }
 
 void __init efi_late_init(void)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 757678fb26e1..0d7dd1f5ac36 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,10 +18,9 @@
 #include <asm/mtrr.h>
 #include <asm/page.h>
 #include <asm/mce.h>
-#include <asm/xcr.h>
 #include <asm/suspend.h>
+#include <asm/fpu/internal.h>
 #include <asm/debugreg.h>
-#include <asm/fpu-internal.h> /* pcntxt_mask */
 #include <asm/cpu.h>
 
 #ifdef CONFIG_X86_32
@@ -155,6 +154,8 @@ static void fix_processor_context(void)
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
+
+	fpu__resume_cpu();
 }
 
 /**
@@ -221,12 +222,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 #endif
 
-	/*
-	 * restore XCR0 for xsave capable cpu's.
-	 */
-	if (cpu_has_xsave)
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-
 	fix_processor_context();
 
 	do_fpu_end();
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 7e8a1a650435..b9531d343134 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
 
 #define read_barrier_depends()		do { } while (0)
 #define smp_read_barrier_depends()	do { } while (0)
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 275a3a8b78af..e97032069f88 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94578efd3067..98088bf5906a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu)
 		return;
 	/*
 	 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
-	 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init.
+	 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
 	*/
 	if (cpu_has_pse)
 		cr4_set_bits_and_update_boot(X86_CR4_PSE);
@@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
 
 static void __init xen_hvm_guest_init(void)
 {
+	if (xen_pv_domain())
+		return;
+
 	init_hvm_pv_info();
 
 	xen_hvm_init_shared_info();
@@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void)
 	xen_hvm_init_time_ops();
 	xen_hvm_init_mmu_ops();
 }
+#endif
 
 static bool xen_nopv = false;
 static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg)
 }
 early_param("xen_nopv", xen_parse_nopv);
 
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
 {
 	if (xen_nopv)
 		return 0;
 
-	if (xen_pv_domain())
-		return 0;
-
 	return xen_cpuid_base();
 }
 
@@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
-	.name			= "Xen HVM",
-	.detect			= xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+	if (xen_pv_domain())
+		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+	.name			= "Xen",
+	.detect			= xen_platform,
+#ifdef CONFIG_XEN_PVHVM
 	.init_platform		= xen_hvm_guest_init,
+#endif
 	.x2apic_available	= xen_x2apic_para_available,
+	.set_cpu_features       = xen_set_cpu_features,
 };
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
+EXPORT_SYMBOL(x86_hyper_xen);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c1edbc..9e2ba5c6e1dd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
 #include "xen-ops.h"
 #include "debugfs.h"
 
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(char *, irq_name);
+static bool xen_pvspin = true;
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void xen_qlock_kick(int cpu)
+{
+	xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+}
+
+/*
+ * Halt the current CPU & release it back to the host
+ */
+static void xen_qlock_wait(u8 *byte, u8 val)
+{
+	int irq = __this_cpu_read(lock_kicker_irq);
+
+	/* If kicker interrupts not initialized yet, just spin */
+	if (irq == -1)
+		return;
+
+	/* clear pending */
+	xen_clear_irq_pending(irq);
+	barrier();
+
+	/*
+	 * We check the byte value after clearing pending IRQ to make sure
+	 * that we won't miss a wakeup event because of the clearing.
+	 *
+	 * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
+	 * So it is effectively a memory barrier for x86.
+	 */
+	if (READ_ONCE(*byte) != val)
+		return;
+
+	/*
+	 * If an interrupt happens here, it will leave the wakeup irq
+	 * pending, which will cause xen_poll_irq() to return
+	 * immediately.
+	 */
+
+	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
+	xen_poll_irq(irq);
+}
+
+#else /* CONFIG_QUEUED_SPINLOCKS */
+
 enum xen_contention_stat {
 	TAKEN_SLOW,
 	TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
 	__ticket_t want;
 };
 
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(char *, irq_name);
 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
 static cpumask_t waiting_cpus;
 
-static bool xen_pvspin = true;
 __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
 	int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
 		}
 	}
 }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
 		return;
 	}
 	printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
+#ifdef CONFIG_QUEUED_SPINLOCKS
+	__pv_init_lock_hash();
+	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+	pv_lock_ops.wait = xen_qlock_wait;
+	pv_lock_ops.kick = xen_qlock_kick;
+#else
 	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
 	pv_lock_ops.unlock_kick = xen_unlock_kick;
+#endif
 }
 
 /*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
 }
 early_param("xen_nopvspin", xen_parse_nopvspin);
 
-#ifdef CONFIG_XEN_DEBUG_FS
+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
 
 static struct dentry *d_spin_debug;
 
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9497698645a..53b4c0811f4f 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
 	tick_resume_local();
 }
 
+static void xen_vcpu_notify_suspend(void *data)
+{
+	tick_suspend_local();
+}
+
 void xen_arch_resume(void)
 {
 	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
+
+void xen_arch_suspend(void)
+{
+	on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 172a02a6ad14..ba78ccf651e7 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 	return -EINVAL;
 }
 
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flag,
+				    struct dma_attrs *attrs)
+{
+	return NULL;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dma_handle,
+				  struct dma_attrs *attrs)
+{
+}
+
 #endif	/* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 9e3571a6535c..83a44a33cfa1 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -15,10 +15,10 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/hardirq.h>
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
 DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
 	/* If we're in an interrupt or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm) {
+	if (faulthandler_disabled() || !mm) {
 		bad_page_fault(regs, address, SIGSEGV);
 		return;
 	}
diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c
index 8cfb71ec0937..184ceadccc1a 100644
--- a/arch/xtensa/mm/highmem.c
+++ b/arch/xtensa/mm/highmem.c
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
 	enum fixed_addresses idx;
 	unsigned long vaddr;
 
+	preempt_disable();
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr)
 	}
 
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);