388 files changed, 11416 insertions, 22653 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index fe44b2494609..df94ac1f75b6 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -428,3 +428,4 @@ source "arch/arc/Kconfig.debug"
 source "security/Kconfig"
 source "crypto/Kconfig"
 source "lib/Kconfig"
+source "kernel/power/Kconfig"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 10bc3d4e8a44..db72fec0e160 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -12,7 +12,7 @@ ifeq ($(CROSS_COMPILE),)
 CROSS_COMPILE := arc-linux-uclibc-
 endif
 
-KBUILD_DEFCONFIG := fpga_defconfig
+KBUILD_DEFCONFIG := nsim_700_defconfig
 
 cflags-y	+= -mA7 -fno-common -pipe -fno-builtin -D__linux__
 
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index cfaedd9c61c9..1c169dc74ad1 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
 	};
 
 	aliases {
@@ -41,9 +41,9 @@
 			#interrupt-cells = <1>;
 		};
 
-		uart0: serial@c0000000 {
+		uart0: serial@f0000000 {
 			compatible = "ns8250";
-			reg = <0xc0000000 0x2000>;
+			reg = <0xf0000000 0x2000>;
 			interrupts = <11>;
 			clock-frequency = <3686400>;
 			baud = <115200>;
@@ -52,21 +52,21 @@
 			no-loopback-test = <1>;
 		};
 
-		pgu0: pgu@c9000000 {
+		pgu0: pgu@f9000000 {
 			compatible = "snps,arcpgufb";
-			reg = <0xc9000000 0x400>;
+			reg = <0xf9000000 0x400>;
 		};
 
-		ps2: ps2@c9001000 {
+		ps2: ps2@f9001000 {
 			compatible = "snps,arc_ps2";
-			reg = <0xc9000400 0x14>;
+			reg = <0xf9000400 0x14>;
 			interrupts = <13>;
 			interrupt-names = "arc_ps2_irq";
 		};
 
-		eth0: ethernet@c0003000 {
+		eth0: ethernet@f0003000 {
 			compatible = "snps,oscilan";
-			reg = <0xc0003000 0x44>;
+			reg = <0xf0003000 0x44>;
 			interrupts = <7>, <8>;
 			interrupt-names = "rx", "tx";
 		};
diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig
deleted file mode 100644
index 49c93011ab96..000000000000
--- a/arch/arc/configs/fpga_noramfs_defconfig
+++ /dev/null
@@ -1,63 +0,0 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IPV6 is not set
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-CONFIG_ARC_EMAC=y
-CONFIG_LXT_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/nsim_700_defconfig
index ef4d3bc7b6c0..ef4d3bc7b6c0 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 742816f1b210..27ecc6975a58 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -41,6 +41,15 @@
 
 /******************************************************************
  * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ * 	Orig Bug + Rejected solution	: https://lkml.org/lkml/2013/3/29/67
+ * 	Reasoning			: https://lkml.org/lkml/2013/4/8/15
+ *
  ******************************************************************/
 
 /*
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index d01df0c517a2..20ebb602ea2f 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -26,8 +26,10 @@
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
 
+#ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+#endif
 
 struct plat_smp_ops  plat_smp_ops;
 
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 6a3d9a6c4497..91bd5bd62857 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -177,6 +177,9 @@ dtb-$(CONFIG_MACH_KIRKWOOD) += kirkwood-b3.dtb \
 dtb-$(CONFIG_ARCH_LPC32XX) += ea3250.dtb phy3250.dtb
 dtb-$(CONFIG_ARCH_MARCO) += marco-evb.dtb
 dtb-$(CONFIG_MACH_MESON6) += meson6-atv1200.dtb
+dtb-$(CONFIG_ARCH_MMP) += pxa168-aspenite.dtb \
+	pxa910-dkb.dtb \
+	mmp2-brownstone.dtb
 dtb-$(CONFIG_ARCH_MOXART) += moxart-uc7112lx.dtb
 dtb-$(CONFIG_ARCH_MXC) += \
 	imx1-ads.dtb \
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 87aa4f3b8b3d..53bbfc90b26a 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -100,7 +100,7 @@
 	};
 
 	lcd0: display {
-		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		compatible = "newhaven,nhd-4.3-480272ef-atxl", "panel-dpi";
 		label = "lcd";
 
 		pinctrl-names = "default";
@@ -112,11 +112,11 @@
 			clock-frequency = <9000000>;
 			hactive = <480>;
 			vactive = <272>;
-			hfront-porch = <8>;
-			hback-porch = <43>;
-			hsync-len = <4>;
-			vback-porch = <12>;
-			vfront-porch = <4>;
+			hfront-porch = <2>;
+			hback-porch = <2>;
+			hsync-len = <41>;
+			vfront-porch = <2>;
+			vback-porch = <2>;
 			vsync-len = <10>;
 			hsync-active = <0>;
 			vsync-active = <0>;
@@ -320,8 +320,7 @@
 
 	lcd_pins: lcd_pins {
 		pinctrl-single,pins = <
-			/* GPIO 5_8 to select LCD / HDMI */
-			0x238 (PIN_OUTPUT_PULLUP | MUX_MODE7)
+			0x1c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpcm_ad7.gpio1_7 */
 		>;
 	};
 };
diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index 9721e55384ce..50096d3427eb 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -14,6 +14,7 @@
 #include "skeleton.dtsi"
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/phy/phy.h>
 
 #define MBUS_ID(target,attributes) (((target) << 24) | ((attributes) << 16))
 
@@ -348,6 +349,12 @@
 				#clock-cells = <1>;
 			};
 
+			usbcluster: usb-cluster@18400 {
+				compatible = "marvell,armada-375-usb-cluster";
+				reg = <0x18400 0x4>;
+				#phy-cells = <1>;
+			};
+
 			mbusc: mbus-controller@20000 {
 				compatible = "marvell,mbus-controller";
 				reg = <0x20000 0x100>, <0x20180 0x20>;
@@ -398,6 +405,8 @@
 				reg = <0x50000 0x500>;
 				interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&gateclk 18>;
+				phys = <&usbcluster PHY_TYPE_USB2>;
+				phy-names = "usb";
 				status = "disabled";
 			};
 
@@ -414,6 +423,8 @@
 				reg = <0x58000 0x20000>,<0x5b880 0x80>;
 				interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&gateclk 16>;
+				phys = <&usbcluster PHY_TYPE_USB3>;
+				phy-names = "usb";
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts
index b5b84006469e..9198b719d0ef 100644
--- a/arch/arm/boot/dts/at91-sama5d4ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d4ek.dts
@@ -9,12 +9,12 @@
  * licensing only applies to this file, and not this project as a
  * whole.
  *
- *  a) This library is free software; you can redistribute it and/or
+ *  a) This file is free software; you can redistribute it and/or
  *     modify it under the terms of the GNU General Public License as
  *     published by the Free Software Foundation; either version 2 of the
  *     License, or (at your option) any later version.
  *
- *     This library is distributed in the hope that it will be useful,
+ *     This file is distributed in the hope that it will be useful,
  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *     GNU General Public License for more details.
diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index cb100b03a362..dd1313cbc314 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -956,6 +956,14 @@
 				};
 			};
 
+			rtc@fffffd20 {
+				compatible = "atmel,at91sam9260-rtt";
+				reg = <0xfffffd20 0x10>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				clocks = <&clk32k>;
+				status = "disabled";
+			};
+
 			watchdog@fffffd40 {
 				compatible = "atmel,at91sam9260-wdt";
 				reg = <0xfffffd40 0x10>;
@@ -966,6 +974,12 @@
 				atmel,idle-halt;
 				status = "disabled";
 			};
+
+			gpbr: syscon@fffffd50 {
+				compatible = "atmel,at91sam9260-gpbr", "syscon";
+				reg = <0xfffffd50 0x10>;
+				status = "disabled";
+			};
 		};
 
 		nand0: nand@40000000 {
diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi
index a81aab4281a7..cdb9ed612109 100644
--- a/arch/arm/boot/dts/at91sam9261.dtsi
+++ b/arch/arm/boot/dts/at91sam9261.dtsi
@@ -828,12 +828,26 @@
 				clocks = <&mck>;
 			};
 
+			rtc@fffffd20 {
+				compatible = "atmel,at91sam9260-rtt";
+				reg = <0xfffffd20 0x10>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				clocks = <&slow_xtal>;
+				status = "disabled";
+			};
+
 			watchdog@fffffd40 {
 				compatible = "atmel,at91sam9260-wdt";
 				reg = <0xfffffd40 0x10>;
 				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
 				status = "disabled";
 			};
+
+			gpbr: syscon@fffffd50 {
+				compatible = "atmel,at91sam9260-gpbr", "syscon";
+				reg = <0xfffffd50 0x10>;
+				status = "disabled";
+			};
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 653e4395b7cb..1467750e3377 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -922,6 +922,27 @@
 				pinctrl-0 = <&pinctrl_can_rx_tx>;
 				clocks = <&can_clk>;
 				clock-names = "can_clk";
+			};
+
+			rtc@fffffd20 {
+				compatible = "atmel,at91sam9260-rtt";
+				reg = <0xfffffd20 0x10>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				clocks = <&slow_xtal>;
+				status = "disabled";
+			};
+
+			rtc@fffffd50 {
+				compatible = "atmel,at91sam9260-rtt";
+				reg = <0xfffffd50 0x10>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				clocks = <&slow_xtal>;
+				status = "disabled";
+			};
+
+			gpbr: syscon@fffffd60 {
+				compatible = "atmel,at91sam9260-gpbr", "syscon";
+				reg = <0xfffffd60 0x50>;
 				status = "disabled";
 			};
 		};
diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
index d2919108e92d..dfaacb113f2e 100644
--- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
@@ -112,9 +112,23 @@
 				};
 			};
 
+			shdwc@fffffd10 {
+				atmel,wakeup-counter = <10>;
+				atmel,wakeup-rtt-timer;
+			};
+
+			rtc@fffffd20 {
+				atmel,rtt-rtc-time-reg = <&gpbr 0x0>;
+				status = "okay";
+			};
+
 			watchdog@fffffd40 {
 				status = "okay";
 			};
+
+			gpbr: syscon@fffffd50 {
+				status = "okay";
+			};
 		};
 
 		nand0: nand@40000000 {
diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index 6c0637a4bda5..2a8da8a884b4 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi
@@ -492,6 +492,27 @@
 					};
 				};
 
+				isi {
+					pinctrl_isi: isi-0 {
+						atmel,pins = <AT91_PIOB 8 AT91_PERIPH_B AT91_PINCTRL_NONE /* D8 */
+							      AT91_PIOB 9 AT91_PERIPH_B AT91_PINCTRL_NONE /* D9 */
+							      AT91_PIOB 10 AT91_PERIPH_B AT91_PINCTRL_NONE /* D10 */
+							      AT91_PIOB 11 AT91_PERIPH_B AT91_PINCTRL_NONE /* D11 */
+							      AT91_PIOB 20 AT91_PERIPH_A AT91_PINCTRL_NONE /* D0 */
+							      AT91_PIOB 21 AT91_PERIPH_A AT91_PINCTRL_NONE /* D1 */
+							      AT91_PIOB 22 AT91_PERIPH_A AT91_PINCTRL_NONE /* D2 */
+							      AT91_PIOB 23 AT91_PERIPH_A AT91_PINCTRL_NONE /* D3 */
+							      AT91_PIOB 24 AT91_PERIPH_A AT91_PINCTRL_NONE /* D4 */
+							      AT91_PIOB 25 AT91_PERIPH_A AT91_PINCTRL_NONE /* D5 */
+							      AT91_PIOB 26 AT91_PERIPH_A AT91_PINCTRL_NONE /* D6 */
+							      AT91_PIOB 27 AT91_PERIPH_A AT91_PINCTRL_NONE /* D7 */
+							      AT91_PIOB 28 AT91_PERIPH_A AT91_PINCTRL_NONE /* PCK */
+							      AT91_PIOB 29 AT91_PERIPH_A AT91_PINCTRL_NONE /* VSYNC */
+							      AT91_PIOB 30 AT91_PERIPH_A AT91_PINCTRL_NONE /* HSYNC */
+							      AT91_PIOB 31 AT91_PERIPH_A AT91_PINCTRL_NONE /* MCK */>;
+					};
+				};
+
 				usart0 {
 					pinctrl_usart0: usart0-0 {
 						atmel,pins =
@@ -1035,6 +1056,17 @@
 				};
 			};
 
+			isi@fffb4000 {
+				compatible = "atmel,at91sam9g45-isi";
+				reg = <0xfffb4000 0x4000>;
+				interrupts = <26 IRQ_TYPE_LEVEL_HIGH 5>;
+				clocks = <&isi_clk>;
+				clock-names = "isi_clk";
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_isi>;
+				status = "disabled";
+			};
+
 			pwm0: pwm@fffb8000 {
 				compatible = "atmel,at91sam9rl-pwm";
 				reg = <0xfffb8000 0x300>;
@@ -1199,12 +1231,26 @@
 				};
 			};
 
+			rtc@fffffd20 {
+				compatible = "atmel,at91sam9260-rtt";
+				reg = <0xfffffd20 0x10>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				clocks = <&clk32k>;
+				status = "disabled";
+			};
+
 			rtc@fffffdb0 {
 				compatible = "atmel,at91rm9200-rtc";
 				reg = <0xfffffdb0 0x30>;
 				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
 				status = "disabled";
 			};
+
+			gpbr: syscon@fffffd60 {
+				compatible = "atmel,at91sam9260-gpbr", "syscon";
+				reg = <0xfffffd60 0x10>;
+				status = "disabled";
+			};
 		};
 
 		fb0: fb@0x00500000 {
diff --git a/arch/arm/boot/dts/at91sam9m10g45ek.dts b/arch/arm/boot/dts/at91sam9m10g45ek.dts
index d8dd22651090..33ce7ca2c404 100644
--- a/arch/arm/boot/dts/at91sam9m10g45ek.dts
+++ b/arch/arm/boot/dts/at91sam9m10g45ek.dts
@@ -161,6 +161,15 @@
 				pinctrl-0 = <&pinctrl_pwm_leds>;
 			};
 
+			rtc@fffffd20 {
+				atmel,rtt-rtc-time-reg = <&gpbr 0x0>;
+				status = "okay";
+			};
+
+			gpbr: syscon@fffffd60 {
+				status = "okay";
+			};
+
 			rtc@fffffdb0 {
 				status = "okay";
 			};
diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi
index f0b4352650ed..72424371413e 100644
--- a/arch/arm/boot/dts/at91sam9rl.dtsi
+++ b/arch/arm/boot/dts/at91sam9rl.dtsi
@@ -1059,6 +1059,27 @@
 					clocks = <&slow_rc_osc &slow_osc>;
 				};
 			};
+
+			rtc@fffffeb0 {
+				compatible = "atmel,at91rm9200-rtc";
+				reg = <0xfffffeb0 0x40>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				status = "disabled";
+			};
+
+			rtc@fffffd20 {
+				compatible = "atmel,at91sam9260-rtt";
+				reg = <0xfffffd20 0x10>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+				clocks = <&clk32k>;
+				status = "disabled";
+			};
+
+			gpbr: syscon@fffffd60 {
+				compatible = "atmel,at91sam9260-gpbr", "syscon";
+				reg = <0xfffffd60 0x10>;
+				status = "disabled";
+			};
 		};
 	};
 
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 736092b1a535..10b725c7bfc0 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -304,7 +304,7 @@
 					/* VDD_GPU - over VDD_SMPS6 */
 					regulator-name = "smps6";
 					regulator-min-microvolt = <850000>;
-					regulator-max-microvolt = <12500000>;
+					regulator-max-microvolt = <1250000>;
 					regulator-always-on;
 					regulator-boot-on;
 				};
@@ -313,7 +313,7 @@
 					/* CORE_VDD */
 					regulator-name = "smps7";
 					regulator-min-microvolt = <850000>;
-					regulator-max-microvolt = <1030000>;
+					regulator-max-microvolt = <1060000>;
 					regulator-always-on;
 					regulator-boot-on;
 				};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 63bf99be1762..22771bc1643a 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -742,7 +742,7 @@
 		};
 
 		wdt2: wdt@4ae14000 {
-			compatible = "ti,omap4-wdt";
+			compatible = "ti,omap3-wdt";
 			reg = <0x4ae14000 0x80>;
 			interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "wd_timer2";
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index afc74fd4bb5e..89085d066c65 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -160,7 +160,7 @@
 					/* VDD_CORE */
 					regulator-name = "smps2";
 					regulator-min-microvolt = <850000>;
-					regulator-max-microvolt = <1030000>;
+					regulator-max-microvolt = <1060000>;
 					regulator-boot-on;
 					regulator-always-on;
 				};
diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi
index 2c05b3f017fa..4bdcbd61ce47 100644
--- a/arch/arm/boot/dts/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi
@@ -1042,7 +1042,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
 		clocks = <&sys_clkin1>, <&sys_clkin2>;
-		reg = <0x01a4>;
+		reg = <0x0164>;
 	};
 
 	mlb_clk: mlb_clk {
@@ -1084,14 +1084,14 @@
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
 		clocks = <&sys_clkin1>, <&sys_clkin2>;
-		reg = <0x01d0>;
+		reg = <0x0168>;
 	};
 
 	video2_dpll_clk_mux: video2_dpll_clk_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
 		clocks = <&sys_clkin1>, <&sys_clkin2>;
-		reg = <0x01d4>;
+		reg = <0x016c>;
 	};
 
 	wkupaon_iclk_mux: wkupaon_iclk_mux {
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index 242ddda0a8cd..22465494b796 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -311,12 +311,13 @@
 		adc: adc@126C0000 {
 			compatible = "samsung,exynos3250-adc",
 				     "samsung,exynos-adc-v2";
-			reg = <0x126C0000 0x100>, <0x10020718 0x4>;
+			reg = <0x126C0000 0x100>;
 			interrupts = <0 137 0>;
 			clock-names = "adc", "sclk";
 			clocks = <&cmu CLK_TSADC>, <&cmu CLK_SCLK_TSADC>;
 			#io-channel-cells = <1>;
 			io-channel-ranges;
+			samsung,syscon-phandle = <&pmu_system_controller>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi
index 2e9f1f7be77b..93b70402e943 100644
--- a/arch/arm/boot/dts/exynos4x12.dtsi
+++ b/arch/arm/boot/dts/exynos4x12.dtsi
@@ -108,13 +108,14 @@
 
 	adc: adc@126C0000 {
 		compatible = "samsung,exynos-adc-v1";
-		reg = <0x126C0000 0x100>, <0x10020718 0x4>;
+		reg = <0x126C0000 0x100>;
 		interrupt-parent = <&combiner>;
 		interrupts = <10 3>;
 		clocks = <&clock CLK_TSADC>;
 		clock-names = "adc";
 		#io-channel-cells = <1>;
 		io-channel-ranges;
+		samsung,syscon-phandle = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index d45a07ea3402..0a229fcd7acf 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -754,12 +754,13 @@
 
 	adc: adc@12D10000 {
 		compatible = "samsung,exynos-adc-v1";
-		reg = <0x12D10000 0x100>, <0x10040718 0x4>;
+		reg = <0x12D10000 0x100>;
 		interrupts = <0 106 0>;
 		clocks = <&clock CLK_ADC>;
 		clock-names = "adc";
 		#io-channel-cells = <1>;
 		io-channel-ranges;
+		samsung,syscon-phandle = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index 90bf4011e319..517e50f6760b 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -541,12 +541,13 @@
 
 	adc: adc@12D10000 {
 		compatible = "samsung,exynos-adc-v2";
-		reg = <0x12D10000 0x100>, <0x10040720 0x4>;
+		reg = <0x12D10000 0x100>;
 		interrupts = <0 106 0>;
 		clocks = <&clock CLK_TSADC>;
 		clock-names = "adc";
 		#io-channel-cells = <1>;
 		io-channel-ranges;
+		samsung,syscon-phandle = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 009abd69385d..327d362fe275 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -67,6 +67,7 @@
 	phy-mode = "rgmii";
 	interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
 			      <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
+	fsl,magic-packet;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index f1cd2147421d..6bfd0bc6e658 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -160,6 +160,7 @@
 	pinctrl-0 = <&pinctrl_enet>;
 	phy-mode = "rgmii";
 	phy-reset-gpios = <&gpio1 25 0>;
+	fsl,magic-packet;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts
index 7f70a39459f6..350208c5e1ed 100644
--- a/arch/arm/boot/dts/mmp2-brownstone.dts
+++ b/arch/arm/boot/dts/mmp2-brownstone.dts
@@ -8,7 +8,7 @@
  */
 
 /dts-v1/;
-/include/ "mmp2.dtsi"
+#include "mmp2.dtsi"
 
 / {
 	model = "Marvell MMP2 Brownstone Development Board";
diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi
index 4e8b08c628c7..766bbb8495b6 100644
--- a/arch/arm/boot/dts/mmp2.dtsi
+++ b/arch/arm/boot/dts/mmp2.dtsi
@@ -7,7 +7,8 @@
  *  publishhed by the Free Software Foundation.
  */
 
-/include/ "skeleton.dtsi"
+#include "skeleton.dtsi"
+#include <dt-bindings/clock/marvell,mmp2.h>
 
 / {
 	aliases {
@@ -135,6 +136,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4030000 0x1000>;
 				interrupts = <27>;
+				clocks = <&soc_clocks MMP2_CLK_UART0>;
+				resets = <&soc_clocks MMP2_CLK_UART0>;
 				status = "disabled";
 			};
 
@@ -142,6 +145,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4017000 0x1000>;
 				interrupts = <28>;
+				clocks = <&soc_clocks MMP2_CLK_UART1>;
+				resets = <&soc_clocks MMP2_CLK_UART1>;
 				status = "disabled";
 			};
 
@@ -149,6 +154,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4018000 0x1000>;
 				interrupts = <24>;
+				clocks = <&soc_clocks MMP2_CLK_UART2>;
+				resets = <&soc_clocks MMP2_CLK_UART2>;
 				status = "disabled";
 			};
 
@@ -156,6 +163,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4016000 0x1000>;
 				interrupts = <46>;
+				clocks = <&soc_clocks MMP2_CLK_UART3>;
+				resets = <&soc_clocks MMP2_CLK_UART3>;
 				status = "disabled";
 			};
 
@@ -168,6 +177,8 @@
 				#gpio-cells = <2>;
 				interrupts = <49>;
 				interrupt-names = "gpio_mux";
+				clocks = <&soc_clocks MMP2_CLK_GPIO>;
+				resets = <&soc_clocks MMP2_CLK_GPIO>;
 				interrupt-controller;
 				#interrupt-cells = <1>;
 				ranges;
@@ -201,6 +212,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4011000 0x1000>;
 				interrupts = <7>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI0>;
+				resets = <&soc_clocks MMP2_CLK_TWSI0>;
 				#address-cells = <1>;
 				#size-cells = <0>;
 				mrvl,i2c-fast-mode;
@@ -211,6 +224,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4025000 0x1000>;
 				interrupts = <58>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI1>;
+				resets = <&soc_clocks MMP2_CLK_TWSI1>;
 				status = "disabled";
 			};
 
@@ -220,8 +235,20 @@
 				interrupts = <1 0>;
 				interrupt-names = "rtc 1Hz", "rtc alarm";
 				interrupt-parent = <&intcmux5>;
+				clocks = <&soc_clocks MMP2_CLK_RTC>;
+				resets = <&soc_clocks MMP2_CLK_RTC>;
 				status = "disabled";
 			};
 		};
+
+		soc_clocks: clocks{
+			compatible = "marvell,mmp2-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/omap2430-sdp.dts b/arch/arm/boot/dts/omap2430-sdp.dts
index 05eca2e4430f..6b36ede58488 100644
--- a/arch/arm/boot/dts/omap2430-sdp.dts
+++ b/arch/arm/boot/dts/omap2430-sdp.dts
@@ -48,22 +48,22 @@
 		gpmc,device-width = <1>;
 		gpmc,cycle2cycle-samecsen = <1>;
 		gpmc,cycle2cycle-diffcsen = <1>;
-		gpmc,cs-on-ns = <7>;
-		gpmc,cs-rd-off-ns = <233>;
-		gpmc,cs-wr-off-ns = <233>;
-		gpmc,adv-on-ns = <22>;
-		gpmc,adv-rd-off-ns = <60>;
-		gpmc,adv-wr-off-ns = <60>;
-		gpmc,oe-on-ns = <67>;
-		gpmc,oe-off-ns = <210>;
-		gpmc,we-on-ns = <67>;
-		gpmc,we-off-ns = <210>;
-		gpmc,rd-cycle-ns = <233>;
-		gpmc,wr-cycle-ns = <233>;
-		gpmc,access-ns = <233>;
-		gpmc,page-burst-access-ns = <30>;
-		gpmc,bus-turnaround-ns = <30>;
-		gpmc,cycle2cycle-delay-ns = <30>;
+		gpmc,cs-on-ns = <6>;
+		gpmc,cs-rd-off-ns = <187>;
+		gpmc,cs-wr-off-ns = <187>;
+		gpmc,adv-on-ns = <18>;
+		gpmc,adv-rd-off-ns = <48>;
+		gpmc,adv-wr-off-ns = <48>;
+		gpmc,oe-on-ns = <60>;
+		gpmc,oe-off-ns = <169>;
+		gpmc,we-on-ns = <66>;
+		gpmc,we-off-ns = <169>;
+		gpmc,rd-cycle-ns = <187>;
+		gpmc,wr-cycle-ns = <187>;
+		gpmc,access-ns = <187>;
+		gpmc,page-burst-access-ns = <24>;
+		gpmc,bus-turnaround-ns = <24>;
+		gpmc,cycle2cycle-delay-ns = <24>;
 		gpmc,wait-monitoring-ns = <0>;
 		gpmc,clk-activation-ns = <0>;
 		gpmc,wr-data-mux-bus-ns = <0>;
diff --git a/arch/arm/boot/dts/pxa168-aspenite.dts b/arch/arm/boot/dts/pxa168-aspenite.dts
index e762facb3fa4..0a988b3fb248 100644
--- a/arch/arm/boot/dts/pxa168-aspenite.dts
+++ b/arch/arm/boot/dts/pxa168-aspenite.dts
@@ -8,7 +8,7 @@
  */
 
 /dts-v1/;
-/include/ "pxa168.dtsi"
+#include "pxa168.dtsi"
 
 / {
 	model = "Marvell PXA168 Aspenite Development Board";
diff --git a/arch/arm/boot/dts/pxa168.dtsi b/arch/arm/boot/dts/pxa168.dtsi
index 975dad21ac38..b899e25cbb1b 100644
--- a/arch/arm/boot/dts/pxa168.dtsi
+++ b/arch/arm/boot/dts/pxa168.dtsi
@@ -7,7 +7,8 @@
  *  publishhed by the Free Software Foundation.
  */
 
-/include/ "skeleton.dtsi"
+#include "skeleton.dtsi"
+#include <dt-bindings/clock/marvell,pxa168.h>
 
 / {
 	aliases {
@@ -59,6 +60,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4017000 0x1000>;
 				interrupts = <27>;
+				clocks = <&soc_clocks PXA168_CLK_UART0>;
+				resets = <&soc_clocks PXA168_CLK_UART0>;
 				status = "disabled";
 			};
 
@@ -66,6 +69,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4018000 0x1000>;
 				interrupts = <28>;
+				clocks = <&soc_clocks PXA168_CLK_UART1>;
+				resets = <&soc_clocks PXA168_CLK_UART1>;
 				status = "disabled";
 			};
 
@@ -73,6 +78,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4026000 0x1000>;
 				interrupts = <29>;
+				clocks = <&soc_clocks PXA168_CLK_UART2>;
+				resets = <&soc_clocks PXA168_CLK_UART2>;
 				status = "disabled";
 			};
 
@@ -84,6 +91,8 @@
 				gpio-controller;
 				#gpio-cells = <2>;
 				interrupts = <49>;
+				clocks = <&soc_clocks PXA168_CLK_GPIO>;
+				resets = <&soc_clocks PXA168_CLK_GPIO>;
 				interrupt-names = "gpio_mux";
 				interrupt-controller;
 				#interrupt-cells = <1>;
@@ -110,6 +119,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4011000 0x1000>;
 				interrupts = <7>;
+				clocks = <&soc_clocks PXA168_CLK_TWSI0>;
+				resets = <&soc_clocks PXA168_CLK_TWSI0>;
 				mrvl,i2c-fast-mode;
 				status = "disabled";
 			};
@@ -118,6 +129,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4025000 0x1000>;
 				interrupts = <58>;
+				clocks = <&soc_clocks PXA168_CLK_TWSI1>;
+				resets = <&soc_clocks PXA168_CLK_TWSI1>;
 				status = "disabled";
 			};
 
@@ -126,8 +139,20 @@
 				reg = <0xd4010000 0x1000>;
 				interrupts = <5 6>;
 				interrupt-names = "rtc 1Hz", "rtc alarm";
+				clocks = <&soc_clocks PXA168_CLK_RTC>;
+				resets = <&soc_clocks PXA168_CLK_RTC>;
 				status = "disabled";
 			};
 		};
+
+		soc_clocks: clocks{
+			compatible = "marvell,pxa168-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/pxa910-dkb.dts b/arch/arm/boot/dts/pxa910-dkb.dts
index 595492aa5053..c82f2810ec73 100644
--- a/arch/arm/boot/dts/pxa910-dkb.dts
+++ b/arch/arm/boot/dts/pxa910-dkb.dts
@@ -8,7 +8,7 @@
  */
 
 /dts-v1/;
-/include/ "pxa910.dtsi"
+#include "pxa910.dtsi"
 
 / {
 	model = "Marvell PXA910 DKB Development Board";
diff --git a/arch/arm/boot/dts/pxa910.dtsi b/arch/arm/boot/dts/pxa910.dtsi
index 0247c622f580..0868f6729be1 100644
--- a/arch/arm/boot/dts/pxa910.dtsi
+++ b/arch/arm/boot/dts/pxa910.dtsi
@@ -7,7 +7,8 @@
  *  publishhed by the Free Software Foundation.
  */
 
-/include/ "skeleton.dtsi"
+#include "skeleton.dtsi"
+#include <dt-bindings/clock/marvell,pxa910.h>
 
 / {
 	aliases {
@@ -71,6 +72,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4017000 0x1000>;
 				interrupts = <27>;
+				clocks = <&soc_clocks PXA910_CLK_UART0>;
+				resets = <&soc_clocks PXA910_CLK_UART0>;
 				status = "disabled";
 			};
 
@@ -78,6 +81,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4018000 0x1000>;
 				interrupts = <28>;
+				clocks = <&soc_clocks PXA910_CLK_UART1>;
+				resets = <&soc_clocks PXA910_CLK_UART1>;
 				status = "disabled";
 			};
 
@@ -85,6 +90,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4036000 0x1000>;
 				interrupts = <59>;
+				clocks = <&soc_clocks PXA910_CLK_UART2>;
+				resets = <&soc_clocks PXA910_CLK_UART2>;
 				status = "disabled";
 			};
 
@@ -97,6 +104,8 @@
 				#gpio-cells = <2>;
 				interrupts = <49>;
 				interrupt-names = "gpio_mux";
+				clocks = <&soc_clocks PXA910_CLK_GPIO>;
+				resets = <&soc_clocks PXA910_CLK_GPIO>;
 				interrupt-controller;
 				#interrupt-cells = <1>;
 				ranges;
@@ -124,6 +133,8 @@
 				#size-cells = <0>;
 				reg = <0xd4011000 0x1000>;
 				interrupts = <7>;
+				clocks = <&soc_clocks PXA910_CLK_TWSI0>;
+				resets = <&soc_clocks PXA910_CLK_TWSI0>;
 				mrvl,i2c-fast-mode;
 				status = "disabled";
 			};
@@ -134,6 +145,8 @@
 				#size-cells = <0>;
 				reg = <0xd4037000 0x1000>;
 				interrupts = <54>;
+				clocks = <&soc_clocks PXA910_CLK_TWSI1>;
+				resets = <&soc_clocks PXA910_CLK_TWSI1>;
 				status = "disabled";
 			};
 
@@ -142,8 +155,21 @@
 				reg = <0xd4010000 0x1000>;
 				interrupts = <5 6>;
 				interrupt-names = "rtc 1Hz", "rtc alarm";
+				clocks = <&soc_clocks PXA910_CLK_RTC>;
+				resets = <&soc_clocks PXA910_CLK_RTC>;
 				status = "disabled";
 			};
 		};
+
+		soc_clocks: clocks{
+			compatible = "marvell,pxa910-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>,
+			      <0xd403b000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc", "apbcp";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/rk3288-evb-rk808.dts b/arch/arm/boot/dts/rk3288-evb-rk808.dts
index d8c775e6d5fe..831a7aa85136 100644
--- a/arch/arm/boot/dts/rk3288-evb-rk808.dts
+++ b/arch/arm/boot/dts/rk3288-evb-rk808.dts
@@ -15,6 +15,13 @@
 
 / {
 	compatible = "rockchip,rk3288-evb-rk808", "rockchip,rk3288";
+
+	ext_gmac: external-gmac-clock {
+		compatible = "fixed-clock";
+		clock-frequency = <125000000>;
+		clock-output-names = "ext_gmac";
+		#clock-cells = <0>;
+	};
 };
 
 &cpu0 {
@@ -152,3 +159,19 @@
 		};
 	};
 };
+
+&gmac {
+	phy_regulator = "vcc_phy";
+	phy-mode = "rgmii";
+	clock_in_out = "input";
+	snps,reset-gpio = <&gpio4 7 0>;
+	snps,reset-active-low;
+	snps,reset-delays-us = <0 10000 1000000>;
+	assigned-clocks = <&cru SCLK_MAC>;
+	assigned-clock-parents = <&ext_gmac>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&rgmii_pins>;
+	tx_delay = <0x30>;
+	rx_delay = <0x10>;
+	status = "ok";
+};
diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi
index 3e067dd65d0c..048cb170c884 100644
--- a/arch/arm/boot/dts/rk3288-evb.dtsi
+++ b/arch/arm/boot/dts/rk3288-evb.dtsi
@@ -90,6 +90,17 @@
 		regulator-always-on;
 		regulator-boot-on;
 	};
+
+	vcc_phy: vcc-phy-regulator {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		gpio = <&gpio0 6 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&eth_phy_pwr>;
+		regulator-name = "vcc_phy";
+		regulator-always-on;
+		regulator-boot-on;
+	};
 };
 
 &emmc {
@@ -178,6 +189,12 @@
 			rockchip,pins = <0 14 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
 	};
+
+	eth_phy {
+		eth_phy_pwr: eth-phy-pwr {
+			rockchip,pins = <0 6 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
 };
 
 &usb_host0_ehci {
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index fd19f00784bd..910dcad2088a 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -380,6 +380,22 @@
 		status = "disabled";
 	};
 
+	gmac: ethernet@ff290000 {
+		compatible = "rockchip,rk3288-gmac";
+		reg = <0xff290000 0x10000>;
+		interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "macirq";
+		rockchip,grf = <&grf>;
+		clocks = <&cru SCLK_MAC>,
+			<&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>,
+			<&cru SCLK_MACREF>, <&cru SCLK_MACREF_OUT>,
+			<&cru ACLK_GMAC>, <&cru PCLK_GMAC>;
+		clock-names = "stmmaceth",
+			"mac_clk_rx", "mac_clk_tx",
+			"clk_mac_ref", "clk_mac_refout",
+			"aclk_mac", "pclk_mac";
+	};
+
 	usb_host0_ehci: usb@ff500000 {
 		compatible = "generic-ehci";
 		reg = <0xff500000 0x100>;
@@ -725,6 +741,11 @@
 			bias-disable;
 		};
 
+		pcfg_pull_none_12ma: pcfg-pull-none-12ma {
+			bias-disable;
+			drive-strength = <12>;
+		};
+
 		i2c0 {
 			i2c0_xfer: i2c0-xfer {
 				rockchip,pins = <0 15 RK_FUNC_1 &pcfg_pull_none>,
@@ -1068,5 +1089,38 @@
 				rockchip,pins = <7 23 3 &pcfg_pull_none>;
 			};
 		};
+
+		gmac {
+			rgmii_pins: rgmii-pins {
+				rockchip,pins = <3 30 3 &pcfg_pull_none>,
+						<3 31 3 &pcfg_pull_none>,
+						<3 26 3 &pcfg_pull_none>,
+						<3 27 3 &pcfg_pull_none>,
+						<3 28 3 &pcfg_pull_none_12ma>,
+						<3 29 3 &pcfg_pull_none_12ma>,
+						<3 24 3 &pcfg_pull_none_12ma>,
+						<3 25 3 &pcfg_pull_none_12ma>,
+						<4 0 3 &pcfg_pull_none>,
+						<4 5 3 &pcfg_pull_none>,
+						<4 6 3 &pcfg_pull_none>,
+						<4 9 3 &pcfg_pull_none_12ma>,
+						<4 4 3 &pcfg_pull_none_12ma>,
+						<4 1 3 &pcfg_pull_none>,
+						<4 3 3 &pcfg_pull_none>;
+			};
+
+			rmii_pins: rmii-pins {
+				rockchip,pins = <3 30 3 &pcfg_pull_none>,
+						<3 31 3 &pcfg_pull_none>,
+						<3 28 3 &pcfg_pull_none>,
+						<3 29 3 &pcfg_pull_none>,
+						<4 0 3 &pcfg_pull_none>,
+						<4 5 3 &pcfg_pull_none>,
+						<4 4 3 &pcfg_pull_none>,
+						<4 1 3 &pcfg_pull_none>,
+						<4 2 3 &pcfg_pull_none>,
+						<4 3 3 &pcfg_pull_none>;
+			};
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index e0157b0f075c..1b0f30c2c4a5 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -9,12 +9,12 @@
  * licensing only applies to this file, and not this project as a
  * whole.
  *
- *  a) This library is free software; you can redistribute it and/or
+ *  a) This file is free software; you can redistribute it and/or
  *     modify it under the terms of the GNU General Public License as
  *     published by the Free Software Foundation; either version 2 of the
  *     License, or (at your option) any later version.
  *
- *     This library is distributed in the hope that it will be useful,
+ *     This file is distributed in the hope that it will be useful,
  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *     GNU General Public License for more details.
@@ -45,6 +45,7 @@
 
 #include "skeleton.dtsi"
 #include <dt-bindings/clock/at91.h>
+#include <dt-bindings/dma/at91.h>
 #include <dt-bindings/pinctrl/at91.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
@@ -302,6 +303,15 @@
 			#size-cells = <1>;
 			ranges;
 
+			dma1: dma-controller@f0004000 {
+				compatible = "atmel,sama5d4-dma";
+				reg = <0xf0004000 0x200>;
+				interrupts = <50 IRQ_TYPE_LEVEL_HIGH 0>;
+				#dma-cells = <1>;
+				clocks = <&dma1_clk>;
+				clock-names = "dma_clk";
+			};
+
 			ramc0: ramc@f0010000 {
 				compatible = "atmel,sama5d3-ddramc";
 				reg = <0xf0010000 0x200>;
@@ -309,6 +319,15 @@
 				clock-names = "ddrck", "mpddr";
 			};
 
+			dma0: dma-controller@f0014000 {
+				compatible = "atmel,sama5d4-dma";
+				reg = <0xf0014000 0x200>;
+				interrupts = <8 IRQ_TYPE_LEVEL_HIGH 0>;
+				#dma-cells = <1>;
+				clocks = <&dma0_clk>;
+				clock-names = "dma_clk";
+			};
+
 			pmc: pmc@f0018000 {
 				compatible = "atmel,sama5d3-pmc";
 				reg = <0xf0018000 0x120>;
@@ -761,6 +780,10 @@
 				compatible = "atmel,hsmci";
 				reg = <0xf8000000 0x600>;
 				interrupts = <35 IRQ_TYPE_LEVEL_HIGH 0>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(0))>;
+				dma-names = "rxtx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_mmc0_clk_cmd_dat0 &pinctrl_mmc0_dat1_3>;
 				status = "disabled";
@@ -776,6 +799,13 @@
 				compatible = "atmel,at91rm9200-spi";
 				reg = <0xf8010000 0x100>;
 				interrupts = <37 IRQ_TYPE_LEVEL_HIGH 3>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(10))>,
+				       <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(11))>;
+				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_spi0>;
 				clocks = <&spi0_clk>;
@@ -787,6 +817,13 @@
 				compatible = "atmel,at91sam9x5-i2c";
 				reg = <0xf8014000 0x4000>;
 				interrupts = <32 IRQ_TYPE_LEVEL_HIGH 6>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(2))>,
+				       <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(3))>;
+				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_i2c0>;
 				#address-cells = <1>;
@@ -817,7 +854,14 @@
 			i2c2: i2c@f8024000 {
 				compatible = "atmel,at91sam9x5-i2c";
 				reg = <0xf8024000 0x4000>;
-				interrupts = <34 4 6>;
+				interrupts = <34 IRQ_TYPE_LEVEL_HIGH 6>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(6))>,
+				       <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(7))>;
+				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_i2c2>;
 				#address-cells = <1>;
@@ -830,6 +874,10 @@
 				compatible = "atmel,hsmci";
 				reg = <0xfc000000 0x600>;
 				interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(1))>;
+				dma-names = "rxtx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_mmc1_clk_cmd_dat0 &pinctrl_mmc1_dat1_3>;
 				status = "disabled";
@@ -843,6 +891,13 @@
 				compatible = "atmel,at91sam9260-usart";
 				reg = <0xfc008000 0x100>;
 				interrupts = <29 IRQ_TYPE_LEVEL_HIGH 5>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(16))>,
+				       <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(17))>;
+				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_usart2 &pinctrl_usart2_rts &pinctrl_usart2_cts>;
 				clocks = <&usart2_clk>;
@@ -854,6 +909,13 @@
 				compatible = "atmel,at91sam9260-usart";
 				reg = <0xfc00c000 0x100>;
 				interrupts = <30 IRQ_TYPE_LEVEL_HIGH 5>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(18))>,
+				       <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(19))>;
+				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_usart3>;
 				clocks = <&usart3_clk>;
@@ -865,6 +927,13 @@
 				compatible = "atmel,at91sam9260-usart";
 				reg = <0xfc010000 0x100>;
 				interrupts = <31 IRQ_TYPE_LEVEL_HIGH 5>;
+				dmas = <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(20))>,
+				       <&dma1
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(21))>;
+				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_usart4>;
 				clocks = <&usart4_clk>;
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index e3ab942fd148..7b4099fcf817 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -188,19 +188,11 @@
 				"apb0_ir1", "apb0_keypad";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 81ad4b94e812..1b76667f3182 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -176,19 +176,11 @@
 				"apb0_ir", "apb0_keypad";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index b131068f4f35..c35217ea1f64 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -161,19 +161,11 @@
 			clock-output-names = "apb0_codec", "apb0_pio", "apb0_ir";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index a400172a8a52..f47156b6572b 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -229,19 +229,11 @@
 					"apb1_daudio1";
 		};
 
-		apb2_mux: apb2_mux@01c20058 {
+		apb2: clk@01c20058 {
 			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
+			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
 			clocks = <&osc32k>, <&osc24M>, <&pll6 0>, <&pll6 0>;
-			clock-output-names = "apb2_mux";
-		};
-
-		apb2: apb2@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun6i-a31-apb2-div-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&apb2_mux>;
 			clock-output-names = "apb2";
 		};
 
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 82a524ce28ad..e21ce5992d56 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -236,19 +236,11 @@
 				"apb0_iis2", "apb0_keypad";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 
diff --git a/arch/arm/boot/dts/sun8i-a23.dtsi b/arch/arm/boot/dts/sun8i-a23.dtsi
index 6086adbf9d74..0746cd1024d7 100644
--- a/arch/arm/boot/dts/sun8i-a23.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23.dtsi
@@ -189,19 +189,11 @@
 					"apb1_daudio0",	"apb1_daudio1";
 		};
 
-		apb2_mux: apb2_mux_clk@01c20058 {
+		apb2: clk@01c20058 {
 			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
+			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
 			clocks = <&osc32k>, <&osc24M>, <&pll6>, <&pll6>;
-			clock-output-names = "apb2_mux";
-		};
-
-		apb2: apb2_clk@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun6i-a31-apb2-div-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&apb2_mux>;
 			clock-output-names = "apb2";
 		};
 
diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 222f3b3f4dd5..4296b5398bf5 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -1,5 +1,6 @@
 #include <dt-bindings/clock/tegra114-car.h>
 #include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra114-mc.h>
 #include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 
@@ -50,6 +51,8 @@
 			resets = <&tegra_car 27>;
 			reset-names = "dc";
 
+			iommus = <&mc TEGRA_SWGROUP_DC>;
+
 			nvidia,head = <0>;
 
 			rgb {
@@ -67,6 +70,8 @@
 			resets = <&tegra_car 26>;
 			reset-names = "dc";
 
+			iommus = <&mc TEGRA_SWGROUP_DCB>;
+
 			nvidia,head = <1>;
 
 			rgb {
@@ -498,15 +503,15 @@
 		reset-names = "fuse";
 	};
 
-	iommu@70019010 {
-		compatible = "nvidia,tegra114-smmu", "nvidia,tegra30-smmu";
-		reg = <0x70019010 0x02c
-		       0x700191f0 0x010
-		       0x70019228 0x074>;
-		nvidia,#asids = <4>;
-		dma-window = <0 0x40000000>;
-		nvidia,swgroups = <0x18659fe>;
-		nvidia,ahb = <&ahb>;
+	mc: memory-controller@70019000 {
+		compatible = "nvidia,tegra114-mc";
+		reg = <0x70019000 0x1000>;
+		clocks = <&tegra_car TEGRA114_CLK_MC>;
+		clock-names = "mc";
+
+		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+		#iommu-cells = <1>;
 	};
 
 	ahub@70080000 {
diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
index 51b373ff1065..4eb540be368f 100644
--- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
@@ -1942,4 +1942,48 @@
 			 <&tegra_car TEGRA124_CLK_EXTERN1>;
 		clock-names = "pll_a", "pll_a_out0", "mclk";
 	};
+
+	thermal-zones {
+		cpu {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+
+		mem {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+
+		gpu {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index df2b06b29985..4be06c6ea0c8 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -1,8 +1,10 @@
 #include <dt-bindings/clock/tegra124-car.h>
 #include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra124-mc.h>
 #include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/tegra124-soctherm.h>
 
 #include "skeleton.dtsi"
 
@@ -102,6 +104,8 @@
 			resets = <&tegra_car 27>;
 			reset-names = "dc";
 
+			iommus = <&mc TEGRA_SWGROUP_DC>;
+
 			nvidia,head = <0>;
 		};
 
@@ -115,6 +119,8 @@
 			resets = <&tegra_car 26>;
 			reset-names = "dc";
 
+			iommus = <&mc TEGRA_SWGROUP_DCB>;
+
 			nvidia,head = <1>;
 		};
 
@@ -275,7 +281,8 @@
 	pinmux: pinmux@0,70000868 {
 		compatible = "nvidia,tegra124-pinmux";
 		reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */
-		      <0x0 0x70003000 0x0 0x434>; /* Mux registers */
+		      <0x0 0x70003000 0x0 0x434>, /* Mux registers */
+		      <0x0 0x70000820 0x0 0x008>; /* MIPI pad control */
 	};
 
 	/*
@@ -551,6 +558,17 @@
 		reset-names = "fuse";
 	};
 
+	mc: memory-controller@0,70019000 {
+		compatible = "nvidia,tegra124-mc";
+		reg = <0x0 0x70019000 0x0 0x1000>;
+		clocks = <&tegra_car TEGRA124_CLK_MC>;
+		clock-names = "mc";
+
+		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+		#iommu-cells = <1>;
+	};
+
 	sata@0,70020000 {
 		compatible = "nvidia,tegra124-ahci";
 
@@ -640,6 +658,18 @@
 		status = "disabled";
 	};
 
+	soctherm: thermal-sensor@0,700e2000 {
+		compatible = "nvidia,tegra124-soctherm";
+		reg = <0x0 0x700e2000 0x0 0x1000>;
+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+			<&tegra_car TEGRA124_CLK_SOC_THERM>;
+		clock-names = "tsensor", "soctherm";
+		resets = <&tegra_car 78>;
+		reset-names = "soctherm";
+		#thermal-sensor-cells = <1>;
+	};
+
 	ahub@0,70300000 {
 		compatible = "nvidia,tegra124-ahub";
 		reg = <0x0 0x70300000 0x0 0x200>,
@@ -881,6 +911,40 @@
 		};
 	};
 
+	thermal-zones {
+		cpu {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+		};
+
+		mem {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>;
+		};
+
+		gpu {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>;
+		};
+
+		pllx {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>;
+		};
+	};
+
 	timer {
 		compatible = "arm,armv7-timer";
 		interrupts = <GIC_PPI 13
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index b270b9e3d455..99475f6e76a3 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -1,5 +1,6 @@
 #include <dt-bindings/clock/tegra30-car.h>
 #include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra30-mc.h>
 #include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 
@@ -166,6 +167,8 @@
 			resets = <&tegra_car 27>;
 			reset-names = "dc";
 
+			iommus = <&mc TEGRA_SWGROUP_DC>;
+
 			nvidia,head = <0>;
 
 			rgb {
@@ -183,6 +186,8 @@
 			resets = <&tegra_car 26>;
 			reset-names = "dc";
 
+			iommus = <&mc TEGRA_SWGROUP_DCB>;
+
 			nvidia,head = <1>;
 
 			rgb {
@@ -615,23 +620,15 @@
 		clock-names = "pclk", "clk32k_in";
 	};
 
-	memory-controller@7000f000 {
+	mc: memory-controller@7000f000 {
 		compatible = "nvidia,tegra30-mc";
-		reg = <0x7000f000 0x010
-		       0x7000f03c 0x1b4
-		       0x7000f200 0x028
-		       0x7000f284 0x17c>;
+		reg = <0x7000f000 0x400>;
+		clocks = <&tegra_car TEGRA30_CLK_MC>;
+		clock-names = "mc";
+
 		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
-	};
 
-	iommu@7000f010 {
-		compatible = "nvidia,tegra30-smmu";
-		reg = <0x7000f010 0x02c
-		       0x7000f1f0 0x010
-		       0x7000f228 0x05c>;
-		nvidia,#asids = <4>;		/* # of ASIDs */
-		dma-window = <0 0x40000000>;	/* IOVA start & length */
-		nvidia,ahb = <&ahb>;
+		#iommu-cells = <1>;
 	};
 
 	fuse@7000f800 {
diff --git a/arch/arm/configs/ape6evm_defconfig b/arch/arm/configs/ape6evm_defconfig
index db81d8ce4c03..9e9a72e3d30f 100644
--- a/arch/arm/configs/ape6evm_defconfig
+++ b/arch/arm/configs/ape6evm_defconfig
@@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig
index d9675c68a399..5666e3700a82 100644
--- a/arch/arm/configs/armadillo800eva_defconfig
+++ b/arch/arm/configs/armadillo800eva_defconfig
@@ -43,7 +43,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig
index 83a87e48901c..7117662bab2e 100644
--- a/arch/arm/configs/bcm_defconfig
+++ b/arch/arm/configs/bcm_defconfig
@@ -39,7 +39,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig
index 1dde5daa84f9..3125e00f05ab 100644
--- a/arch/arm/configs/bockw_defconfig
+++ b/arch/arm/configs/bockw_defconfig
@@ -29,7 +29,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 759f9b0053e2..235842c9ba96 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -49,7 +49,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=m
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_ONDEMAND=m
 CONFIG_CPU_IDLE=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c41990729024..5ef14de00a29 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -27,7 +27,7 @@ CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index eb440aae4283..ea316c4b890e 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -39,7 +39,6 @@ CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig
index 1fe3621faf65..112543665dd7 100644
--- a/arch/arm/configs/hisi_defconfig
+++ b/arch/arm/configs/hisi_defconfig
@@ -18,7 +18,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_NEON=y
 CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 182e54692664..18e59feaa307 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -31,7 +31,6 @@ CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index f707cd2691cf..7c2075a07eba 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -54,7 +54,7 @@ CONFIG_ARM_IMX6Q_CPUFREQ=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TEST_SUSPEND=y
 CONFIG_NET=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 20a3ff99fae2..a2067cbfe173 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -30,7 +30,7 @@ CONFIG_HIGHMEM=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/kzm9g_defconfig b/arch/arm/configs/kzm9g_defconfig
index 8cb115d74fdf..5d63fc5d2d48 100644
--- a/arch/arm/configs/kzm9g_defconfig
+++ b/arch/arm/configs/kzm9g_defconfig
@@ -43,7 +43,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/lager_defconfig b/arch/arm/configs/lager_defconfig
index 929c571ea29b..a82afc916a89 100644
--- a/arch/arm/configs/lager_defconfig
+++ b/arch/arm/configs/lager_defconfig
@@ -37,7 +37,7 @@ CONFIG_AUTO_ZRELADDR=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/mackerel_defconfig b/arch/arm/configs/mackerel_defconfig
index 57ececba2ae6..05a529311b4d 100644
--- a/arch/arm/configs/mackerel_defconfig
+++ b/arch/arm/configs/mackerel_defconfig
@@ -28,7 +28,6 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_PM=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/marzen_defconfig b/arch/arm/configs/marzen_defconfig
index ff91630d34e1..3c8b6d823189 100644
--- a/arch/arm/configs/marzen_defconfig
+++ b/arch/arm/configs/marzen_defconfig
@@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index d7896580f3bb..2328fe752e9c 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -479,4 +479,4 @@ CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_LOCKUP_DETECTOR=y
 CONFIG_CRYPTO_DEV_TEGRA_AES=y
-CONFIG_GENERIC_CPUFREQ_CPU0=y
+CONFIG_CPUFREQ_DT=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 115cda9f3260..a7dce674f1be 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -63,7 +63,6 @@ CONFIG_FPE_NWFPE=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 3e09286f7ff1..c2c3a852af9f 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -127,6 +127,8 @@ CONFIG_SRAM=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI_PLATFORM=y
 CONFIG_MD=y
 CONFIG_NETDEVICES=y
 # CONFIG_NET_VENDOR_ARC is not set
diff --git a/arch/arm/configs/prima2_defconfig b/arch/arm/configs/prima2_defconfig
index 23591dba47a0..f610230b9c1f 100644
--- a/arch/arm/configs/prima2_defconfig
+++ b/arch/arm/configs/prima2_defconfig
@@ -18,7 +18,7 @@ CONFIG_PREEMPT=y
 CONFIG_AEABI=y
 CONFIG_KEXEC=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index b58fb32770a0..afa24799477a 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -32,7 +32,7 @@ CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_KERNEL_MODE_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_ADVANCED_DEBUG=y
 CONFIG_NET=y
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index 63fb5316ff02..3df6ca0c1d1f 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -39,7 +39,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -146,7 +146,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_S35390A=y
 CONFIG_DMADEVICES=y
 CONFIG_SH_DMAE=y
-CONFIG_RCAR_AUDMAC_PP=y
 CONFIG_RCAR_DMAC=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PWM=y
@@ -178,5 +177,5 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
 CONFIG_CPU_THERMAL=y
-CONFIG_GENERIC_CPUFREQ_CPU0=y
+CONFIG_CPUFREQ_DT=y
 CONFIG_REGULATOR_DA9210=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index f7ac0379850f..7a342d2780a8 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -11,7 +11,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 40750f93aa83..3ea9c3377ccb 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -46,7 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig
index d219d6a43238..6a1c9898fd03 100644
--- a/arch/arm/configs/u8500_defconfig
+++ b/arch/arm/configs/u8500_defconfig
@@ -25,7 +25,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_ARM_U8500_CPUIDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig
index 9e7a25639690..1bfaa7bfc392 100644
--- a/arch/arm/configs/vt8500_v6_v7_defconfig
+++ b/arch/arm/configs/vt8500_v6_v7_defconfig
@@ -16,7 +16,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index e6e3446abdf6..b52101d37ec7 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -121,13 +121,12 @@ static inline unsigned long dma_max_pfn(struct device *dev)
 }
 #define dma_max_pfn(dev) dma_max_pfn(dev)
 
-static inline int set_arch_dma_coherent_ops(struct device *dev)
-{
-	dev->archdata.dma_coherent = true;
-	set_dma_ops(dev, &arm_coherent_dma_ops);
-	return 0;
-}
-#define set_arch_dma_coherent_ops(dev)	set_arch_dma_coherent_ops(dev)
+#define arch_setup_dma_ops arch_setup_dma_ops
+extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			       struct iommu_ops *iommu, bool coherent);
+
+#define arch_teardown_dma_ops arch_teardown_dma_ops
+extern void arch_teardown_dma_ops(struct device *dev);
 
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269c6e61..66ce17655bb9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 53036e21756b..254e0650e48b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d5712716..63e0ecc04901 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 	
 	/*
diff --git a/arch/arm/include/asm/mach/irda.h b/arch/arm/include/asm/mach/irda.h
deleted file mode 100644
index 38f77b5e56cf..000000000000
--- a/arch/arm/include/asm/mach/irda.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/irda.h
- *
- *  Copyright (C) 2004 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __ASM_ARM_MACH_IRDA_H
-#define __ASM_ARM_MACH_IRDA_H
-
-struct irda_platform_data {
-	int (*startup)(struct device *);
-	void (*shutdown)(struct device *);
-	int (*set_power)(struct device *, unsigned int state);
-	void (*set_speed)(struct device *, unsigned int speed);
-};
-
-#endif
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index ac4bfae26702..0fa418463f49 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	struct __raw_tickets tickets = READ_ONCE(lock->tickets);
 	return (tickets.next - tickets.owner) > 1;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index e34934f63a49..f7c65adaa428 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -484,7 +484,7 @@ static void armpmu_disable(struct pmu *pmu)
 	armpmu->stop(armpmu);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int armpmu_runtime_resume(struct device *dev)
 {
 	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8361652b6dab..f9c863911038 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
@@ -806,6 +807,7 @@ static int __init customize_machine(void)
 	 * machine from the device tree, if no callback is provided,
 	 * otherwise we would always need an init_machine callback.
 	 */
+	of_iommu_init();
 	if (machine_desc->init_machine)
 		machine_desc->init_machine();
 #ifdef CONFIG_OF
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8a959e..2d6d91001062 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	int err;
 	struct kvm_vcpu *vcpu;
 
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
 		err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	int ret;
 
 	if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.has_run_once = true;
 
 	/*
-	 * Initialize the VGIC before running a vcpu the first time on
-	 * this VM.
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
 	 */
-	if (unlikely(!vgic_initialized(vcpu->kvm))) {
-		ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_ready(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
 	}
 
+	/*
+	 * Enable the arch timers only if we have an in-kernel VGIC
+	 * and it has been properly initialized, since we cannot handle
+	 * interrupts from the virtual timer with a userspace gic.
+	 */
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+		kvm_timer_enable(kvm);
+
 	return 0;
 }
 
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			       const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	/*
+	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+	 * use the same target.
+	 */
+	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+		return -EINVAL;
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+		if (set && i >= KVM_VCPU_MAX_FEATURES)
+			return -ENOENT;
+
+		/*
+		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+		 * use the same feature set.
+		 */
+		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+		    test_bit(i, vcpu->arch.features) != set)
+			return -EINVAL;
+
+		if (set)
+			set_bit(i, vcpu->arch.features);
+	}
+
+	vcpu->arch.target = phys_target;
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 					 struct kvm_vcpu_init *init)
 {
@@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 		return ret;
 
 	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
+	vcpu_reset_hcr(vcpu);
+
+	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
-	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
+	else
+		vcpu->arch.pause = false;
 
 	return 0;
 }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd8..384bab67c462 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	}
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-
-	/* We can only cope with guest==host and only on A15/A7 (for now). */
-	if (init->target != kvm_target_cpu())
-		return -EINVAL;
-
-	vcpu->arch.target = init->target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (test_bit(i, (void *)init->features)) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93182e9..5d3bfc0eb3f0 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
-	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
 
-	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-					 KVM_TRACE_MMIO_READ_UNSATISFIED,
-			mmio.len, fault_ipa,
-			(mmio.is_write) ? data : 0);
+	if (mmio.is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+					       mmio.len);
 
-	if (mmio.is_write)
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+			       fault_ipa, data);
 		mmio_write_buf(mmio.data, mmio.len, data);
+	} else {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+			       fault_ipa, 0);
+	}
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff17cbbe..1dc9778a00af 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct vm_area_struct *vma;
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
+	bool fault_ipa_uncached;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+	fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
@@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		hva = vm_end;
 	} while (hva < reg_end);
 
-	if (ret) {
-		spin_lock(&kvm->mmu_lock);
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-		spin_unlock(&kvm->mmu_lock);
-	}
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
 	return ret;
 }
 
@@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
+	/*
+	 * Readonly memslots are not incoherent with the caches by definition,
+	 * but in practice, they are used mostly to emulate ROMs or NOR flashes
+	 * that the guest may consider devices and hence map as uncached.
+	 * To prevent incoherency issues in these cases, tag all readonly
+	 * regions as incoherent.
+	 */
+	if (slot->flags & KVM_MEM_READONLY)
+		slot->flags |= KVM_MEMSLOT_INCOHERENT;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee2..58cb3248d277 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.pause = true;
+		kvm_vcpu_kick(tmp);
+	}
+
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
 	vcpu->run->system_event.type = type;
 	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm/mach-davinci/pm_domain.c b/arch/arm/mach-davinci/pm_domain.c
index 6b98413cebd6..641edc313938 100644
--- a/arch/arm/mach-davinci/pm_domain.c
+++ b/arch/arm/mach-davinci/pm_domain.c
@@ -14,7 +14,7 @@
 #include <linux/pm_clock.h>
 #include <linux/platform_device.h>
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int davinci_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index e4a00bafffc1..603820e5aba7 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -21,7 +21,7 @@ menuconfig ARCH_EXYNOS
 	select HAVE_S3C_RTC if RTC_CLASS
 	select PINCTRL
 	select PINCTRL_EXYNOS
-	select PM_GENERIC_DOMAINS if PM_RUNTIME
+	select PM_GENERIC_DOMAINS if PM
 	select S5P_DEV_MFC
 	select SRAM
 	select MFD_SYSCON
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 5057d61298b7..2f7616889c3f 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -31,6 +31,8 @@
 #include <linux/micrel_phy.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include <linux/fec.h>
+#include <linux/netdevice.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/system_misc.h>
@@ -39,6 +41,35 @@
 #include "cpuidle.h"
 #include "hardware.h"
 
+static struct fec_platform_data fec_pdata;
+
+static void imx6q_fec_sleep_enable(int enabled)
+{
+	struct regmap *gpr;
+
+	gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+	if (!IS_ERR(gpr)) {
+		if (enabled)
+			regmap_update_bits(gpr, IOMUXC_GPR13,
+					   IMX6Q_GPR13_ENET_STOP_REQ,
+					   IMX6Q_GPR13_ENET_STOP_REQ);
+
+		else
+			regmap_update_bits(gpr, IOMUXC_GPR13,
+					   IMX6Q_GPR13_ENET_STOP_REQ, 0);
+	} else
+		pr_err("failed to find fsl,imx6q-iomux-gpr regmap\n");
+}
+
+static void __init imx6q_enet_plt_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_path("/soc/aips-bus@02100000/ethernet@02188000");
+	if (np && of_get_property(np, "fsl,magic-packet", NULL))
+		fec_pdata.sleep_mode_enable = imx6q_fec_sleep_enable;
+}
+
 /* For imx6q sabrelite board: set KSZ9021RN RGMII pad skew */
 static int ksz9021rn_phy_fixup(struct phy_device *phydev)
 {
@@ -261,6 +292,12 @@ static void __init imx6q_axi_init(void)
 	}
 }
 
+/* Add auxdata to pass platform data */
+static const struct of_dev_auxdata imx6q_auxdata_lookup[] __initconst = {
+	OF_DEV_AUXDATA("fsl,imx6q-fec", 0x02188000, NULL, &fec_pdata),
+	{ /* sentinel */ }
+};
+
 static void __init imx6q_init_machine(void)
 {
 	struct device *parent;
@@ -274,11 +311,13 @@ static void __init imx6q_init_machine(void)
 
 	imx6q_enet_phy_init();
 
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, parent);
+	of_platform_populate(NULL, of_default_bus_match_table,
+			     imx6q_auxdata_lookup, parent);
 
 	imx_anatop_init();
 	cpu_is_imx6q() ?  imx6q_pm_init() : imx6dl_pm_init();
 	imx6q_1588_init();
+	imx6q_enet_plt_init();
 	imx6q_axi_init();
 }
 
diff --git a/arch/arm/mach-imx/mach-imx6sx.c b/arch/arm/mach-imx/mach-imx6sx.c
index 7a96c6577234..747b012665f5 100644
--- a/arch/arm/mach-imx/mach-imx6sx.c
+++ b/arch/arm/mach-imx/mach-imx6sx.c
@@ -12,12 +12,62 @@
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include <linux/fec.h>
+#include <linux/netdevice.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
 #include "common.h"
 #include "cpuidle.h"
 
+static struct fec_platform_data fec_pdata[2];
+
+static void imx6sx_fec1_sleep_enable(int enabled)
+{
+	struct regmap *gpr;
+
+	gpr = syscon_regmap_lookup_by_compatible("fsl,imx6sx-iomuxc-gpr");
+	if (!IS_ERR(gpr)) {
+		if (enabled)
+			regmap_update_bits(gpr, IOMUXC_GPR4,
+					   IMX6SX_GPR4_FEC_ENET1_STOP_REQ,
+					   IMX6SX_GPR4_FEC_ENET1_STOP_REQ);
+		else
+			regmap_update_bits(gpr, IOMUXC_GPR4,
+					   IMX6SX_GPR4_FEC_ENET1_STOP_REQ, 0);
+	} else
+		pr_err("failed to find fsl,imx6sx-iomux-gpr regmap\n");
+}
+
+static void imx6sx_fec2_sleep_enable(int enabled)
+{
+	struct regmap *gpr;
+
+	gpr = syscon_regmap_lookup_by_compatible("fsl,imx6sx-iomuxc-gpr");
+	if (!IS_ERR(gpr)) {
+		if (enabled)
+			regmap_update_bits(gpr, IOMUXC_GPR4,
+					   IMX6SX_GPR4_FEC_ENET2_STOP_REQ,
+					   IMX6SX_GPR4_FEC_ENET2_STOP_REQ);
+		else
+			regmap_update_bits(gpr, IOMUXC_GPR4,
+					   IMX6SX_GPR4_FEC_ENET2_STOP_REQ, 0);
+	} else
+		pr_err("failed to find fsl,imx6sx-iomux-gpr regmap\n");
+}
+
+static void __init imx6sx_enet_plt_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_path("/soc/aips-bus@02100000/ethernet@02188000");
+	if (np && of_get_property(np, "fsl,magic-packet", NULL))
+		fec_pdata[0].sleep_mode_enable = imx6sx_fec1_sleep_enable;
+	np = of_find_node_by_path("/soc/aips-bus@02100000/ethernet@021b4000");
+	if (np && of_get_property(np, "fsl,magic-packet", NULL))
+		fec_pdata[1].sleep_mode_enable = imx6sx_fec2_sleep_enable;
+}
+
 static int ar8031_phy_fixup(struct phy_device *dev)
 {
 	u16 val;
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index ca79ddac38bc..ef6041e7e675 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -19,7 +19,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int keystone_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
diff --git a/arch/arm/mach-mmp/Kconfig b/arch/arm/mach-mmp/Kconfig
index ebdba87b9671..fdbfadf00c84 100644
--- a/arch/arm/mach-mmp/Kconfig
+++ b/arch/arm/mach-mmp/Kconfig
@@ -86,11 +86,12 @@ config MACH_GPLUGD
 
 config MACH_MMP_DT
 	bool "Support MMP (ARMv5) platforms from device tree"
-	select CPU_PXA168
-	select CPU_PXA910
 	select USE_OF
 	select PINCTRL
 	select PINCTRL_SINGLE
+	select COMMON_CLK
+	select ARCH_HAS_RESET_CONTROLLER
+	select CPU_MOHAWK
 	help
 	  Include support for Marvell MMP2 based platforms using
 	  the device tree. Needn't select any other machine while
@@ -99,10 +100,12 @@ config MACH_MMP_DT
 config MACH_MMP2_DT
 	bool "Support MMP2 (ARMv7) platforms from device tree"
 	depends on !CPU_MOHAWK
-	select CPU_MMP2
 	select USE_OF
 	select PINCTRL
 	select PINCTRL_SINGLE
+	select COMMON_CLK
+	select ARCH_HAS_RESET_CONTROLLER
+	select CPU_PJ4
 	help
 	  Include support for Marvell MMP2 based platforms using
 	  the device tree.
@@ -111,21 +114,18 @@ endmenu
 
 config CPU_PXA168
 	bool
-	select COMMON_CLK
 	select CPU_MOHAWK
 	help
 	  Select code specific to PXA168
 
 config CPU_PXA910
 	bool
-	select COMMON_CLK
 	select CPU_MOHAWK
 	help
 	  Select code specific to PXA910
 
 config CPU_MMP2
 	bool
-	select COMMON_CLK
 	select CPU_PJ4
 	help
 	  Select code specific to MMP2. MMP2 is ARMv7 compatible.
diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c
index cca529ceecb7..b2296c9309b8 100644
--- a/arch/arm/mach-mmp/mmp-dt.c
+++ b/arch/arm/mach-mmp/mmp-dt.c
@@ -11,63 +11,42 @@
 
 #include <linux/irqchip.h>
 #include <linux/of_platform.h>
+#include <linux/clk-provider.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
+#include <asm/hardware/cache-tauros2.h>
 
 #include "common.h"
 
 extern void __init mmp_dt_init_timer(void);
 
-static const struct of_dev_auxdata pxa168_auxdata_lookup[] __initconst = {
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4026000, "pxa2xx-uart.2", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4025000, "pxa2xx-i2c.1", NULL),
-	OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp-gpio", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL),
-	{}
+static const char *pxa168_dt_board_compat[] __initdata = {
+	"mrvl,pxa168-aspenite",
+	NULL,
 };
 
-static const struct of_dev_auxdata pxa910_auxdata_lookup[] __initconst = {
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4036000, "pxa2xx-uart.2", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4037000, "pxa2xx-i2c.1", NULL),
-	OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp-gpio", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL),
-	{}
+static const char *pxa910_dt_board_compat[] __initdata = {
+	"mrvl,pxa910-dkb",
+	NULL,
 };
 
-static void __init pxa168_dt_init(void)
-{
-	of_platform_populate(NULL, of_default_bus_match_table,
-			     pxa168_auxdata_lookup, NULL);
-}
-
-static void __init pxa910_dt_init(void)
+static void __init mmp_init_time(void)
 {
-	of_platform_populate(NULL, of_default_bus_match_table,
-			     pxa910_auxdata_lookup, NULL);
+#ifdef CONFIG_CACHE_TAUROS2
+	tauros2_init(0);
+#endif
+	mmp_dt_init_timer();
+	of_clk_init(NULL);
 }
 
-static const char *mmp_dt_board_compat[] __initdata = {
-	"mrvl,pxa168-aspenite",
-	"mrvl,pxa910-dkb",
-	NULL,
-};
-
 DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_time	= mmp_dt_init_timer,
-	.init_machine	= pxa168_dt_init,
-	.dt_compat	= mmp_dt_board_compat,
+	.init_time	= mmp_init_time,
+	.dt_compat	= pxa168_dt_board_compat,
 MACHINE_END
 
 DT_MACHINE_START(PXA910_DT, "Marvell PXA910 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_time	= mmp_dt_init_timer,
-	.init_machine	= pxa910_dt_init,
-	.dt_compat	= mmp_dt_board_compat,
+	.init_time	= mmp_init_time,
+	.dt_compat	= pxa910_dt_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c
index 023cb453f157..998c0f533abc 100644
--- a/arch/arm/mach-mmp/mmp2-dt.c
+++ b/arch/arm/mach-mmp/mmp2-dt.c
@@ -12,29 +12,22 @@
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/of_platform.h>
+#include <linux/clk-provider.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
+#include <asm/hardware/cache-tauros2.h>
 
 #include "common.h"
 
 extern void __init mmp_dt_init_timer(void);
 
-static const struct of_dev_auxdata mmp2_auxdata_lookup[] __initconst = {
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4030000, "pxa2xx-uart.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.1", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.2", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4016000, "pxa2xx-uart.3", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4025000, "pxa2xx-i2c.1", NULL),
-	OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp2-gpio", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL),
-	{}
-};
-
-static void __init mmp2_dt_init(void)
+static void __init mmp_init_time(void)
 {
-	of_platform_populate(NULL, of_default_bus_match_table,
-			     mmp2_auxdata_lookup, NULL);
+#ifdef CONFIG_CACHE_TAUROS2
+	tauros2_init(0);
+#endif
+	mmp_dt_init_timer();
+	of_clk_init(NULL);
 }
 
 static const char *mmp2_dt_board_compat[] __initdata = {
@@ -44,7 +37,6 @@ static const char *mmp2_dt_board_compat[] __initdata = {
 
 DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_time	= mmp_dt_init_timer,
-	.init_machine	= mmp2_dt_init,
+	.init_time	= mmp_init_time,
 	.dt_compat	= mmp2_dt_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 3f2d39672393..c40e209de65c 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -21,7 +21,7 @@
 
 #include "soc.h"
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int omap1_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
@@ -59,7 +59,7 @@ static struct dev_pm_domain default_pm_domain = {
 #define OMAP1_PM_DOMAIN (&default_pm_domain)
 #else
 #define OMAP1_PM_DOMAIN NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static struct pm_clk_notifier_block platform_bus_notifier = {
 	.pm_domain = OMAP1_PM_DOMAIN,
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index f0edec199cd4..6ab656cc4f16 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -15,7 +15,7 @@ config ARCH_OMAP3
 	select ARM_CPU_SUSPEND if PM
 	select OMAP_INTERCONNECT
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select SOC_HAS_OMAP2_SDRC
 
 config ARCH_OMAP4
@@ -32,7 +32,7 @@ config ARCH_OMAP4
 	select PL310_ERRATA_588369 if CACHE_L2X0
 	select PL310_ERRATA_727915 if CACHE_L2X0
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_775420
 
@@ -103,7 +103,7 @@ config ARCH_OMAP2PLUS_TYPICAL
 	select I2C_OMAP
 	select MENELAUS if ARCH_OMAP2
 	select NEON if CPU_V7
-	select PM_RUNTIME
+	select PM
 	select REGULATOR
 	select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
 	select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4
diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c
index 5c5ebb4db5f7..644ff3231bb8 100644
--- a/arch/arm/mach-omap2/cclock3xxx_data.c
+++ b/arch/arm/mach-omap2/cclock3xxx_data.c
@@ -111,6 +111,7 @@ static struct clk dpll3_ck;
 
 static const char *dpll3_ck_parent_names[] = {
 	"sys_ck",
+	"sys_ck",
 };
 
 static const struct clk_ops dpll3_ck_ops = {
@@ -733,6 +734,10 @@ static const char *corex2_fck_parent_names[] = {
 DEFINE_STRUCT_CLK_HW_OMAP(corex2_fck, NULL);
 DEFINE_STRUCT_CLK(corex2_fck, corex2_fck_parent_names, core_ck_ops);
 
+static const char *cpefuse_fck_parent_names[] = {
+	"sys_ck",
+};
+
 static struct clk cpefuse_fck;
 
 static struct clk_hw_omap cpefuse_fck_hw = {
@@ -744,7 +749,7 @@ static struct clk_hw_omap cpefuse_fck_hw = {
 	.clkdm_name	= "core_l4_clkdm",
 };
 
-DEFINE_STRUCT_CLK(cpefuse_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(cpefuse_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk csi2_96m_fck;
 
@@ -775,7 +780,7 @@ static struct clk_hw_omap d2d_26m_fck_hw = {
 	.clkdm_name	= "d2d_clkdm",
 };
 
-DEFINE_STRUCT_CLK(d2d_26m_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(d2d_26m_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk des1_ick;
 
@@ -1046,7 +1051,7 @@ static struct clk_hw_omap dss2_alwon_fck_hw = {
 	.clkdm_name	= "dss_clkdm",
 };
 
-DEFINE_STRUCT_CLK(dss2_alwon_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(dss2_alwon_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk dss_96m_fck;
 
@@ -1368,7 +1373,7 @@ DEFINE_STRUCT_CLK(gpio1_dbck, gpio1_dbck_parent_names, aes2_ick_ops);
 static struct clk wkup_l4_ick;
 
 DEFINE_STRUCT_CLK_HW_OMAP(wkup_l4_ick, "wkup_clkdm");
-DEFINE_STRUCT_CLK(wkup_l4_ick, dpll3_ck_parent_names, core_l4_ick_ops);
+DEFINE_STRUCT_CLK(wkup_l4_ick, cpefuse_fck_parent_names, core_l4_ick_ops);
 
 static struct clk gpio1_ick;
 
@@ -1862,7 +1867,7 @@ static struct clk_hw_omap hecc_ck_hw = {
 	.clkdm_name	= "core_l3_clkdm",
 };
 
-DEFINE_STRUCT_CLK(hecc_ck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(hecc_ck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk hsotgusb_fck_am35xx;
 
@@ -1875,7 +1880,7 @@ static struct clk_hw_omap hsotgusb_fck_am35xx_hw = {
 	.clkdm_name	= "core_l3_clkdm",
 };
 
-DEFINE_STRUCT_CLK(hsotgusb_fck_am35xx, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(hsotgusb_fck_am35xx, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk hsotgusb_ick_3430es1;
 
@@ -2411,7 +2416,7 @@ static struct clk_hw_omap modem_fck_hw = {
 	.clkdm_name	= "d2d_clkdm",
 };
 
-DEFINE_STRUCT_CLK(modem_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(modem_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk mspro_fck;
 
@@ -2710,7 +2715,7 @@ static struct clk_hw_omap sr1_fck_hw = {
 	.clkdm_name	= "wkup_clkdm",
 };
 
-DEFINE_STRUCT_CLK(sr1_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(sr1_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk sr2_fck;
 
@@ -2724,7 +2729,7 @@ static struct clk_hw_omap sr2_fck_hw = {
 	.clkdm_name	= "wkup_clkdm",
 };
 
-DEFINE_STRUCT_CLK(sr2_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(sr2_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk sr_l4_ick;
 
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index 641337c6cde9..a4282e79143e 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -270,8 +270,6 @@ extern const struct clksel_rate div31_1to31_rates[];
 
 extern void __iomem *clk_memmaps[];
 
-extern int am33xx_clk_init(void);
-
 extern int omap2_clkops_enable_clkdm(struct clk_hw *hw);
 extern void omap2_clkops_disable_clkdm(struct clk_hw *hw);
 
diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index 20e120d071dd..c2da2a0fe5ad 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -474,7 +474,7 @@ void omap3_noncore_dpll_disable(struct clk_hw *hw)
  */
 long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_clk)
+				       struct clk_hw **best_parent_clk)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	struct dpll_data *dd;
@@ -488,10 +488,10 @@ long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 	if (__clk_get_rate(dd->clk_bypass) == rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = dd->clk_bypass;
+		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
 	} else {
 		rate = omap2_dpll_round_rate(hw, rate, best_parent_rate);
-		*best_parent_clk = dd->clk_ref;
+		*best_parent_clk = __clk_get_hw(dd->clk_ref);
 	}
 
 	*best_parent_rate = rate;
diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c
index 535822fcf4bb..0e58e5a85d53 100644
--- a/arch/arm/mach-omap2/dpll44xx.c
+++ b/arch/arm/mach-omap2/dpll44xx.c
@@ -223,7 +223,7 @@ out:
  */
 long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	struct dpll_data *dd;
@@ -237,11 +237,11 @@ long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 	if (__clk_get_rate(dd->clk_bypass) == rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = dd->clk_bypass;
+		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
 	} else {
 		rate = omap4_dpll_regm4xen_round_rate(hw, rate,
 						      best_parent_rate);
-		*best_parent_clk = dd->clk_ref;
+		*best_parent_clk = __clk_get_hw(dd->clk_ref);
 	}
 
 	*best_parent_rate = rate;
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 53841dea80ea..c25feba05818 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -471,11 +471,15 @@ void __init omap3xxx_check_revision(void)
 			cpu_rev = "1.0";
 			break;
 		case 1:
-		/* FALLTHROUGH */
-		default:
 			omap_revision = AM437X_REV_ES1_1;
 			cpu_rev = "1.1";
 			break;
+		case 2:
+		/* FALLTHROUGH */
+		default:
+			omap_revision = AM437X_REV_ES1_2;
+			cpu_rev = "1.2";
+			break;
 		}
 		break;
 	case 0xb8f2:
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 4fc838354e31..a1bd6affb508 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -361,7 +361,7 @@ static void __init omap_hwmod_init_postsetup(void)
 	u8 postsetup_state;
 
 	/* Set the default postsetup state for all hwmods */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	postsetup_state = _HWMOD_STATE_IDLE;
 #else
 	postsetup_state = _HWMOD_STATE_ENABLED;
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 8c58b71c2727..be9541e18650 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -588,7 +588,7 @@ odbs_exit:
 	return ERR_PTR(ret);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int _od_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h
index 4376f59626d1..c1a3b4416311 100644
--- a/arch/arm/mach-omap2/soc.h
+++ b/arch/arm/mach-omap2/soc.h
@@ -446,6 +446,7 @@ IS_OMAP_TYPE(3430, 0x3430)
 #define AM437X_CLASS		0x43700000
 #define AM437X_REV_ES1_0	(AM437X_CLASS | (0x10 << 8))
 #define AM437X_REV_ES1_1	(AM437X_CLASS | (0x11 << 8))
+#define AM437X_REV_ES1_2	(AM437X_CLASS | (0x12 << 8))
 
 #define OMAP443X_CLASS		0x44300044
 #define OMAP4430_REV_ES1_0	(OMAP443X_CLASS | (0x10 << 8))
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 7dd894ece9ae..d28ecb9ef172 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -37,7 +37,7 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 #include <asm/mach/map.h>
 #include <mach/assabet.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index b90c7d828391..7fcbe3d119c7 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -43,7 +43,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c
index 3c43219bc881..c6b412054a3c 100644
--- a/arch/arm/mach-sa1100/h3100.c
+++ b/arch/arm/mach-sa1100/h3100.c
@@ -18,7 +18,7 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 #include <mach/h3xxx.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c
index 5be54c214c7c..118338efd790 100644
--- a/arch/arm/mach-sa1100/h3600.c
+++ b/arch/arm/mach-sa1100/h3600.c
@@ -18,7 +18,7 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 #include <mach/h3xxx.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c
index b47262afb240..f8197eb6e566 100644
--- a/arch/arm/mach-shmobile/board-lager.c
+++ b/arch/arm/mach-shmobile/board-lager.c
@@ -32,7 +32,6 @@
 #include <linux/pinctrl/machine.h>
 #include <linux/platform_data/camera-rcar.h>
 #include <linux/platform_data/gpio-rcar.h>
-#include <linux/platform_data/rcar-du.h>
 #include <linux/platform_data/usb-rcar-gen2-phy.h>
 #include <linux/platform_device.h>
 #include <linux/phy.h>
@@ -83,61 +82,6 @@
  *
  */
 
-/* DU */
-static struct rcar_du_encoder_data lager_du_encoders[] = {
-	{
-		.type = RCAR_DU_ENCODER_VGA,
-		.output = RCAR_DU_OUTPUT_DPAD0,
-	}, {
-		.type = RCAR_DU_ENCODER_NONE,
-		.output = RCAR_DU_OUTPUT_LVDS1,
-		.connector.lvds.panel = {
-			.width_mm = 210,
-			.height_mm = 158,
-			.mode = {
-				.pixelclock = 65000000,
-				.hactive = 1024,
-				.hfront_porch = 20,
-				.hback_porch = 160,
-				.hsync_len = 136,
-				.vactive = 768,
-				.vfront_porch = 3,
-				.vback_porch = 29,
-				.vsync_len = 6,
-			},
-		},
-	},
-};
-
-static const struct rcar_du_platform_data lager_du_pdata __initconst = {
-	.encoders = lager_du_encoders,
-	.num_encoders = ARRAY_SIZE(lager_du_encoders),
-};
-
-static const struct resource du_resources[] __initconst = {
-	DEFINE_RES_MEM(0xfeb00000, 0x70000),
-	DEFINE_RES_MEM_NAMED(0xfeb90000, 0x1c, "lvds.0"),
-	DEFINE_RES_MEM_NAMED(0xfeb94000, 0x1c, "lvds.1"),
-	DEFINE_RES_IRQ(gic_spi(256)),
-	DEFINE_RES_IRQ(gic_spi(268)),
-	DEFINE_RES_IRQ(gic_spi(269)),
-};
-
-static void __init lager_add_du_device(void)
-{
-	struct platform_device_info info = {
-		.name = "rcar-du-r8a7790",
-		.id = -1,
-		.res = du_resources,
-		.num_res = ARRAY_SIZE(du_resources),
-		.data = &lager_du_pdata,
-		.size_data = sizeof(lager_du_pdata),
-		.dma_mask = DMA_BIT_MASK(32),
-	};
-
-	platform_device_register_full(&info);
-}
-
 /* LEDS */
 static struct gpio_led lager_leds[] = {
 	{
@@ -800,8 +744,6 @@ static void __init lager_add_standard_devices(void)
 
 	platform_device_register_full(&ether_info);
 
-	lager_add_du_device();
-
 	platform_device_register_resndata(NULL, "qspi", 0,
 					  qspi_resources,
 					  ARRAY_SIZE(qspi_resources),
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index 994dc7d86ae2..598f704f76ae 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -27,7 +27,6 @@
 #include <linux/pinctrl/machine.h>
 #include <linux/platform_data/camera-rcar.h>
 #include <linux/platform_data/gpio-rcar.h>
-#include <linux/platform_data/rcar-du.h>
 #include <linux/platform_data/usb-rcar-phy.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
@@ -171,62 +170,6 @@ static struct platform_device hspi_device = {
 	.num_resources	= ARRAY_SIZE(hspi_resources),
 };
 
-/*
- * DU
- *
- * The panel only specifies the [hv]display and [hv]total values. The position
- * and width of the sync pulses don't matter, they're copied from VESA timings.
- */
-static struct rcar_du_encoder_data du_encoders[] = {
-	{
-		.type = RCAR_DU_ENCODER_VGA,
-		.output = RCAR_DU_OUTPUT_DPAD0,
-	}, {
-		.type = RCAR_DU_ENCODER_LVDS,
-		.output = RCAR_DU_OUTPUT_DPAD1,
-		.connector.lvds.panel = {
-			.width_mm = 210,
-			.height_mm = 158,
-			.mode = {
-				.pixelclock = 65000000,
-				.hactive = 1024,
-				.hfront_porch = 20,
-				.hback_porch = 160,
-				.hsync_len = 136,
-				.vactive = 768,
-				.vfront_porch = 3,
-				.vback_porch = 29,
-				.vsync_len = 6,
-			},
-		},
-	},
-};
-
-static const struct rcar_du_platform_data du_pdata __initconst = {
-	.encoders = du_encoders,
-	.num_encoders = ARRAY_SIZE(du_encoders),
-};
-
-static const struct resource du_resources[] __initconst = {
-	DEFINE_RES_MEM(0xfff80000, 0x40000),
-	DEFINE_RES_IRQ(gic_iid(0x3f)),
-};
-
-static void __init marzen_add_du_device(void)
-{
-	struct platform_device_info info = {
-		.name = "rcar-du-r8a7779",
-		.id = -1,
-		.res = du_resources,
-		.num_res = ARRAY_SIZE(du_resources),
-		.data = &du_pdata,
-		.size_data = sizeof(du_pdata),
-		.dma_mask = DMA_BIT_MASK(32),
-	};
-
-	platform_device_register_full(&info);
-}
-
 /* LEDS */
 static struct gpio_led marzen_leds[] = {
 	{
@@ -385,7 +328,6 @@ static void __init marzen_init(void)
 	platform_device_register_full(&vin1_info);
 	platform_device_register_full(&vin3_info);
 	platform_add_devices(marzen_devices, ARRAY_SIZE(marzen_devices));
-	marzen_add_du_device();
 }
 
 static const char *marzen_boards_compat_dt[] __initdata = {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e8907117861e..7864797609b3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1947,9 +1947,8 @@ EXPORT_SYMBOL_GPL(arm_iommu_release_mapping);
  *	arm_iommu_create_mapping)
  *
  * Attaches specified io address space mapping to the provided device,
- * this replaces the dma operations (dma_map_ops pointer) with the
- * IOMMU aware version. More than one client might be attached to
- * the same io address space mapping.
+ * More than one client might be attached to the same io address space
+ * mapping.
  */
 int arm_iommu_attach_device(struct device *dev,
 			    struct dma_iommu_mapping *mapping)
@@ -1962,7 +1961,6 @@ int arm_iommu_attach_device(struct device *dev,
 
 	kref_get(&mapping->kref);
 	dev->archdata.mapping = mapping;
-	set_dma_ops(dev, &iommu_ops);
 
 	pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
 	return 0;
@@ -1974,7 +1972,6 @@ EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
  * @dev: valid struct device pointer
  *
  * Detaches the provided device from a previously attached map.
- * This voids the dma operations (dma_map_ops pointer)
  */
 void arm_iommu_detach_device(struct device *dev)
 {
@@ -1989,10 +1986,83 @@ void arm_iommu_detach_device(struct device *dev)
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
 	dev->archdata.mapping = NULL;
-	set_dma_ops(dev, NULL);
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
 EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
 
-#endif
+static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
+{
+	return coherent ? &iommu_coherent_ops : &iommu_ops;
+}
+
+static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				    struct iommu_ops *iommu)
+{
+	struct dma_iommu_mapping *mapping;
+
+	if (!iommu)
+		return false;
+
+	mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
+	if (IS_ERR(mapping)) {
+		pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
+				size, dev_name(dev));
+		return false;
+	}
+
+	if (arm_iommu_attach_device(dev, mapping)) {
+		pr_warn("Failed to attached device %s to IOMMU_mapping\n",
+				dev_name(dev));
+		arm_iommu_release_mapping(mapping);
+		return false;
+	}
+
+	return true;
+}
+
+static void arm_teardown_iommu_dma_ops(struct device *dev)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+
+	arm_iommu_detach_device(dev);
+	arm_iommu_release_mapping(mapping);
+}
+
+#else
+
+static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				    struct iommu_ops *iommu)
+{
+	return false;
+}
+
+static void arm_teardown_iommu_dma_ops(struct device *dev) { }
+
+#define arm_get_iommu_dma_map_ops arm_get_dma_map_ops
+
+#endif	/* CONFIG_ARM_DMA_USE_IOMMU */
+
+static struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
+}
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			struct iommu_ops *iommu, bool coherent)
+{
+	struct dma_map_ops *dma_ops;
+
+	dev->archdata.dma_coherent = coherent;
+	if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
+		dma_ops = arm_get_iommu_dma_map_ops(coherent);
+	else
+		dma_ops = arm_get_dma_map_ops(coherent);
+
+	set_dma_ops(dev, dma_ops);
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+	arm_teardown_iommu_dma_ops(dev);
+}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 688db03ef5b8..b1f9a20a3677 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -14,7 +14,9 @@ config ARM64
 	select ARM_ARCH_TIMER
 	select ARM_GIC
 	select AUDIT_ARCH_COMPAT_GENERIC
+	select ARM_GIC_V2M if PCI_MSI
 	select ARM_GIC_V3
+	select ARM_GIC_V3_ITS if PCI_MSI
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index dd301be89ecc..5376d908eabe 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,6 +1,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -13,14 +14,12 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RESOURCE_COUNTERS=y
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
-# CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
@@ -92,7 +91,6 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-# CONFIG_HMC_DRV is not set
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
 CONFIG_GPIO_PL061=y
@@ -133,6 +131,8 @@ CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA=y
+CONFIG_AUTOFS4_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=y
 CONFIG_VFAT_FS=y
@@ -152,14 +152,15 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
+CONFIG_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
-CONFIG_CRYPTO_AES_ARM64_CE=y
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
 CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6b61091c7f4c..55103e50c51b 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -27,6 +27,7 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mman.h
 generic-y += msgbuf.h
+generic-y += msi.h
 generic-y += mutex.h
 generic-y += pci.h
 generic-y += pci-bridge.h
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index d34189bceff7..9ce3e680ae1c 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -52,13 +52,14 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
 	dev->archdata.dma_ops = ops;
 }
 
-static inline int set_arch_dma_coherent_ops(struct device *dev)
+static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				      struct iommu_ops *iommu, bool coherent)
 {
-	dev->archdata.dma_coherent = true;
-	set_dma_ops(dev, &coherent_swiotlb_dma_ops);
-	return 0;
+	dev->archdata.dma_coherent = coherent;
+	if (coherent)
+		set_dma_ops(dev, &coherent_swiotlb_dma_ops);
 }
-#define set_arch_dma_coherent_ops	set_arch_dma_coherent_ops
+#define arch_setup_dma_ops	arch_setup_dma_ops
 
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55b5518..8127e45e2637 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4ba8d67..0b7dfdb931df 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a59f6a1..14a74f136272 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 
 	if (!icache_is_aliasing()) {		/* PIPT */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index df22314f57cf..210d632aa5ad 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -298,7 +298,6 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
 #define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
 #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
 
-#define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 #define pud_write(pud)		pte_write(pud_pte(pud))
 #define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
 
@@ -401,7 +400,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
 }
 
-#define pud_page(pud)           pmd_page(pud_pmd(pud))
+#define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
 #endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */
 
@@ -437,6 +436,8 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
 	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
 }
 
+#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+
 #endif  /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index c45b7b1b7197..cee128732435 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	arch_spinlock_t lockval = ACCESS_ONCE(*lock);
+	arch_spinlock_t lockval = READ_ONCE(*lock);
 	return (lockval.next - lockval.owner) > 1;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 3425f311c49e..f1dbca7d5c96 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -540,6 +540,8 @@ const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
 #ifdef CONFIG_CPU_IDLE
 	.cpu_init_idle	= cpu_psci_cpu_init_idle,
+#endif
+#ifdef CONFIG_ARM64_CPU_SUSPEND
 	.cpu_suspend	= cpu_psci_cpu_suspend,
 #endif
 #ifdef CONFIG_SMP
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 3771b72b6569..2d6b6065fe7f 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -5,6 +5,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
+#include <asm/mmu_context.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
@@ -98,7 +99,18 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	 */
 	ret = __cpu_suspend_enter(arg, fn);
 	if (ret == 0) {
-		cpu_switch_mm(mm->pgd, mm);
+		/*
+		 * We are resuming from reset with TTBR0_EL1 set to the
+		 * idmap to enable the MMU; restore the active_mm mappings in
+		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
+		 * the thread entered __cpu_suspend with TTBR0_EL1 set to
+		 * reserved TTBR0 page tables and should be restored as such.
+		 */
+		if (mm == &init_mm)
+			cpu_set_reserved_ttbr0();
+		else
+			cpu_switch_mm(mm->pgd, mm);
+
 		flush_tlb_all();
 
 		/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 76794692c20b..9535bd555d1d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
 	return 0;
 }
 
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	return -EINVAL;
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-	int phys_target = kvm_target_cpu();
-
-	if (init->target != phys_target)
-		return -EINVAL;
-
-	vcpu->arch.target = phys_target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (init->features[i / 32] & (1 << (i % 32))) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index bf69601be546..cf33f33333cc 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -182,9 +182,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 	static const char units[] = "KMGTPE";
 	u64 prot = val & pg_level[level].mask;
 
-	if (addr < LOWEST_ADDR)
-		return;
-
 	if (!st->level) {
 		st->level = level;
 		st->current_prot = prot;
@@ -272,7 +269,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start)
 {
-	pgd_t *pgd = pgd_offset(mm, 0);
+	pgd_t *pgd = pgd_offset(mm, 0UL);
 	unsigned i;
 	unsigned long addr;
 
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index b0a608da7bd1..b964c667aced 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -30,8 +30,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -187,13 +186,14 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
 
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -369,11 +369,10 @@ copy_exception_bytes:
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -521,3 +520,4 @@ __do_clear_user (void __user *pto, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 15a9ed1d579c..4fc16b44fff2 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -108,6 +108,7 @@ config ETRAX_AXISFLASHMAP
 	select MTD_JEDECPROBE
 	select MTD_BLOCK
 	select MTD_COMPLEX_MAPPINGS
+	select MTD_MTDRAM
 	help
 	  This option enables MTD mapping of flash devices.  Needed to use
 	  flash memories.  If unsure, say Y.
@@ -358,13 +359,6 @@ config ETRAX_SPI_MMC
 	default MMC
 	select SPI
 	select MMC_SPI
-	select ETRAX_SPI_MMC_BOARD
-
-# For the parts that can't be a module (due to restrictions in
-# framework elsewhere).
-config ETRAX_SPI_MMC_BOARD
-       boolean
-       default n
 
 # While the board info is MMC_SPI only, the drivers are written to be
 # independent of MMC_SPI, so we'll keep SPI non-dependent on the
diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile
index 39aa3c117a86..15fbfefced2c 100644
--- a/arch/cris/arch-v32/drivers/Makefile
+++ b/arch/cris/arch-v32/drivers/Makefile
@@ -10,4 +10,3 @@ obj-$(CONFIG_ETRAX_IOP_FW_LOAD)         += iop_fw_load.o
 obj-$(CONFIG_ETRAX_I2C)			+= i2c.o
 obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
 obj-$(CONFIG_PCI)			+= pci/
-obj-$(CONFIG_ETRAX_SPI_MMC_BOARD)	+= board_mmcspi.o
diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h
index c073cf4ba016..d9cc856f89fb 100644
--- a/arch/cris/arch-v32/drivers/i2c.h
+++ b/arch/cris/arch-v32/drivers/i2c.h
@@ -2,7 +2,6 @@
 #include <linux/init.h>
 
 /* High level I2C actions */
-int __init i2c_init(void);
 int i2c_write(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_read(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue);
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 5a149134cfb5..08a313fc2241 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -1,8 +1,7 @@
 /*
- * Simple synchronous serial port driver for ETRAX FS and Artpec-3.
- *
- * Copyright (c) 2005 Axis Communications AB
+ * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3.
  *
+ * Copyright (c) 2005, 2008 Axis Communications AB
  * Author: Mikael Starvik
  *
  */
@@ -16,16 +15,17 @@
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
 #include <linux/wait.h>
 
 #include <asm/io.h>
-#include <dma.h>
+#include <mach/dma.h>
 #include <pinmux.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/sser_defs.h>
+#include <hwregs/timer_defs.h>
 #include <hwregs/dma_defs.h>
 #include <hwregs/dma.h>
 #include <hwregs/intr_vect_defs.h>
@@ -59,22 +59,23 @@
 /* the rest of the data pointed out by Descr1 and set readp to the start */
 /* of Descr2                                                             */
 
-#define SYNC_SERIAL_MAJOR 125
-
 /* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */
 /* words can be handled */
-#define IN_BUFFER_SIZE 12288
-#define IN_DESCR_SIZE 256
-#define NBR_IN_DESCR (IN_BUFFER_SIZE/IN_DESCR_SIZE)
+#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE
+#define NBR_IN_DESCR (8*6)
+#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR)
 
-#define OUT_BUFFER_SIZE 1024*8
 #define NBR_OUT_DESCR 8
+#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR)
 
 #define DEFAULT_FRAME_RATE 0
 #define DEFAULT_WORD_RATE 7
 
+/* To be removed when we move to pure udev. */
+#define SYNC_SERIAL_MAJOR 125
+
 /* NOTE: Enabling some debug will likely cause overrun or underrun,
- * especially if manual mode is use.
+ * especially if manual mode is used.
  */
 #define DEBUG(x)
 #define DEBUGREAD(x)
@@ -85,11 +86,28 @@
 #define DEBUGTRDMA(x)
 #define DEBUGOUTBUF(x)
 
-typedef struct sync_port
-{
-	reg_scope_instances regi_sser;
-	reg_scope_instances regi_dmain;
-	reg_scope_instances regi_dmaout;
+enum syncser_irq_setup {
+	no_irq_setup = 0,
+	dma_irq_setup = 1,
+	manual_irq_setup = 2,
+};
+
+struct sync_port {
+	unsigned long regi_sser;
+	unsigned long regi_dmain;
+	unsigned long regi_dmaout;
+
+	/* Interrupt vectors. */
+	unsigned long dma_in_intr_vect; /* Used for DMA in. */
+	unsigned long dma_out_intr_vect; /* Used for DMA out. */
+	unsigned long syncser_intr_vect; /* Used when no DMA. */
+
+	/* DMA number for in and out. */
+	unsigned int dma_in_nbr;
+	unsigned int dma_out_nbr;
+
+	/* DMA owner. */
+	enum dma_owner req_dma;
 
 	char started; /* 1 if port has been started */
 	char port_nbr; /* Port 0 or 1 */
@@ -99,22 +117,29 @@ typedef struct sync_port
 	char use_dma;  /* 1 if port uses dma */
 	char tr_running;
 
-	char init_irqs;
+	enum syncser_irq_setup init_irqs;
 	int output;
 	int input;
 
 	/* Next byte to be read by application */
-	volatile unsigned char *volatile readp;
+	unsigned char *readp;
 	/* Next byte to be written by etrax */
-	volatile unsigned char *volatile writep;
+	unsigned char *writep;
 
 	unsigned int in_buffer_size;
+	unsigned int in_buffer_len;
 	unsigned int inbufchunk;
-	unsigned char out_buffer[OUT_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	unsigned char in_buffer[IN_BUFFER_SIZE]__attribute__ ((aligned(32)));
-	unsigned char flip[IN_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	struct dma_descr_data* next_rx_desc;
-	struct dma_descr_data* prev_rx_desc;
+	/* Data buffers for in and output. */
+	unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32);
+	unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32);
+	unsigned char flip[IN_BUFFER_SIZE] __aligned(32);
+	struct timespec timestamp[NBR_IN_DESCR];
+	struct dma_descr_data *next_rx_desc;
+	struct dma_descr_data *prev_rx_desc;
+
+	struct timeval last_timestamp;
+	int read_ts_idx;
+	int write_ts_idx;
 
 	/* Pointer to the first available descriptor in the ring,
 	 * unless active_tr_descr == catch_tr_descr and a dma
@@ -135,114 +160,138 @@ typedef struct sync_port
 	/* Number of bytes currently locked for being read by DMA */
 	int out_buf_count;
 
-	dma_descr_data in_descr[NBR_IN_DESCR] __attribute__ ((__aligned__(16)));
-	dma_descr_context in_context __attribute__ ((__aligned__(32)));
-	dma_descr_data out_descr[NBR_OUT_DESCR]
-		__attribute__ ((__aligned__(16)));
-	dma_descr_context out_context __attribute__ ((__aligned__(32)));
+	dma_descr_context in_context __aligned(32);
+	dma_descr_context out_context __aligned(32);
+	dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16);
+	dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16);
+
 	wait_queue_head_t out_wait_q;
 	wait_queue_head_t in_wait_q;
 
 	spinlock_t lock;
-} sync_port;
+};
 
 static DEFINE_MUTEX(sync_serial_mutex);
 static int etrax_sync_serial_init(void);
 static void initialize_port(int portnbr);
 static inline int sync_data_avail(struct sync_port *port);
 
-static int sync_serial_open(struct inode *, struct file*);
-static int sync_serial_release(struct inode*, struct file*);
+static int sync_serial_open(struct inode *, struct file *);
+static int sync_serial_release(struct inode *, struct file *);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static int sync_serial_ioctl(struct file *,
-			     unsigned int cmd, unsigned long arg);
-static ssize_t sync_serial_write(struct file * file, const char * buf,
+static long sync_serial_ioctl(struct file *file,
+			      unsigned int cmd, unsigned long arg);
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg);
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos);
-static ssize_t sync_serial_read(struct file *file, char *buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos);
 
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
+#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
+	(defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)))
 #define SYNC_SER_DMA
+#else
+#define SYNC_SER_MANUAL
 #endif
 
-static void send_word(sync_port* port);
-static void start_dma_out(struct sync_port *port, const char *data, int count);
-static void start_dma_in(sync_port* port);
 #ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count);
+static void start_dma_in(struct sync_port *port);
 static irqreturn_t tr_interrupt(int irq, void *dev_id);
 static irqreturn_t rx_interrupt(int irq, void *dev_id);
 #endif
-
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
-#define SYNC_SER_MANUAL
-#endif
 #ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port);
 static irqreturn_t manual_interrupt(int irq, void *dev_id);
 #endif
 
-#ifdef CONFIG_ETRAXFS	/* ETRAX FS */
-#define OUT_DMA_NBR 4
-#define IN_DMA_NBR 5
-#define PINMUX_SSER pinmux_sser0
-#define SYNCSER_INST regi_sser0
-#define SYNCSER_INTR_VECT SSER0_INTR_VECT
-#define OUT_DMA_INST regi_dma4
-#define IN_DMA_INST regi_dma5
-#define DMA_OUT_INTR_VECT DMA4_INTR_VECT
-#define DMA_IN_INTR_VECT DMA5_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser0
-#else			/* Artpec-3 */
-#define OUT_DMA_NBR 6
-#define IN_DMA_NBR 7
-#define PINMUX_SSER pinmux_sser
-#define SYNCSER_INST regi_sser
-#define SYNCSER_INTR_VECT SSER_INTR_VECT
-#define OUT_DMA_INST regi_dma6
-#define IN_DMA_INST regi_dma7
-#define DMA_OUT_INTR_VECT DMA6_INTR_VECT
-#define DMA_IN_INTR_VECT DMA7_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser
+#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed
+#define artpec_request_dma crisv32_request_dma
+#define artpec_free_dma crisv32_free_dma
+
+#ifdef CONFIG_ETRAXFS
+/* ETRAX FS */
+#define DMA_OUT_NBR0		SYNC_SER0_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER0_RX_DMA_NBR
+#define DMA_OUT_NBR1		SYNC_SER1_TX_DMA_NBR
+#define DMA_IN_NBR1		SYNC_SER1_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser0
+#define PINMUX_SSER1		pinmux_sser1
+#define SYNCSER_INST0		regi_sser0
+#define SYNCSER_INST1		regi_sser1
+#define SYNCSER_INTR_VECT0	SSER0_INTR_VECT
+#define SYNCSER_INTR_VECT1	SSER1_INTR_VECT
+#define OUT_DMA_INST0		regi_dma4
+#define IN_DMA_INST0		regi_dma5
+#define DMA_OUT_INTR_VECT0	DMA4_INTR_VECT
+#define DMA_OUT_INTR_VECT1	DMA7_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA5_INTR_VECT
+#define DMA_IN_INTR_VECT1	DMA6_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser0
+#define REQ_DMA_SYNCSER1	dma_sser1
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
+#define PORT1_DMA 1
+#else
+#define PORT1_DMA 0
+#endif
+#elif defined(CONFIG_CRIS_MACH_ARTPEC3)
+/* ARTPEC-3 */
+#define DMA_OUT_NBR0		SYNC_SER_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser
+#define SYNCSER_INST0		regi_sser
+#define SYNCSER_INTR_VECT0	SSER_INTR_VECT
+#define OUT_DMA_INST0		regi_dma6
+#define IN_DMA_INST0		regi_dma7
+#define DMA_OUT_INTR_VECT0	DMA6_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA7_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser
+#define REQ_DMA_SYNCSER1	dma_sser
 #endif
 
-/* The ports */
-static struct sync_port ports[]=
-{
-	{
-		.regi_sser             = SYNCSER_INST,
-		.regi_dmaout           = OUT_DMA_INST,
-		.regi_dmain            = IN_DMA_INST,
 #if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
-                .use_dma               = 1,
+#define PORT0_DMA 1
 #else
-                .use_dma               = 0,
+#define PORT0_DMA 0
 #endif
-	}
-#ifdef CONFIG_ETRAXFS
-	,
 
+/* The ports */
+static struct sync_port ports[] = {
 	{
-		.regi_sser             = regi_sser1,
-		.regi_dmaout           = regi_dma6,
-		.regi_dmain            = regi_dma7,
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
-                .use_dma               = 1,
-#else
-                .use_dma               = 0,
-#endif
-	}
+		.regi_sser		= SYNCSER_INST0,
+		.regi_dmaout		= OUT_DMA_INST0,
+		.regi_dmain		= IN_DMA_INST0,
+		.use_dma		= PORT0_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT0,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT0,
+		.dma_in_nbr		= DMA_IN_NBR0,
+		.dma_out_nbr		= DMA_OUT_NBR0,
+		.req_dma		= REQ_DMA_SYNCSER0,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT0,
+	},
+#ifdef CONFIG_ETRAXFS
+	{
+		.regi_sser		= SYNCSER_INST1,
+		.regi_dmaout		= regi_dma6,
+		.regi_dmain		= regi_dma7,
+		.use_dma		= PORT1_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT1,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT1,
+		.dma_in_nbr		= DMA_IN_NBR1,
+		.dma_out_nbr		= DMA_OUT_NBR1,
+		.req_dma		= REQ_DMA_SYNCSER1,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT1,
+	},
 #endif
 };
 
 #define NBR_PORTS ARRAY_SIZE(ports)
 
-static const struct file_operations sync_serial_fops = {
+static const struct file_operations syncser_fops = {
 	.owner		= THIS_MODULE,
 	.write		= sync_serial_write,
 	.read		= sync_serial_read,
@@ -253,61 +302,40 @@ static const struct file_operations sync_serial_fops = {
 	.llseek		= noop_llseek,
 };
 
-static int __init etrax_sync_serial_init(void)
-{
-	ports[0].enabled = 0;
-#ifdef CONFIG_ETRAXFS
-	ports[1].enabled = 0;
-#endif
-	if (register_chrdev(SYNC_SERIAL_MAJOR, "sync serial",
-			&sync_serial_fops) < 0) {
-		printk(KERN_WARNING
-			"Unable to get major for synchronous serial port\n");
-		return -EBUSY;
-	}
-
-	/* Initialize Ports */
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
-	if (crisv32_pinmux_alloc_fixed(PINMUX_SSER)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[0].enabled = 1;
-	initialize_port(0);
-#endif
-
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
-	if (crisv32_pinmux_alloc_fixed(pinmux_sser1)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[1].enabled = 1;
-	initialize_port(1);
-#endif
+static dev_t syncser_first;
+static int minor_count = NBR_PORTS;
+#define SYNCSER_NAME "syncser"
+static struct cdev *syncser_cdev;
+static struct class *syncser_class;
 
-#ifdef CONFIG_ETRAXFS
-	printk(KERN_INFO "ETRAX FS synchronous serial port driver\n");
-#else
-	printk(KERN_INFO "Artpec-3 synchronous serial port driver\n");
-#endif
-	return 0;
+static void sync_serial_start_port(struct sync_port *port)
+{
+	reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+	reg_sser_rw_tr_cfg tr_cfg =
+		REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	reg_sser_rw_rec_cfg rec_cfg =
+		REG_RD(sser, port->regi_sser, rw_rec_cfg);
+	cfg.en = regk_sser_yes;
+	tr_cfg.tr_en = regk_sser_yes;
+	rec_cfg.rec_en = regk_sser_yes;
+	REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+	port->started = 1;
 }
 
 static void __init initialize_port(int portnbr)
 {
-	int __attribute__((unused)) i;
 	struct sync_port *port = &ports[portnbr];
-	reg_sser_rw_cfg cfg = {0};
-	reg_sser_rw_frm_cfg frm_cfg = {0};
-	reg_sser_rw_tr_cfg tr_cfg = {0};
-	reg_sser_rw_rec_cfg rec_cfg = {0};
+	reg_sser_rw_cfg cfg = { 0 };
+	reg_sser_rw_frm_cfg frm_cfg = { 0 };
+	reg_sser_rw_tr_cfg tr_cfg = { 0 };
+	reg_sser_rw_rec_cfg rec_cfg = { 0 };
 
-	DEBUG(printk(KERN_DEBUG "Init sync serial port %d\n", portnbr));
+	DEBUG(pr_info("Init sync serial port %d\n", portnbr));
 
 	port->port_nbr = portnbr;
-	port->init_irqs = 1;
+	port->init_irqs = no_irq_setup;
 
 	port->out_rd_ptr = port->out_buffer;
 	port->out_buf_count = 0;
@@ -318,10 +346,11 @@ static void __init initialize_port(int portnbr)
 	port->readp = port->flip;
 	port->writep = port->flip;
 	port->in_buffer_size = IN_BUFFER_SIZE;
+	port->in_buffer_len = 0;
 	port->inbufchunk = IN_DESCR_SIZE;
-	port->next_rx_desc = &port->in_descr[0];
-	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR-1];
-	port->prev_rx_desc->eol = 1;
+
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
 
 	init_waitqueue_head(&port->out_wait_q);
 	init_waitqueue_head(&port->in_wait_q);
@@ -368,14 +397,18 @@ static void __init initialize_port(int portnbr)
 	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
 
 #ifdef SYNC_SER_DMA
-	/* Setup the descriptor ring for dma out/transmit. */
-	for (i = 0; i < NBR_OUT_DESCR; i++) {
-		port->out_descr[i].wait = 0;
-		port->out_descr[i].intr = 1;
-		port->out_descr[i].eol = 0;
-		port->out_descr[i].out_eop = 0;
-		port->out_descr[i].next =
-			(dma_descr_data *)virt_to_phys(&port->out_descr[i+1]);
+	{
+		int i;
+		/* Setup the descriptor ring for dma out/transmit. */
+		for (i = 0; i < NBR_OUT_DESCR; i++) {
+			dma_descr_data *descr = &port->out_descr[i];
+			descr->wait = 0;
+			descr->intr = 1;
+			descr->eol = 0;
+			descr->out_eop = 0;
+			descr->next =
+				(dma_descr_data *)virt_to_phys(&descr[i+1]);
+		}
 	}
 
 	/* Create a ring from the list. */
@@ -391,201 +424,116 @@ static void __init initialize_port(int portnbr)
 
 static inline int sync_data_avail(struct sync_port *port)
 {
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----      -    -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->in_buffer_size - (start - end);
-	return avail;
-}
-
-static inline int sync_data_avail_to_end(struct sync_port *port)
-{
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----           -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-	return avail;
+	return port->in_buffer_len;
 }
 
 static int sync_serial_open(struct inode *inode, struct file *file)
 {
+	int ret = 0;
 	int dev = iminor(inode);
-	int ret = -EBUSY;
-	sync_port *port;
-	reg_dma_rw_cfg cfg = {.en = regk_dma_yes};
-	reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes};
+	struct sync_port *port;
+#ifdef SYNC_SER_DMA
+	reg_dma_rw_cfg cfg = { .en = regk_dma_yes };
+	reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes };
+#endif
 
-	mutex_lock(&sync_serial_mutex);
-	DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev));
+	DEBUG(pr_debug("Open sync serial port %d\n", dev));
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev));
-		ret = -ENODEV;
-		goto out;
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
 	}
 	port = &ports[dev];
 	/* Allow open this device twice (assuming one reader and one writer) */
-	if (port->busy == 2)
-	{
-		DEBUG(printk(KERN_DEBUG "Device is busy.. \n"));
-		goto out;
+	if (port->busy == 2) {
+		DEBUG(pr_info("syncser%d is busy\n", dev));
+		return -EBUSY;
 	}
 
+	mutex_lock(&sync_serial_mutex);
 
-	if (port->init_irqs) {
-		if (port->use_dma) {
-			if (port == &ports[0]) {
-#ifdef SYNC_SER_DMA
-				if (request_irq(DMA_OUT_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 0 dma tr",
-						&ports[0])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (request_irq(DMA_IN_INTR_VECT,
-						rx_interrupt,
-						0,
-						"synchronous serial 1 dma rx",
-						&ports[0])) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(OUT_DMA_NBR,
-						"synchronous serial 0 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(IN_DMA_NBR,
-						"synchronous serial 0 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					crisv32_free_dma(OUT_DMA_NBR);
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
+	/* Clear any stale date left in the flip buffer */
+	port->readp = port->writep = port->flip;
+	port->in_buffer_len = 0;
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
+
+	if (port->init_irqs != no_irq_setup) {
+		/* Init only on first call. */
+		port->busy++;
+		mutex_unlock(&sync_serial_mutex);
+		return 0;
+	}
+	if (port->use_dma) {
 #ifdef SYNC_SER_DMA
-				if (request_irq(DMA6_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 1 dma tr",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ");
-					goto out;
-				} else if (request_irq(DMA7_INTR_VECT,
-						       rx_interrupt,
-						       0,
-						       "synchronous serial 1 dma rx",
-						       &ports[1])) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_TX_DMA_NBR,
-						"synchronous serial 1 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_RX_DMA_NBR,
-						"synchronous serial 3 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					crisv32_free_dma(SYNC_SER1_TX_DMA_NBR);
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
+		const char *tmp;
+		DEBUG(pr_info("Using DMA for syncser%d\n", dev));
+
+		tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx";
+		if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0,
+				tmp, port)) {
+			pr_err("Can't alloc syncser%d TX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_out_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d TX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx";
+		if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0,
+				tmp, port)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_in_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		/* Enable DMAs */
+		REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
+		REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
+		/* Enable DMA IRQs */
+		REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
+		REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
+		/* Set up wordsize = 1 for DMAs. */
+		DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1);
+		DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1);
+
+		start_dma_in(port);
+		port->init_irqs = dma_irq_setup;
 #endif
-                        /* Enable DMAs */
-			REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
-			REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
-			/* Enable DMA IRQs */
-			REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
-			REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
-			/* Set up wordsize = 1 for DMAs. */
-			DMA_WR_CMD (port->regi_dmain, regk_dma_set_w_size1);
-			DMA_WR_CMD (port->regi_dmaout, regk_dma_set_w_size1);
-
-			start_dma_in(port);
-			port->init_irqs = 0;
-		} else { /* !port->use_dma */
+	} else { /* !port->use_dma */
 #ifdef SYNC_SER_MANUAL
-			if (port == &ports[0]) {
-				if (request_irq(SYNCSER_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[0])) {
-					printk("Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
-				if (request_irq(SSER1_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#endif
-			port->init_irqs = 0;
+		const char *tmp = dev == 0 ? "syncser0 manual irq" :
+					     "syncser1 manual irq";
+		if (request_irq(port->syncser_intr_vect, manual_interrupt,
+				0, tmp, port)) {
+			pr_err("Can't alloc syncser%d manual irq",
+				dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		port->init_irqs = manual_irq_setup;
 #else
-			panic("sync_serial: Manual mode not supported.\n");
+		panic("sync_serial: Manual mode not supported\n");
 #endif /* SYNC_SER_MANUAL */
-		}
-
-	} /* port->init_irqs */
-
+	}
 	port->busy++;
 	ret = 0;
-out:
+
+unlock_and_exit:
 	mutex_unlock(&sync_serial_mutex);
 	return ret;
 }
@@ -593,18 +541,17 @@ out:
 static int sync_serial_release(struct inode *inode, struct file *file)
 {
 	int dev = iminor(inode);
-	sync_port *port;
+	struct sync_port *port;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
 	if (port->busy)
 		port->busy--;
 	if (!port->busy)
-          /* XXX */ ;
+		/* XXX */;
 	return 0;
 }
 
@@ -612,21 +559,15 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
 {
 	int dev = iminor(file_inode(file));
 	unsigned int mask = 0;
-	sync_port *port;
-	DEBUGPOLL( static unsigned int prev_mask = 0; );
+	struct sync_port *port;
+	DEBUGPOLL(
+	static unsigned int prev_mask;
+	);
 
 	port = &ports[dev];
 
-	if (!port->started) {
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg =
-			REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		rec_cfg.rec_en = port->input;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
+	if (!port->started)
+		sync_serial_start_port(port);
 
 	poll_wait(file, &port->out_wait_q, wait);
 	poll_wait(file, &port->in_wait_q, wait);
@@ -645,33 +586,175 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
 	if (port->input && sync_data_avail(port) >= port->inbufchunk)
 		mask |= POLLIN | POLLRDNORM;
 
-	DEBUGPOLL(if (mask != prev_mask)
-	      printk("sync_serial_poll: mask 0x%08X %s %s\n", mask,
-		     mask&POLLOUT?"POLLOUT":"", mask&POLLIN?"POLLIN":"");
-	      prev_mask = mask;
-	      );
+	DEBUGPOLL(
+	if (mask != prev_mask)
+		pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
+			mask,
+			mask & POLLOUT ? "POLLOUT" : "",
+			mask & POLLIN ? "POLLIN" : "");
+		prev_mask = mask;
+	);
 	return mask;
 }
 
-static int sync_serial_ioctl(struct file *file,
-		  unsigned int cmd, unsigned long arg)
+static ssize_t __sync_serial_read(struct file *file,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos,
+				  struct timespec *ts)
+{
+	unsigned long flags;
+	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int avail;
+	struct sync_port *port;
+	unsigned char *start;
+	unsigned char *end;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+
+	if (!port->started)
+		sync_serial_start_port(port);
+
+	/* Calculate number of available bytes */
+	/* Save pointers to avoid that they are modified by interrupt */
+	spin_lock_irqsave(&port->lock, flags);
+	start = port->readp;
+	end = port->writep;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while ((start == end) && !port->in_buffer_len) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		wait_event_interruptible(port->in_wait_q,
+					 !(start == end && !port->full));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		spin_lock_irqsave(&port->lock, flags);
+		start = port->readp;
+		end = port->writep;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n",
+			  dev, count,
+			  start - port->flip, end - port->flip,
+			  port->in_buffer_size));
+
+	/* Lazy read, never return wrapped data. */
+	if (end > start)
+		avail = end - start;
+	else
+		avail = port->flip + port->in_buffer_size - start;
+
+	count = count > avail ? avail : count;
+	if (copy_to_user(buf, start, count))
+		return -EFAULT;
+
+	/* If timestamp requested, find timestamp of first returned byte
+	 * and copy it.
+	 * N.B: Applications that request timstamps MUST read data in
+	 * chunks that are multiples of IN_DESCR_SIZE.
+	 * Otherwise the timestamps will not be aligned to the data read.
+	 */
+	if (ts != NULL) {
+		int idx = port->read_ts_idx;
+		memcpy(ts, &port->timestamp[idx], sizeof(struct timespec));
+		port->read_ts_idx += count / IN_DESCR_SIZE;
+		if (port->read_ts_idx >= NBR_IN_DESCR)
+			port->read_ts_idx = 0;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->readp += count;
+	/* Check for wrap */
+	if (port->readp >= port->flip + port->in_buffer_size)
+		port->readp = port->flip;
+	port->in_buffer_len -= count;
+	port->full = 0;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	DEBUGREAD(pr_info("r %d\n", count));
+
+	return count;
+}
+
+static ssize_t sync_serial_input(struct file *file, unsigned long arg)
+{
+	struct ssp_request req;
+	int count;
+	int ret;
+
+	/* Copy the request structure from user-mode. */
+	ret = copy_from_user(&req, (struct ssp_request __user *)arg,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("sync_serial_input copy from user failed\n"));
+		return -EFAULT;
+	}
+
+	/* To get the timestamps aligned, make sure that 'len'
+	 * is a multiple of IN_DESCR_SIZE.
+	 */
+	if ((req.len % IN_DESCR_SIZE) != 0) {
+		DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n",
+			      req.len, IN_DESCR_SIZE));
+		return -EFAULT;
+	}
+
+	/* Do the actual read. */
+	/* Note that req.buf is actually a pointer to user space. */
+	count = __sync_serial_read(file, req.buf, req.len,
+				   NULL, &req.ts);
+
+	if (count < 0) {
+		DEBUG(pr_info("sync_serial_input read failed\n"));
+		return count;
+	}
+
+	/* Copy the request back to user-mode. */
+	ret = copy_to_user((struct ssp_request __user *)arg, &req,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("syncser input copy2user failed\n"));
+		return -EFAULT;
+	}
+
+	/* Return the number of bytes read. */
+	return count;
+}
+
+
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg)
 {
 	int return_val = 0;
 	int dma_w_size = regk_dma_set_w_size1;
 	int dev = iminor(file_inode(file));
-	sync_port *port;
+	struct sync_port *port;
 	reg_sser_rw_tr_cfg tr_cfg;
 	reg_sser_rw_rec_cfg rec_cfg;
 	reg_sser_rw_frm_cfg frm_cfg;
 	reg_sser_rw_cfg gen_cfg;
 	reg_sser_rw_intr_mask intr_mask;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -1;
 	}
-        port = &ports[dev];
+
+	if (cmd == SSP_INPUT)
+		return sync_serial_input(file, arg);
+
+	port = &ports[dev];
 	spin_lock_irq(&port->lock);
 
 	tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
@@ -680,11 +763,9 @@ static int sync_serial_ioctl(struct file *file,
 	gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg);
 	intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 
-	switch(cmd)
-	{
+	switch (cmd) {
 	case SSP_SPEED:
-		if (GET_SPEED(arg) == CODEC)
-		{
+		if (GET_SPEED(arg) == CODEC) {
 			unsigned int freq;
 
 			gen_cfg.base_freq = regk_sser_f32;
@@ -701,15 +782,25 @@ static int sync_serial_ioctl(struct file *file,
 			case FREQ_256kHz:
 				gen_cfg.clk_div = 125 *
 					(1 << (freq - FREQ_256kHz)) - 1;
-			break;
+				break;
 			case FREQ_512kHz:
 				gen_cfg.clk_div = 62;
-			break;
+				break;
 			case FREQ_1MHz:
 			case FREQ_2MHz:
 			case FREQ_4MHz:
 				gen_cfg.clk_div = 8 * (1 << freq) - 1;
-			break;
+				break;
+			}
+		} else if (GET_SPEED(arg) == CODEC_f32768) {
+			gen_cfg.base_freq = regk_sser_f32_768;
+			switch (GET_FREQ(arg)) {
+			case FREQ_4096kHz:
+				gen_cfg.clk_div = 7;
+				break;
+			default:
+				spin_unlock_irq(&port->lock);
+				return -EINVAL;
 			}
 		} else {
 			gen_cfg.base_freq = regk_sser_f29_493;
@@ -767,62 +858,64 @@ static int sync_serial_ioctl(struct file *file,
 
 		break;
 	case SSP_MODE:
-		switch(arg)
-		{
-			case MASTER_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.out_on = regk_sser_tr;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			default:
-				spin_unlock_irq(&port->lock);
-				return -EINVAL;
+		switch (arg) {
+		case MASTER_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.out_on = regk_sser_tr;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		default:
+			spin_unlock_irq(&port->lock);
+			return -EINVAL;
 		}
-		if (!port->use_dma || (arg == MASTER_OUTPUT || arg == SLAVE_OUTPUT))
+		if (!port->use_dma || arg == MASTER_OUTPUT ||
+				arg == SLAVE_OUTPUT)
 			intr_mask.rdav = regk_sser_yes;
 		break;
 	case SSP_FRAME_SYNC:
 		if (arg & NORMAL_SYNC) {
 			frm_cfg.rec_delay = 1;
 			frm_cfg.tr_delay = 1;
-		}
-		else if (arg & EARLY_SYNC)
+		} else if (arg & EARLY_SYNC)
 			frm_cfg.rec_delay = frm_cfg.tr_delay = 0;
-		else if (arg & SECOND_WORD_SYNC) {
+		else if (arg & LATE_SYNC) {
+			frm_cfg.tr_delay = 2;
+			frm_cfg.rec_delay = 2;
+		} else if (arg & SECOND_WORD_SYNC) {
 			frm_cfg.rec_delay = 7;
 			frm_cfg.tr_delay = 1;
 		}
@@ -914,15 +1007,12 @@ static int sync_serial_ioctl(struct file *file,
 		frm_cfg.type = regk_sser_level;
 		frm_cfg.tr_delay = 1;
 		frm_cfg.level = regk_sser_neg_lo;
-		if (arg & SPI_SLAVE)
-		{
+		if (arg & SPI_SLAVE) {
 			rec_cfg.clk_pol = regk_sser_neg;
 			gen_cfg.clk_dir = regk_sser_in;
 			port->input = 1;
 			port->output = 0;
-		}
-		else
-		{
+		} else {
 			gen_cfg.out_clk_pol = regk_sser_pos;
 			port->input = 0;
 			port->output = 1;
@@ -965,19 +1055,19 @@ static int sync_serial_ioctl(struct file *file,
 }
 
 static long sync_serial_ioctl(struct file *file,
-                             unsigned int cmd, unsigned long arg)
+		unsigned int cmd, unsigned long arg)
 {
-       long ret;
+	long ret;
 
-       mutex_lock(&sync_serial_mutex);
-       ret = sync_serial_ioctl_unlocked(file, cmd, arg);
-       mutex_unlock(&sync_serial_mutex);
+	mutex_lock(&sync_serial_mutex);
+	ret = sync_serial_ioctl_unlocked(file, cmd, arg);
+	mutex_unlock(&sync_serial_mutex);
 
-       return ret;
+	return ret;
 }
 
 /* NOTE: sync_serial_write does not support concurrency */
-static ssize_t sync_serial_write(struct file *file, const char *buf,
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	int dev = iminor(file_inode(file));
@@ -993,7 +1083,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	unsigned char *buf_stop_ptr; /* Last byte + 1 */
 
 	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
-		DEBUG(printk("Invalid minor %d\n", dev));
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
@@ -1006,9 +1096,9 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	 * |_________|___________________|________________________|
 	 *           ^ rd_ptr            ^ wr_ptr
 	 */
-	DEBUGWRITE(printk(KERN_DEBUG "W d%d c %lu a: %p c: %p\n",
-			  port->port_nbr, count, port->active_tr_descr,
-			  port->catch_tr_descr));
+	DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n",
+			   port->port_nbr, count, port->active_tr_descr,
+			   port->catch_tr_descr));
 
 	/* Read variables that may be updated by interrupts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -1020,7 +1110,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	if (port->tr_running &&
 	    ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) ||
 	     out_buf_count >= OUT_BUFFER_SIZE)) {
-		DEBUGWRITE(printk(KERN_DEBUG "sser%d full\n", dev));
+		DEBUGWRITE(pr_info("sser%d full\n", dev));
 		return -EAGAIN;
 	}
 
@@ -1043,15 +1133,16 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	if (copy_from_user(wr_ptr, buf, trunc_count))
 		return -EFAULT;
 
-	DEBUGOUTBUF(printk(KERN_DEBUG "%-4d + %-4d = %-4d     %p %p %p\n",
-			   out_buf_count, trunc_count,
-			   port->out_buf_count, port->out_buffer,
-			   wr_ptr, buf_stop_ptr));
+	DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d     %p %p %p\n",
+			    out_buf_count, trunc_count,
+			    port->out_buf_count, port->out_buffer,
+			    wr_ptr, buf_stop_ptr));
 
 	/* Make sure transmitter/receiver is running */
 	if (!port->started) {
 		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
+		reg_sser_rw_rec_cfg rec_cfg =
+			REG_RD(sser, port->regi_sser, rw_rec_cfg);
 		cfg.en = regk_sser_yes;
 		rec_cfg.rec_en = port->input;
 		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
@@ -1068,8 +1159,11 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	spin_lock_irqsave(&port->lock, flags);
 	port->out_buf_count += trunc_count;
 	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
 		start_dma_out(port, wr_ptr, trunc_count);
+#endif
 	} else if (!port->tr_running) {
+#ifdef SYNC_SER_MANUAL
 		reg_sser_rw_intr_mask intr_mask;
 		intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 		/* Start sender by writing data */
@@ -1077,14 +1171,15 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 		/* and enable transmitter ready IRQ */
 		intr_mask.trdy = 1;
 		REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
+#endif
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	/* Exit if non blocking */
 	if (file->f_flags & O_NONBLOCK) {
-		DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu  %08x\n",
-				  port->port_nbr, trunc_count,
-				  REG_RD_INT(dma, port->regi_dmaout, r_intr)));
+		DEBUGWRITE(pr_info("w d%d c %u  %08x\n",
+				   port->port_nbr, trunc_count,
+				   REG_RD_INT(dma, port->regi_dmaout, r_intr)));
 		return trunc_count;
 	}
 
@@ -1094,105 +1189,32 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	if (signal_pending(current))
 		return -EINTR;
 
-	DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu\n",
-			  port->port_nbr, trunc_count));
+	DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count));
 	return trunc_count;
 }
 
-static ssize_t sync_serial_read(struct file * file, char * buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	int dev = iminor(file_inode(file));
-	int avail;
-	sync_port *port;
-	unsigned char* start;
-	unsigned char* end;
-	unsigned long flags;
-
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
-		return -ENODEV;
-	}
-	port = &ports[dev];
-
-	DEBUGREAD(printk("R%d c %d ri %lu wi %lu /%lu\n", dev, count, port->readp - port->flip, port->writep - port->flip, port->in_buffer_size));
-
-	if (!port->started)
-	{
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		tr_cfg.tr_en = regk_sser_yes;
-		rec_cfg.rec_en = regk_sser_yes;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
-
-	/* Calculate number of available bytes */
-	/* Save pointers to avoid that they are modified by interrupt */
-	spin_lock_irqsave(&port->lock, flags);
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	spin_unlock_irqrestore(&port->lock, flags);
-	while ((start == end) && !port->full) /* No data */
-	{
-		DEBUGREAD(printk(KERN_DEBUG "&"));
-		if (file->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-
-		wait_event_interruptible(port->in_wait_q,
-					 !(start == end && !port->full));
-		if (signal_pending(current))
-			return -EINTR;
-
-		spin_lock_irqsave(&port->lock, flags);
-		start = (unsigned char*)port->readp; /* cast away volatile */
-		end = (unsigned char*)port->writep;  /* cast away volatile */
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-
-	/* Lazy read, never return wrapped data. */
-	if (port->full)
-		avail = port->in_buffer_size;
-	else if (end > start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-
-	count = count > avail ? avail : count;
-	if (copy_to_user(buf, start, count))
-		return -EFAULT;
-	/* Disable interrupts while updating readp */
-	spin_lock_irqsave(&port->lock, flags);
-	port->readp += count;
-	if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
-		port->readp = port->flip;
-	port->full = 0;
-	spin_unlock_irqrestore(&port->lock, flags);
-	DEBUGREAD(printk("r %d\n", count));
-	return count;
+	return __sync_serial_read(file, buf, count, ppos, NULL);
 }
 
-static void send_word(sync_port* port)
+#ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port)
 {
 	reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
 	reg_sser_rw_tr_data tr_data =  {0};
 
-	switch(tr_cfg.sample_size)
+	switch (tr_cfg.sample_size) {
+	case 8:
+		port->out_buf_count--;
+		tr_data.data = *port->out_rd_ptr++;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 12:
 	{
-	 case 8:
-		 port->out_buf_count--;
-		 tr_data.data = *port->out_rd_ptr++;
-		 REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
-		 if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
-			 port->out_rd_ptr = port->out_buffer;
-		 break;
-	 case 12:
-	 {
 		int data = (*port->out_rd_ptr++) << 8;
 		data |= *port->out_rd_ptr++;
 		port->out_buf_count -= 2;
@@ -1200,8 +1222,8 @@ static void send_word(sync_port* port)
 		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
 		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
 			port->out_rd_ptr = port->out_buffer;
+		break;
 	}
-	break;
 	case 16:
 		port->out_buf_count -= 2;
 		tr_data.data = *(unsigned short *)port->out_rd_ptr;
@@ -1233,27 +1255,28 @@ static void send_word(sync_port* port)
 		break;
 	}
 }
+#endif
 
-static void start_dma_out(struct sync_port *port,
-			  const char *data, int count)
+#ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count)
 {
-	port->active_tr_descr->buf = (char *) virt_to_phys((char *) data);
+	port->active_tr_descr->buf = (char *)virt_to_phys((char *)data);
 	port->active_tr_descr->after = port->active_tr_descr->buf + count;
 	port->active_tr_descr->intr = 1;
 
 	port->active_tr_descr->eol = 1;
 	port->prev_tr_descr->eol = 0;
 
-	DEBUGTRDMA(printk(KERN_DEBUG "Inserting eolr:%p eol@:%p\n",
+	DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n",
 		port->prev_tr_descr, port->active_tr_descr));
 	port->prev_tr_descr = port->active_tr_descr;
-	port->active_tr_descr = phys_to_virt((int) port->active_tr_descr->next);
+	port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next);
 
 	if (!port->tr_running) {
 		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser,
 			rw_tr_cfg);
 
-		port->out_context.next = 0;
+		port->out_context.next = NULL;
 		port->out_context.saved_data =
 			(dma_descr_data *)virt_to_phys(port->prev_tr_descr);
 		port->out_context.saved_data_buf = port->prev_tr_descr->buf;
@@ -1263,57 +1286,58 @@ static void start_dma_out(struct sync_port *port,
 
 		tr_cfg.tr_en = regk_sser_yes;
 		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma s\n"););
+		DEBUGTRDMA(pr_info(KERN_INFO "dma s\n"););
 	} else {
 		DMA_CONTINUE_DATA(port->regi_dmaout);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma c\n"););
+		DEBUGTRDMA(pr_info("dma c\n"););
 	}
 
 	port->tr_running = 1;
 }
 
-static void start_dma_in(sync_port *port)
+static void start_dma_in(struct sync_port *port)
 {
 	int i;
 	char *buf;
+	unsigned long flags;
+	spin_lock_irqsave(&port->lock, flags);
 	port->writep = port->flip;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-	if (port->writep > port->flip + port->in_buffer_size) {
-		panic("Offset too large in sync serial driver\n");
-		return;
-	}
-	buf = (char*)virt_to_phys(port->in_buffer);
+	buf = (char *)virt_to_phys(port->in_buffer);
 	for (i = 0; i < NBR_IN_DESCR; i++) {
 		port->in_descr[i].buf = buf;
 		port->in_descr[i].after = buf + port->inbufchunk;
 		port->in_descr[i].intr = 1;
-		port->in_descr[i].next = (dma_descr_data*)virt_to_phys(&port->in_descr[i+1]);
+		port->in_descr[i].next =
+			(dma_descr_data *)virt_to_phys(&port->in_descr[i+1]);
 		port->in_descr[i].buf = buf;
 		buf += port->inbufchunk;
 	}
 	/* Link the last descriptor to the first */
-	port->in_descr[i-1].next = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_descr[i-1].next =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_descr[i-1].eol = regk_sser_yes;
 	port->next_rx_desc = &port->in_descr[0];
 	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1];
-	port->in_context.saved_data = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_context.saved_data =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_context.saved_data_buf = port->in_descr[0].buf;
 	DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context));
 }
 
-#ifdef SYNC_SER_DMA
 static irqreturn_t tr_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
 	reg_dma_rw_stat stat;
 	int i;
 	int found = 0;
 	int stop_sser = 0;
 
 	for (i = 0; i < NBR_PORTS; i++) {
-		sync_port *port = &ports[i];
-		if (!port->enabled  || !port->use_dma)
+		struct sync_port *port = &ports[i];
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		/* IRQ active for the port? */
@@ -1338,19 +1362,20 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 			int sent;
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGTXINT(printk(KERN_DEBUG "%-4d - %-4d = %-4d\t"
-					  "in descr %p (ac: %p)\n",
-					  port->out_buf_count, sent,
-					  port->out_buf_count - sent,
-					  port->catch_tr_descr,
-					  port->active_tr_descr););
+			DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t"
+					   "in descr %p (ac: %p)\n",
+					   port->out_buf_count, sent,
+					   port->out_buf_count - sent,
+					   port->catch_tr_descr,
+					   port->active_tr_descr););
 			port->out_buf_count -= sent;
 			port->catch_tr_descr =
 				phys_to_virt((int) port->catch_tr_descr->next);
 			port->out_rd_ptr =
 				phys_to_virt((int) port->catch_tr_descr->buf);
 		} else {
-			int i, sent;
+			reg_sser_rw_tr_cfg tr_cfg;
+			int j, sent;
 			/* EOL handler.
 			 * Note that if an EOL was encountered during the irq
 			 * locked section of sync_ser_write the DMA will be
@@ -1358,11 +1383,11 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 			 * The remaining descriptors will be traversed by
 			 * the descriptor interrupts as usual.
 			 */
-			i = 0;
+			j = 0;
 			while (!port->catch_tr_descr->eol) {
 				sent = port->catch_tr_descr->after -
 					port->catch_tr_descr->buf;
-				DEBUGOUTBUF(printk(KERN_DEBUG
+				DEBUGOUTBUF(pr_info(
 					"traversing descr %p -%d (%d)\n",
 					port->catch_tr_descr,
 					sent,
@@ -1370,16 +1395,15 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 				port->out_buf_count -= sent;
 				port->catch_tr_descr = phys_to_virt(
 					(int)port->catch_tr_descr->next);
-				i++;
-				if (i >= NBR_OUT_DESCR) {
+				j++;
+				if (j >= NBR_OUT_DESCR) {
 					/* TODO: Reset and recover */
 					panic("sync_serial: missing eol");
 				}
 			}
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGOUTBUF(printk(KERN_DEBUG
-				"eol at descr %p -%d (%d)\n",
+			DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n",
 				port->catch_tr_descr,
 				sent,
 				port->out_buf_count));
@@ -1394,15 +1418,13 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 					OUT_BUFFER_SIZE)
 				port->out_rd_ptr = port->out_buffer;
 
-			reg_sser_rw_tr_cfg tr_cfg =
-				REG_RD(sser, port->regi_sser, rw_tr_cfg);
-			DEBUGTXINT(printk(KERN_DEBUG
+			tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+			DEBUGTXINT(pr_info(
 				"tr_int DMA stop %d, set catch @ %p\n",
 				port->out_buf_count,
 				port->active_tr_descr));
 			if (port->out_buf_count != 0)
-				printk(KERN_CRIT "sync_ser: buffer not "
-					"empty after eol.\n");
+				pr_err("sync_ser: buf not empty after eol\n");
 			port->catch_tr_descr = port->active_tr_descr;
 			port->tr_running = 0;
 			tr_cfg.tr_en = regk_sser_no;
@@ -1414,62 +1436,79 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 	return IRQ_RETVAL(found);
 } /* tr_interrupt */
 
+
+static inline void handle_rx_packet(struct sync_port *port)
+{
+	int idx;
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
+	unsigned long flags;
+
+	DEBUGRXINT(pr_info(KERN_INFO "!"));
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* If we overrun the user experience is crap regardless if we
+	 * drop new or old data. Its much easier to get it right when
+	 * dropping new data so lets do that.
+	 */
+	if ((port->writep + port->inbufchunk <=
+	     port->flip + port->in_buffer_size) &&
+	    (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) {
+		memcpy(port->writep,
+		       phys_to_virt((unsigned)port->next_rx_desc->buf),
+		       port->inbufchunk);
+		port->writep += port->inbufchunk;
+		if (port->writep >= port->flip + port->in_buffer_size)
+			port->writep = port->flip;
+
+		/* Timestamp the new data chunk. */
+		if (port->write_ts_idx == NBR_IN_DESCR)
+			port->write_ts_idx = 0;
+		idx = port->write_ts_idx++;
+		do_posix_clock_monotonic_gettime(&port->timestamp[idx]);
+		port->in_buffer_len += port->inbufchunk;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	port->next_rx_desc->eol = 1;
+	port->prev_rx_desc->eol = 0;
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 0);
+	port->prev_rx_desc = port->next_rx_desc;
+	port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 1);
+	/* wake up the waiting process */
+	wake_up_interruptible(&port->in_wait_q);
+	DMA_CONTINUE(port->regi_dmain);
+	REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+
+}
+
 static irqreturn_t rx_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
 
 	int i;
 	int found = 0;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	DEBUG(pr_info("rx_interrupt\n"));
+
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
 
-		if (!port->enabled || !port->use_dma )
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		masked = REG_RD(dma, port->regi_dmain, r_masked_intr);
 
-		if (masked.data) /* Descriptor interrupt */
-		{
-			found = 1;
-			while (REG_RD(dma, port->regi_dmain, rw_data) !=
-			       virt_to_phys(port->next_rx_desc)) {
-				DEBUGRXINT(printk(KERN_DEBUG "!"));
-				if (port->writep + port->inbufchunk > port->flip + port->in_buffer_size) {
-					int first_size = port->flip + port->in_buffer_size - port->writep;
-					memcpy((char*)port->writep, phys_to_virt((unsigned)port->next_rx_desc->buf), first_size);
-					memcpy(port->flip, phys_to_virt((unsigned)port->next_rx_desc->buf+first_size), port->inbufchunk - first_size);
-					port->writep = port->flip + port->inbufchunk - first_size;
-				} else {
-					memcpy((char*)port->writep,
-					       phys_to_virt((unsigned)port->next_rx_desc->buf),
-					       port->inbufchunk);
-					port->writep += port->inbufchunk;
-					if (port->writep >= port->flip + port->in_buffer_size)
-						port->writep = port->flip;
-				}
-                                if (port->writep == port->readp)
-                                {
-				  port->full = 1;
-                                }
-
-				port->next_rx_desc->eol = 1;
-				port->prev_rx_desc->eol = 0;
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 0);
-				port->prev_rx_desc = port->next_rx_desc;
-				port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 1);
-				/* wake up the waiting process */
-				wake_up_interruptible(&port->in_wait_q);
-				DMA_CONTINUE(port->regi_dmain);
-				REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+		if (!masked.data)
+			continue;
 
-			}
-		}
+		/* Descriptor interrupt */
+		found = 1;
+		while (REG_RD(dma, port->regi_dmain, rw_data) !=
+				virt_to_phys(port->next_rx_desc))
+			handle_rx_packet(port);
 	}
 	return IRQ_RETVAL(found);
 } /* rx_interrupt */
@@ -1478,75 +1517,83 @@ static irqreturn_t rx_interrupt(int irq, void *dev_id)
 #ifdef SYNC_SER_MANUAL
 static irqreturn_t manual_interrupt(int irq, void *dev_id)
 {
+	unsigned long flags;
 	int i;
 	int found = 0;
 	reg_sser_r_masked_intr masked;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
 
 		if (!port->enabled || port->use_dma)
-		{
 			continue;
-		}
 
 		masked = REG_RD(sser, port->regi_sser, r_masked_intr);
-		if (masked.rdav)	/* Data received? */
-		{
-			reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-			reg_sser_r_rec_data data = REG_RD(sser, port->regi_sser, r_rec_data);
+		/* Data received? */
+		if (masked.rdav) {
+			reg_sser_rw_rec_cfg rec_cfg =
+				REG_RD(sser, port->regi_sser, rw_rec_cfg);
+			reg_sser_r_rec_data data = REG_RD(sser,
+				port->regi_sser, r_rec_data);
 			found = 1;
 			/* Read data */
-			switch(rec_cfg.sample_size)
-			{
+			spin_lock_irqsave(&port->lock, flags);
+			switch (rec_cfg.sample_size) {
 			case 8:
 				*port->writep++ = data.data & 0xff;
 				break;
 			case 12:
 				*port->writep = (data.data & 0x0ff0) >> 4;
 				*(port->writep + 1) = data.data & 0x0f;
-				port->writep+=2;
+				port->writep += 2;
 				break;
 			case 16:
-				*(unsigned short*)port->writep = data.data;
-				port->writep+=2;
+				*(unsigned short *)port->writep = data.data;
+				port->writep += 2;
 				break;
 			case 24:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=3;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 3;
 				break;
 			case 32:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=4;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 4;
 				break;
 			}
 
-			if (port->writep >= port->flip + port->in_buffer_size) /* Wrap? */
+			/* Wrap? */
+			if (port->writep >= port->flip + port->in_buffer_size)
 				port->writep = port->flip;
 			if (port->writep == port->readp) {
-				/* receive buffer overrun, discard oldest data
-				 */
+				/* Receive buf overrun, discard oldest data */
 				port->readp++;
-				if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
+				/* Wrap? */
+				if (port->readp >= port->flip +
+						port->in_buffer_size)
 					port->readp = port->flip;
 			}
+			spin_unlock_irqrestore(&port->lock, flags);
 			if (sync_data_avail(port) >= port->inbufchunk)
-				wake_up_interruptible(&port->in_wait_q); /* Wake up application */
+				/* Wake up application */
+				wake_up_interruptible(&port->in_wait_q);
 		}
 
-		if (masked.trdy) /* Transmitter ready? */
-		{
+		/* Transmitter ready? */
+		if (masked.trdy) {
 			found = 1;
-			if (port->out_buf_count > 0) /* More data to send */
+			/* More data to send */
+			if (port->out_buf_count > 0)
 				send_word(port);
-			else /* transmission finished */
-			{
+			else {
+				/* Transmission finished */
 				reg_sser_rw_intr_mask intr_mask;
-				intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
+				intr_mask = REG_RD(sser, port->regi_sser,
+					rw_intr_mask);
 				intr_mask.trdy = 0;
-				REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
-				wake_up_interruptible(&port->out_wait_q); /* Wake up application */
+				REG_WR(sser, port->regi_sser,
+					rw_intr_mask, intr_mask);
+				/* Wake up application */
+				wake_up_interruptible(&port->out_wait_q);
 			}
 		}
 	}
@@ -1554,4 +1601,109 @@ static irqreturn_t manual_interrupt(int irq, void *dev_id)
 }
 #endif
 
+static int __init etrax_sync_serial_init(void)
+{
+#if 1
+	/* This code will be removed when we move to udev for all devices. */
+	syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0);
+	if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR);
+		return -1;
+	}
+#else
+	/* Allocate dynamic major number. */
+	if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to allocate character device region\n");
+		return -1;
+	}
+#endif
+	syncser_cdev = cdev_alloc();
+	if (!syncser_cdev) {
+		pr_err("Failed to allocate cdev for syncser\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+	cdev_init(syncser_cdev, &syncser_fops);
+
+	/* Create a sysfs class for syncser */
+	syncser_class = class_create(THIS_MODULE, "syncser_class");
+
+	/* Initialize Ports */
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 0\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -EIO;
+	}
+	initialize_port(0);
+	ports[0].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 1\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		class_destroy(syncser_class);
+		return -EIO;
+	}
+	initialize_port(1);
+	ports[1].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+	/* Add it to system */
+	if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) {
+		pr_err("Failed to add syncser as char device\n");
+		device_destroy(syncser_class, syncser_first);
+		class_destroy(syncser_class);
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+
+
+	pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n",
+		SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first));
+
+	return 0;
+}
+
+static void __exit etrax_sync_serial_exit(void)
+{
+	int i;
+	device_destroy(syncser_class, syncser_first);
+	class_destroy(syncser_class);
+
+	if (syncser_cdev) {
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+	}
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+		if (port->init_irqs == dma_irq_setup) {
+			/* Free dma irqs and dma channels. */
+#ifdef SYNC_SER_DMA
+			artpec_free_dma(port->dma_in_nbr);
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+#endif
+		} else if (port->init_irqs == manual_irq_setup) {
+			/* Free manual irq. */
+			free_irq(port->syncser_intr_vect, port);
+		}
+	}
+
+	pr_info("ARTPEC synchronous serial port unregistered\n");
+}
+
 module_init(etrax_sync_serial_init);
+module_exit(etrax_sync_serial_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c
index 610909b003f6..02e33ebe51ec 100644
--- a/arch/cris/arch-v32/kernel/debugport.c
+++ b/arch/cris/arch-v32/kernel/debugport.c
@@ -3,7 +3,9 @@
  */
 
 #include <linux/console.h>
+#include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/string.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/reg_map.h>
 #include <hwregs/ser_defs.h>
@@ -65,6 +67,7 @@ struct dbg_port ports[] =
   },
 #endif
 };
+
 static struct dbg_port *port =
 #if defined(CONFIG_ETRAX_DEBUG_PORT0)
 	&ports[0];
@@ -97,14 +100,19 @@ static struct dbg_port *kgdb_port =
 #endif
 #endif
 
-static void
-start_port(struct dbg_port* p)
+static void start_port(struct dbg_port *p)
 {
-	if (!p)
-		return;
+	/* Set up serial port registers */
+	reg_ser_rw_tr_ctrl tr_ctrl = {0};
+	reg_ser_rw_tr_dma_en tr_dma_en = {0};
 
-	if (p->started)
+	reg_ser_rw_rec_ctrl rec_ctrl = {0};
+	reg_ser_rw_tr_baud_div tr_baud_div = {0};
+	reg_ser_rw_rec_baud_div rec_baud_div = {0};
+
+	if (!p || p->started)
 		return;
+
 	p->started = 1;
 
 	if (p->nbr == 1)
@@ -118,36 +126,24 @@ start_port(struct dbg_port* p)
 		crisv32_pinmux_alloc_fixed(pinmux_ser4);
 #endif
 
-	/* Set up serial port registers */
-	reg_ser_rw_tr_ctrl tr_ctrl = {0};
-	reg_ser_rw_tr_dma_en tr_dma_en = {0};
-
-	reg_ser_rw_rec_ctrl rec_ctrl = {0};
-	reg_ser_rw_tr_baud_div tr_baud_div = {0};
-	reg_ser_rw_rec_baud_div rec_baud_div = {0};
-
 	tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493;
 	tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no;
 	tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8;
 	tr_ctrl.en = rec_ctrl.en = 1;
 
-	if (p->parity == 'O')
-	{
+	if (p->parity == 'O') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_odd;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
-	}
-	else if (p->parity == 'E')
-	{
+	} else if (p->parity == 'E') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_even;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
 	}
 
-	if (p->bits == 7)
-	{
+	if (p->bits == 7) {
 		tr_ctrl.data_bits = regk_ser_bits7;
 		rec_ctrl.data_bits = regk_ser_bits7;
 	}
@@ -161,8 +157,7 @@ start_port(struct dbg_port* p)
 
 #ifdef CONFIG_ETRAX_KGDB
 /* Use polling to get a single character from the kernel debug port */
-int
-getDebugChar(void)
+int getDebugChar(void)
 {
 	reg_ser_rs_stat_din stat;
 	reg_ser_rw_ack_intr ack_intr = { 0 };
@@ -179,8 +174,7 @@ getDebugChar(void)
 }
 
 /* Use polling to put a single character to the kernel debug port */
-void
-putDebugChar(int val)
+void putDebugChar(int val)
 {
 	reg_ser_r_stat_din stat;
 	do {
@@ -190,12 +184,48 @@ putDebugChar(int val)
 }
 #endif /* CONFIG_ETRAX_KGDB */
 
+static void __init early_putch(int c)
+{
+	reg_ser_r_stat_din stat;
+	/* Wait until transmitter is ready and send. */
+	do
+		stat = REG_RD(ser, port->instance, r_stat_din);
+	while (!stat.tr_rdy);
+	REG_WR_INT(ser, port->instance, rw_dout, c);
+}
+
+static void __init
+early_console_write(struct console *con, const char *s, unsigned n)
+{
+	extern void reset_watchdog(void);
+	int i;
+
+	/* Send data. */
+	for (i = 0; i < n; i++) {
+		/* TODO: the '\n' -> '\n\r' translation should be done at the
+		   receiver. Remove it when the serial driver removes it.   */
+		if (s[i] == '\n')
+			early_putch('\r');
+		early_putch(s[i]);
+		reset_watchdog();
+	}
+}
+
+static struct console early_console_dev __initdata = {
+	.name   = "early",
+	.write  = early_console_write,
+	.flags  = CON_PRINTBUFFER | CON_BOOT,
+	.index  = -1
+};
+
 /* Register console for printk's, etc. */
-int __init
-init_etrax_debug(void)
+int __init init_etrax_debug(void)
 {
         start_port(port);
 
+	/* Register an early console if a debug port was chosen.  */
+	register_console(&early_console_dev);
+
 #ifdef CONFIG_ETRAX_KGDB
 	start_port(kgdb_port);
 #endif /* CONFIG_ETRAX_KGDB */
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index ee66866538f8..eb74dabbeb96 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/cpufreq.h>
+#include <linux/mm.h>
 #include <asm/types.h>
 #include <asm/signal.h>
 #include <asm/io.h>
@@ -56,7 +57,6 @@ static int __init etrax_init_cont_rotime(void)
 }
 arch_initcall(etrax_init_cont_rotime);
 
-
 unsigned long timer_regs[NR_CPUS] =
 {
 	regi_timer0,
@@ -68,9 +68,8 @@ unsigned long timer_regs[NR_CPUS] =
 extern int set_rtc_mmss(unsigned long nowtime);
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data);
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data);
 
 static struct notifier_block cris_time_freq_notifier_block = {
 	.notifier_call = cris_time_freq_notifier,
@@ -87,7 +86,6 @@ unsigned long get_ns_in_jiffie(void)
 	return ns;
 }
 
-
 /* From timer MDS describing the hardware watchdog:
  * 4.3.1 Watchdog Operation
  * The watchdog timer is an 8-bit timer with a configurable start value.
@@ -109,11 +107,18 @@ static short int watchdog_key = 42;  /* arbitrary 7 bit number */
  * is used though, so set this really low. */
 #define WATCHDOG_MIN_FREE_PAGES 8
 
+/* for reliable NICE_DOGGY behaviour */
+static int bite_in_progress;
+
 void reset_watchdog(void)
 {
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	reg_timer_rw_wd_ctrl wd_ctrl = { 0 };
 
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+	if (unlikely(bite_in_progress))
+		return;
+#endif
 	/* Only keep watchdog happy as long as we have memory left! */
 	if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) {
 		/* Reset the watchdog with the inverse of the old key */
@@ -148,7 +153,9 @@ void handle_watchdog_bite(struct pt_regs *regs)
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	extern int cause_of_death;
 
+	nmi_enter();
 	oops_in_progress = 1;
+	bite_in_progress = 1;
 	printk(KERN_WARNING "Watchdog bite\n");
 
 	/* Check if forced restart or unexpected watchdog */
@@ -170,6 +177,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
 	printk(KERN_WARNING "Oops: bitten by watchdog\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	printk("\n"); /* Flush mtdoops.  */
 #ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
 #endif
@@ -202,7 +210,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
 	/* Reset watchdog otherwise it resets us! */
 	reset_watchdog();
 
-        /* Update statistics. */
+	/* Update statistics. */
 	update_process_times(user_mode(regs));
 
 	cris_do_profile(regs); /* Save profiling information */
@@ -213,7 +221,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
 
 	/* Call the real timer interrupt handler */
 	xtime_update(1);
-        return IRQ_HANDLED;
+	return IRQ_HANDLED;
 }
 
 /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
@@ -293,14 +301,13 @@ void __init time_init(void)
 
 #ifdef CONFIG_CPU_FREQ
 	cpufreq_register_notifier(&cris_time_freq_notifier_block,
-		CPUFREQ_TRANSITION_NOTIFIER);
+				  CPUFREQ_TRANSITION_NOTIFIER);
 #endif
 }
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data)
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data)
 {
 	struct cpufreq_freqs *freqs = data;
 	if (val == CPUFREQ_POSTCHANGE) {
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
index 0b5b70d5f58a..f0f335d8aa79 100644
--- a/arch/cris/arch-v32/lib/usercopy.c
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -26,8 +26,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -155,13 +154,13 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
-
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -321,11 +320,10 @@ copy_exception_bytes:
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -468,3 +466,4 @@ __do_clear_user (void __user *pto, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c
index 38f29eec14a6..05a04708b8eb 100644
--- a/arch/cris/arch-v32/mach-fs/pinmux.c
+++ b/arch/cris/arch-v32/mach-fs/pinmux.c
@@ -26,7 +26,29 @@ static DEFINE_SPINLOCK(pinmux_lock);
 
 static void crisv32_pinmux_set(int port);
 
-int crisv32_pinmux_init(void)
+static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+				 enum pin_mode mode)
+{
+	int i;
+
+	for (i = first_pin; i <= last_pin; i++) {
+		if ((pins[port][i] != pinmux_none)
+		    && (pins[port][i] != pinmux_gpio)
+		    && (pins[port][i] != mode)) {
+#ifdef DEBUG
+			panic("Pinmux alloc failed!\n");
+#endif
+			return -EPERM;
+		}
+	}
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = mode;
+
+	crisv32_pinmux_set(port);
+}
+
+static int crisv32_pinmux_init(void)
 {
 	static int initialized;
 
@@ -37,20 +59,20 @@ int crisv32_pinmux_init(void)
 		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
 		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
 		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
-		crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
 	}
 
 	return 0;
 }
 
-int
-crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
+int crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+			 enum pin_mode mode)
 {
-	int i;
 	unsigned long flags;
+	int ret;
 
 	crisv32_pinmux_init();
 
@@ -59,26 +81,11 @@ crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
 
 	spin_lock_irqsave(&pinmux_lock, flags);
 
-	for (i = first_pin; i <= last_pin; i++) {
-		if ((pins[port][i] != pinmux_none)
-		    && (pins[port][i] != pinmux_gpio)
-		    && (pins[port][i] != mode)) {
-			spin_unlock_irqrestore(&pinmux_lock, flags);
-#ifdef DEBUG
-			panic("Pinmux alloc failed!\n");
-#endif
-			return -EPERM;
-		}
-	}
-
-	for (i = first_pin; i <= last_pin; i++)
-		pins[port][i] = mode;
-
-	crisv32_pinmux_set(port);
+	ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode);
 
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 int crisv32_pinmux_alloc_fixed(enum fixed_function function)
@@ -98,58 +105,58 @@ int crisv32_pinmux_alloc_fixed(enum fixed_function function)
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
 		hwprot.ser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
 		hwprot.ser2 = regk_pinmux_yes;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
 		hwprot.ser3 = regk_pinmux_yes;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.sser0 = regk_pinmux_yes;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
 		hwprot.sser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
 		hwprot.ata0 = regk_pinmux_yes;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
 		hwprot.ata1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
 		hwprot.ata = regk_pinmux_yes;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
 		hwprot.eth1 = regk_pinmux_yes;
 		hwprot.eth1_mgm = regk_pinmux_yes;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.timer = regk_pinmux_yes;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -188,9 +195,19 @@ void crisv32_pinmux_set(int port)
 #endif
 }
 
-int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
 {
 	int i;
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = pinmux_none;
+
+	crisv32_pinmux_set(port);
+	return 0;
+}
+
+int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
 	unsigned long flags;
 
 	crisv32_pinmux_init();
@@ -199,11 +216,7 @@ int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
 		return -EINVAL;
 
 	spin_lock_irqsave(&pinmux_lock, flags);
-
-	for (i = first_pin; i <= last_pin; i++)
-		pins[port][i] = pinmux_none;
-
-	crisv32_pinmux_set(port);
+	__crisv32_pinmux_dealloc(port, first_pin, last_pin);
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
 	return 0;
@@ -226,58 +239,58 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function)
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_dealloc(PORT_C, 4, 7);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7);
 		hwprot.ser1 = regk_pinmux_no;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 11);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11);
 		hwprot.ser2 = regk_pinmux_no;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 12, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15);
 		hwprot.ser3 = regk_pinmux_no;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_dealloc(PORT_C, 0, 3);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.sser0 = regk_pinmux_no;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
 		hwprot.sser1 = regk_pinmux_no;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_dealloc(PORT_D, 5, 7);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 15, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17);
 		hwprot.ata0 = regk_pinmux_no;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 17, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17);
 		hwprot.ata1 = regk_pinmux_no;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 11, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 3, 3);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 10);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 0, 2);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_dealloc(PORT_B, 0, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 8, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15);
 		hwprot.ata = regk_pinmux_no;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_dealloc(PORT_E, 0, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17);
 		hwprot.eth1 = regk_pinmux_no;
 		hwprot.eth1_mgm = regk_pinmux_no;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.timer = regk_pinmux_no;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -293,7 +306,8 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function)
 	return ret;
 }
 
-void crisv32_pinmux_dump(void)
+#ifdef DEBUG
+static void crisv32_pinmux_dump(void)
 {
 	int i, j;
 
@@ -305,5 +319,5 @@ void crisv32_pinmux_dump(void)
 			printk(KERN_DEBUG "  Pin %d = %d\n", j, pins[i][j]);
 	}
 }
-
+#endif
 __initcall(crisv32_pinmux_init);
diff --git a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
index c2b3036779df..09bf0c90d2d3 100644
--- a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
+++ b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
@@ -28,11 +28,9 @@ enum fixed_function {
   pinmux_timer
 };
 
-int crisv32_pinmux_init(void);
 int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode);
 int crisv32_pinmux_alloc_fixed(enum fixed_function function);
 int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin);
 int crisv32_pinmux_dealloc_fixed(enum fixed_function function);
-void crisv32_pinmux_dump(void);
 
 #endif
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index d5f124832fd1..889f2de050a3 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,8 +1,4 @@
 
-header-y += arch-v10/
-header-y += arch-v32/
-
-
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 7d47b366ad82..01f66b8f15e5 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,8 +1,8 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += arch-v10/
-header-y += arch-v32/
+header-y += ../arch-v10/arch/
+header-y += ../arch-v32/arch/
 header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += byteorder.h
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 5868cee20ebd..3908b942fd4c 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -47,16 +47,16 @@ EXPORT_SYMBOL(__negdi2);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Userspace access functions */
-EXPORT_SYMBOL(__copy_user_zeroing);
-EXPORT_SYMBOL(__copy_user);
-
 #undef memcpy
 #undef memset
 extern void * memset(void *, int, __kernel_size_t);
 extern void * memcpy(void *, const void *, __kernel_size_t);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
+#ifdef CONFIG_ETRAX_ARCH_V32
+#undef strcmp
+EXPORT_SYMBOL(strcmp);
+#endif
 
 #ifdef CONFIG_ETRAX_FAST_TIMER
 /* Fast timer functions */
@@ -66,3 +66,4 @@ EXPORT_SYMBOL(del_fast_timer);
 EXPORT_SYMBOL(schedule_usleep);
 #endif
 EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_from_user);
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index 0ffda73734f5..da4c72401e27 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -14,6 +14,10 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/utsname.h>
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#endif
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -34,25 +38,24 @@ static int kstack_depth_to_print = 24;
 
 void (*nmi_handler)(struct pt_regs *);
 
-void
-show_trace(unsigned long *stack)
+void show_trace(unsigned long *stack)
 {
 	unsigned long addr, module_start, module_end;
 	extern char _stext, _etext;
 	int i;
 
-	printk("\nCall Trace: ");
+	pr_err("\nCall Trace: ");
 
 	i = 1;
 	module_start = VMALLOC_START;
 	module_end = VMALLOC_END;
 
-	while (((long)stack & (THREAD_SIZE-1)) != 0) {
+	while (((long)stack & (THREAD_SIZE - 1)) != 0) {
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
@@ -68,10 +71,14 @@ show_trace(unsigned long *stack)
 		if (((addr >= (unsigned long)&_stext) &&
 		     (addr <= (unsigned long)&_etext)) ||
 		    ((addr >= module_start) && (addr <= module_end))) {
+#ifdef CONFIG_KALLSYMS
+			print_ip_sym(addr);
+#else
 			if (i && ((i % 8) == 0))
-				printk("\n       ");
-			printk("[<%08lx>] ", addr);
+				pr_err("\n       ");
+			pr_err("[<%08lx>] ", addr);
 			i++;
+#endif
 		}
 	}
 }
@@ -111,21 +118,21 @@ show_stack(struct task_struct *task, unsigned long *sp)
 
 	stack = sp;
 
-	printk("\nStack from %08lx:\n       ", (unsigned long)stack);
+	pr_err("\nStack from %08lx:\n       ", (unsigned long)stack);
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (((long)stack & (THREAD_SIZE-1)) == 0)
 			break;
 		if (i && ((i % 8) == 0))
-			printk("\n       ");
+			pr_err("\n       ");
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
-		printk("%08lx ", addr);
+		pr_err("%08lx ", addr);
 	}
 	show_trace(sp);
 }
@@ -139,33 +146,32 @@ show_stack(void)
 	unsigned long *sp = (unsigned long *)rdusp();
 	int i;
 
-	printk("Stack dump [0x%08lx]:\n", (unsigned long)sp);
+	pr_err("Stack dump [0x%08lx]:\n", (unsigned long)sp);
 	for (i = 0; i < 16; i++)
-		printk("sp + %d: 0x%08lx\n", i*4, sp[i]);
+		pr_err("sp + %d: 0x%08lx\n", i*4, sp[i]);
 	return 0;
 }
 #endif
 
-void
-set_nmi_handler(void (*handler)(struct pt_regs *))
+void set_nmi_handler(void (*handler)(struct pt_regs *))
 {
 	nmi_handler = handler;
 	arch_enable_nmi();
 }
 
 #ifdef CONFIG_DEBUG_NMI_OOPS
-void
-oops_nmi_handler(struct pt_regs *regs)
+void oops_nmi_handler(struct pt_regs *regs)
 {
 	stop_watchdog();
 	oops_in_progress = 1;
-	printk("NMI!\n");
+	pr_err("NMI!\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	oops_exit();
+	pr_err("\n"); /* Flush mtdoops.  */
 }
 
-static int __init
-oops_nmi_register(void)
+static int __init oops_nmi_register(void)
 {
 	set_nmi_handler(oops_nmi_handler);
 	return 0;
@@ -180,8 +186,7 @@ __initcall(oops_nmi_register);
  * similar to an Oops dump, and if the kernel is configured to be a nice
  * doggy, then halt instead of reboot.
  */
-void
-watchdog_bite_hook(struct pt_regs *regs)
+void watchdog_bite_hook(struct pt_regs *regs)
 {
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	local_irq_disable();
@@ -196,8 +201,7 @@ watchdog_bite_hook(struct pt_regs *regs)
 }
 
 /* This is normally the Oops function. */
-void
-die_if_kernel(const char *str, struct pt_regs *regs, long err)
+void die_if_kernel(const char *str, struct pt_regs *regs, long err)
 {
 	if (user_mode(regs))
 		return;
@@ -211,13 +215,17 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
 	stop_watchdog();
 #endif
 
+	oops_enter();
 	handle_BUG(regs);
 
-	printk("%s: %04lx\n", str, err & 0xffff);
+	pr_err("Linux %s %s\n", utsname()->release, utsname()->version);
+	pr_err("%s: %04lx\n", str, err & 0xffff);
 
 	show_registers(regs);
 
+	oops_exit();
 	oops_in_progress = 0;
+	pr_err("\n"); /* Flush mtdoops.  */
 
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
@@ -225,8 +233,7 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
 	do_exit(SIGSEGV);
 }
 
-void __init
-trap_init(void)
+void __init trap_init(void)
 {
 	/* Nothing needs to be done */
 }
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index c81af5bd9167..1e7fd45b60f8 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -11,13 +11,15 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <asm/tlb.h>
 #include <asm/sections.h>
 
 unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 
-void __init
-mem_init(void)
+void __init mem_init(void)
 {
 	BUG_ON(!mem_map);
 
@@ -31,10 +33,36 @@ mem_init(void)
 	mem_init_print_info(NULL);
 }
 
-/* free the pages occupied by initialization code */
+/* Free a range of init pages. Virtual addresses. */
 
-void 
-free_initmem(void)
+void free_init_pages(const char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+/* Free the pages occupied by initialization code. */
+
+void free_initmem(void)
 {
 	free_initmem_default(-1);
 }
+
+/* Free the pages occupied by initrd code. */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_init_pages("initrd memory",
+	                start,
+	                end);
+}
+#endif
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index f9ca44bdea20..80fdb995a8ce 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -76,10 +76,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
  * Must be freed with iounmap.
  */
 
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
         return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0);
 }
+EXPORT_SYMBOL(ioremap_nocache);
 
 void iounmap(volatile void __iomem *addr)
 {
diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h
index 263511719a4a..69952c184207 100644
--- a/arch/hexagon/include/asm/cache.h
+++ b/arch/hexagon/include/asm/cache.h
@@ -1,7 +1,7 @@
 /*
  * Cache definitions for the Hexagon architecture
  *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2011,2014 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,8 @@
 #define L1_CACHE_SHIFT		(5)
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+
 #define __cacheline_aligned	__aligned(L1_CACHE_BYTES)
 #define ____cacheline_aligned	__aligned(L1_CACHE_BYTES)
 
diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
index 49e0896ec240..b86f9f300e94 100644
--- a/arch/hexagon/include/asm/cacheflush.h
+++ b/arch/hexagon/include/asm/cacheflush.h
@@ -21,10 +21,7 @@
 #ifndef _ASM_CACHEFLUSH_H
 #define _ASM_CACHEFLUSH_H
 
-#include <linux/cache.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-#include <asm-generic/cacheflush.h>
+#include <linux/mm_types.h>
 
 /* Cache flushing:
  *
@@ -41,6 +38,20 @@
 #define LINESIZE	32
 #define LINEBITS	5
 
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
 /*
  * Flush Dcache range through current map.
  */
@@ -49,7 +60,6 @@ extern void flush_dcache_range(unsigned long start, unsigned long end);
 /*
  * Flush Icache range through current map.
  */
-#undef flush_icache_range
 extern void flush_icache_range(unsigned long start, unsigned long end);
 
 /*
@@ -79,19 +89,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	/*  generic_ptrace_pokedata doesn't wind up here, does it?  */
 }
 
-#undef copy_to_user_page
-static inline void copy_to_user_page(struct vm_area_struct *vma,
-					     struct page *page,
-					     unsigned long vaddr,
-					     void *dst, void *src, int len)
-{
-	memcpy(dst, src, len);
-	if (vma->vm_flags & VM_EXEC) {
-		flush_icache_range((unsigned long) dst,
-		(unsigned long) dst + len);
-	}
-}
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len);
 
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
 
 extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end);
 extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end);
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 70298996e9b2..66f5e9a61efc 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -24,14 +24,9 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/vmalloc.h>
-#include <asm/string.h>
-#include <asm/mem-layout.h>
 #include <asm/iomap.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 
 /*
  * We don't have PCI yet.
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index 0e7c1dbb37b2..6981949f5df3 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/mmzone.h>
 #include <linux/mm.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 7858663352b9..110dab152f82 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -1,7 +1,7 @@
 /*
  * Kernel traps/events for Hexagon processor
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -423,7 +423,7 @@ void do_trap0(struct pt_regs *regs)
 			 */
 			info.si_code = TRAP_BRKPT;
 			info.si_addr = (void __user *) pt_elr(regs);
-			send_sig_info(SIGTRAP, &info, current);
+			force_sig_info(SIGTRAP, &info, current);
 		} else {
 #ifdef CONFIG_KGDB
 			kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 44d8c47bae2f..5f268c1071b3 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -1,7 +1,7 @@
 /*
  * Linker script for Hexagon kernel
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -59,7 +59,7 @@ SECTIONS
 	INIT_DATA_SECTION(PAGE_SIZE)
 
 	_sdata = .;
-		RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE)
+		RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
 		RO_DATA_SECTION(PAGE_SIZE)
 	_edata = .;
 
diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c
index 0c76c802e31c..a7c6d827d8b6 100644
--- a/arch/hexagon/mm/cache.c
+++ b/arch/hexagon/mm/cache.c
@@ -127,3 +127,13 @@ void flush_cache_all_hexagon(void)
 	local_irq_restore(flags);
 	mb();
 }
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+		(unsigned long) dst + len);
+	}
+}
diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
index 5905fd5f97f6..d27d67224046 100644
--- a/arch/hexagon/mm/ioremap.c
+++ b/arch/hexagon/mm/ioremap.c
@@ -20,6 +20,7 @@
 
 #include <linux/io.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 
 void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 536d13b0bea6..074e52bf815c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -20,7 +20,6 @@ config IA64
 	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
-	select HAVE_KVM
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
@@ -232,7 +231,7 @@ config IA64_SGI_UV
 config IA64_HP_SIM
 	bool "Ski-simulator"
 	select SWIOTLB
-	depends on !PM_RUNTIME
+	depends on !PM
 
 endchoice
 
@@ -640,8 +639,6 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
-source "arch/ia64/kvm/Kconfig"
-
 source "lib/Kconfig"
 
 config IOMMU_HELPER
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 5441b14994fc..970d0bd99621 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
-core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
deleted file mode 100644
index 4729752b7256..000000000000
--- a/arch/ia64/include/asm/kvm_host.h
+++ /dev/null
@@ -1,609 +0,0 @@
-/*
- * kvm_host.h: used for kvm module, and hold ia64-specific sections.
- *
- * Copyright (C) 2007, Intel Corporation.
- *
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_KVM_HOST_H
-#define __ASM_KVM_HOST_H
-
-#define KVM_USER_MEM_SLOTS 32
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
-
-/* define exit reasons from vmm to kvm*/
-#define EXIT_REASON_VM_PANIC		0
-#define EXIT_REASON_MMIO_INSTRUCTION	1
-#define EXIT_REASON_PAL_CALL		2
-#define EXIT_REASON_SAL_CALL		3
-#define EXIT_REASON_SWITCH_RR6		4
-#define EXIT_REASON_VM_DESTROY		5
-#define EXIT_REASON_EXTERNAL_INTERRUPT	6
-#define EXIT_REASON_IPI			7
-#define EXIT_REASON_PTC_G		8
-#define EXIT_REASON_DEBUG		20
-
-/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
-#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
- */
-#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+	-------	KVM_VM_DATA_SIZE
- *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
- *     	      |			     |	 |    /			  |
- *     	      |	       ..........    |	 |   /vcpu's struct&stack |
- *     	      |	       ..........    |	 |  /---------------------|---- 0
- *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
- *	      |	    vcpu[4]'s data   |	 |/-----------------------|
- *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
- *	      |	    vcpu[2]'s data   |	/|------------------------|
- *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
- *	      |	    vcpu[0]'s data   |____________________________|
- *            +----------------------+	 |
- *	      |	   memory dirty log  |	 |
- *            +----------------------+	 |
- *	      |	   vm's data struct  |	 |
- *            +----------------------+	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |	  vm's p2m table  |	 |
- *	      |			     |	 |
- *            |			     |	 |
- *	      |			     |	 |  |
- * vm's data->|			     |   |  |
- *	      +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT	26
-#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE		KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT		16
-#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT		16
-#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT		16
-#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT	16
-#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-/*
- * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
- */
-#define KVM_STK_SHIFT		16
-#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
-
-#define KVM_VM_STRUCT_SHIFT	19
-#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT	19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-	char vcpu_vhpt[VHPT_SIZE];
-	char vcpu_vtlb[VTLB_SIZE];
-	char vcpu_vpd[VPD_SIZE];
-	char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-	char kvm_p2m[KVM_P2M_SIZE];
-	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, vcpu_data[n]))
-#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_vm_struct))
-#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
-				offsetof(struct kvm_vcpu_data, vcpu_struct))
-
-/*IO section definitions*/
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_IOREQ_NONE        0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-
-/*Guest Physical address layout.*/
-#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
-#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
-#define GPFN_PIB        (3UL << 60) /* PIB base */
-#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
-#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
-#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
-#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
-
-#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
-#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
-#define INVALID_MFN       (~0UL)
-#define MEM_G   (1UL << 30)
-#define MEM_M   (1UL << 20)
-#define MMIO_START       (3 * MEM_G)
-#define MMIO_SIZE        (512 * MEM_M)
-#define VGA_IO_START     0xA0000UL
-#define VGA_IO_SIZE      0x20000
-#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
-#define LEGACY_IO_SIZE   (64 * MEM_M)
-#define IO_SAPIC_START   0xfec00000UL
-#define IO_SAPIC_SIZE    0x100000
-#define PIB_START 0xfee00000UL
-#define PIB_SIZE 0x200000
-#define GFW_START        (4 * MEM_G - 16 * MEM_M)
-#define GFW_SIZE         (16 * MEM_M)
-
-/*Deliver mode, defined for ioapic.c*/
-#define dest_Fixed IOSAPIC_FIXED
-#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
-
-#define NMI_VECTOR      		2
-#define ExtINT_VECTOR       		0
-#define NULL_VECTOR     		(-1)
-#define IA64_SPURIOUS_INT_VECTOR    	0x0f
-
-#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
-
-/*
- *Delivery mode
- */
-#define SAPIC_DELIV_SHIFT      8
-#define SAPIC_FIXED            0x0
-#define SAPIC_LOWEST_PRIORITY  0x1
-#define SAPIC_PMI              0x2
-#define SAPIC_NMI              0x4
-#define SAPIC_INIT             0x5
-#define SAPIC_EXTINT           0x7
-
-/*
- * vcpu->requests bit members for arch
- */
-#define KVM_REQ_PTC_G		32
-#define KVM_REQ_RESUME		33
-
-struct kvm_mmio_req {
-	uint64_t addr;          /*  physical address		*/
-	uint64_t size;          /*  size in bytes		*/
-	uint64_t data;          /*  data (or paddr of data)     */
-	uint8_t state:4;
-	uint8_t dir:1;          /*  1=read, 0=write             */
-};
-
-/*Pal data struct */
-struct kvm_pal_call{
-	/*In area*/
-	uint64_t gr28;
-	uint64_t gr29;
-	uint64_t gr30;
-	uint64_t gr31;
-	/*Out area*/
-	struct ia64_pal_retval ret;
-};
-
-/* Sal data structure */
-struct kvm_sal_call{
-	/*In area*/
-	uint64_t in0;
-	uint64_t in1;
-	uint64_t in2;
-	uint64_t in3;
-	uint64_t in4;
-	uint64_t in5;
-	uint64_t in6;
-	uint64_t in7;
-	struct sal_ret_values ret;
-};
-
-/*Guest change rr6*/
-struct kvm_switch_rr6 {
-	uint64_t old_rr;
-	uint64_t new_rr;
-};
-
-union ia64_ipi_a{
-	unsigned long val;
-	struct {
-		unsigned long rv  : 3;
-		unsigned long ir  : 1;
-		unsigned long eid : 8;
-		unsigned long id  : 8;
-		unsigned long ib_base : 44;
-	};
-};
-
-union ia64_ipi_d {
-	unsigned long val;
-	struct {
-		unsigned long vector : 8;
-		unsigned long dm  : 3;
-		unsigned long ig  : 53;
-	};
-};
-
-/*ipi check exit data*/
-struct kvm_ipi_data{
-	union ia64_ipi_a addr;
-	union ia64_ipi_d data;
-};
-
-/*global purge data*/
-struct kvm_ptc_g {
-	unsigned long vaddr;
-	unsigned long rr;
-	unsigned long ps;
-	struct kvm_vcpu *vcpu;
-};
-
-/*Exit control data */
-struct exit_ctl_data{
-	uint32_t exit_reason;
-	uint32_t vm_status;
-	union {
-		struct kvm_mmio_req	ioreq;
-		struct kvm_pal_call	pal_data;
-		struct kvm_sal_call	sal_data;
-		struct kvm_switch_rr6	rr_data;
-		struct kvm_ipi_data	ipi_data;
-		struct kvm_ptc_g	ptc_g_data;
-	} u;
-};
-
-union pte_flags {
-	unsigned long val;
-	struct {
-		unsigned long p    :  1; /*0      */
-		unsigned long      :  1; /* 1     */
-		unsigned long ma   :  3; /* 2-4   */
-		unsigned long a    :  1; /* 5     */
-		unsigned long d    :  1; /* 6     */
-		unsigned long pl   :  2; /* 7-8   */
-		unsigned long ar   :  3; /* 9-11  */
-		unsigned long ppn  : 38; /* 12-49 */
-		unsigned long      :  2; /* 50-51 */
-		unsigned long ed   :  1; /* 52    */
-	};
-};
-
-union ia64_pta {
-	unsigned long val;
-	struct {
-		unsigned long ve : 1;
-		unsigned long reserved0 : 1;
-		unsigned long size : 6;
-		unsigned long vf : 1;
-		unsigned long reserved1 : 6;
-		unsigned long base : 49;
-	};
-};
-
-struct thash_cb {
-	/* THASH base information */
-	struct thash_data	*hash; /* hash table pointer */
-	union ia64_pta		pta;
-	int           num;
-};
-
-struct kvm_vcpu_stat {
-	u32 halt_wakeup;
-};
-
-struct kvm_vcpu_arch {
-	int launched;
-	int last_exit;
-	int last_run_cpu;
-	int vmm_tr_slot;
-	int vm_tr_slot;
-	int sn_rtc_tr_slot;
-
-#define KVM_MP_STATE_RUNNABLE          0
-#define KVM_MP_STATE_UNINITIALIZED     1
-#define KVM_MP_STATE_INIT_RECEIVED     2
-#define KVM_MP_STATE_HALTED            3
-	int mp_state;
-
-#define MAX_PTC_G_NUM			3
-	int ptc_g_count;
-	struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
-
-	/*halt timer to wake up sleepy vcpus*/
-	struct hrtimer hlt_timer;
-	long ht_active;
-
-	struct kvm_lapic *apic;    /* kernel irqchip context */
-	struct vpd *vpd;
-
-	/* Exit data for vmm_transition*/
-	struct exit_ctl_data exit_data;
-
-	cpumask_t cache_coherent_map;
-
-	unsigned long vmm_rr;
-	unsigned long host_rr6;
-	unsigned long psbits[8];
-	unsigned long cr_iipa;
-	unsigned long cr_isr;
-	unsigned long vsa_base;
-	unsigned long dirty_log_lock_pa;
-	unsigned long __gp;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-	/* purge all */
-	unsigned long ptce_base;
-	unsigned long ptce_count[2];
-	unsigned long ptce_stride[2];
-	/* itc/itm */
-	unsigned long last_itc;
-	long itc_offset;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned int timer_pending;
-	unsigned int timer_fired;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-	int mode_flags;
-	struct thash_cb vtlb;
-	struct thash_cb vhpt;
-	char irq_check;
-	char irq_new_pending;
-
-	unsigned long opcode;
-	unsigned long cause;
-	char log_buf[VMM_LOG_LEN];
-	union context host;
-	union context guest;
-
-	char mmio_data[8];
-};
-
-struct kvm_vm_stat {
-	u64 remote_tlb_flush;
-};
-
-struct kvm_sal_data {
-	unsigned long boot_ip;
-	unsigned long boot_gp;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-struct kvm_arch {
-	spinlock_t dirty_log_lock;
-
-	unsigned long	vm_base;
-	unsigned long	metaphysical_rr0;
-	unsigned long	metaphysical_rr4;
-	unsigned long	vmm_init_rr;
-
-	int		is_sn2;
-
-	struct kvm_ioapic *vioapic;
-	struct kvm_vm_stat stat;
-	struct kvm_sal_data rdv_sal_data;
-
-	struct list_head assigned_dev_head;
-	struct iommu_domain *iommu_domain;
-	bool iommu_noncoherent;
-
-	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
-};
-
-union cpuid3_t {
-	u64 value;
-	struct {
-		u64 number : 8;
-		u64 revision : 8;
-		u64 model : 8;
-		u64 family : 8;
-		u64 archrev : 8;
-		u64 rv : 24;
-	};
-};
-
-struct kvm_pt_regs {
-	/* The following registers are saved by SAVE_MIN: */
-	unsigned long b6;  /* scratch */
-	unsigned long b7;  /* scratch */
-
-	unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
-	unsigned long ar_ssd; /* reserved for future use (scratch) */
-
-	unsigned long r8;  /* scratch (return value register 0) */
-	unsigned long r9;  /* scratch (return value register 1) */
-	unsigned long r10; /* scratch (return value register 2) */
-	unsigned long r11; /* scratch (return value register 3) */
-
-	unsigned long cr_ipsr; /* interrupted task's psr */
-	unsigned long cr_iip;  /* interrupted task's instruction pointer */
-	unsigned long cr_ifs;  /* interrupted task's function state */
-
-	unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
-	unsigned long ar_pfs;  /* prev function state  */
-	unsigned long ar_rsc;  /* RSE configuration */
-	/* The following two are valid only if cr_ipsr.cpl > 0: */
-	unsigned long ar_rnat;  /* RSE NaT */
-	unsigned long ar_bspstore; /* RSE bspstore */
-
-	unsigned long pr;  /* 64 predicate registers (1 bit each) */
-	unsigned long b0;  /* return pointer (bp) */
-	unsigned long loadrs;  /* size of dirty partition << 16 */
-
-	unsigned long r1;  /* the gp pointer */
-	unsigned long r12; /* interrupted task's memory stack pointer */
-	unsigned long r13; /* thread pointer */
-
-	unsigned long ar_fpsr;  /* floating point status (preserved) */
-	unsigned long r15;  /* scratch */
-
-	/* The remaining registers are NOT saved for system calls.  */
-	unsigned long r14;  /* scratch */
-	unsigned long r2;  /* scratch */
-	unsigned long r3;  /* scratch */
-	unsigned long r16;  /* scratch */
-	unsigned long r17;  /* scratch */
-	unsigned long r18;  /* scratch */
-	unsigned long r19;  /* scratch */
-	unsigned long r20;  /* scratch */
-	unsigned long r21;  /* scratch */
-	unsigned long r22;  /* scratch */
-	unsigned long r23;  /* scratch */
-	unsigned long r24;  /* scratch */
-	unsigned long r25;  /* scratch */
-	unsigned long r26;  /* scratch */
-	unsigned long r27;  /* scratch */
-	unsigned long r28;  /* scratch */
-	unsigned long r29;  /* scratch */
-	unsigned long r30;  /* scratch */
-	unsigned long r31;  /* scratch */
-	unsigned long ar_ccv;  /* compare/exchange value (scratch) */
-
-	/*
-	 * Floating point registers that the kernel considers scratch:
-	 */
-	struct ia64_fpreg f6;  /* scratch */
-	struct ia64_fpreg f7;  /* scratch */
-	struct ia64_fpreg f8;  /* scratch */
-	struct ia64_fpreg f9;  /* scratch */
-	struct ia64_fpreg f10;  /* scratch */
-	struct ia64_fpreg f11;  /* scratch */
-
-	unsigned long r4;  /* preserved */
-	unsigned long r5;  /* preserved */
-	unsigned long r6;  /* preserved */
-	unsigned long r7;  /* preserved */
-	unsigned long eml_unat;    /* used for emulating instruction */
-	unsigned long pad0;     /* alignment pad */
-};
-
-static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
-{
-	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
-}
-
-typedef int kvm_vmm_entry(void);
-typedef void kvm_tramp_entry(union context *host, union context *guest);
-
-struct kvm_vmm_info{
-	struct module	*module;
-	kvm_vmm_entry 	*vmm_entry;
-	kvm_tramp_entry *tramp_entry;
-	unsigned long 	vmm_ivt;
-	unsigned long	patch_mov_ar;
-	unsigned long	patch_mov_ar_sn2;
-};
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
-void kvm_sal_emul(struct kvm_vcpu *vcpu);
-
-#define __KVM_HAVE_ARCH_VM_ALLOC 1
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_free_memslot(struct kvm *kvm,
-		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
-static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change) {}
-static inline void kvm_arch_hardware_unsetup(void) {}
-
-#endif /* __ASSEMBLY__*/
-
-#endif
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 14aa1c58912b..0ec484d2dcbc 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -35,8 +35,8 @@ extern void *per_cpu_init(void);
 
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
- * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
- * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly
  * more efficient.
  */
 #define __ia64_per_cpu_var(var) (*({					\
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
deleted file mode 100644
index 42b233bedeb5..000000000000
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * same structure to x86's
- * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
- * For now, define same duplicated definitions.
- */
-
-#ifndef _ASM_IA64__PVCLOCK_ABI_H
-#define _ASM_IA64__PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
-
-/*
- * These structs MUST NOT be changed.
- * They are the ABI between hypervisor and guest OS.
- * KVM is using this.
- *
- * pvclock_vcpu_time_info holds the system time and the tsc timestamp
- * of the last update. So the guest can use the tsc delta to get a
- * more precise system time.  There is one per virtual cpu.
- *
- * pvclock_wall_clock references the point in time when the system
- * time was zero (usually boot time), thus the guest calculates the
- * current wall clock by adding the system time.
- *
- * Protocol for the "version" fields is: hypervisor raises it (making
- * it uneven) before it starts updating the fields and raises it again
- * (making it even) when it is done.  Thus the guest can make sure the
- * time values it got are consistent by checking the version before
- * and after reading them.
- */
-
-struct pvclock_vcpu_time_info {
-	u32   version;
-	u32   pad0;
-	u64   tsc_timestamp;
-	u64   system_time;
-	u32   tsc_to_system_mul;
-	s8    tsc_shift;
-	u8    pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-	u32   version;
-	u32   sec;
-	u32   nsec;
-} __attribute__((__packed__));
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
deleted file mode 100644
index 99503c284400..000000000000
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef __ASM_IA64_KVM_H
-#define __ASM_IA64_KVM_H
-
-/*
- * kvm structure definitions  for ia64
- *
- * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/* Select x86 specific features in <linux/kvm.h> */
-#define __KVM_HAVE_IOAPIC
-#define __KVM_HAVE_IRQ_LINE
-
-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
-#define KVM_IOAPIC_NUM_PINS  48
-
-struct kvm_ioapic_state {
-	__u64 base_address;
-	__u32 ioregsel;
-	__u32 id;
-	__u32 irr;
-	__u32 pad;
-	union {
-		__u64 bits;
-		struct {
-			__u8 vector;
-			__u8 delivery_mode:3;
-			__u8 dest_mode:1;
-			__u8 delivery_status:1;
-			__u8 polarity:1;
-			__u8 remote_irr:1;
-			__u8 trig_mode:1;
-			__u8 mask:1;
-			__u8 reserve:7;
-			__u8 reserved[4];
-			__u8 dest_id;
-		} fields;
-	} redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2
-#define KVM_NR_IRQCHIPS          3
-
-#define KVM_CONTEXT_SIZE	8*1024
-
-struct kvm_fpreg {
-	union {
-		unsigned long bits[2];
-		long double __dummy;	/* force 16-byte alignment */
-	} u;
-};
-
-union context {
-	/* 8K size */
-	char	dummy[KVM_CONTEXT_SIZE];
-	struct {
-		unsigned long       psr;
-		unsigned long       pr;
-		unsigned long       caller_unat;
-		unsigned long       pad;
-		unsigned long       gr[32];
-		unsigned long       ar[128];
-		unsigned long       br[8];
-		unsigned long       cr[128];
-		unsigned long       rr[8];
-		unsigned long       ibr[8];
-		unsigned long       dbr[8];
-		unsigned long       pkr[8];
-		struct kvm_fpreg   fr[128];
-	};
-};
-
-struct thash_data {
-	union {
-		struct {
-			unsigned long p    :  1; /* 0 */
-			unsigned long rv1  :  1; /* 1 */
-			unsigned long ma   :  3; /* 2-4 */
-			unsigned long a    :  1; /* 5 */
-			unsigned long d    :  1; /* 6 */
-			unsigned long pl   :  2; /* 7-8 */
-			unsigned long ar   :  3; /* 9-11 */
-			unsigned long ppn  : 38; /* 12-49 */
-			unsigned long rv2  :  2; /* 50-51 */
-			unsigned long ed   :  1; /* 52 */
-			unsigned long ig1  : 11; /* 53-63 */
-		};
-		struct {
-			unsigned long __rv1 : 53;     /* 0-52 */
-			unsigned long contiguous : 1; /*53 */
-			unsigned long tc : 1;         /* 54 TR or TC */
-			unsigned long cl : 1;
-			/* 55 I side or D side cache line */
-			unsigned long len  :  4;      /* 56-59 */
-			unsigned long io  : 1;	/* 60 entry is for io or not */
-			unsigned long nomap : 1;
-			/* 61 entry cann't be inserted into machine TLB.*/
-			unsigned long checked : 1;
-			/* 62 for VTLB/VHPT sanity check */
-			unsigned long invalid : 1;
-			/* 63 invalid entry */
-		};
-		unsigned long page_flags;
-	};                  /* same for VHPT and TLB */
-
-	union {
-		struct {
-			unsigned long rv3  :  2;
-			unsigned long ps   :  6;
-			unsigned long key  : 24;
-			unsigned long rv4  : 32;
-		};
-		unsigned long itir;
-	};
-	union {
-		struct {
-			unsigned long ig2  :  12;
-			unsigned long vpn  :  49;
-			unsigned long vrn  :   3;
-		};
-		unsigned long ifa;
-		unsigned long vadr;
-		struct {
-			unsigned long tag  :  63;
-			unsigned long ti   :  1;
-		};
-		unsigned long etag;
-	};
-	union {
-		struct thash_data *next;
-		unsigned long rid;
-		unsigned long gpaddr;
-	};
-};
-
-#define	NITRS	8
-#define NDTRS	8
-
-struct saved_vpd {
-	unsigned long  vhpi;
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-};
-
-struct kvm_regs {
-	struct saved_vpd vpd;
-	/*Arch-regs*/
-	int mp_state;
-	unsigned long vmm_rr;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-
-	char irq_check;
-	unsigned long saved_itc;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned long timer_pending;
-	unsigned long last_itc;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-
-	union context saved_guest;
-
-	unsigned long reserved[64];	/* for future use */
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-#define KVM_IA64_VCPU_STACK_SHIFT	16
-#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
-
-struct kvm_ia64_vcpu_stack {
-	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
-};
-
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
-/* definition of registers in kvm_run */
-struct kvm_sync_regs {
-};
-
-#endif
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index dc063fe6646a..5f4243f0acfa 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2145,22 +2145,12 @@ doit:
 	return 0;
 }
 
-static int
-pfm_no_open(struct inode *irrelevant, struct file *dontcare)
-{
-	DPRINT(("pfm_no_open called\n"));
-	return -ENXIO;
-}
-
-
-
 static const struct file_operations pfm_file_ops = {
 	.llseek		= no_llseek,
 	.read		= pfm_read,
 	.write		= pfm_write,
 	.poll		= pfm_poll,
 	.unlocked_ioctl = pfm_ioctl,
-	.open		= pfm_no_open,	/* special open code to disallow open via /proc */
 	.fasync		= pfm_fasync,
 	.release	= pfm_close,
 	.flush		= pfm_flush
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
deleted file mode 100644
index 3d50ea955c4c..000000000000
--- a/arch/ia64/kvm/Kconfig
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
-	bool "Virtualization"
-	depends on HAVE_KVM || IA64
-	default y
-	---help---
-	  Say Y here to get to see options for using your Linux host to run other
-	  operating systems inside virtual machines (guests).
-	  This option alone does not add any kernel code.
-
-	  If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
-	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on BROKEN
-	depends on HAVE_KVM && MODULES
-	depends on BROKEN
-	select PREEMPT_NOTIFIERS
-	select ANON_INODES
-	select HAVE_KVM_IRQCHIP
-	select HAVE_KVM_IRQFD
-	select HAVE_KVM_IRQ_ROUTING
-	select KVM_APIC_ARCHITECTURE
-	select KVM_MMIO
-	---help---
-	  Support hosting fully virtualized guest machines using hardware
-	  virtualization extensions.  You will need a fairly recent
-	  processor equipped with virtualization extensions. You will also
-	  need to select one or more of the processor modules below.
-
-	  This module provides access to the hardware capabilities through
-	  a character device node named /dev/kvm.
-
-	  To compile this as a module, choose M here: the module
-	  will be called kvm.
-
-	  If unsure, say N.
-
-config KVM_INTEL
-	tristate "KVM for Intel Itanium 2 processors support"
-	depends on KVM && m
-	---help---
-	  Provides support for KVM on Itanium 2 processors equipped with the VT
-	  extensions.
-
-config KVM_DEVICE_ASSIGNMENT
-	bool "KVM legacy PCI device assignment support"
-	depends on KVM && PCI && IOMMU_API
-	default y
-	---help---
-	  Provide support for legacy PCI device assignment through KVM.  The
-	  kernel now also supports a full featured userspace device driver
-	  framework through VFIO, which supersedes much of this support.
-
-	  If unsure, say Y.
-
-source drivers/vhost/Kconfig
-
-endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
deleted file mode 100644
index 18e45ec49bbf..000000000000
--- a/arch/ia64/kvm/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#This Make file is to generate asm-offsets.h and build source.
-#
-
-#Generate asm-offsets.h for vmm module build
-offsets-file := asm-offsets.h
-
-always  := $(offsets-file)
-targets := $(offsets-file)
-targets += arch/ia64/kvm/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-	(set -e; \
-	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
-	 echo "#define __ASM_KVM_OFFSETS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Makefile"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
-# We use internal rules to avoid the "is up to date" message from make
-arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
-			$(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
-			$(wildcard $(srctree)/include/linux/*.h)
-	$(call if_changed_dep,cc_s_c)
-
-$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
-	$(call cmd,offsets)
-
-FORCE : $(obj)/$(offsets-file)
-
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-KVM := ../../../virt/kvm
-
-common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
-
-ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
-endif
-
-kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
-obj-$(CONFIG_KVM) += kvm.o
-
-CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
-kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o kvm_lib.o
-#Add link memcpy and memset to avoid possible structure assignment error
-kvm-intel-objs += memcpy.o memset.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
deleted file mode 100644
index 9324c875caf5..000000000000
--- a/arch/ia64/kvm/asm-offsets.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * asm-offsets.c Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- *
- * Anthony Xu    <anthony.xu@intel.com>
- * Xiantao Zhang <xiantao.zhang@intel.com>
- * Copyright (c) 2007 Intel Corporation  KVM support.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
-
-#include "vcpu.h"
-
-void foo(void)
-{
-	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
-
-	BLANK();
-
-	DEFINE(VMM_VCPU_META_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu,
-				arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_VRR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vrr[0]));
-	DEFINE(VMM_VPD_IRR0_OFFSET,
-			offsetof(struct vpd, irr[0]));
-	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_check));
-	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VPD_VHPI_OFFSET,
-			offsetof(struct vpd, vhpi));
-	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vsa_base));
-	DEFINE(VMM_VCPU_VPD_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VCPU_IRQ_CHECK,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VCPU_TIMER_PENDING,
-			offsetof(struct kvm_vcpu, arch.timer_pending));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_offset));
-	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
-			offsetof(struct kvm_vcpu, arch.last_itc));
-	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
-			offsetof(struct kvm_vcpu, arch.saved_gp));
-
-	BLANK();
-
-	DEFINE(VMM_PT_REGS_B6_OFFSET,
-				offsetof(struct kvm_pt_regs, b6));
-	DEFINE(VMM_PT_REGS_B7_OFFSET,
-				offsetof(struct kvm_pt_regs, b7));
-	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_csd));
-	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ssd));
-	DEFINE(VMM_PT_REGS_R8_OFFSET,
-				offsetof(struct kvm_pt_regs, r8));
-	DEFINE(VMM_PT_REGS_R9_OFFSET,
-				offsetof(struct kvm_pt_regs, r9));
-	DEFINE(VMM_PT_REGS_R10_OFFSET,
-				offsetof(struct kvm_pt_regs, r10));
-	DEFINE(VMM_PT_REGS_R11_OFFSET,
-				offsetof(struct kvm_pt_regs, r11));
-	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ipsr));
-	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_iip));
-	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ifs));
-	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_unat));
-	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_pfs));
-	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rsc));
-	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rnat));
-
-	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_bspstore));
-	DEFINE(VMM_PT_REGS_PR_OFFSET,
-				offsetof(struct kvm_pt_regs, pr));
-	DEFINE(VMM_PT_REGS_B0_OFFSET,
-				offsetof(struct kvm_pt_regs, b0));
-	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
-				offsetof(struct kvm_pt_regs, loadrs));
-	DEFINE(VMM_PT_REGS_R1_OFFSET,
-				offsetof(struct kvm_pt_regs, r1));
-	DEFINE(VMM_PT_REGS_R12_OFFSET,
-				offsetof(struct kvm_pt_regs, r12));
-	DEFINE(VMM_PT_REGS_R13_OFFSET,
-				offsetof(struct kvm_pt_regs, r13));
-	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_fpsr));
-	DEFINE(VMM_PT_REGS_R15_OFFSET,
-				offsetof(struct kvm_pt_regs, r15));
-	DEFINE(VMM_PT_REGS_R14_OFFSET,
-				offsetof(struct kvm_pt_regs, r14));
-	DEFINE(VMM_PT_REGS_R2_OFFSET,
-				offsetof(struct kvm_pt_regs, r2));
-	DEFINE(VMM_PT_REGS_R3_OFFSET,
-				offsetof(struct kvm_pt_regs, r3));
-	DEFINE(VMM_PT_REGS_R16_OFFSET,
-				offsetof(struct kvm_pt_regs, r16));
-	DEFINE(VMM_PT_REGS_R17_OFFSET,
-				offsetof(struct kvm_pt_regs, r17));
-	DEFINE(VMM_PT_REGS_R18_OFFSET,
-				offsetof(struct kvm_pt_regs, r18));
-	DEFINE(VMM_PT_REGS_R19_OFFSET,
-				offsetof(struct kvm_pt_regs, r19));
-	DEFINE(VMM_PT_REGS_R20_OFFSET,
-				offsetof(struct kvm_pt_regs, r20));
-	DEFINE(VMM_PT_REGS_R21_OFFSET,
-				offsetof(struct kvm_pt_regs, r21));
-	DEFINE(VMM_PT_REGS_R22_OFFSET,
-				offsetof(struct kvm_pt_regs, r22));
-	DEFINE(VMM_PT_REGS_R23_OFFSET,
-				offsetof(struct kvm_pt_regs, r23));
-	DEFINE(VMM_PT_REGS_R24_OFFSET,
-				offsetof(struct kvm_pt_regs, r24));
-	DEFINE(VMM_PT_REGS_R25_OFFSET,
-				offsetof(struct kvm_pt_regs, r25));
-	DEFINE(VMM_PT_REGS_R26_OFFSET,
-				offsetof(struct kvm_pt_regs, r26));
-	DEFINE(VMM_PT_REGS_R27_OFFSET,
-				offsetof(struct kvm_pt_regs, r27));
-	DEFINE(VMM_PT_REGS_R28_OFFSET,
-				offsetof(struct kvm_pt_regs, r28));
-	DEFINE(VMM_PT_REGS_R29_OFFSET,
-				offsetof(struct kvm_pt_regs, r29));
-	DEFINE(VMM_PT_REGS_R30_OFFSET,
-				offsetof(struct kvm_pt_regs, r30));
-	DEFINE(VMM_PT_REGS_R31_OFFSET,
-				offsetof(struct kvm_pt_regs, r31));
-	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ccv));
-	DEFINE(VMM_PT_REGS_F6_OFFSET,
-				offsetof(struct kvm_pt_regs, f6));
-	DEFINE(VMM_PT_REGS_F7_OFFSET,
-				offsetof(struct kvm_pt_regs, f7));
-	DEFINE(VMM_PT_REGS_F8_OFFSET,
-				offsetof(struct kvm_pt_regs, f8));
-	DEFINE(VMM_PT_REGS_F9_OFFSET,
-				offsetof(struct kvm_pt_regs, f9));
-	DEFINE(VMM_PT_REGS_F10_OFFSET,
-				offsetof(struct kvm_pt_regs, f10));
-	DEFINE(VMM_PT_REGS_F11_OFFSET,
-				offsetof(struct kvm_pt_regs, f11));
-	DEFINE(VMM_PT_REGS_R4_OFFSET,
-				offsetof(struct kvm_pt_regs, r4));
-	DEFINE(VMM_PT_REGS_R5_OFFSET,
-				offsetof(struct kvm_pt_regs, r5));
-	DEFINE(VMM_PT_REGS_R6_OFFSET,
-				offsetof(struct kvm_pt_regs, r6));
-	DEFINE(VMM_PT_REGS_R7_OFFSET,
-				offsetof(struct kvm_pt_regs, r7));
-	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, eml_unat));
-	DEFINE(VMM_VCPU_IIPA_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_iipa));
-	DEFINE(VMM_VCPU_OPCODE_OFFSET,
-				offsetof(struct kvm_vcpu, arch.opcode));
-	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
-	DEFINE(VMM_VCPU_ISR_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_isr));
-	DEFINE(VMM_PT_REGS_R16_SLOT,
-				(((offsetof(struct kvm_pt_regs, r16)
-				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-				offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
-	BLANK();
-
-	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
-	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.insvc[0]));
-	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
-	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
-
-	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
-	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
-	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
-	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
-	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
-	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
-	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
-	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
-	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
-	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
-	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
-	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
-	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
-	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
-	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
-	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
-	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
-	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
-	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
-	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
-	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
-	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
-	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
-	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
-	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
-	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
-	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
-	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
-	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
-	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
-	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
-	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
-	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
-	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
-	BLANK();
-}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
deleted file mode 100644
index c0785a728271..000000000000
--- a/arch/ia64/kvm/irq.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * irq.h: In-kernel interrupt controller related definitions
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Authors:
- *   Xiantao Zhang <xiantao.zhang@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "lapic.h"
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 1;
-}
-
-#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
deleted file mode 100644
index dbe46f43884d..000000000000
--- a/arch/ia64/kvm/kvm-ia64.c
+++ /dev/null
@@ -1,1942 +0,0 @@
-/*
- * kvm_ia64.c: Basic KVM support On Itanium series processors
- *
- *
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/bitops.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/gcc_intrin.h>
-#include <asm/pal.h>
-#include <asm/cacheflush.h>
-#include <asm/div64.h>
-#include <asm/tlb.h>
-#include <asm/elf.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "misc.h"
-#include "vti.h"
-#include "iodev.h"
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-static unsigned long kvm_vmm_base;
-static unsigned long kvm_vsa_base;
-static unsigned long kvm_vm_buffer;
-static unsigned long kvm_vm_buffer_size;
-unsigned long kvm_vmm_gp;
-
-static long vp_env_info;
-
-static struct kvm_vmm_info *kvm_vmm_info;
-
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ NULL }
-};
-
-static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (vcpu->kvm->arch.is_sn2)
-		return rtc_time();
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-static void kvm_flush_icache(unsigned long start, unsigned long len)
-{
-	int l;
-
-	for (l = 0; l < (len + 32); l += 32)
-		ia64_fc((void *)(start + l));
-
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void kvm_flush_tlb_all(void)
-{
-	unsigned long i, j, count0, count1, stride0, stride1, addr;
-	long flags;
-
-	addr    = local_cpu_data->ptce_base;
-	count0  = local_cpu_data->ptce_count[0];
-	count1  = local_cpu_data->ptce_count[1];
-	stride0 = local_cpu_data->ptce_stride[0];
-	stride1 = local_cpu_data->ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();			/* srlz.i implies srlz.d */
-}
-
-long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
-			(u64)opt_handler);
-
-	return iprv.status;
-}
-
-static  DEFINE_SPINLOCK(vp_lock);
-
-int kvm_arch_hardware_enable(void)
-{
-	long  status;
-	long  tmp_base;
-	unsigned long pte;
-	unsigned long saved_psr;
-	int slot;
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return -EINVAL;
-
-	spin_lock(&vp_lock);
-	status = ia64_pal_vp_init_env(kvm_vsa_base ?
-				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
-			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
-	if (status != 0) {
-		spin_unlock(&vp_lock);
-		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return -EINVAL;
-	}
-
-	if (!kvm_vsa_base) {
-		kvm_vsa_base = tmp_base;
-		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
-	}
-	spin_unlock(&vp_lock);
-	ia64_ptr_entry(0x3, slot);
-
-	return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
-
-	long status;
-	int slot;
-	unsigned long pte;
-	unsigned long saved_psr;
-	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-				PAGE_KERNEL));
-
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return;
-
-	status = ia64_pal_vp_exit_env(host_iva);
-	if (status)
-		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
-				status);
-	ia64_ptr_entry(0x3, slot);
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-	*(int *)rtn = 0;
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-
-	int r;
-
-	switch (ext) {
-	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_MP_STATE:
-	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
-		r = 1;
-		break;
-	case KVM_CAP_COALESCED_MMIO:
-		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-		break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-	case KVM_CAP_IOMMU:
-		r = iommu_present(&pci_bus_type);
-		break;
-#endif
-	default:
-		r = 0;
-	}
-	return r;
-
-}
-
-static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-	kvm_run->hw.hardware_exit_reason = 1;
-	return 0;
-}
-
-static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct kvm_mmio_req *p;
-	struct kvm_io_device *mmio_dev;
-	int r;
-
-	p = kvm_get_vcpu_ioreq(vcpu);
-
-	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
-		goto mmio;
-	vcpu->mmio_needed = 1;
-	vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
-	vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
-	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
-
-	if (vcpu->mmio_is_write)
-		memcpy(vcpu->arch.mmio_data, &p->data, p->size);
-	memcpy(kvm_run->mmio.data, &p->data, p->size);
-	kvm_run->exit_reason = KVM_EXIT_MMIO;
-	return 0;
-mmio:
-	if (p->dir)
-		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				    p->size, &p->data);
-	else
-		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				     p->size, &p->data);
-	if (r)
-		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
-	p->state = STATE_IORESP_READY;
-
-	return 1;
-}
-
-static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		return kvm_pal_emul(vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 2;
-		return 0;
-	}
-}
-
-static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		kvm_sal_emul(vcpu);
-		return 1;
-	} else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 3;
-		return 0;
-	}
-
-}
-
-static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vector, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- *  offset: address offset to IPI space.
- *  value:  deliver value.
- */
-static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
-				uint64_t vector)
-{
-	switch (dm) {
-	case SAPIC_FIXED:
-		break;
-	case SAPIC_NMI:
-		vector = 2;
-		break;
-	case SAPIC_EXTINT:
-		vector = 0;
-		break;
-	case SAPIC_INIT:
-	case SAPIC_PMI:
-	default:
-		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		return;
-	}
-	__apic_accept_irq(vcpu, vector);
-}
-
-static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
-			unsigned long eid)
-{
-	union ia64_lid lid;
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		lid.val = VCPU_LID(vcpu);
-		if (lid.id == id && lid.eid == eid)
-			return vcpu;
-	}
-
-	return NULL;
-}
-
-static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm_vcpu *target_vcpu;
-	struct kvm_pt_regs *regs;
-	union ia64_ipi_a addr = p->u.ipi_data.addr;
-	union ia64_ipi_d data = p->u.ipi_data.data;
-
-	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
-	if (!target_vcpu)
-		return handle_vm_error(vcpu, kvm_run);
-
-	if (!target_vcpu->arch.launched) {
-		regs = vcpu_regs(target_vcpu);
-
-		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
-		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
-
-		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		if (waitqueue_active(&target_vcpu->wq))
-			wake_up_interruptible(&target_vcpu->wq);
-	} else {
-		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
-		if (target_vcpu != vcpu)
-			kvm_vcpu_kick(target_vcpu);
-	}
-
-	return 1;
-}
-
-struct call_data {
-	struct kvm_ptc_g ptc_g_data;
-	struct kvm_vcpu *vcpu;
-};
-
-static void vcpu_global_purge(void *info)
-{
-	struct call_data *p = (struct call_data *)info;
-	struct kvm_vcpu *vcpu = p->vcpu;
-
-	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-		return;
-
-	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
-	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
-		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
-							p->ptc_g_data;
-	} else {
-		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
-		vcpu->arch.ptc_g_count = 0;
-		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-	}
-}
-
-static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm *kvm = vcpu->kvm;
-	struct call_data call_data;
-	int i;
-	struct kvm_vcpu *vcpui;
-
-	call_data.ptc_g_data = p->u.ptc_g_data;
-
-	kvm_for_each_vcpu(i, vcpui, kvm) {
-		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
-				vcpu == vcpui)
-			continue;
-
-		if (waitqueue_active(&vcpui->wq))
-			wake_up_interruptible(&vcpui->wq);
-
-		if (vcpui->cpu != -1) {
-			call_data.vcpu = vcpui;
-			smp_call_function_single(vcpui->cpu,
-					vcpu_global_purge, &call_data, 1);
-		} else
-			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
-
-	}
-	return 1;
-}
-
-static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte, rtc_phys_addr, map_addr;
-	int slot;
-
-	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
-	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
-	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
-	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
-	vcpu->arch.sn_rtc_tr_slot = slot;
-	if (slot < 0) {
-		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
-		slot = 0;
-	}
-	return slot;
-}
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-
-	ktime_t kt;
-	long itc_diff;
-	unsigned long vcpu_now_itc;
-	unsigned long expires;
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (irqchip_in_kernel(vcpu->kvm)) {
-
-		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
-
-		if (time_after(vcpu_now_itc, vpd->itm)) {
-			vcpu->arch.timer_check = 1;
-			return 1;
-		}
-		itc_diff = vpd->itm - vcpu_now_itc;
-		if (itc_diff < 0)
-			itc_diff = -itc_diff;
-
-		expires = div64_u64(itc_diff, cyc_per_usec);
-		kt = ktime_set(0, 1000 * expires);
-
-		vcpu->arch.ht_active = 1;
-		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
-
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-		kvm_vcpu_block(vcpu);
-		hrtimer_cancel(p_ht);
-		vcpu->arch.ht_active = 0;
-
-		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
-				kvm_cpu_has_pending_timer(vcpu))
-			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-			return -EINTR;
-		return 1;
-	} else {
-		printk(KERN_ERR"kvm: Unsupported userspace halt!");
-		return 0;
-	}
-}
-
-static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
-	return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-				struct kvm_run *kvm_run)
-{
-	printk("VMM: %s", vcpu->arch.log_buf);
-	return 1;
-}
-
-static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run) = {
-	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
-	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
-	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
-	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
-	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
-	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
-	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
-	[EXIT_REASON_IPI]		    = handle_ipi,
-	[EXIT_REASON_PTC_G]		    = handle_global_purge,
-	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
-
-};
-
-static const int kvm_vti_max_exit_handlers =
-		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
-
-static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_exit_data;
-
-	p_exit_data = kvm_get_exit_data(vcpu);
-	return p_exit_data->exit_reason;
-}
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-	u32 exit_reason = kvm_get_exit_reason(vcpu);
-	vcpu->arch.last_exit = exit_reason;
-
-	if (exit_reason < kvm_vti_max_exit_handlers
-			&& kvm_vti_exit_handlers[exit_reason])
-		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = exit_reason;
-	}
-	return 0;
-}
-
-static inline void vti_set_rr6(unsigned long rr6)
-{
-	ia64_set_rr(RR6, rr6);
-	ia64_srlz_i();
-}
-
-static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte;
-	struct kvm *kvm = vcpu->kvm;
-	int r;
-
-	/*Insert a pair of tr to map vmm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vmm_tr_slot = r;
-	/*Insert a pairt of tr to map data of vm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
-					pte, KVM_VM_DATA_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vm_tr_slot = r;
-
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2) {
-		r = kvm_sn2_setup_mappings(vcpu);
-		if (r < 0)
-			goto out;
-	}
-#endif
-
-	r = 0;
-out:
-	return r;
-}
-
-static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
-	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2)
-		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
-#endif
-}
-
-static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-	int r;
-	int cpu = smp_processor_id();
-
-	if (vcpu->arch.last_run_cpu != cpu ||
-			per_cpu(last_vcpu, cpu) != vcpu) {
-		per_cpu(last_vcpu, cpu) = vcpu;
-		vcpu->arch.last_run_cpu = cpu;
-		kvm_flush_tlb_all();
-	}
-
-	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
-	vti_set_rr6(vcpu->arch.vmm_rr);
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	return r;
-}
-
-static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-{
-	kvm_purge_vmm_mapping(vcpu);
-	vti_set_rr6(vcpu->arch.host_rr6);
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	union context *host_ctx, *guest_ctx;
-	int r, idx;
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-again:
-	if (signal_pending(current)) {
-		r = -EINTR;
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		goto out;
-	}
-
-	preempt_disable();
-	local_irq_disable();
-
-	/*Get host and guest context with guest address space.*/
-	host_ctx = kvm_get_host_context(vcpu);
-	guest_ctx = kvm_get_guest_context(vcpu);
-
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
-	r = kvm_vcpu_pre_transition(vcpu);
-	if (r < 0)
-		goto vcpu_run_fail;
-
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	vcpu->mode = IN_GUEST_MODE;
-	kvm_guest_enter();
-
-	/*
-	 * Transition to the guest
-	 */
-	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-
-	kvm_vcpu_post_transition(vcpu);
-
-	vcpu->arch.launched = 1;
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
-	local_irq_enable();
-
-	/*
-	 * We must have an instruction between local_irq_enable() and
-	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
-	 * the interrupt shadow.  The stat.exits increment will do nicely.
-	 * But we need to prevent reordering, hence this barrier():
-	 */
-	barrier();
-	kvm_guest_exit();
-	vcpu->mode = OUTSIDE_GUEST_MODE;
-	preempt_enable();
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	r = kvm_handle_exit(kvm_run, vcpu);
-
-	if (r > 0) {
-		if (!need_resched())
-			goto again;
-	}
-
-out:
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	if (r > 0) {
-		cond_resched();
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		goto again;
-	}
-
-	return r;
-
-vcpu_run_fail:
-	local_irq_enable();
-	preempt_enable();
-	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-	goto out;
-}
-
-static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
-
-	if (!vcpu->mmio_is_write)
-		memcpy(&p->data, vcpu->arch.mmio_data, 8);
-	p->state = STATE_IORESP_READY;
-}
-
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	int r;
-	sigset_t sigsaved;
-
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-		kvm_vcpu_block(vcpu);
-		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-		r = -EAGAIN;
-		goto out;
-	}
-
-	if (vcpu->mmio_needed) {
-		memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
-		kvm_set_mmio_data(vcpu);
-		vcpu->mmio_read_completed = 1;
-		vcpu->mmio_needed = 0;
-	}
-	r = __vcpu_run(vcpu, kvm_run);
-out:
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return r;
-}
-
-struct kvm *kvm_arch_alloc_vm(void)
-{
-
-	struct kvm *kvm;
-	uint64_t  vm_base;
-
-	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
-	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
-
-	if (!vm_base)
-		return NULL;
-
-	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-	kvm = (struct kvm *)(vm_base +
-			offsetof(struct kvm_vm_data, kvm_vm_struct));
-	kvm->arch.vm_base = vm_base;
-	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
-
-	return kvm;
-}
-
-struct kvm_ia64_io_range {
-	unsigned long start;
-	unsigned long size;
-	unsigned long type;
-};
-
-static const struct kvm_ia64_io_range io_ranges[] = {
-	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
-	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
-	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
-	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
-	{PIB_START, PIB_SIZE, GPFN_PIB},
-};
-
-static void kvm_build_io_pmt(struct kvm *kvm)
-{
-	unsigned long i, j;
-
-	/* Mark I/O ranges */
-	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
-							i++) {
-		for (j = io_ranges[i].start;
-				j < io_ranges[i].start + io_ranges[i].size;
-				j += PAGE_SIZE)
-			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
-					io_ranges[i].type, 0);
-	}
-
-}
-
-/*Use unused rids to virtualize guest rid.*/
-#define GUEST_PHYSICAL_RR0	0x1739
-#define GUEST_PHYSICAL_RR4	0x2739
-#define VMM_INIT_RR		0x1660
-
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-	BUG_ON(!kvm);
-
-	if (type)
-		return -EINVAL;
-
-	kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
-	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
-	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
-	kvm->arch.vmm_init_rr = VMM_INIT_RR;
-
-	/*
-	 *Fill P2M entries for MMIO/IO ranges
-	 */
-	kvm_build_io_pmt(kvm);
-
-	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
-
-	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-
-	return 0;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
-					struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		vpd->vgr[i] = regs->vpd.vgr[i];
-		vpd->vbgr[i] = regs->vpd.vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		vpd->vcr[i] = regs->vpd.vcr[i];
-	vpd->vhpi = regs->vpd.vhpi;
-	vpd->vnat = regs->vpd.vnat;
-	vpd->vbnat = regs->vpd.vbnat;
-	vpd->vpsr = regs->vpd.vpsr;
-
-	vpd->vpr = regs->vpd.vpr;
-
-	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
-
-	RESTORE_REGS(mp_state);
-	RESTORE_REGS(vmm_rr);
-	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
-	RESTORE_REGS(itr_regions);
-	RESTORE_REGS(dtr_regions);
-	RESTORE_REGS(tc_regions);
-	RESTORE_REGS(irq_check);
-	RESTORE_REGS(itc_check);
-	RESTORE_REGS(timer_check);
-	RESTORE_REGS(timer_pending);
-	RESTORE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		vcpu->arch.vrr[i] = regs->vrr[i];
-		vcpu->arch.ibr[i] = regs->ibr[i];
-		vcpu->arch.dbr[i] = regs->dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		vcpu->arch.insvc[i] = regs->insvc[i];
-	RESTORE_REGS(xtp);
-	RESTORE_REGS(metaphysical_rr0);
-	RESTORE_REGS(metaphysical_rr4);
-	RESTORE_REGS(metaphysical_saved_rr0);
-	RESTORE_REGS(metaphysical_saved_rr4);
-	RESTORE_REGS(fp_psr);
-	RESTORE_REGS(saved_gp);
-
-	vcpu->arch.irq_new_pending = 1;
-	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
-	set_bit(KVM_REQ_RESUME, &vcpu->requests);
-
-	return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
-		bool line_status)
-{
-	if (!irqchip_in_kernel(kvm))
-		return -ENXIO;
-
-	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event->irq, irq_event->level,
-					line_status);
-	return 0;
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
-{
-	struct kvm *kvm = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	int r = -ENOTTY;
-
-	switch (ioctl) {
-	case KVM_CREATE_IRQCHIP:
-		r = -EFAULT;
-		r = kvm_ioapic_init(kvm);
-		if (r)
-			goto out;
-		r = kvm_setup_default_irq_routing(kvm);
-		if (r) {
-			mutex_lock(&kvm->slots_lock);
-			kvm_ioapic_destroy(kvm);
-			mutex_unlock(&kvm->slots_lock);
-			goto out;
-		}
-		break;
-	case KVM_GET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = -EFAULT;
-		if (copy_to_user(argp, &chip, sizeof chip))
-				goto out;
-		r = 0;
-		break;
-		}
-	case KVM_SET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = 0;
-		break;
-		}
-	default:
-		;
-	}
-out:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-
-}
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-		struct kvm_translation *tr)
-{
-
-	return -EINVAL;
-}
-
-static int kvm_alloc_vmm_area(void)
-{
-	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
-		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
-				get_order(KVM_VMM_SIZE));
-		if (!kvm_vmm_base)
-			return -ENOMEM;
-
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
-
-		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
-				kvm_vmm_base, kvm_vm_buffer);
-	}
-
-	return 0;
-}
-
-static void kvm_free_vmm_area(void)
-{
-	if (kvm_vmm_base) {
-		/*Zero this area before free to avoid bits leak!!*/
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
-		kvm_vmm_base  = 0;
-		kvm_vm_buffer = 0;
-		kvm_vsa_base = 0;
-	}
-}
-
-static int vti_init_vpd(struct kvm_vcpu *vcpu)
-{
-	int i;
-	union cpuid3_t cpuid3;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (IS_ERR(vpd))
-		return PTR_ERR(vpd);
-
-	/* CPUID init */
-	for (i = 0; i < 5; i++)
-		vpd->vcpuid[i] = ia64_get_cpuid(i);
-
-	/* Limit the CPUID number to 5 */
-	cpuid3.value = vpd->vcpuid[3];
-	cpuid3.number = 4;	/* 5 - 1 */
-	vpd->vcpuid[3] = cpuid3.value;
-
-	/*Set vac and vdc fields*/
-	vpd->vac.a_from_int_cr = 1;
-	vpd->vac.a_to_int_cr = 1;
-	vpd->vac.a_from_psr = 1;
-	vpd->vac.a_from_cpuid = 1;
-	vpd->vac.a_cover = 1;
-	vpd->vac.a_bsw = 1;
-	vpd->vac.a_int = 1;
-	vpd->vdc.d_vmsw = 1;
-
-	/*Set virtual buffer*/
-	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
-
-	return 0;
-}
-
-static int vti_create_vp(struct kvm_vcpu *vcpu)
-{
-	long ret;
-	struct vpd *vpd = vcpu->arch.vpd;
-	unsigned long  vmm_ivt;
-
-	vmm_ivt = kvm_vmm_info->vmm_ivt;
-
-	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
-
-	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
-
-	if (ret) {
-		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void init_ptce_info(struct kvm_vcpu *vcpu)
-{
-	ia64_ptce_info_t ptce = {0};
-
-	ia64_get_ptce(&ptce);
-	vcpu->arch.ptce_base = ptce.base;
-	vcpu->arch.ptce_count[0] = ptce.count[0];
-	vcpu->arch.ptce_count[1] = ptce.count[1];
-	vcpu->arch.ptce_stride[0] = ptce.stride[0];
-	vcpu->arch.ptce_stride[1] = ptce.stride[1];
-}
-
-static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
-{
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-
-	if (hrtimer_cancel(p_ht))
-		hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
-}
-
-static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
-{
-	struct kvm_vcpu *vcpu;
-	wait_queue_head_t *q;
-
-	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
-	q = &vcpu->wq;
-
-	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
-		goto out;
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-out:
-	vcpu->arch.timer_fired = 1;
-	vcpu->arch.timer_check = 1;
-	return HRTIMER_NORESTART;
-}
-
-#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
-
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct kvm_vcpu *v;
-	int r;
-	int i;
-	long itc_offset;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	union context *p_ctx = &vcpu->arch.guest;
-	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
-
-	/*Init vcpu context for first run.*/
-	if (IS_ERR(vmm_vcpu))
-		return PTR_ERR(vmm_vcpu);
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		/*Set entry address for first run.*/
-		regs->cr_iip = PALE_RESET_ENTRY;
-
-		/*Initialize itc offset for vcpus*/
-		itc_offset = 0UL - kvm_get_itc(vcpu);
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			v->arch.itc_offset = itc_offset;
-			v->arch.last_itc = 0;
-		}
-	} else
-		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
-	r = -ENOMEM;
-	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
-	if (!vcpu->arch.apic)
-		goto out;
-	vcpu->arch.apic->vcpu = vcpu;
-
-	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
-	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
-	p_ctx->psr = 0x1008522000UL;
-	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
-	p_ctx->caller_unat = 0;
-	p_ctx->pr = 0x0;
-	p_ctx->ar[36] = 0x0; /*unat*/
-	p_ctx->ar[19] = 0x0; /*rnat*/
-	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
-				((sizeof(struct kvm_vcpu)+15) & ~15);
-	p_ctx->ar[64] = 0x0; /*pfs*/
-	p_ctx->cr[0] = 0x7e04UL;
-	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
-	p_ctx->cr[8] = 0x3c;
-
-	/*Initialize region register*/
-	p_ctx->rr[0] = 0x30;
-	p_ctx->rr[1] = 0x30;
-	p_ctx->rr[2] = 0x30;
-	p_ctx->rr[3] = 0x30;
-	p_ctx->rr[4] = 0x30;
-	p_ctx->rr[5] = 0x30;
-	p_ctx->rr[7] = 0x30;
-
-	/*Initialize branch register 0*/
-	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
-
-	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
-	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
-	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
-
-	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	vcpu->arch.hlt_timer.function = hlt_timer_fn;
-
-	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
-	vcpu->arch.vsa_base = kvm_vsa_base;
-	vcpu->arch.__gp = kvm_vmm_gp;
-	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
-	init_ptce_info(vcpu);
-
-	r = 0;
-out:
-	return r;
-}
-
-static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
-{
-	unsigned long psr;
-	int r;
-
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
-	if (r)
-		goto fail;
-
-	r = vti_init_vpd(vcpu);
-	if (r) {
-		printk(KERN_DEBUG"kvm: vpd init error!!\n");
-		goto uninit;
-	}
-
-	r = vti_create_vp(vcpu);
-	if (r)
-		goto uninit;
-
-	kvm_purge_vmm_mapping(vcpu);
-
-	return 0;
-uninit:
-	kvm_vcpu_uninit(vcpu);
-fail:
-	return r;
-}
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-		unsigned int id)
-{
-	struct kvm_vcpu *vcpu;
-	unsigned long vm_base = kvm->arch.vm_base;
-	int r;
-	int cpu;
-
-	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-	r = -EINVAL;
-	if (id >= KVM_MAX_VCPUS) {
-		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-				KVM_MAX_VCPUS);
-		goto fail;
-	}
-
-	r = -ENOMEM;
-	if (!vm_base) {
-		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
-		goto fail;
-	}
-	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-					vcpu_data[id].vcpu_struct));
-	vcpu->kvm = kvm;
-
-	cpu = get_cpu();
-	r = vti_vcpu_setup(vcpu, id);
-	put_cpu();
-
-	if (r) {
-		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
-		goto fail;
-	}
-
-	return vcpu;
-fail:
-	return ERR_PTR(r);
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-	unsigned long vm_base = kvm->arch.vm_base;
-
-	if (vm_base) {
-		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
-	}
-
-}
-
-static void kvm_release_vm_pages(struct kvm *kvm)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int j;
-
-	slots = kvm_memslots(kvm);
-	kvm_for_each_memslot(memslot, slots) {
-		for (j = 0; j < memslot->npages; j++) {
-			if (memslot->rmap[j])
-				put_page((struct page *)memslot->rmap[j]);
-		}
-	}
-}
-
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-	kvm_iommu_unmap_guest(kvm);
-	kvm_free_all_assigned_devices(kvm);
-	kfree(kvm->arch.vioapic);
-	kvm_release_vm_pages(kvm);
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	if (cpu != vcpu->cpu) {
-		vcpu->cpu = cpu;
-		if (vcpu->arch.ht_active)
-			kvm_migrate_hlt_timer(vcpu);
-	}
-}
-
-#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	vcpu_load(vcpu);
-
-	for (i = 0; i < 16; i++) {
-		regs->vpd.vgr[i] = vpd->vgr[i];
-		regs->vpd.vbgr[i] = vpd->vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		regs->vpd.vcr[i] = vpd->vcr[i];
-	regs->vpd.vhpi = vpd->vhpi;
-	regs->vpd.vnat = vpd->vnat;
-	regs->vpd.vbnat = vpd->vbnat;
-	regs->vpd.vpsr = vpd->vpsr;
-	regs->vpd.vpr = vpd->vpr;
-
-	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
-	SAVE_REGS(mp_state);
-	SAVE_REGS(vmm_rr);
-	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
-	SAVE_REGS(itr_regions);
-	SAVE_REGS(dtr_regions);
-	SAVE_REGS(tc_regions);
-	SAVE_REGS(irq_check);
-	SAVE_REGS(itc_check);
-	SAVE_REGS(timer_check);
-	SAVE_REGS(timer_pending);
-	SAVE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		regs->vrr[i] = vcpu->arch.vrr[i];
-		regs->ibr[i] = vcpu->arch.ibr[i];
-		regs->dbr[i] = vcpu->arch.dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		regs->insvc[i] = vcpu->arch.insvc[i];
-	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
-	SAVE_REGS(xtp);
-	SAVE_REGS(metaphysical_rr0);
-	SAVE_REGS(metaphysical_rr4);
-	SAVE_REGS(metaphysical_saved_rr0);
-	SAVE_REGS(metaphysical_saved_rr4);
-	SAVE_REGS(fp_psr);
-	SAVE_REGS(saved_gp);
-
-	vcpu_put(vcpu);
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
-	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
-
-	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
-	return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-
-	hrtimer_cancel(&vcpu->arch.hlt_timer);
-	kfree(vcpu->arch.apic);
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-			 unsigned int ioctl, unsigned long arg)
-{
-	struct kvm_vcpu *vcpu = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	struct kvm_ia64_vcpu_stack *stack = NULL;
-	long r;
-
-	switch (ioctl) {
-	case KVM_IA64_VCPU_GET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_WRITE, user_stack,
-			       sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
-			       "Illegal user destination address for stack\n");
-			goto out;
-		}
-		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-
-		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
-		if (r)
-			goto out;
-
-		if (copy_to_user(user_stack, stack,
-				 sizeof(struct kvm_ia64_vcpu_stack))) {
-			r = -EFAULT;
-			goto out;
-		}
-
-		break;
-	}
-	case KVM_IA64_VCPU_SET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_READ, user_stack,
-			    sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
-			       "Illegal user address for stack\n");
-			goto out;
-		}
-		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-		if (copy_from_user(stack, user_stack,
-				   sizeof(struct kvm_ia64_vcpu_stack)))
-			goto out;
-
-		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
-		break;
-	}
-
-	default:
-		r = -EINVAL;
-	}
-
-out:
-	kfree(stack);
-	return r;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-	return VM_FAULT_SIGBUS;
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-		struct kvm_memory_slot *memslot,
-		struct kvm_userspace_memory_region *mem,
-		enum kvm_mr_change change)
-{
-	unsigned long i;
-	unsigned long pfn;
-	int npages = memslot->npages;
-	unsigned long base_gfn = memslot->base_gfn;
-
-	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-		return -ENOMEM;
-
-	for (i = 0; i < npages; i++) {
-		pfn = gfn_to_pfn(kvm, base_gfn + i);
-		if (!kvm_is_reserved_pfn(pfn)) {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					pfn << PAGE_SHIFT,
-				_PAGE_AR_RWX | _PAGE_MA_WB);
-			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
-		} else {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
-					_PAGE_MA_UC);
-			memslot->rmap[i] = 0;
-			}
-	}
-
-	return 0;
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-	kvm_arch_flush_shadow_all();
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-			unsigned int ioctl, unsigned long arg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_vcpu_uninit(vcpu);
-}
-
-static int vti_cpu_has_kvm_support(void)
-{
-	long  avail = 1, status = 1, control = 1;
-	long ret;
-
-	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
-	if (ret)
-		goto out;
-
-	if (!(avail & PAL_PROC_VM_BIT))
-		goto out;
-
-	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
-
-	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
-	if (ret)
-		goto out;
-	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
-
-	if (!(vp_env_info & VP_OPCODE)) {
-		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
-				"vm_env_info:0x%lx\n", vp_env_info);
-	}
-
-	return 1;
-out:
-	return 0;
-}
-
-
-/*
- * On SN2, the ITC isn't stable, so copy in fast path code to use the
- * SN2 RTC, replacing the ITC based default verion.
- */
-static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
-			  struct module *module)
-{
-	unsigned long new_ar, new_ar_sn2;
-	unsigned long module_base;
-
-	if (!ia64_platform_is("sn2"))
-		return;
-
-	module_base = (unsigned long)module->module_core;
-
-	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
-	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
-
-	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
-	       "as source\n");
-
-	/*
-	 * Copy the SN2 version of mov_ar into place. They are both
-	 * the same size, so 6 bundles is sufficient (6 * 0x10).
-	 */
-	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
-}
-
-static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-			    struct module *module)
-{
-	unsigned long module_base;
-	unsigned long vmm_size;
-
-	unsigned long vmm_offset, func_offset, fdesc_offset;
-	struct fdesc *p_fdesc;
-
-	BUG_ON(!module);
-
-	if (!kvm_vmm_base) {
-		printk("kvm: kvm area hasn't been initialized yet!!\n");
-		return -EFAULT;
-	}
-
-	/*Calculate new position of relocated vmm module.*/
-	module_base = (unsigned long)module->module_core;
-	vmm_size = module->core_size;
-	if (unlikely(vmm_size > KVM_VMM_SIZE))
-		return -EFAULT;
-
-	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
-	kvm_patch_vmm(vmm_info, module);
-	kvm_flush_icache(kvm_vmm_base, vmm_size);
-
-	/*Recalculate kvm_vmm_info based on new VMM*/
-	vmm_offset = vmm_info->vmm_ivt - module_base;
-	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
-	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
-			kvm_vmm_info->vmm_ivt);
-
-	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
-	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
-							fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
-			KVM_VMM_BASE+func_offset);
-
-	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
-	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
-			fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
-
-	kvm_vmm_gp = p_fdesc->gp;
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
-						kvm_vmm_info->vmm_entry);
-	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
-						KVM_VMM_BASE + func_offset);
-
-	return 0;
-}
-
-int kvm_arch_init(void *opaque)
-{
-	int r;
-	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
-
-	if (!vti_cpu_has_kvm_support()) {
-		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
-		r = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (kvm_vmm_info) {
-		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
-		r = -EEXIST;
-		goto out;
-	}
-
-	r = -ENOMEM;
-	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
-	if (!kvm_vmm_info)
-		goto out;
-
-	if (kvm_alloc_vmm_area())
-		goto out_free0;
-
-	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
-	if (r)
-		goto out_free1;
-
-	return 0;
-
-out_free1:
-	kvm_free_vmm_area();
-out_free0:
-	kfree(kvm_vmm_info);
-out:
-	return r;
-}
-
-void kvm_arch_exit(void)
-{
-	kvm_free_vmm_area();
-	kfree(kvm_vmm_info);
-	kvm_vmm_info = NULL;
-}
-
-static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
-				    struct kvm_memory_slot *memslot)
-{
-	int i;
-	long base;
-	unsigned long n;
-	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-	base = memslot->base_gfn / BITS_PER_LONG;
-
-	spin_lock(&kvm->arch.dirty_log_lock);
-	for (i = 0; i < n/sizeof(long); ++i) {
-		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
-		dirty_bitmap[base + i] = 0;
-	}
-	spin_unlock(&kvm->arch.dirty_log_lock);
-}
-
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-		struct kvm_dirty_log *log)
-{
-	int r;
-	unsigned long n;
-	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-	r = -ENOENT;
-	if (!memslot->dirty_bitmap)
-		goto out;
-
-	kvm_ia64_sync_dirty_log(kvm, memslot);
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
-		goto out;
-
-	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
-		kvm_flush_remote_tlbs(kvm);
-		n = kvm_dirty_bitmap_bytes(memslot);
-		memset(memslot->dirty_bitmap, 0, n);
-	}
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-	return 0;
-}
-
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
-{
-	return __apic_accept_irq(vcpu, irq->vector);
-}
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
-{
-	return apic->vcpu->vcpu_id == dest;
-}
-
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
-{
-	return 0;
-}
-
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
-	return vcpu1->arch.xtp - vcpu2->arch.xtp;
-}
-
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode)
-{
-	struct kvm_lapic *target = vcpu->arch.apic;
-	return (dest_mode == 0) ?
-		kvm_apic_match_physical_addr(target, dest) :
-		kvm_apic_match_logical_addr(target, dest);
-}
-
-static int find_highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-
-	return -1;
-}
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-    if (vpd->irr[0] & (1UL << NMI_VECTOR))
-		return NMI_VECTOR;
-    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-    return find_highest_bits((int *)&vpd->irr[0]);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.timer_fired;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
-	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
-		(kvm_highest_pending_irq(vcpu) != -1);
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-	return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	mp_state->mp_state = vcpu->arch.mp_state;
-	return 0;
-}
-
-static int vcpu_reset(struct kvm_vcpu *vcpu)
-{
-	int r;
-	long psr;
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-
-	vcpu->arch.launched = 0;
-	kvm_arch_vcpu_uninit(vcpu);
-	r = kvm_arch_vcpu_init(vcpu);
-	if (r)
-		goto fail;
-
-	kvm_purge_vmm_mapping(vcpu);
-	r = 0;
-fail:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	int r = 0;
-
-	vcpu->arch.mp_state = mp_state->mp_state;
-	if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
-		r = vcpu_reset(vcpu);
-	return r;
-}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
deleted file mode 100644
index cb548ee9fcae..000000000000
--- a/arch/ia64/kvm/kvm_fw.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@intel.com>
- * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
- * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "vti.h"
-#include "misc.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/tlb.h>
-
-/*
- * Handy macros to make sure that the PAL return values start out
- * as something meaningful.
- */
-#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
-	{						\
-		x.status = PAL_STATUS_UNIMPLEMENTED;	\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-	}
-
-#define INIT_PAL_STATUS_SUCCESS(x)			\
-	{						\
-		x.status = PAL_STATUS_SUCCESS;		\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-    }
-
-static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
-		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
-	struct exit_ctl_data *p;
-
-	if (vcpu) {
-		p = &vcpu->arch.exit_data;
-		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-			*gr28 = p->u.pal_data.gr28;
-			*gr29 = p->u.pal_data.gr29;
-			*gr30 = p->u.pal_data.gr30;
-			*gr31 = p->u.pal_data.gr31;
-			return ;
-		}
-	}
-	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
-}
-
-static void set_pal_result(struct kvm_vcpu *vcpu,
-		struct ia64_pal_retval result) {
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		p->u.pal_data.ret = result;
-		return ;
-	}
-	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
-}
-
-static void set_sal_result(struct kvm_vcpu *vcpu,
-		struct sal_ret_values result) {
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		p->u.sal_data.ret = result;
-		return ;
-	}
-	printk(KERN_WARNING"Failed to set sal result!!\n");
-}
-
-struct cache_flush_args {
-	u64 cache_type;
-	u64 operation;
-	u64 progress;
-	long status;
-};
-
-cpumask_t cpu_cache_coherent_map;
-
-static void remote_pal_cache_flush(void *data)
-{
-	struct cache_flush_args *args = data;
-	long status;
-	u64 progress = args->progress;
-
-	status = ia64_pal_cache_flush(args->cache_type, args->operation,
-					&progress, NULL);
-	if (status != 0)
-	args->status = status;
-}
-
-static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
-{
-	u64 gr28, gr29, gr30, gr31;
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	struct cache_flush_args args = {0, 0, 0, 0};
-	long psr;
-
-	gr28 = gr29 = gr30 = gr31 = 0;
-	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
-
-	if (gr31 != 0)
-		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
-
-	/* Always call Host Pal in int=1 */
-	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
-	args.cache_type = gr29;
-	args.operation = gr30;
-	smp_call_function(remote_pal_cache_flush,
-				(void *)&args, 1);
-	if (args.status != 0)
-		printk(KERN_ERR"pal_cache_flush error!,"
-				"status:0x%lx\n", args.status);
-	/*
-	 * Call Host PAL cache flush
-	 * Clear psr.ic when call PAL_CACHE_FLUSH
-	 */
-	local_irq_save(psr);
-	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
-						&result.v0);
-	local_irq_restore(psr);
-	if (result.status != 0)
-		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
-				"in1:%lx,in2:%lx\n",
-				vcpu, result.status, gr29, gr30);
-
-#if 0
-	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
-		cpus_setall(vcpu->arch.cache_coherent_map);
-		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
-		cpus_setall(cpu_cache_coherent_map);
-		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
-	}
-#endif
-	return result;
-}
-
-struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
-	return result;
-}
-
-static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
-
-	/*
-	 * PAL_FREQ_BASE may not be implemented in some platforms,
-	 * call SAL instead.
-	 */
-	if (result.v0 == 0) {
-		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-							&result.v0,
-							&result.v1);
-		result.v2 = 0;
-	}
-
-	return result;
-}
-
-/*
- * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
- * RTC is used instead. This function patches the ratios from SAL
- * to match the RTC before providing them to the guest.
- */
-static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
-{
-	struct pal_freq_ratio *ratio;
-	unsigned long sal_freq, sal_drift, factor;
-
-	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-					    &sal_freq, &sal_drift);
-	ratio = (struct pal_freq_ratio *)&result->v2;
-	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
-		sn_rtc_cycles_per_second;
-
-	ratio->num = 3;
-	ratio->den = factor;
-}
-
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
-
-	if (vcpu->kvm->arch.is_sn2)
-		sn2_patch_itc_freq_ratios(&result);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_UNIMPLEMENTED(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_SUCCESS(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
-			&result.v2, in2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
-{
-
-	pal_cache_config_info_t ci;
-	long status;
-	unsigned long in0, in1, in2, in3, r9, r10;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_cache_config_info(in1, in2, &ci);
-	r9 = ci.pcci_info_1.pcci1_data;
-	r10 = ci.pcci_info_2.pcci2_data;
-	return ((struct ia64_pal_retval){status, r9, r10, 0});
-}
-
-#define GUEST_IMPL_VA_MSB	59
-#define GUEST_RID_BITS		18
-
-static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
-{
-
-	pal_vm_info_1_u_t vminfo1;
-	pal_vm_info_2_u_t vminfo2;
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
-	if (!result.status) {
-		vminfo1.pvi1_val = result.v0;
-		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
-		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
-		result.v0 = vminfo1.pvi1_val;
-		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
-		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
-		result.v1 = vminfo2.pvi2_val;
-	}
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	result.status = ia64_pal_vm_info(in1, in2,
-			(pal_tc_info_u_t *)&result.v1, &result.v2);
-
-	return result;
-}
-
-static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
-{
-	u64 index = 0;
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		index = p->u.pal_data.gr28;
-
-	return index;
-}
-
-static void prepare_for_halt(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.timer_pending = 1;
-	vcpu->arch.timer_fired = 0;
-}
-
-static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
-{
-	long status;
-	unsigned long in0, in1, in2, in3, r9;
-	unsigned long pm_buffer[16];
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_perf_mon_info(pm_buffer,
-				(pal_perf_mon_info_u_t *) &r9);
-	if (status != 0) {
-		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
-	} else {
-		if (in1)
-			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
-		else {
-			status = PAL_STATUS_EINVAL;
-			printk(KERN_WARNING"Invalid parameters "
-						"for PAL call:0x%lx!\n", in0);
-		}
-	}
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
-{
-	unsigned long in0, in1, in2, in3;
-	long status;
-	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
-					| (1UL << 61) | (1UL << 60);
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	if (in1) {
-		memcpy((void *)in1, &res, sizeof(res));
-		status = 0;
-	} else{
-		status = PAL_STATUS_EINVAL;
-		printk(KERN_WARNING"Invalid parameters "
-					"for PAL call:0x%lx!\n", in0);
-	}
-
-	return (struct ia64_pal_retval){status, 0, 0, 0};
-}
-
-static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
-{
-	unsigned long r9;
-	long status;
-
-	status = ia64_pal_mem_attrib(&r9);
-
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static void remote_pal_prefetch_visibility(void *v)
-{
-	s64 trans_type = (s64)v;
-	ia64_pal_prefetch_visibility(trans_type);
-}
-
-static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_prefetch_visibility(in1);
-	if (result.status == 0) {
-		/* Must be performed on all remote processors
-		in the coherence domain. */
-		smp_call_function(remote_pal_prefetch_visibility,
-					(void *)in1, 1);
-		/* Unnecessary on remote processor for other vcpus!*/
-		result.status = 1;
-	}
-	return result;
-}
-
-static void remote_pal_mc_drain(void *v)
-{
-	ia64_pal_mc_drain();
-}
-
-static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	if (in1 == 0 && in2) {
-		char brand_info[128];
-		result.status = ia64_pal_get_brand_info(brand_info);
-		if (result.status == PAL_STATUS_SUCCESS)
-			memcpy((void *)in2, brand_info, 128);
-	} else {
-		result.status = PAL_STATUS_REQUIRES_MEMORY;
-		printk(KERN_WARNING"Invalid parameters for "
-					"PAL call:0x%lx!\n", in0);
-	}
-
-	return result;
-}
-
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-
-	u64 gr28;
-	struct ia64_pal_retval result;
-	int ret = 1;
-
-	gr28 = kvm_get_pal_call_index(vcpu);
-	switch (gr28) {
-	case PAL_CACHE_FLUSH:
-		result = pal_cache_flush(vcpu);
-		break;
-	case PAL_MEM_ATTRIB:
-		result = pal_mem_attrib(vcpu);
-		break;
-	case PAL_CACHE_SUMMARY:
-		result = pal_cache_summary(vcpu);
-		break;
-	case PAL_PERF_MON_INFO:
-		result = pal_perf_mon_info(vcpu);
-		break;
-	case PAL_HALT_INFO:
-		result = pal_halt_info(vcpu);
-		break;
-	case PAL_HALT_LIGHT:
-	{
-		INIT_PAL_STATUS_SUCCESS(result);
-		prepare_for_halt(vcpu);
-		if (kvm_highest_pending_irq(vcpu) == -1)
-			ret = kvm_emulate_halt(vcpu);
-	}
-		break;
-
-	case PAL_PREFETCH_VISIBILITY:
-		result = pal_prefetch_visibility(vcpu);
-		break;
-	case PAL_MC_DRAIN:
-		result.status = ia64_pal_mc_drain();
-		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
-		   That causes the congestion. */
-		smp_call_function(remote_pal_mc_drain, NULL, 1);
-		break;
-
-	case PAL_FREQ_RATIOS:
-		result = pal_freq_ratios(vcpu);
-		break;
-
-	case PAL_FREQ_BASE:
-		result = pal_freq_base(vcpu);
-		break;
-
-	case PAL_LOGICAL_TO_PHYSICAL :
-		result = pal_logical_to_physica(vcpu);
-		break;
-
-	case PAL_VM_SUMMARY :
-		result = pal_vm_summary(vcpu);
-		break;
-
-	case PAL_VM_INFO :
-		result = pal_vm_info(vcpu);
-		break;
-	case PAL_PLATFORM_ADDR :
-		result = pal_platform_addr(vcpu);
-		break;
-	case PAL_CACHE_INFO:
-		result = pal_cache_info(vcpu);
-		break;
-	case PAL_PTCE_INFO:
-		INIT_PAL_STATUS_SUCCESS(result);
-		result.v1 = (1L << 32) | 1L;
-		break;
-	case PAL_REGISTER_INFO:
-		result = pal_register_info(vcpu);
-		break;
-	case PAL_VM_PAGE_SIZE:
-		result.status = ia64_pal_vm_page_size(&result.v0,
-							&result.v1);
-		break;
-	case PAL_RSE_INFO:
-		result.status = ia64_pal_rse_info(&result.v0,
-					(pal_hints_u_t *)&result.v1);
-		break;
-	case PAL_PROC_GET_FEATURES:
-		result = pal_proc_get_features(vcpu);
-		break;
-	case PAL_DEBUG_INFO:
-		result.status = ia64_pal_debug_info(&result.v0,
-							&result.v1);
-		break;
-	case PAL_VERSION:
-		result.status = ia64_pal_version(
-				(pal_version_u_t *)&result.v0,
-				(pal_version_u_t *)&result.v1);
-		break;
-	case PAL_FIXED_ADDR:
-		result.status = PAL_STATUS_SUCCESS;
-		result.v0 = vcpu->vcpu_id;
-		break;
-	case PAL_BRAND_INFO:
-		result = pal_get_brand_info(vcpu);
-		break;
-	case PAL_GET_PSTATE:
-	case PAL_CACHE_SHARED_INFO:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		break;
-	default:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		printk(KERN_WARNING"kvm: Unsupported pal call,"
-					" index:0x%lx\n", gr28);
-	}
-	set_pal_result(vcpu, result);
-	return ret;
-}
-
-static struct sal_ret_values sal_emulator(struct kvm *kvm,
-				long index, unsigned long in1,
-				unsigned long in2, unsigned long in3,
-				unsigned long in4, unsigned long in5,
-				unsigned long in6, unsigned long in7)
-{
-	unsigned long r9  = 0;
-	unsigned long r10 = 0;
-	long r11 = 0;
-	long status;
-
-	status = 0;
-	switch (index) {
-	case SAL_FREQ_BASE:
-		status = ia64_sal_freq_base(in1, &r9, &r10);
-		break;
-	case SAL_PCI_CONFIG_READ:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_READ\n");
-		break;
-	case SAL_PCI_CONFIG_WRITE:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_WRITE\n");
-		break;
-	case SAL_SET_VECTORS:
-		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
-			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
-				status = -2;
-			} else {
-				kvm->arch.rdv_sal_data.boot_ip = in2;
-				kvm->arch.rdv_sal_data.boot_gp = in3;
-			}
-			printk("Rendvous called! iip:%lx\n\n", in2);
-		} else
-			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
-							"ignored...\n", in1);
-		break;
-	case SAL_GET_STATE_INFO:
-		/* No more info.  */
-		status = -5;
-		r9 = 0;
-		break;
-	case SAL_GET_STATE_INFO_SIZE:
-		/* Return a dummy size.  */
-		status = 0;
-		r9 = 128;
-		break;
-	case SAL_CLEAR_STATE_INFO:
-		/* Noop.  */
-		break;
-	case SAL_MC_RENDEZ:
-		printk(KERN_WARNING
-			"kvm: called SAL_MC_RENDEZ. ignored...\n");
-		break;
-	case SAL_MC_SET_PARAMS:
-		printk(KERN_WARNING
-			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
-		break;
-	case SAL_CACHE_FLUSH:
-		if (1) {
-			/*Flush using SAL.
-			This method is faster but has a side
-			effect on other vcpu running on
-			this cpu.  */
-			status = ia64_sal_cache_flush(in1);
-		} else {
-			/*Maybe need to implement the method
-			without side effect!*/
-			status = 0;
-		}
-		break;
-	case SAL_CACHE_INIT:
-		printk(KERN_WARNING
-			"kvm: called SAL_CACHE_INIT.  ignored...\n");
-		break;
-	case SAL_UPDATE_PAL:
-		printk(KERN_WARNING
-			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
-		break;
-	default:
-		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
-						" index:%ld\n", index);
-		status = -1;
-		break;
-	}
-	return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
-		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		*in0 = p->u.sal_data.in0;
-		*in1 = p->u.sal_data.in1;
-		*in2 = p->u.sal_data.in2;
-		*in3 = p->u.sal_data.in3;
-		*in4 = p->u.sal_data.in4;
-		*in5 = p->u.sal_data.in5;
-		*in6 = p->u.sal_data.in6;
-		*in7 = p->u.sal_data.in7;
-		return ;
-	}
-	*in0 = 0;
-}
-
-void kvm_sal_emul(struct kvm_vcpu *vcpu)
-{
-
-	struct sal_ret_values result;
-	u64 index, in1, in2, in3, in4, in5, in6, in7;
-
-	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
-			&in3, &in4, &in5, &in6, &in7);
-	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
-					in4, in5, in6, in7);
-	set_sal_result(vcpu, result);
-}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index f1268b8e6f9e..000000000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *	Just include kernel's library, and disable symbols export.
- * 	Copyright (C) 2008, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include <linux/module.h>
-#undef CONFIG_KALLSYMS
-#undef EXPORT_SYMBOL
-#undef EXPORT_SYMBOL_GPL
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
deleted file mode 100644
index b2bcaa2787aa..000000000000
--- a/arch/ia64/kvm/kvm_minstate.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- *  kvm_minstate.h: min save macros
- *  Copyright (c) 2007, Intel Corporation.
- *
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/types.h>
-#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
-#include "asm-offsets.h"
-
-#define KVM_MINSTATE_START_SAVE_MIN	     					\
-	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
-	;;									\
-	mov.m r28 = ar.rnat;                                  			\
-	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
-	;;									\
-	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
-	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
-	;;									\
-	mov ar.bspstore = r22;				/* switch to kernel RBS */\
-	;;									\
-	mov r18 = ar.bsp;							\
-	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
-
-
-
-#define KVM_MINSTATE_END_SAVE_MIN						\
-	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
-	;;
-
-
-#define PAL_VSA_SYNC_READ						\
-	/* begin to call pal vps sync_read */				\
-{.mii;									\
-	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
-	nop 0x0;							\
-	mov r24=ip;							\
-	;;								\
-}									\
-{.mmb									\
-	add r24=0x20, r24;						\
-	ld8 r25 = [r25];      /* read vpd base */			\
-	br.cond.sptk kvm_vps_sync_read;		/*call the service*/	\
-	;;								\
-};									\
-
-
-#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
-
-/*
- * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *  psr.ic: off
- *  r31:	contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *  psr.ic: off
- *   r2 = points to &pt_regs.r16
- *   r8 = contents of ar.ccv
- *   r9 = contents of ar.csd
- *  r10 = contents of ar.ssd
- *  r11 = FPSR_DEFAULT
- *  r12 = kernel sp (kernel virtual address)
- *  r13 = points to current task_struct (kernel virtual address)
- *  p15 = TRUE if psr.i is set in cr.ipsr
- *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *	  preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-
-
-#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
-
-#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
-	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
-	mov r27 = ar.rsc;         /* M */			\
-	mov r20 = r1;         /* A */				\
-	mov r25 = ar.unat;        /* M */			\
-	mov r29 = cr.ipsr;        /* M */			\
-	mov r26 = ar.pfs;         /* I */			\
-	mov r18 = cr.isr;         				\
-	COVER;              /* B;; (or nothing) */		\
-	;;							\
-	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
-	mov r1 = r16;						\
-/*	mov r21=r16;	*/					\
-	/* switch from user to kernel RBS: */			\
-	;;							\
-	invala;             /* M */				\
-	SAVE_IFS;						\
-	;;							\
-	KVM_MINSTATE_START_SAVE_MIN				\
-	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
-	adds r16 = PT(CR_IPSR),r1;				\
-	;;							\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
-	st8 [r16] = r29;      /* save cr.ipsr */		\
-	;;							\
-	lfetch.fault.excl.nt1 [r17];				\
-	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
-	mov r29 = b0						\
-	;;							\
-	adds r16 = PT(R8),r1; /* initialize first base pointer */\
-	adds r17 = PT(R9),r1; /* initialize second base pointer */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r8,16;			\
-.mem.offset 8,0; st8.spill [r17] = r9,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r10,24;			\
-.mem.offset 8,0; st8.spill [r17] = r11,24;			\
-	;;							\
-	mov r9 = cr.iip;         /* M */			\
-	mov r10 = ar.fpsr;        /* M */			\
-	;;							\
-	st8 [r16] = r9,16;    /* save cr.iip */			\
-	st8 [r17] = r30,16;   /* save cr.ifs */			\
-	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
-	;;							\
-	st8 [r16] = r25,16;   /* save ar.unat */		\
-	st8 [r17] = r26,16;    /* save ar.pfs */		\
-	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
-	;;							\
-	st8 [r16] = r27,16;   /* save ar.rsc */			\
-	st8 [r17] = r28,16;   /* save ar.rnat */		\
-	;;          /* avoid RAW on r16 & r17 */		\
-	st8 [r16] = r23,16;   /* save ar.bspstore */		\
-	st8 [r17] = r31,16;   /* save predicates */		\
-	;;							\
-	st8 [r16] = r29,16;   /* save b0 */			\
-	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
-.mem.offset 8,0; st8.spill [r17] = r12,16;			\
-	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r13,16;			\
-.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
-	mov r13 = r21;   /* establish `current' */		\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r15,16;			\
-.mem.offset 8,0; st8.spill [r17] = r14,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r2,16;			\
-.mem.offset 8,0; st8.spill [r17] = r3,16;			\
-	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
-	 ;;							\
-	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
-	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
-	mov r26 = cr.iipa;					\
-	mov r27 = cr.isr;					\
-	;;							\
-	st8 [r16] = r26;					\
-	st8 [r17] = r27;					\
-	;;							\
-	EXTRA;							\
-	mov r8 = ar.ccv;					\
-	mov r9 = ar.csd;					\
-	mov r10 = ar.ssd;					\
-	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
-	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
-	;;							\
-	ld8 r1 = [r17];/* establish kernel global pointer */	\
-	;;							\
-	PAL_VSA_SYNC_READ					\
-	KVM_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *  psr.ic: on
- *  r2: points to &pt_regs.f6
- *  r3: points to &pt_regs.f7
- *  r8: contents of ar.ccv
- *  r9: contents of ar.csd
- *  r10:	contents of ar.ssd
- *  r11:	FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define KVM_SAVE_REST				\
-.mem.offset 0,0; st8.spill [r2] = r16,16;	\
-.mem.offset 8,0; st8.spill [r3] = r17,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r18,16;	\
-.mem.offset 8,0; st8.spill [r3] = r19,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r20,16;	\
-.mem.offset 8,0; st8.spill [r3] = r21,16;	\
-	mov r18=b6;			\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r22,16;	\
-.mem.offset 8,0; st8.spill [r3] = r23,16;	\
-	mov r19 = b7;				\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r24,16;	\
-.mem.offset 8,0; st8.spill [r3] = r25,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r26,16;	\
-.mem.offset 8,0; st8.spill [r3] = r27,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r28,16;	\
-.mem.offset 8,0; st8.spill [r3] = r29,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r30,16;	\
-.mem.offset 8,0; st8.spill [r3] = r31,32;	\
-	;;					\
-	mov ar.fpsr = r11;			\
-	st8 [r2] = r8,8;			\
-	adds r24 = PT(B6)-PT(F7),r3;		\
-	adds r25 = PT(B7)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r18,16;       /* b6 */	\
-	st8 [r25] = r19,16;       /* b7 */	\
-	adds r2 = PT(R4)-PT(F6),r2;		\
-	adds r3 = PT(R5)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r9;	/* ar.csd */		\
-	st8 [r25] = r10;	/* ar.ssd */	\
-	;;					\
-	mov r18 = ar.unat;			\
-	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
-	;;					\
-	st8 [r19] = r18; /* eml_unat */ 	\
-
-
-#define KVM_SAVE_EXTRA				\
-.mem.offset 0,0; st8.spill [r2] = r4,16;	\
-.mem.offset 8,0; st8.spill [r3] = r5,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r6,16;	\
-.mem.offset 8,0; st8.spill [r3] = r7;		\
-	;;					\
-	mov r26 = ar.unat;			\
-	;;					\
-	st8 [r2] = r26;/* eml_unat */ 		\
-
-#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
-#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
-#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
deleted file mode 100644
index c5f92a926a9a..000000000000
--- a/arch/ia64/kvm/lapic.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __KVM_IA64_LAPIC_H
-#define __KVM_IA64_LAPIC_H
-
-#include <linux/kvm_host.h>
-
-/*
- * vlsapic
- */
-struct kvm_lapic{
-	struct kvm_vcpu *vcpu;
-	uint64_t insvc[4];
-	uint64_t vhpi;
-	uint8_t xtp;
-	uint8_t pal_init_pending;
-	uint8_t pad[2];
-};
-
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_free_lapic(struct kvm_vcpu *vcpu);
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
-#define kvm_apic_present(x) (true)
-#define kvm_lapic_enabled(x) (true)
-
-#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
deleted file mode 100644
index c04cdbe9f80f..000000000000
--- a/arch/ia64/kvm/memcpy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
deleted file mode 100644
index 83c3066d844a..000000000000
--- a/arch/ia64/kvm/memset.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
deleted file mode 100644
index dd979e00b574..000000000000
--- a/arch/ia64/kvm/misc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef __KVM_IA64_MISC_H
-#define __KVM_IA64_MISC_H
-
-#include <linux/kvm_host.h>
-/*
- * misc.h
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- *Return p2m base address at host side!
- */
-static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
-{
-	return (uint64_t *)(kvm->arch.vm_base +
-				offsetof(struct kvm_vm_data, kvm_p2m));
-}
-
-static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
-		u64 paddr, u64 mem_flags)
-{
-	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
-	unsigned long pte;
-
-	pte = PAGE_ALIGN(paddr) | mem_flags;
-	pmt_base[gfn] = pte;
-}
-
-/*Function for translating host address to guest address*/
-
-static inline void *to_guest(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
-			KVM_VM_DATA_BASE);
-}
-
-/*Function for translating guest address to host address*/
-
-static inline void *to_host(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
-			+ kvm->arch.vm_base);
-}
-
-/* Get host context of the vcpu */
-static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.host;
-	return to_guest(vcpu->kvm, ctx);
-}
-
-/* Get guest context of the vcpu */
-static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.guest;
-	return  to_guest(vcpu->kvm, ctx);
-}
-
-/* kvm get exit data from gvmm! */
-static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
-{
-	return &vcpu->arch.exit_data;
-}
-
-/*kvm get vcpu ioreq for kvm module!*/
-static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_ctl_data;
-
-	if (vcpu) {
-		p_ctl_data = kvm_get_exit_data(vcpu);
-		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
-			return &p_ctl_data->u.ioreq;
-	}
-
-	return NULL;
-}
-
-#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
deleted file mode 100644
index f1e17d3d6cd9..000000000000
--- a/arch/ia64/kvm/mmio.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
- *
- * Copyright (c) 2007 Intel Corporation  KVM support.
- * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- * Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-
-#include "vcpu.h"
-
-static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
-{
-	VLSAPIC_XTP(v) = val;
-}
-
-/*
- * LSAPIC OFFSET
- */
-#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
-#define PIB_OFST_INTA          0x1E0000
-#define PIB_OFST_XTP           0x1E0008
-
-/*
- * execute write IPI op.
- */
-static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
-					uint64_t addr, uint64_t data)
-{
-	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	p->exit_reason = EXIT_REASON_IPI;
-	p->u.ipi_data.addr.val = addr;
-	p->u.ipi_data.data.val = data;
-	vmm_transition(current_vcpu);
-
-	local_irq_restore(psr);
-
-}
-
-void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
-			unsigned long length, unsigned long val)
-{
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		panic_vm(v, "Undefined write on PIB INTA\n");
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			vlsapic_write_xtp(v, val);
-		} else {
-			panic_vm(v, "Undefined write on PIB XTP\n");
-		}
-		break;
-	default:
-		if (PIB_LOW_HALF(addr)) {
-			/*Lower half */
-			if (length != 8)
-				panic_vm(v, "Can't LHF write with size %ld!\n",
-						length);
-			else
-				vlsapic_write_ipi(v, addr, val);
-		} else {   /*Upper half */
-			panic_vm(v, "IPI-UHF write %lx\n", addr);
-		}
-		break;
-	}
-}
-
-unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
-		unsigned long length)
-{
-	uint64_t result = 0;
-
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		if (length == 1) /* 1 byte load */
-			; /* There is no i8259, there is no INTA access*/
-		else
-			panic_vm(v, "Undefined read on PIB INTA\n");
-
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			result = VLSAPIC_XTP(v);
-		} else {
-			panic_vm(v, "Undefined read on PIB XTP\n");
-		}
-		break;
-	default:
-		panic_vm(v, "Undefined addr access for lsapic!\n");
-		break;
-	}
-	return result;
-}
-
-static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
-					u16 s, int ma, int dir)
-{
-	unsigned long iot;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
-
-	local_irq_save(psr);
-
-	/*Intercept the access for PIB range*/
-	if (iot == GPFN_PIB) {
-		if (!dir)
-			lsapic_write(vcpu, src_pa, s, *dest);
-		else
-			*dest = lsapic_read(vcpu, src_pa, s);
-		goto out;
-	}
-	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
-	p->u.ioreq.addr = src_pa;
-	p->u.ioreq.size = s;
-	p->u.ioreq.dir = dir;
-	if (dir == IOREQ_WRITE)
-		p->u.ioreq.data = *dest;
-	p->u.ioreq.state = STATE_IOREQ_READY;
-	vmm_transition(vcpu);
-
-	if (p->u.ioreq.state == STATE_IORESP_READY) {
-		if (dir == IOREQ_READ)
-			/* it's necessary to ensure zero extending */
-			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
-	} else
-		panic_vm(vcpu, "Unhandled mmio access returned!\n");
-out:
-	local_irq_restore(psr);
-	return ;
-}
-
-/*
-   dir 1: read 0:write
-   inst_type 0:integer 1:floating point
- */
-#define SL_INTEGER	0	/* store/load interger*/
-#define SL_FLOATING	1     	/* store/load floating*/
-
-void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
-{
-	struct kvm_pt_regs *regs;
-	IA64_BUNDLE bundle;
-	int slot, dir = 0;
-	int inst_type = -1;
-	u16 size = 0;
-	u64 data, slot1a, slot1b, temp, update_reg;
-	s32 imm;
-	INST64 inst;
-
-	regs = vcpu_regs(vcpu);
-
-	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
-		/* if fetch code fail, return and try again */
-		return;
-	}
-	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
-	if (!slot)
-		inst.inst = bundle.slot0;
-	else if (slot == 1) {
-		slot1a = bundle.slot1a;
-		slot1b = bundle.slot1b;
-		inst.inst = slot1a + (slot1b << 18);
-	} else if (slot == 2)
-		inst.inst = bundle.slot2;
-
-	/* Integer Load/Store */
-	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
-		inst_type = SL_INTEGER;
-		size = (inst.M1.x6 & 0x3);
-		if ((inst.M1.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M4.r2);
-		} else if ((inst.M1.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-		}
-	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
-		/* Integer Load + Reg update */
-		inst_type = SL_INTEGER;
-		dir = IOREQ_READ;
-		size = (inst.M2.x6 & 0x3);
-		temp = vcpu_get_gr(vcpu, inst.M2.r3);
-		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
-		temp += update_reg;
-		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
-	} else if (inst.M3.major == 5) {
-		/*Integer Load/Store + Imm update*/
-		inst_type = SL_INTEGER;
-		size = (inst.M3.x6&0x3);
-		if ((inst.M5.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M5.r2);
-			temp = vcpu_get_gr(vcpu, inst.M5.r3);
-			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
-				(inst.M5.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
-
-		} else if ((inst.M3.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-			temp = vcpu_get_gr(vcpu, inst.M3.r3);
-			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
-				(inst.M3.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
-
-		}
-	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
-				&& inst.M9.m == 0 && inst.M9.x == 0) {
-		/* Floating-point spill*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
-		/* Write high word. FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
-			    ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
-		/* Floating-point spill + Imm update */
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-
-		/* Write high word.FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
-			    8, ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
-		/* Floating-point stf8 + Imm update */
-		struct ia64_fpreg v;
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		size = 3;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		data = v.u.bits[0]; /* Significand.  */
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
-			&& inst.M15.x6 <= 0x2f) {
-		temp = vcpu_get_gr(vcpu, inst.M15.r3);
-		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
-			(inst.M15.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
-
-		vcpu_increment_iip(vcpu);
-		return;
-	} else if (inst.M12.major == 6 && inst.M12.m == 1
-			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
-		/* Floating-point Load Pair + Imm ldfp8 M12*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_READ;
-		size = 8;     /*ldfd*/
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
-		padr += 8;
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
-		padr += 8;
-		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
-		vcpu_increment_iip(vcpu);
-		return;
-	} else {
-		inst_type = -1;
-		panic_vm(vcpu, "Unsupported MMIO access instruction! "
-				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-				bundle.i64[0], bundle.i64[1]);
-	}
-
-	size = 1 << size;
-	if (dir == IOREQ_WRITE) {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-	} else {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		if (inst_type == SL_INTEGER)
-			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
-		else
-			panic_vm(vcpu, "Unsupported instruction type!\n");
-
-	}
-	vcpu_increment_iip(vcpu);
-}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
deleted file mode 100644
index f793be3effff..000000000000
--- a/arch/ia64/kvm/optvfault.S
+++ /dev/null
@@ -1,1090 +0,0 @@
-/*
- * arch/ia64/kvm/optvfault.S
- * optimize virtualization fault handler
- *
- * Copyright (C) 2006 Intel Co
- *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- * Copyright (C) 2008 Intel Co
- *      Add the support for Tukwila processors.
- *	Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/kvm_host.h>
-
-#include "vti.h"
-#include "asm-offsets.h"
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
-
-#define VMX_VPS_SYNC_READ			\
-	add r16=VMM_VPD_BASE_OFFSET,r21;	\
-	mov r17 = b0;				\
-	mov r18 = r24;				\
-	mov r19 = r25;				\
-	mov r20 = r31;				\
-	;;					\
-{.mii;						\
-	ld8 r16 = [r16];			\
-	nop 0x0;				\
-	mov r24 = ip;				\
-	;;					\
-};						\
-{.mmb;						\
-	add r24=0x20, r24;			\
-	mov r25 =r16;				\
-	br.sptk.many kvm_vps_sync_read;		\
-};						\
-	mov b0 = r17;				\
-	mov r24 = r18;				\
-	mov r25 = r19;				\
-	mov r31 = r20
-
-ENTRY(kvm_vps_entry)
-	adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	ld8 r29 = [r29]
-	;;
-	add r29 = r29, r30
-	;;
-	mov b0 = r29
-	br.sptk.many b0
-END(kvm_vps_entry)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_read)
-	movl r30 = PAL_VPS_SYNC_READ
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_read)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_write)
-	movl r30 = PAL_VPS_SYNC_WRITE
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_write)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *
- */
-GLOBAL_ENTRY(kvm_vps_resume_normal)
-	movl r30 = PAL_VPS_RESUME_NORMAL
-	;;
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_normal)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *	r17 : isr
- */
-GLOBAL_ENTRY(kvm_vps_resume_handler)
-	movl r30 = PAL_VPS_RESUME_HANDLER
-	;;
-	ld8 r26=[r25]
-	shr r17=r17,IA64_ISR_IR_BIT
-	;;
-	dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_handler)
-
-//mov r1=ar3
-GLOBAL_ENTRY(kvm_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-	br.many kvm_virtualization_fault_back
-#endif
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	;;
-	ld8 r18=[r18]
-	mov r19=ar.itc
-	mov r24=b0
-	;;
-	add r19=r19,r18
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	st8 [r16] = r19
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	shladd r17=r17,4,r20
-	;;
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar)
-
-/*
- * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
- * clock as it's source for emulating the ITC. This version will be
- * copied on top of the original version if the host is determined to
- * be an SN2.
- */
-GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
-
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	mov r24=b0
-	;;
-	ld8 r18=[r18]
-	ld8 r19=[r19]
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	add r19=r19,r18
-	shladd r17=r17,4,r20
-	;;
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	st8 [r16] = r19
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar_sn2)
-
-
-
-// mov r1=rr[r3]
-GLOBAL_ENTRY(kvm_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,6,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r24=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_from_rr_back_1:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
-	shr.u r26=r19,61
-	;;
-	shladd r17=r17,4,r22
-	shladd r27=r26,3,r27
-	;;
-	ld8 r19=[r27]
-	mov b0=r17
-	br.many b0
-END(kvm_asm_mov_from_rr)
-
-
-// mov rr[r3]=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,13,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r22=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_to_rr_back_1:
-	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-	shr.u r23=r19,61
-	shladd r17=r17,4,r20
-	;;
-	//if rr6, go back
-	cmp.eq p6,p0=6,r23
-	mov b0=r22
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	;;
-	mov r28=r19
-	mov b0=r17
-	br.many b0
-kvm_asm_mov_to_rr_back_2:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	shladd r27=r23,3,r27
-	;; // vrr.rid<<4 |0xe
-	st8 [r27]=r19
-	mov b0=r30
-	;;
-	extr.u r16=r19,8,26
-	extr.u r18 =r19,2,6
-	mov r17 =0xe
-	;;
-	shladd r16 = r16, 4, r17
-	extr.u r19 =r19,0,8
-	;;
-	shl r16 = r16,8
-	;;
-	add r19 = r19, r16
-	;; //set ve 1
-	dep r19=-1,r19,0,1
-	cmp.lt p6,p0=14,r18
-	;;
-	(p6) mov r18=14
-	;;
-	(p6) dep r19=r18,r19,2,6
-	;;
-	cmp.eq p6,p0=0,r23
-	;;
-	cmp.eq.or p6,p0=4,r23
-	;;
-	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	;;
-	ld4 r16=[r16]
-	cmp.eq p7,p0=r0,r0
-	(p6) shladd r17=r23,1,r17
-	;;
-	(p6) st8 [r17]=r19
-	(p6) tbit.nz p6,p7=r16,0
-	;;
-	(p7) mov rr[r28]=r19
-	mov r24=r22
-	br.many b0
-END(kvm_asm_mov_to_rr)
-
-
-//rsm
-GLOBAL_ENTRY(kvm_asm_rsm)
-#ifndef ACCE_RSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;
-	add r17=VPD_VPSR_START_OFFSET,r16
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	//r26 is imm24
-	dep r26=r28,r26,23,1
-	;;
-	ld8 r18=[r17]
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
-	ld4 r23=[r22]
-	sub r27=-1,r26
-	mov r24=b0
-	;;
-	mov r20=cr.ipsr
-	or r28=r27,r28
-	and r19=r18,r27
-	;;
-	st8 [r17]=r19
-	and r20=r20,r28
-	/* Comment it out due to short of fp lazy alorgithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	*/
-	;;
-	mov cr.ipsr=r20
-	tbit.nz p6,p0=r23,0
-	;;
-	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	dep r23=-1,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_rsm)
-
-
-//ssm
-GLOBAL_ENTRY(kvm_asm_ssm)
-#ifndef ACCE_SSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;  //r26 is imm24
-	add r27=VPD_VPSR_START_OFFSET,r16
-	dep r26=r28,r26,23,1
-	;;  //r19 vpsr
-	ld8 r29=[r27]
-	mov r24=b0
-	;;
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	mov r20=cr.ipsr
-	or r19=r29,r26
-	;;
-	ld4 r23=[r22]
-	st8 [r27]=r19
-	or r20=r20,r26
-	;;
-	mov cr.ipsr=r20
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	and r19=r28,r19
-	tbit.z p6,p0=r23,0
-	;;
-	cmp.ne.or p6,p0=r28,r19
-	(p6) br.dptk kvm_asm_ssm_1
-	;;
-	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_ssm_1:
-	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
-	;;
-	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_ssm)
-
-
-//mov psr.l=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,13,7 //r2
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
-	shladd r26=r26,4,r20
-	mov r24=b0
-	;;
-	add r27=VPD_VPSR_START_OFFSET,r16
-	mov b0=r26
-	br.many b0
-	;;
-kvm_asm_mov_to_psr_back:
-	ld8 r17=[r27]
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	dep r19=0,r19,32,32
-	;;
-	ld4 r23=[r22]
-	dep r18=0,r17,0,32
-	;;
-	add r30=r18,r19
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	st8 [r27]=r30
-	and r27=r28,r30
-	and r29=r28,r17
-	;;
-	cmp.eq p5,p0=r29,r27
-	cmp.eq p6,p7=r28,r27
-	(p5) br.many kvm_asm_mov_to_psr_1
-	;;
-	//virtual to physical
-	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	(p7) dep r23=-1,r23,0,1
-	;;
-	//physical to virtual
-	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	(p6) dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_mov_to_psr_1:
-	mov r20=cr.ipsr
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-	;;
-	or r19=r19,r28
-	dep r20=0,r20,0,32
-	;;
-	add r20=r19,r20
-	mov b0=r24
-	;;
-	/* Comment it out due to short of fp lazy algorithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	;;
-	*/
-	mov cr.ipsr=r20
-	cmp.ne p6,p0=r0,r0
-	;;
-	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
-	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
-	(p6) br.dpnt.few kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_mov_to_psr)
-
-
-ENTRY(kvm_asm_dispatch_vexirq)
-//increment iip
-	mov r17 = b0
-	mov r18 = r31
-{.mii
-	add r25=VMM_VPD_BASE_OFFSET,r21
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 = 0x20, r24
-	ld8 r25 = [r25]
-	br.sptk.many kvm_vps_sync_write
-}
-	mov b0 =r17
-	mov r16=cr.ipsr
-	mov r31 = r18
-	mov r19 = 37
-	;;
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	(p7) add r17=1,r17
-	;;
-	(p6) add r18=0x10,r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	(p6) mov cr.iip=r18
-	mov cr.ipsr=r16
-	mov r30 =1
-	br.many kvm_dispatch_vexirq
-END(kvm_asm_dispatch_vexirq)
-
-// thash
-// TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(kvm_asm_thash)
-#ifndef ACCE_THASH
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r17=r25,20,7		// get r3 from opcode in r25
-	extr.u r18=r25,6,7		// get r1 from opcode in r25
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
-	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
-	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
-	;;
-	mov r24=b0
-	;;
-	ld8 r16=[r16]		// get VPD addr
-	mov b0=r17
-	br.many b0			// r19 return value
-	;;
-kvm_asm_thash_back1:
-	shr.u r23=r19,61		// get RR number
-	adds r28=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
-	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
-	;;
-	shladd r27=r23,3,r28	// get vcpu->arch.vrr[r23]'s addr
-	ld8 r17=[r16]		// get PTA
-	mov r26=1
-	;;
-	extr.u r29=r17,2,6	// get pta.size
-	ld8 r28=[r27]		// get vcpu->arch.vrr[r23]'s value
-	;;
-	mov b0=r24
-	//Fallback to C if pta.vf is set
-	tbit.nz p6,p0=r17, 8
-	;;
-	(p6) mov r24=EVENT_THASH
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	extr.u r28=r28,2,6	// get rr.ps
-	shl r22=r26,r29		// 1UL << pta.size
-	;;
-	shr.u r23=r19,r28	// vaddr >> rr.ps
-	adds r26=3,r29		// pta.size + 3
-	shl r27=r17,3		// pta << 3
-	;;
-	shl r23=r23,3		// (vaddr >> rr.ps) << 3
-	shr.u r27=r27,r26	// (pta << 3) >> (pta.size+3)
-	movl r16=7<<61
-	;;
-	adds r22=-1,r22		// (1UL << pta.size) - 1
-	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
-	and r19=r19,r16		// vaddr & VRN_MASK
-	;;
-	and r22=r22,r23		// vhpt_offset
-	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
-	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
-	;;
-	or r19=r19,r22		// calc pval
-	shladd r17=r18,4,r26
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	;;
-	mov b0=r17
-	br.many b0
-END(kvm_asm_thash)
-
-#define MOV_TO_REG0	\
-{;			\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	;;			\
-};
-
-
-#define MOV_TO_REG(n)	\
-{;			\
-	mov r##n##=r19;	\
-	mov b0=r30;	\
-	br.sptk.many b0;	\
-	;;			\
-};
-
-
-#define MOV_FROM_REG(n)	\
-{;				\
-	mov r19=r##n##;		\
-	mov b0=r30;		\
-	br.sptk.many b0;		\
-	;;				\
-};
-
-
-#define MOV_TO_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	mov r2=r19;				\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r##n##=r2;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r26;				\
-	mov b0=r30;				\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_to_bank0_reg##n##)
-
-
-#define MOV_FROM_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	nop.b 0x0;					\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r##n##;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r19=r2;				\
-	mov r2=r26;				\
-	mov b0=r30;				\
-};						\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_from_bank0_reg##n##)
-
-
-#define JMP_TO_MOV_TO_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_to_bank0_reg##n##;	\
-	;;						\
-}
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_from_bank0_reg##n##;	\
-	;;						\
-}
-
-
-MOV_FROM_BANK0_REG(16)
-MOV_FROM_BANK0_REG(17)
-MOV_FROM_BANK0_REG(18)
-MOV_FROM_BANK0_REG(19)
-MOV_FROM_BANK0_REG(20)
-MOV_FROM_BANK0_REG(21)
-MOV_FROM_BANK0_REG(22)
-MOV_FROM_BANK0_REG(23)
-MOV_FROM_BANK0_REG(24)
-MOV_FROM_BANK0_REG(25)
-MOV_FROM_BANK0_REG(26)
-MOV_FROM_BANK0_REG(27)
-MOV_FROM_BANK0_REG(28)
-MOV_FROM_BANK0_REG(29)
-MOV_FROM_BANK0_REG(30)
-MOV_FROM_BANK0_REG(31)
-
-
-// mov from reg table
-ENTRY(asm_mov_from_reg)
-	MOV_FROM_REG(0)
-	MOV_FROM_REG(1)
-	MOV_FROM_REG(2)
-	MOV_FROM_REG(3)
-	MOV_FROM_REG(4)
-	MOV_FROM_REG(5)
-	MOV_FROM_REG(6)
-	MOV_FROM_REG(7)
-	MOV_FROM_REG(8)
-	MOV_FROM_REG(9)
-	MOV_FROM_REG(10)
-	MOV_FROM_REG(11)
-	MOV_FROM_REG(12)
-	MOV_FROM_REG(13)
-	MOV_FROM_REG(14)
-	MOV_FROM_REG(15)
-	JMP_TO_MOV_FROM_BANK0_REG(16)
-	JMP_TO_MOV_FROM_BANK0_REG(17)
-	JMP_TO_MOV_FROM_BANK0_REG(18)
-	JMP_TO_MOV_FROM_BANK0_REG(19)
-	JMP_TO_MOV_FROM_BANK0_REG(20)
-	JMP_TO_MOV_FROM_BANK0_REG(21)
-	JMP_TO_MOV_FROM_BANK0_REG(22)
-	JMP_TO_MOV_FROM_BANK0_REG(23)
-	JMP_TO_MOV_FROM_BANK0_REG(24)
-	JMP_TO_MOV_FROM_BANK0_REG(25)
-	JMP_TO_MOV_FROM_BANK0_REG(26)
-	JMP_TO_MOV_FROM_BANK0_REG(27)
-	JMP_TO_MOV_FROM_BANK0_REG(28)
-	JMP_TO_MOV_FROM_BANK0_REG(29)
-	JMP_TO_MOV_FROM_BANK0_REG(30)
-	JMP_TO_MOV_FROM_BANK0_REG(31)
-	MOV_FROM_REG(32)
-	MOV_FROM_REG(33)
-	MOV_FROM_REG(34)
-	MOV_FROM_REG(35)
-	MOV_FROM_REG(36)
-	MOV_FROM_REG(37)
-	MOV_FROM_REG(38)
-	MOV_FROM_REG(39)
-	MOV_FROM_REG(40)
-	MOV_FROM_REG(41)
-	MOV_FROM_REG(42)
-	MOV_FROM_REG(43)
-	MOV_FROM_REG(44)
-	MOV_FROM_REG(45)
-	MOV_FROM_REG(46)
-	MOV_FROM_REG(47)
-	MOV_FROM_REG(48)
-	MOV_FROM_REG(49)
-	MOV_FROM_REG(50)
-	MOV_FROM_REG(51)
-	MOV_FROM_REG(52)
-	MOV_FROM_REG(53)
-	MOV_FROM_REG(54)
-	MOV_FROM_REG(55)
-	MOV_FROM_REG(56)
-	MOV_FROM_REG(57)
-	MOV_FROM_REG(58)
-	MOV_FROM_REG(59)
-	MOV_FROM_REG(60)
-	MOV_FROM_REG(61)
-	MOV_FROM_REG(62)
-	MOV_FROM_REG(63)
-	MOV_FROM_REG(64)
-	MOV_FROM_REG(65)
-	MOV_FROM_REG(66)
-	MOV_FROM_REG(67)
-	MOV_FROM_REG(68)
-	MOV_FROM_REG(69)
-	MOV_FROM_REG(70)
-	MOV_FROM_REG(71)
-	MOV_FROM_REG(72)
-	MOV_FROM_REG(73)
-	MOV_FROM_REG(74)
-	MOV_FROM_REG(75)
-	MOV_FROM_REG(76)
-	MOV_FROM_REG(77)
-	MOV_FROM_REG(78)
-	MOV_FROM_REG(79)
-	MOV_FROM_REG(80)
-	MOV_FROM_REG(81)
-	MOV_FROM_REG(82)
-	MOV_FROM_REG(83)
-	MOV_FROM_REG(84)
-	MOV_FROM_REG(85)
-	MOV_FROM_REG(86)
-	MOV_FROM_REG(87)
-	MOV_FROM_REG(88)
-	MOV_FROM_REG(89)
-	MOV_FROM_REG(90)
-	MOV_FROM_REG(91)
-	MOV_FROM_REG(92)
-	MOV_FROM_REG(93)
-	MOV_FROM_REG(94)
-	MOV_FROM_REG(95)
-	MOV_FROM_REG(96)
-	MOV_FROM_REG(97)
-	MOV_FROM_REG(98)
-	MOV_FROM_REG(99)
-	MOV_FROM_REG(100)
-	MOV_FROM_REG(101)
-	MOV_FROM_REG(102)
-	MOV_FROM_REG(103)
-	MOV_FROM_REG(104)
-	MOV_FROM_REG(105)
-	MOV_FROM_REG(106)
-	MOV_FROM_REG(107)
-	MOV_FROM_REG(108)
-	MOV_FROM_REG(109)
-	MOV_FROM_REG(110)
-	MOV_FROM_REG(111)
-	MOV_FROM_REG(112)
-	MOV_FROM_REG(113)
-	MOV_FROM_REG(114)
-	MOV_FROM_REG(115)
-	MOV_FROM_REG(116)
-	MOV_FROM_REG(117)
-	MOV_FROM_REG(118)
-	MOV_FROM_REG(119)
-	MOV_FROM_REG(120)
-	MOV_FROM_REG(121)
-	MOV_FROM_REG(122)
-	MOV_FROM_REG(123)
-	MOV_FROM_REG(124)
-	MOV_FROM_REG(125)
-	MOV_FROM_REG(126)
-	MOV_FROM_REG(127)
-END(asm_mov_from_reg)
-
-
-/* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
- */
-ENTRY(kvm_resume_to_guest_with_sync)
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	mov r16 = r31
-	mov r17 = r24
-	;;
-{.mii
-	ld8 r25 =[r19]
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 =0x20, r24
-	nop 0x0
-	br.sptk.many kvm_vps_sync_write
-}
-
-	mov r31 = r16
-	mov r24 =r17
-	;;
-	br.sptk.many kvm_resume_to_guest
-END(kvm_resume_to_guest_with_sync)
-
-ENTRY(kvm_resume_to_guest)
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 =[r16]
-	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	mov r16=cr.ipsr
-	;;
-	ld8 r20 = [r20]
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r25=[r19]
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	;;
-	(p6) add r18=0x10,r18
-	(p7) add r17=1,r17
-	;;
-	(p6) mov cr.iip=r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	mov cr.ipsr=r16
-	adds r19= VPD_VPSR_START_OFFSET,r25
-	add r28=PAL_VPS_RESUME_NORMAL,r20
-	add r29=PAL_VPS_RESUME_HANDLER,r20
-	;;
-	ld8 r19=[r19]
-	mov b0=r29
-	mov r27=cr.isr
-	;;
-	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p7=vpsr.ic
-	shr r27=r27,IA64_ISR_IR_BIT
-	;;
-	(p6) ld8 r26=[r25]
-	(p7) mov b0=r28
-	;;
-	(p6) dep r26=r27,r26,63,1
-	mov pr=r31,-2
-	br.sptk.many b0             // call pal service
-	;;
-END(kvm_resume_to_guest)
-
-
-MOV_TO_BANK0_REG(16)
-MOV_TO_BANK0_REG(17)
-MOV_TO_BANK0_REG(18)
-MOV_TO_BANK0_REG(19)
-MOV_TO_BANK0_REG(20)
-MOV_TO_BANK0_REG(21)
-MOV_TO_BANK0_REG(22)
-MOV_TO_BANK0_REG(23)
-MOV_TO_BANK0_REG(24)
-MOV_TO_BANK0_REG(25)
-MOV_TO_BANK0_REG(26)
-MOV_TO_BANK0_REG(27)
-MOV_TO_BANK0_REG(28)
-MOV_TO_BANK0_REG(29)
-MOV_TO_BANK0_REG(30)
-MOV_TO_BANK0_REG(31)
-
-
-// mov to reg table
-ENTRY(asm_mov_to_reg)
-	MOV_TO_REG0
-	MOV_TO_REG(1)
-	MOV_TO_REG(2)
-	MOV_TO_REG(3)
-	MOV_TO_REG(4)
-	MOV_TO_REG(5)
-	MOV_TO_REG(6)
-	MOV_TO_REG(7)
-	MOV_TO_REG(8)
-	MOV_TO_REG(9)
-	MOV_TO_REG(10)
-	MOV_TO_REG(11)
-	MOV_TO_REG(12)
-	MOV_TO_REG(13)
-	MOV_TO_REG(14)
-	MOV_TO_REG(15)
-	JMP_TO_MOV_TO_BANK0_REG(16)
-	JMP_TO_MOV_TO_BANK0_REG(17)
-	JMP_TO_MOV_TO_BANK0_REG(18)
-	JMP_TO_MOV_TO_BANK0_REG(19)
-	JMP_TO_MOV_TO_BANK0_REG(20)
-	JMP_TO_MOV_TO_BANK0_REG(21)
-	JMP_TO_MOV_TO_BANK0_REG(22)
-	JMP_TO_MOV_TO_BANK0_REG(23)
-	JMP_TO_MOV_TO_BANK0_REG(24)
-	JMP_TO_MOV_TO_BANK0_REG(25)
-	JMP_TO_MOV_TO_BANK0_REG(26)
-	JMP_TO_MOV_TO_BANK0_REG(27)
-	JMP_TO_MOV_TO_BANK0_REG(28)
-	JMP_TO_MOV_TO_BANK0_REG(29)
-	JMP_TO_MOV_TO_BANK0_REG(30)
-	JMP_TO_MOV_TO_BANK0_REG(31)
-	MOV_TO_REG(32)
-	MOV_TO_REG(33)
-	MOV_TO_REG(34)
-	MOV_TO_REG(35)
-	MOV_TO_REG(36)
-	MOV_TO_REG(37)
-	MOV_TO_REG(38)
-	MOV_TO_REG(39)
-	MOV_TO_REG(40)
-	MOV_TO_REG(41)
-	MOV_TO_REG(42)
-	MOV_TO_REG(43)
-	MOV_TO_REG(44)
-	MOV_TO_REG(45)
-	MOV_TO_REG(46)
-	MOV_TO_REG(47)
-	MOV_TO_REG(48)
-	MOV_TO_REG(49)
-	MOV_TO_REG(50)
-	MOV_TO_REG(51)
-	MOV_TO_REG(52)
-	MOV_TO_REG(53)
-	MOV_TO_REG(54)
-	MOV_TO_REG(55)
-	MOV_TO_REG(56)
-	MOV_TO_REG(57)
-	MOV_TO_REG(58)
-	MOV_TO_REG(59)
-	MOV_TO_REG(60)
-	MOV_TO_REG(61)
-	MOV_TO_REG(62)
-	MOV_TO_REG(63)
-	MOV_TO_REG(64)
-	MOV_TO_REG(65)
-	MOV_TO_REG(66)
-	MOV_TO_REG(67)
-	MOV_TO_REG(68)
-	MOV_TO_REG(69)
-	MOV_TO_REG(70)
-	MOV_TO_REG(71)
-	MOV_TO_REG(72)
-	MOV_TO_REG(73)
-	MOV_TO_REG(74)
-	MOV_TO_REG(75)
-	MOV_TO_REG(76)
-	MOV_TO_REG(77)
-	MOV_TO_REG(78)
-	MOV_TO_REG(79)
-	MOV_TO_REG(80)
-	MOV_TO_REG(81)
-	MOV_TO_REG(82)
-	MOV_TO_REG(83)
-	MOV_TO_REG(84)
-	MOV_TO_REG(85)
-	MOV_TO_REG(86)
-	MOV_TO_REG(87)
-	MOV_TO_REG(88)
-	MOV_TO_REG(89)
-	MOV_TO_REG(90)
-	MOV_TO_REG(91)
-	MOV_TO_REG(92)
-	MOV_TO_REG(93)
-	MOV_TO_REG(94)
-	MOV_TO_REG(95)
-	MOV_TO_REG(96)
-	MOV_TO_REG(97)
-	MOV_TO_REG(98)
-	MOV_TO_REG(99)
-	MOV_TO_REG(100)
-	MOV_TO_REG(101)
-	MOV_TO_REG(102)
-	MOV_TO_REG(103)
-	MOV_TO_REG(104)
-	MOV_TO_REG(105)
-	MOV_TO_REG(106)
-	MOV_TO_REG(107)
-	MOV_TO_REG(108)
-	MOV_TO_REG(109)
-	MOV_TO_REG(110)
-	MOV_TO_REG(111)
-	MOV_TO_REG(112)
-	MOV_TO_REG(113)
-	MOV_TO_REG(114)
-	MOV_TO_REG(115)
-	MOV_TO_REG(116)
-	MOV_TO_REG(117)
-	MOV_TO_REG(118)
-	MOV_TO_REG(119)
-	MOV_TO_REG(120)
-	MOV_TO_REG(121)
-	MOV_TO_REG(122)
-	MOV_TO_REG(123)
-	MOV_TO_REG(124)
-	MOV_TO_REG(125)
-	MOV_TO_REG(126)
-	MOV_TO_REG(127)
-END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
deleted file mode 100644
index b0398740b48d..000000000000
--- a/arch/ia64/kvm/process.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * process.c: handle interruption inject for guests.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  	Shaofan Li (Susue Li) <susie.li@intel.com>
- *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Xiantao Zhang (xiantao.zhang@intel.com)
- */
-#include "vcpu.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/fpswa.h>
-#include <asm/kregs.h>
-#include <asm/tlb.h>
-
-fpswa_interface_t *vmm_fpswa_interface;
-
-#define IA64_VHPT_TRANS_VECTOR			0x0000
-#define IA64_INST_TLB_VECTOR			0x0400
-#define IA64_DATA_TLB_VECTOR			0x0800
-#define IA64_ALT_INST_TLB_VECTOR		0x0c00
-#define IA64_ALT_DATA_TLB_VECTOR		0x1000
-#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
-#define IA64_INST_KEY_MISS_VECTOR		0x1800
-#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
-#define IA64_DIRTY_BIT_VECTOR			0x2000
-#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
-#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
-#define IA64_BREAK_VECTOR			0x2c00
-#define IA64_EXTINT_VECTOR			0x3000
-#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
-#define IA64_KEY_PERMISSION_VECTOR		0x5100
-#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
-#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
-#define IA64_GENEX_VECTOR			0x5400
-#define IA64_DISABLED_FPREG_VECTOR		0x5500
-#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
-#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
-#define IA64_DEBUG_VECTOR			0x5900
-#define IA64_UNALIGNED_REF_VECTOR		0x5a00
-#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
-#define IA64_FP_FAULT_VECTOR			0x5c00
-#define IA64_FP_TRAP_VECTOR			0x5d00
-#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
-#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
-#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
-
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
-			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
-			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
-
-#define DOMN_PAL_REQUEST    0x110000
-#define DOMN_SAL_REQUEST    0x110001
-
-static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
-	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
-	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
-	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
-	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
-	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
-};
-
-static void collect_interruption(struct kvm_vcpu *vcpu)
-{
-	u64 ipsr;
-	u64 vdcr;
-	u64 vifs;
-	unsigned long vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = vcpu_get_psr(vcpu);
-	vcpu_bsw0(vcpu);
-	if (vpsr & IA64_PSR_IC) {
-
-		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
-		 * since after guest do rfi, we still want these bits on in
-		 * mpsr
-		 */
-
-		ipsr = regs->cr_ipsr;
-		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
-					| IA64_PSR_DD | IA64_PSR_SS
-					| IA64_PSR_ED));
-		vcpu_set_ipsr(vcpu, vpsr);
-
-		/* Currently, for trap, we do not advance IIP to next
-		 * instruction. That's because we assume caller already
-		 * set up IIP correctly
-		 */
-
-		vcpu_set_iip(vcpu , regs->cr_iip);
-
-		/* set vifs.v to zero */
-		vifs = VCPU(vcpu, ifs);
-		vifs &= ~IA64_IFS_V;
-		vcpu_set_ifs(vcpu, vifs);
-
-		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
-	}
-
-	vdcr = VCPU(vcpu, dcr);
-
-	/* Set guest psr
-	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
-	 * be: set to the value of dcr.be
-	 * pp: set to the value of dcr.pp
-	 */
-	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
-	vpsr |= (vdcr & IA64_DCR_BE);
-
-	/* VDCR pp bit position is different from VPSR pp bit */
-	if (vdcr & IA64_DCR_PP) {
-		vpsr |= IA64_PSR_PP;
-	} else {
-		vpsr &= ~IA64_PSR_PP;
-	}
-
-	vcpu_set_psr(vcpu, vpsr);
-
-}
-
-void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
-{
-	u64 viva;
-	struct kvm_pt_regs *regs;
-	union ia64_isr pt_isr;
-
-	regs = vcpu_regs(vcpu);
-
-	/* clear cr.isr.ir (incomplete register frame)*/
-	pt_isr.val = VMX(vcpu, cr_isr);
-	pt_isr.ir = 0;
-	VMX(vcpu, cr_isr) = pt_isr.val;
-
-	collect_interruption(vcpu);
-
-	viva = vcpu_get_iva(vcpu);
-	regs->cr_iip = viva + vec;
-}
-
-static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
-{
-	union ia64_rr rr, rr1;
-
-	rr.val = vcpu_get_rr(vcpu, ifa);
-	rr1.val = 0;
-	rr1.ps = rr.ps;
-	rr1.rid = rr.rid;
-	return (rr1.val);
-}
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- *  set_ifa: if true, set vIFA
- *  set_itir: if true, set vITIR
- *  set_iha: if true, set vIHA
- */
-void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
-		int set_ifa, int set_itir, int set_iha)
-{
-	long vpsr;
-	u64 value;
-
-	vpsr = VCPU(vcpu, vpsr);
-	/* Vol2, Table 8-1 */
-	if (vpsr & IA64_PSR_IC) {
-		if (set_ifa)
-			vcpu_set_ifa(vcpu, vadr);
-		if (set_itir) {
-			value = vcpu_get_itir_on_fault(vcpu, vadr);
-			vcpu_set_itir(vcpu, value);
-		}
-
-		if (set_iha) {
-			value = vcpu_thash(vcpu, vadr);
-			vcpu_set_iha(vcpu, value);
-		}
-	}
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- *  @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
-}
-
-/*
- * Data Nested TLB Fault
- *  @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void nested_dtlb(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- *  @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- *  VHPT Translation Vector
- */
-static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA*/
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-}
-
-/*
- * VHPT Instruction Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * VHPT Data Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * Deal with:
- *  General Exception vector
- */
-void _general_exception(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
-}
-
-/*
- * Illegal Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_dep(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rsv_reg_field(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void privilege_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void unimpl_daddr(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void privilege_reg(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/* Deal with
- *  Nat consumption vector
- * Parameter:
- *  vaddr: Optional, if t == REGISTER
- */
-static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
-						enum tlb_miss_type t)
-{
-	/* If vPSR.ic && t == DATA/INST, IFA */
-	if (t == DATA || t == INSTRUCTION) {
-		/* IFA */
-		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
-	}
-
-	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rnat_consumption(struct kvm_vcpu *vcpu)
-{
-	_nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- *  Page not present vector
- */
-static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
-void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-/* Deal with
- *  Data access rights vector
- */
-void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
-}
-
-fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
-		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
-		unsigned long *ifs, struct kvm_pt_regs *regs)
-{
-	fp_state_t fp_state;
-	fpswa_ret_t ret;
-	struct kvm_vcpu *vcpu = current_vcpu;
-
-	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
-
-	if (!vmm_fpswa_interface)
-		return (fpswa_ret_t) {-1, 0, 0, 0};
-
-	memset(&fp_state, 0, sizeof(fp_state_t));
-
-	/*
-	 * compute fp_state.  only FP registers f6 - f11 are used by the
-	 * vmm, so set those bits in the mask and set the low volatile
-	 * pointer to point to these registers.
-	 */
-	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
-
-	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
-
-   /*
-	 * unsigned long (*EFI_FPSWA) (
-	 *      unsigned long    trap_type,
-	 *      void             *Bundle,
-	 *      unsigned long    *pipsr,
-	 *      unsigned long    *pfsr,
-	 *      unsigned long    *pisr,
-	 *      unsigned long    *ppreds,
-	 *      unsigned long    *pifs,
-	 *      void             *fp_state);
-	 */
-	/*Call host fpswa interface directly to virtualize
-	 *guest fpswa request!
-	 */
-	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
-	ia64_srlz_d();
-
-	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
-			ipsr, fpsr, isr, pr, ifs, &fp_state);
-	ia64_set_rr(7UL << 61, old_rr7);
-	ia64_srlz_d();
-	return ret;
-}
-
-/*
- * Handle floating-point assist faults and traps for domain.
- */
-unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
-					unsigned long isr)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	IA64_BUNDLE bundle;
-	unsigned long fault_ip;
-	fpswa_ret_t ret;
-
-	fault_ip = regs->cr_iip;
-	/*
-	 * When the FP trap occurs, the trapping instruction is completed.
-	 * If ipsr.ri == 0, there is the trapping instruction in previous
-	 * bundle.
-	 */
-	if (!fp_fault && (ia64_psr(regs)->ri == 0))
-		fault_ip -= 16;
-
-	if (fetch_code(v, fault_ip, &bundle))
-		return -EAGAIN;
-
-	if (!bundle.i64[0] && !bundle.i64[1])
-		return -EACCES;
-
-	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
-			&isr, &regs->pr, &regs->cr_ifs, regs);
-	return ret.status;
-}
-
-void reflect_interruption(u64 ifa, u64 isr, u64 iim,
-		u64 vec, struct kvm_pt_regs *regs)
-{
-	u64 vector;
-	int status ;
-	struct kvm_vcpu *vcpu = current_vcpu;
-	u64 vpsr = VCPU(vcpu, vpsr);
-
-	vector = vec2off[vec];
-
-	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-						"with psr.ic = 0\n", vector);
-		return;
-	}
-
-	switch (vec) {
-	case 32: 	/*IA64_FP_FAULT_VECTOR*/
-		status = vmm_handle_fpu_swa(1, regs, isr);
-		if (!status) {
-			vcpu_increment_iip(vcpu);
-			return;
-		} else if (-EAGAIN == status)
-			return;
-		break;
-	case 33:	/*IA64_FP_TRAP_VECTOR*/
-		status = vmm_handle_fpu_swa(0, regs, isr);
-		if (!status)
-			return ;
-		break;
-	}
-
-	VCPU(vcpu, isr) = isr;
-	VCPU(vcpu, iipa) = regs->cr_iip;
-	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
-		VCPU(vcpu, iim) = iim;
-	else
-		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
-
-	inject_guest_interruption(vcpu, vector);
-}
-
-static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
-						unsigned long arg)
-{
-	struct thash_data *data;
-	unsigned long gpa, poff;
-
-	if (!is_physical_mode(vcpu)) {
-		/* Depends on caller to provide the DTR or DTC mapping.*/
-		data = vtlb_lookup(vcpu, arg, D_TLB);
-		if (data)
-			gpa = data->page_flags & _PAGE_PPN_MASK;
-		else {
-			data = vhpt_lookup(arg);
-			if (!data)
-				return 0;
-			gpa = data->gpaddr & _PAGE_PPN_MASK;
-		}
-
-		poff = arg & (PSIZE(data->ps) - 1);
-		arg = PAGEALIGN(gpa, data->ps) | poff;
-	}
-	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
-
-	return (unsigned long)__va(arg);
-}
-
-static void set_pal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
-	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
-	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
-
-	/*FIXME:For static and stacked convention, firmware
-	 * has put the parameters in gr28-gr31 before
-	 * break to vmm  !!*/
-
-	switch (gr28) {
-	case PAL_PERF_MON_INFO:
-	case PAL_HALT_INFO:
-		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-		break;
-	case PAL_BRAND_INFO:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
-		break;
-	default:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-	}
-	p->u.pal_data.gr28 = gr28;
-	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
-
-	p->exit_reason = EXIT_REASON_PAL_CALL;
-}
-
-static void get_pal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
-		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
-		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
-		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-static void set_sal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
-	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
-	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
-	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
-	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
-	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
-	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
-	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
-	p->exit_reason = EXIT_REASON_SAL_CALL;
-}
-
-static void get_sal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
-		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
-		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
-		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
-		unsigned long isr, unsigned long iim)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	long psr;
-
-	if (ia64_psr(regs)->cpl == 0) {
-		/* Allow hypercalls only when cpl = 0.  */
-		if (iim == DOMN_PAL_REQUEST) {
-			local_irq_save(psr);
-			set_pal_call_data(v);
-			vmm_transition(v);
-			get_pal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		} else if (iim == DOMN_SAL_REQUEST) {
-			local_irq_save(psr);
-			set_sal_call_data(v);
-			vmm_transition(v);
-			get_sal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		}
-	}
-	reflect_interruption(ifa, isr, iim, 11, regs);
-}
-
-void check_pending_irq(struct kvm_vcpu *vcpu)
-{
-	int  mask, h_pending, h_inservice;
-	u64 isr;
-	unsigned long  vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	h_pending = highest_pending_irq(vcpu);
-	if (h_pending == NULL_VECTOR) {
-		update_vhpi(vcpu, NULL_VECTOR);
-		return;
-	}
-	h_inservice = highest_inservice_irq(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	mask = irq_masked(vcpu, h_pending, h_inservice);
-	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
-		isr = vpsr & IA64_PSR_RI;
-		update_vhpi(vcpu, h_pending);
-		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-	} else if (mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-	} else {
-		/* masked by vpsr.i or vtpr.*/
-		update_vhpi(vcpu, h_pending);
-	}
-}
-
-static void generate_exirq(struct kvm_vcpu *vcpu)
-{
-	unsigned  vpsr;
-	uint64_t isr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
-	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-}
-
-void vhpi_detection(struct kvm_vcpu *vcpu)
-{
-	uint64_t    threshold, vhpi;
-	union ia64_tpr       vtpr;
-	struct ia64_psr vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vtpr.val = VCPU(vcpu, tpr);
-
-	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
-	vhpi = VCPU(vcpu, vhpi);
-	if (vhpi > threshold) {
-		/* interrupt actived*/
-		generate_exirq(vcpu);
-	}
-}
-
-void leave_hypervisor_tail(void)
-{
-	struct kvm_vcpu *v = current_vcpu;
-
-	if (VMX(v, timer_check)) {
-		VMX(v, timer_check) = 0;
-		if (VMX(v, itc_check)) {
-			if (vcpu_get_itc(v) > VCPU(v, itm)) {
-				if (!(VCPU(v, itv) & (1 << 16))) {
-					vcpu_pend_interrupt(v, VCPU(v, itv)
-							& 0xff);
-					VMX(v, itc_check) = 0;
-				} else {
-					v->arch.timer_pending = 1;
-				}
-				VMX(v, last_itc) = VCPU(v, itm) + 1;
-			}
-		}
-	}
-
-	rmb();
-	if (v->arch.irq_new_pending) {
-		v->arch.irq_new_pending = 0;
-		VMX(v, irq_check) = 0;
-		check_pending_irq(v);
-		return;
-	}
-	if (VMX(v, irq_check)) {
-		VMX(v, irq_check) = 0;
-		vhpi_detection(v);
-	}
-}
-
-static inline void handle_lds(struct kvm_pt_regs *regs)
-{
-	regs->cr_ipsr |= IA64_PSR_ED;
-}
-
-void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
-{
-	unsigned long pte;
-	union ia64_rr rr;
-
-	rr.val = ia64_get_rr(vadr);
-	pte =  vadr & _PAGE_PPN_MASK;
-	pte = pte | PHY_PAGE_WB;
-	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
-	return;
-}
-
-void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
-{
-	unsigned long vpsr;
-	int type;
-
-	u64 vhpt_adr, gppa, pteval, rr, itir;
-	union ia64_isr misr;
-	union ia64_pta vpta;
-	struct thash_data *data;
-	struct kvm_vcpu *v = current_vcpu;
-
-	vpsr = VCPU(v, vpsr);
-	misr.val = VMX(v, cr_isr);
-
-	type = vec;
-
-	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
-		if (vec == 2) {
-			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
-				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
-				return;
-			}
-		}
-		physical_tlb_miss(v, vadr, type);
-		return;
-	}
-	data = vtlb_lookup(v, vadr, type);
-	if (data != 0) {
-		if (type == D_TLB) {
-			gppa = (vadr & ((1UL << data->ps) - 1))
-				+ (data->ppn >> (data->ps - 12) << data->ps);
-			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
-				if (data->pl >= ((regs->cr_ipsr >>
-						IA64_PSR_CPL0_BIT) & 3))
-					emulate_io_inst(v, gppa, data->ma);
-				else {
-					vcpu_set_isr(v, misr.val);
-					data_access_rights(v, vadr);
-				}
-				return ;
-			}
-		}
-		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
-
-	} else if (type == D_TLB) {
-		if (misr.sp) {
-			handle_lds(regs);
-			return;
-		}
-
-		rr = vcpu_get_rr(v, vadr);
-		itir = rr & (RR_RID_MASK | RR_PS_MASK);
-
-		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				alt_dtlb(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-			return ;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-		/* avoid recursively walking (short format) VHPT */
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (!(pteval & _PAGE_P)) {
-				if (vpsr & IA64_PSR_IC) {
-					vcpu_set_isr(v, misr.val);
-					dtlb_fault(v, vadr);
-				} else {
-					nested_dtlb(v);
-				}
-			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
-				thash_purge_and_insert(v, pteval, itir,
-								vadr, D_TLB);
-			} else if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dtlb_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		} else {
-			/* Can't read VHPT.  */
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dvhpt_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		}
-	} else if (type == I_TLB) {
-		if (!(vpsr & IA64_PSR_IC))
-			misr.ni = 1;
-		if (!vhpt_enabled(v, vadr, INST_REF)) {
-			vcpu_set_isr(v, misr.val);
-			alt_itlb(v, vadr);
-			return;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (pteval & _PAGE_P) {
-				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
-					vcpu_set_isr(v, misr.val);
-					itlb_fault(v, vadr);
-					return ;
-				}
-				rr = vcpu_get_rr(v, vadr);
-				itir = rr & (RR_RID_MASK | RR_PS_MASK);
-				thash_purge_and_insert(v, pteval, itir,
-							vadr, I_TLB);
-			} else {
-				vcpu_set_isr(v, misr.val);
-				inst_page_not_present(v, vadr);
-			}
-		} else {
-			vcpu_set_isr(v, misr.val);
-			ivhpt_fault(v, vadr);
-		}
-	}
-}
-
-void kvm_vexirq(struct kvm_vcpu *vcpu)
-{
-	u64 vpsr, isr;
-	struct kvm_pt_regs *regs;
-
-	regs = vcpu_regs(vcpu);
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
-}
-
-void kvm_ia64_handle_irq(struct kvm_vcpu *v)
-{
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-	vmm_transition(v);
-	local_irq_restore(psr);
-
-	VMX(v, timer_check) = 1;
-
-}
-
-static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
-{
-	u64 oldrid, moldrid, oldpsbits, vaddr;
-	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
-	vaddr = p->vaddr;
-
-	oldrid = VMX(v, vrr[0]);
-	VMX(v, vrr[0]) = p->rr;
-	oldpsbits = VMX(v, psbits[0]);
-	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
-	moldrid = ia64_get_rr(0x0);
-	ia64_set_rr(0x0, vrrtomrr(p->rr));
-	ia64_srlz_d();
-
-	vaddr = PAGEALIGN(vaddr, p->ps);
-	thash_purge_entries_remote(v, vaddr, p->ps);
-
-	VMX(v, vrr[0]) = oldrid;
-	VMX(v, psbits[0]) = oldpsbits;
-	ia64_set_rr(0x0, moldrid);
-	ia64_dv_serialize_data();
-}
-
-static void vcpu_do_resume(struct kvm_vcpu *vcpu)
-{
-	/*Re-init VHPT and VTLB once from resume*/
-	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
-	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
-
-	ia64_set_pta(vcpu->arch.vhpt.pta.val);
-}
-
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-		panic_vm(vcpu, "Failed to do vmm sanity check,"
-			"it maybe caused by crashed vmm!!\n\n");
-	}
-}
-
-static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
-{
-	vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
-
-	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
-		vcpu_do_resume(vcpu);
-		return;
-	}
-
-	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
-		thash_purge_all(vcpu);
-		return;
-	}
-
-	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
-		while (vcpu->arch.ptc_g_count > 0)
-			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
-	}
-}
-
-void vmm_transition(struct kvm_vcpu *vcpu)
-{
-	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
-			1, 0, 0, 0, 0, 0);
-	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
-						1, 0, 0, 0, 0, 0);
-	kvm_do_resume_op(vcpu);
-}
-
-void vmm_panic_handler(u64 vec)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	vmm_sanity = 0;
-	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-			vec2off[vec]);
-}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
deleted file mode 100644
index 30897d44d61e..000000000000
--- a/arch/ia64/kvm/trampoline.S
+++ /dev/null
@@ -1,1038 +0,0 @@
-/* Save all processor states
- *
- * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
- * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include "asm-offsets.h"
-
-
-#define CTX(name)    VMM_CTX_##name##_OFFSET
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r32;		\
-	add	r3 = CTX(B1),r32;		\
-	mov	r16 = b0;			\
-	mov	r17 = b1;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b2;			\
-	mov	r17 = b3;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b4;			\
-	mov	r17 = b5;			\
-	;;					\
-	st8	[r2]=r16;   			\
-	st8	[r3]=r17;   			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r33;		\
-	add	r3 = CTX(B1),r33;		\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b0 = r16;			\
-	mov	b1 = r17;			\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b2 = r16;			\
-	mov	b3 = r17;			\
-	;;					\
-	ld8	r16=[r2];   			\
-	ld8	r17=[r3];   			\
-	;;					\
-	mov	b4=r16;				\
-	mov	b5=r17;				\
-	;;
-
-
-	/*
-	 *	r32: context_t base address
-	 *	bsw == 1
-	 *	Save all bank1 general registers, r4 ~ r7
-	 */
-#define	SAVE_GENERAL_REGS			\
-	add	r2=CTX(R4),r32;			\
-	add	r3=CTX(R5),r32;			\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r4,16;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r5,16;		\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r6,48;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r7,48;		\
-	;;                          		\
-.mem.offset 0,0;        			\
-    st8.spill    [r2]=r12;			\
-.mem.offset 8,0;				\
-    st8.spill    [r3]=r13;			\
-    ;;
-
-	/*
-	 *	r33: context_t base address
-	 *	bsw == 1
-	 */
-#define	RESTORE_GENERAL_REGS			\
-	add	r2=CTX(R4),r33;			\
-	add	r3=CTX(R5),r33;			\
-	;;					\
-	ld8.fill	r4=[r2],16;		\
-	ld8.fill	r5=[r3],16;		\
-	;;					\
-	ld8.fill	r6=[r2],48;		\
-	ld8.fill	r7=[r3],48;		\
-	;;					\
-	ld8.fill    r12=[r2];			\
-	ld8.fill    r13 =[r3];			\
-	;;
-
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r32;		\
-	add	r3 = CTX(KR1),r32;		\
-	mov	r16 = ar.k0;			\
-	mov	r17 = ar.k1;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;		        		\
-	mov	r16 = ar.k2;			\
-	mov	r17 = ar.k3;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k4;			\
-	mov	r17 = ar.k5;			\
-	;;				    	\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k6;			\
-	mov	r17 = ar.k7;			\
-	;;		    			\
-	st8	[r2] = r16;     		\
-	st8	[r3] = r17;			\
-	;;
-
-
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r33;		\
-	add	r3 = CTX(KR1),r33;		\
-	;;		    			\
-	ld8	r16=[r2],16;     		\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k0=r16;  			\
-	mov	ar.k1=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;		        		\
-	mov	ar.k2=r16;   			\
-	mov	ar.k3=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k4=r16;			\
-	mov	ar.k5=r17;	    		\
-	;;				    	\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k6=r16;  			\
-	mov	ar.k7=r17;	    		\
-	;;
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_APP_REGS				\
-	add  r2 = CTX(BSPSTORE),r32;		\
-	mov  r16 = ar.bspstore;			\
-	;;					\
-	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
-	mov  r16 = ar.rnat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
-	mov  r16 = ar.fcr;			\
-	;;					\
-	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
-	mov  r16 = ar.eflag;			\
-	;;					\
-	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
-	mov  r16 = ar.cflg;			\
-	;;					\
-	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
-	mov  r16 = ar.fsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
-	mov  r16 = ar.fir;			\
-	;;					\
-	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
-	mov  r16 = ar.fdr;			\
-	;;					\
-	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
-	mov  r16 = ar.unat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
-	mov  r16 = ar.fpsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
-	mov  r16 = ar.pfs;			\
-	;;					\
-	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
-	mov  r16 = ar.lc;			\
-	;;					\
-	st8  [r2] = r16;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_APP_REGS			\
-	add  r2=CTX(BSPSTORE),r33;		\
-	;;					\
-	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
-	;;					\
-	mov  ar.bspstore=r16;			\
-	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
-	;;					\
-	mov  ar.rnat=r16;			\
-	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
-	;;					\
-	mov  ar.fcr=r16;			\
-	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
-	;;					\
-	mov  ar.eflag=r16;			\
-	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
-	;;					\
-	mov  ar.cflg=r16;			\
-	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
-	;;					\
-	mov  ar.fsr=r16;			\
-	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
-	;;					\
-	mov  ar.fir=r16;			\
-	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
-	;;					\
-	mov  ar.fdr=r16;			\
-	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
-	;;					\
-	mov  ar.unat=r16;			\
-	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
-	;;					\
-	mov  ar.fpsr=r16;			\
-	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
-	;;					\
-	mov  ar.pfs=r16;			\
-	ld8  r16=[r2];				\
-	;;					\
-	mov  ar.lc=r16;				\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_CTL_REGS				\
-	add	r2 = CTX(DCR),r32;		\
-	mov	r16 = cr.dcr;			\
-	;;					\
-	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
-	;;                          		\
-	mov	r16 = cr.iva;			\
-	;;					\
-	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
-	;;					\
-	mov r16 = cr.pta;			\
-	;;					\
-	st8 [r2] = r16 ;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_CTL_REGS				\
-	add	r2 = CTX(DCR),r33;	        	\
-	;;						\
-	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
-	;;                      			\
-	mov	cr.dcr = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
-	;;						\
-	mov	cr.iva = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8 r16 = [r2];					\
-	;;						\
-	mov cr.pta = r16;				\
-	dv_serialize_data;				\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_REGION_REGS			\
-	add	r2=CTX(RR0),r32;		\
-	mov	r16=rr[r0];			\
-	dep.z	r18=1,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=2,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=3,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=4,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=5,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=7,61,3;			\
-	;;					\
-	st8	[r2]=r17,16;			\
-	mov	r16=rr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	;;
-
-	/*
-	 *	r33:context_t base address
-	 */
-#define	RESTORE_REGION_REGS	\
-	add	r2=CTX(RR0),r33;\
-	mov r18=r0;		\
-	;;			\
-	ld8	r20=[r2],8;	\
-	;;	/* rr0 */	\
-	ld8	r21=[r2],8;	\
-	;;	/* rr1 */	\
-	ld8	r22=[r2],8;	\
-	;;	/* rr2 */	\
-	ld8	r23=[r2],8;	\
-	;;	/* rr3 */	\
-	ld8	r24=[r2],8;	\
-	;;	/* rr4 */	\
-	ld8	r25=[r2],16;	\
-	;;	/* rr5 */	\
-	ld8	r27=[r2];	\
-	;;	/* rr7 */	\
-	mov rr[r18]=r20;	\
-	dep.z	r18=1,61,3;	\
-	;;  /* rr1 */		\
-	mov rr[r18]=r21;	\
-	dep.z	r18=2,61,3;	\
-	;;  /* rr2 */		\
-	mov rr[r18]=r22;	\
-	dep.z	r18=3,61,3;	\
-	;;  /* rr3 */		\
-	mov rr[r18]=r23;	\
-	dep.z	r18=4,61,3;	\
-	;;  /* rr4 */		\
-	mov rr[r18]=r24;	\
-	dep.z	r18=5,61,3;	\
-	;;  /* rr5 */		\
-	mov rr[r18]=r25;	\
-	dep.z	r18=7,61,3;	\
-	;;  /* rr7 */		\
-	mov rr[r18]=r27;	\
-	;;			\
-	srlz.i;			\
-	;;
-
-
-
-	/*
-	 *	r32:	context_t base address
-	 *	r36~r39:scratch registers
-	 */
-#define	SAVE_DEBUG_REGS				\
-	add	r2=CTX(IBR0),r32;		\
-	add	r3=CTX(DBR0),r32;		\
-	mov	r16=ibr[r0];			\
-	mov	r17=dbr[r0];			\
-	;;					\
-	st8	[r2]=r16,8; 			\
-	st8	[r3]=r17,8;	    		\
-	add	r18=1,r0;		    	\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=3,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=4,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=5,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=6,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=7,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	;;
-
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_DEBUG_REGS			\
-	add	r2=CTX(IBR0),r33;		\
-	add	r3=CTX(DBR0),r33;		\
-	mov r16=7;    				\
-	mov r17=r0;				\
-	;;                    			\
-	mov ar.lc = r16;			\
-	;; 					\
-1:						\
-	ld8 r18=[r2],8;		    		\
-	ld8 r19=[r3],8;				\
-	;;					\
-	mov ibr[r17]=r18;			\
-	mov dbr[r17]=r19;			\
-	;;   					\
-	srlz.i;					\
-	;; 					\
-	add r17=1,r17;				\
-	br.cloop.sptk 1b;			\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_LOW				\
-	add	r2=CTX(F2),r32;			\
-	add	r3=CTX(F3),r32;			\
-	;;					\
-	stf.spill.nta	[r2]=f2,32;		\
-	stf.spill.nta	[r3]=f3,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f4,32;		\
-	stf.spill.nta	[r3]=f5,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f6,32;		\
-	stf.spill.nta	[r3]=f7,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f8,32;		\
-	stf.spill.nta	[r3]=f9,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f10,32;		\
-	stf.spill.nta	[r3]=f11,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f12,32;		\
-	stf.spill.nta	[r3]=f13,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f14,32;		\
-	stf.spill.nta	[r3]=f15,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f16,32;		\
-	stf.spill.nta	[r3]=f17,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f18,32;		\
-	stf.spill.nta	[r3]=f19,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f20,32;		\
-	stf.spill.nta	[r3]=f21,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f22,32;		\
-	stf.spill.nta	[r3]=f23,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f24,32;		\
-	stf.spill.nta	[r3]=f25,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f26,32;		\
-	stf.spill.nta	[r3]=f27,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f28,32;		\
-	stf.spill.nta	[r3]=f29,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f30;		\
-	stf.spill.nta	[r3]=f31;		\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_HIGH				\
-	add	r2=CTX(F32),r32;		\
-	add	r3=CTX(F33),r32;		\
-	;;					\
-	stf.spill.nta	[r2]=f32,32;		\
-	stf.spill.nta	[r3]=f33,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f34,32;		\
-	stf.spill.nta	[r3]=f35,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f36,32;		\
-	stf.spill.nta	[r3]=f37,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f38,32;		\
-	stf.spill.nta	[r3]=f39,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f40,32;		\
-	stf.spill.nta	[r3]=f41,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f42,32;		\
-	stf.spill.nta	[r3]=f43,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f44,32;		\
-	stf.spill.nta	[r3]=f45,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f46,32;		\
-	stf.spill.nta	[r3]=f47,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f48,32;		\
-	stf.spill.nta	[r3]=f49,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f50,32;		\
-	stf.spill.nta	[r3]=f51,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f52,32;		\
-	stf.spill.nta	[r3]=f53,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f54,32;		\
-	stf.spill.nta	[r3]=f55,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f56,32;		\
-	stf.spill.nta	[r3]=f57,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f58,32;		\
-	stf.spill.nta	[r3]=f59,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f60,32;		\
-	stf.spill.nta	[r3]=f61,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f62,32;		\
-	stf.spill.nta	[r3]=f63,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f64,32;		\
-	stf.spill.nta	[r3]=f65,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f66,32;		\
-	stf.spill.nta	[r3]=f67,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f68,32;		\
-	stf.spill.nta	[r3]=f69,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f70,32;		\
-	stf.spill.nta	[r3]=f71,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f72,32;		\
-	stf.spill.nta	[r3]=f73,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f74,32;		\
-	stf.spill.nta	[r3]=f75,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f76,32;		\
-	stf.spill.nta	[r3]=f77,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f78,32;		\
-	stf.spill.nta	[r3]=f79,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f80,32;		\
-	stf.spill.nta	[r3]=f81,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f82,32;		\
-	stf.spill.nta	[r3]=f83,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f84,32;		\
-	stf.spill.nta	[r3]=f85,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f86,32;		\
-	stf.spill.nta	[r3]=f87,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f88,32;		\
-	stf.spill.nta	[r3]=f89,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f90,32;		\
-	stf.spill.nta	[r3]=f91,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f92,32;		\
-	stf.spill.nta	[r3]=f93,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f94,32;		\
-	stf.spill.nta	[r3]=f95,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f96,32;		\
-	stf.spill.nta	[r3]=f97,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f98,32;		\
-	stf.spill.nta	[r3]=f99,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f100,32;		\
-	stf.spill.nta	[r3]=f101,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f102,32;		\
-	stf.spill.nta	[r3]=f103,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f104,32;		\
-	stf.spill.nta	[r3]=f105,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f106,32;		\
-	stf.spill.nta	[r3]=f107,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f108,32;		\
-	stf.spill.nta	[r3]=f109,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f110,32;		\
-	stf.spill.nta	[r3]=f111,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f112,32;		\
-	stf.spill.nta	[r3]=f113,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f114,32;		\
-	stf.spill.nta	[r3]=f115,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f116,32;		\
-	stf.spill.nta	[r3]=f117,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f118,32;		\
-	stf.spill.nta	[r3]=f119,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f120,32;		\
-	stf.spill.nta	[r3]=f121,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f122,32;		\
-	stf.spill.nta	[r3]=f123,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f124,32;		\
-	stf.spill.nta	[r3]=f125,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f126;		\
-	stf.spill.nta	[r3]=f127;		\
-	;;
-
-     /*
-      *      r33:    point to context_t structure
-      */
-#define	RESTORE_FPU_LOW				\
-    add     r2 = CTX(F2), r33;			\
-    add     r3 = CTX(F3), r33;			\
-    ;;						\
-    ldf.fill.nta f2 = [r2], 32;			\
-    ldf.fill.nta f3 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f4 = [r2], 32;			\
-    ldf.fill.nta f5 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f6 = [r2], 32;			\
-    ldf.fill.nta f7 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f8 = [r2], 32;			\
-    ldf.fill.nta f9 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f10 = [r2], 32;		\
-    ldf.fill.nta f11 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f12 = [r2], 32;		\
-    ldf.fill.nta f13 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f14 = [r2], 32;		\
-    ldf.fill.nta f15 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f16 = [r2], 32;		\
-    ldf.fill.nta f17 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f18 = [r2], 32;		\
-    ldf.fill.nta f19 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f20 = [r2], 32;		\
-    ldf.fill.nta f21 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f22 = [r2], 32;		\
-    ldf.fill.nta f23 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f24 = [r2], 32;		\
-    ldf.fill.nta f25 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f26 = [r2], 32;		\
-    ldf.fill.nta f27 = [r3], 32;		\
-	;;					\
-    ldf.fill.nta f28 = [r2], 32;		\
-    ldf.fill.nta f29 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f30 = [r2], 32;		\
-    ldf.fill.nta f31 = [r3], 32;		\
-    ;;
-
-
-
-    /*
-     *      r33:    point to context_t structure
-     */
-#define	RESTORE_FPU_HIGH			\
-    add     r2 = CTX(F32), r33;			\
-    add     r3 = CTX(F33), r33;			\
-    ;;						\
-    ldf.fill.nta f32 = [r2], 32;		\
-    ldf.fill.nta f33 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f34 = [r2], 32;		\
-    ldf.fill.nta f35 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f36 = [r2], 32;		\
-    ldf.fill.nta f37 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f38 = [r2], 32;		\
-    ldf.fill.nta f39 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f40 = [r2], 32;		\
-    ldf.fill.nta f41 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f42 = [r2], 32;		\
-    ldf.fill.nta f43 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f44 = [r2], 32;		\
-    ldf.fill.nta f45 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f46 = [r2], 32;		\
-    ldf.fill.nta f47 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f48 = [r2], 32;		\
-    ldf.fill.nta f49 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f50 = [r2], 32;		\
-    ldf.fill.nta f51 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f52 = [r2], 32;		\
-    ldf.fill.nta f53 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f54 = [r2], 32;		\
-    ldf.fill.nta f55 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f56 = [r2], 32;		\
-    ldf.fill.nta f57 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f58 = [r2], 32;		\
-    ldf.fill.nta f59 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f60 = [r2], 32;		\
-    ldf.fill.nta f61 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f62 = [r2], 32;		\
-    ldf.fill.nta f63 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f64 = [r2], 32;		\
-    ldf.fill.nta f65 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f66 = [r2], 32;		\
-    ldf.fill.nta f67 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f68 = [r2], 32;		\
-    ldf.fill.nta f69 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f70 = [r2], 32;		\
-    ldf.fill.nta f71 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f72 = [r2], 32;		\
-    ldf.fill.nta f73 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f74 = [r2], 32;		\
-    ldf.fill.nta f75 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f76 = [r2], 32;		\
-    ldf.fill.nta f77 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f78 = [r2], 32;		\
-    ldf.fill.nta f79 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f80 = [r2], 32;		\
-    ldf.fill.nta f81 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f82 = [r2], 32;		\
-    ldf.fill.nta f83 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f84 = [r2], 32;		\
-    ldf.fill.nta f85 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f86 = [r2], 32;		\
-    ldf.fill.nta f87 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f88 = [r2], 32;		\
-    ldf.fill.nta f89 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f90 = [r2], 32;		\
-    ldf.fill.nta f91 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f92 = [r2], 32;		\
-    ldf.fill.nta f93 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f94 = [r2], 32;		\
-    ldf.fill.nta f95 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f96 = [r2], 32;		\
-    ldf.fill.nta f97 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f98 = [r2], 32;		\
-    ldf.fill.nta f99 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f100 = [r2], 32;		\
-    ldf.fill.nta f101 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f102 = [r2], 32;		\
-    ldf.fill.nta f103 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f104 = [r2], 32;		\
-    ldf.fill.nta f105 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f106 = [r2], 32;		\
-    ldf.fill.nta f107 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f108 = [r2], 32;		\
-    ldf.fill.nta f109 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f110 = [r2], 32;		\
-    ldf.fill.nta f111 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f112 = [r2], 32;		\
-    ldf.fill.nta f113 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f114 = [r2], 32;		\
-    ldf.fill.nta f115 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f116 = [r2], 32;		\
-    ldf.fill.nta f117 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f118 = [r2], 32;		\
-    ldf.fill.nta f119 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f120 = [r2], 32;		\
-    ldf.fill.nta f121 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f122 = [r2], 32;		\
-    ldf.fill.nta f123 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f124 = [r2], 32;		\
-    ldf.fill.nta f125 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f126 = [r2], 32;		\
-    ldf.fill.nta f127 = [r3], 32;		\
-    ;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_PTK_REGS				\
-    add r2=CTX(PKR0), r32;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1:						\
-    mov r18=pkr[r17];				\
-    ;;                     			\
-    srlz.i;					\
-    ;; 						\
-    st8 [r2]=r18, 8;				\
-    ;;    					\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_PTK_REGS	    		\
-    add r2=CTX(PKR0), r33;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1: 						\
-    ld8 r18=[r2], 8;				\
-    ;;						\
-    mov pkr[r17]=r18;				\
-    ;;    					\
-    srlz.i;					\
-    ;; 						\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-
-/*
- * void vmm_trampoline( context_t * from,
- *			context_t * to)
- *
- * 	from:	r32
- *	to:	r33
- *  note: interrupt disabled before call this function.
- */
-GLOBAL_ENTRY(vmm_trampoline)
-    mov r16 = psr
-    adds r2 = CTX(PSR), r32
-    ;;
-    st8 [r2] = r16, 8       // psr
-    mov r17 = pr
-    ;;
-    st8 [r2] = r17, 8       // pr
-    mov r18 = ar.unat
-    ;;
-    st8 [r2] = r18
-    mov r17 = ar.rsc
-    ;;
-    adds r2 = CTX(RSC),r32
-    ;;
-    st8 [r2]= r17
-    mov ar.rsc =0
-    flushrs
-    ;;
-    SAVE_GENERAL_REGS
-    ;;
-    SAVE_KERNEL_REGS
-    ;;
-    SAVE_APP_REGS
-    ;;
-    SAVE_BRANCH_REGS
-    ;;
-    SAVE_CTL_REGS
-    ;;
-    SAVE_REGION_REGS
-    ;;
-    //SAVE_DEBUG_REGS
-    ;;
-    rsm  psr.dfl
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_LOW
-    ;;
-    rsm  psr.dfh
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_HIGH
-    ;;
-    SAVE_PTK_REGS
-    ;;
-    RESTORE_PTK_REGS
-    ;;
-    RESTORE_FPU_HIGH
-    ;;
-    RESTORE_FPU_LOW
-    ;;
-    //RESTORE_DEBUG_REGS
-    ;;
-    RESTORE_REGION_REGS
-    ;;
-    RESTORE_CTL_REGS
-    ;;
-    RESTORE_BRANCH_REGS
-    ;;
-    RESTORE_APP_REGS
-    ;;
-    RESTORE_KERNEL_REGS
-    ;;
-    RESTORE_GENERAL_REGS
-    ;;
-    adds r2=CTX(PSR), r33
-    ;;
-    ld8 r16=[r2], 8       // psr
-    ;;
-    mov psr.l=r16
-    ;;
-    srlz.d
-    ;;
-    ld8 r16=[r2], 8       // pr
-    ;;
-    mov pr =r16,-1
-    ld8 r16=[r2]       // unat
-    ;;
-    mov ar.unat=r16
-    ;;
-    adds r2=CTX(RSC),r33
-    ;;
-    ld8 r16 =[r2]
-    ;;
-    mov ar.rsc = r16
-    ;;
-    br.ret.sptk.few b0
-END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
deleted file mode 100644
index 958815c9787d..000000000000
--- a/arch/ia64/kvm/vcpu.c
+++ /dev/null
@@ -1,2209 +0,0 @@
-/*
- * kvm_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  Shaofan Li (Susue Li) <susie.li@intel.com>
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/ia64regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- *   mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
-	/*  2004/09/12(Kevin): Allow switch to self */
-	/*
-	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
-	 *  This kind of transition usually occurs in the very early
-	 *  stage of Linux boot up procedure. Another case is in efi
-	 *  and pal calls. (see "arch/ia64/kernel/head.S")
-	 *
-	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
-	 *  This kind of transition is found when OSYa exits efi boot
-	 *  service. Due to gva = gpa in this case (Same region),
-	 *  data access can be satisfied though itlb entry for physical
-	 *  emulation is hit.
-	 */
-	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
-	 *  This kind of transition is found in OSYa.
-	 *
-	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
-	 *  This kind of transition is found in OSYa
-	 */
-	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
-	/* (1,0,0)->(1,1,1) */
-	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
-	/*
-	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
-	 *  This kind of transition usually occurs when Linux returns
-	 *  from the low level TLB miss handlers.
-	 *  (see "arch/ia64/kernel/ivt.S")
-	 */
-	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
-	 *  This kind of transition usually occurs in Linux low level
-	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
-	 *
-	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
-	 *  This kind of transition usually occurs in pal and efi calls,
-	 *  which requires running in physical mode.
-	 *  (see "arch/ia64/kernel/head.S")
-	 *  (1,1,1)->(1,0,0)
-	 */
-
-	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
-};
-
-void physical_mode_init(struct kvm_vcpu  *vcpu)
-{
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-void switch_to_physical_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	/* Save original virtual mode rr[0] and rr[4] */
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
-	ia64_srlz_d();
-
-	ia64_set_psr(psr);
-	return;
-}
-
-void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-	return;
-}
-
-static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
-{
-	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-	int act;
-	act = mm_switch_action(old_psr, new_psr);
-	switch (act) {
-	case SW_V2P:
-		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
-		old_psr.val, new_psr.val);*/
-		switch_to_physical_rid(vcpu);
-		/*
-		 * Set rse to enforced lazy, to prevent active rse
-		 *save/restor when guest physical mode.
-		 */
-		vcpu->arch.mode_flags |= GUEST_IN_PHY;
-		break;
-	case SW_P2V:
-		switch_to_virtual_rid(vcpu);
-		/*
-		 * recover old mode which is saved when entering
-		 * guest physical mode
-		 */
-		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
-		break;
-	case SW_SELF:
-		break;
-	case SW_NOP:
-		break;
-	default:
-		/* Sanity check */
-		break;
-	}
-	return;
-}
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-
-	if ((old_psr.dt != new_psr.dt)
-			|| (old_psr.it != new_psr.it)
-			|| (old_psr.rt != new_psr.rt))
-		switch_mm_mode(vcpu, old_psr, new_psr);
-
-	return;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu)) {
-		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
-		switch_to_virtual_rid(vcpu);
-	}
-	return;
-}
-
-/* Recover always follows prepare */
-void recover_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu))
-		switch_to_physical_rid(vcpu);
-	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
-	return;
-}
-
-#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
-
-static u16 gr_info[32] = {
-	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
-	RPT(r1), RPT(r2), RPT(r3),
-	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
-	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
-	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
-	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
-	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
-};
-
-#define IA64_FIRST_STACKED_GR   32
-#define IA64_FIRST_ROTATING_FR  32
-
-static inline unsigned long
-rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
-{
-	reg += rrb;
-	if (reg >= sor)
-		reg -= sor;
-	return reg;
-}
-
-/*
- * Return the (rotated) index for floating point register
- * be in the REGNUM (REGNUM must range from 32-127,
- * result is in the range from 0-95.
- */
-static inline unsigned long fph_index(struct kvm_pt_regs *regs,
-						long regnum)
-{
-	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
-	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
-}
-
-/*
- * The inverse of the above: given bspstore and the number of
- * registers, calculate ar.bsp.
- */
-static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
-							long num_regs)
-{
-	long delta = ia64_rse_slot_num(addr) + num_regs;
-	int i = 0;
-
-	if (num_regs < 0)
-		delta -= 0x3e;
-	if (delta < 0) {
-		while (delta <= -0x3f) {
-			i--;
-			delta += 0x3f;
-		}
-	} else {
-		while (delta >= 0x3f) {
-			i++;
-			delta -= 0x3f;
-		}
-	}
-
-	return addr + num_regs + i;
-}
-
-static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-					unsigned long *val, int *nat)
-{
-	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	new_rsc = old_rsc&(~(0x3));
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	bsp = kbs + (regs->loadrs >> 19);
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	if (addr >= bspstore) {
-		ia64_flushrs();
-		ia64_mf();
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	}
-	*val = *addr;
-	if (nat) {
-		if (bspstore < rnat_addr)
-			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
-							& nat_mask);
-		else
-			*nat = (int)!!((*rnat_addr) & nat_mask);
-		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-	}
-}
-
-void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-				unsigned long val, unsigned long nat)
-{
-	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc, psr;
-	unsigned long rnat;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	/* put RSC to lazy mode, and set loadrs 0 */
-	new_rsc = old_rsc & (~0x3fff0003);
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	local_irq_save(psr);
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	if (addr >= bspstore) {
-
-		ia64_flushrs();
-		ia64_mf();
-		*addr = val;
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		if (bspstore < rnat_addr)
-			rnat = rnat & (~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr)&(~nat_mask);
-
-		ia64_mf();
-		ia64_loadrs();
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	} else {
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		*addr = val;
-		if (bspstore < rnat_addr)
-			rnat = rnat&(~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr) & (~nat_mask);
-
-		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	}
-	local_irq_restore(psr);
-	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-}
-
-void getreg(unsigned long regnum, unsigned long *val,
-				int *nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr, *unat;
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		get_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-
-	addr += gr_info[regnum];
-
-	*val  = *(unsigned long *)addr;
-	/*
-	 * do it only when requested
-	 */
-	if (nat)
-		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
-}
-
-void setreg(unsigned long regnum, unsigned long val,
-			int nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr;
-	unsigned long bitmask;
-	unsigned long *unat;
-
-	/*
-	 * First takes care of stacked registers
-	 */
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		set_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-	/*
-	 * add offset from base of struct
-	 * and do it !
-	 */
-	addr += gr_info[regnum];
-
-	*(unsigned long *)addr = val;
-
-	/*
-	 * We need to clear the corresponding UNAT bit to fully emulate the load
-	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
-	 */
-	bitmask   = 1UL << ((addr >> 3) & 0x3f);
-	if (nat)
-		*unat |= bitmask;
-	 else
-		*unat &= ~bitmask;
-
-}
-
-u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long val;
-
-	if (!reg)
-		return 0;
-	getreg(reg, &val, 0, regs);
-	return val;
-}
-
-void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	long sof = (regs->cr_ifs) & 0x7f;
-
-	if (!reg)
-		return;
-	if (reg >= sof + 32)
-		return;
-	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
-}
-
-void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-				struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-#define CASE_FIXED_FP(reg)			\
-	case  (reg) :				\
-		ia64_stf_spill(fpval, reg);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(0);
-		CASE_FIXED_FP(1);
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-#undef CASE_FIXED_FP
-}
-
-void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-					struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-
-#define CASE_FIXED_FP(reg)			\
-	case (reg) :				\
-		ia64_ldf_fill(reg, fpval);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-}
-
-void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	if (reg > 1)
-		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-/*
- * The Altix RTC is mapped specially here for the vmm module
- */
-#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
-static long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm->arch.is_sn2)
-		return (*SN_RTC_BASE);
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-/************************************************************************
- * lsapic timer
- ***********************************************************************/
-u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
-{
-	unsigned long guest_itc;
-	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
-
-	if (guest_itc >= VMX(vcpu, last_itc)) {
-		VMX(vcpu, last_itc) = guest_itc;
-		return  guest_itc;
-	} else
-		return VMX(vcpu, last_itc);
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
-static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
-{
-	struct kvm_vcpu *v;
-	struct kvm *kvm;
-	int i;
-	long itc_offset = val - kvm_get_itc(vcpu);
-	unsigned long vitv = VCPU(vcpu, itv);
-
-	kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			VMX(v, itc_offset) = itc_offset;
-			VMX(v, last_itc) = 0;
-		}
-	}
-	VMX(vcpu, last_itc) = 0;
-	if (VCPU(vcpu, itm) <= val) {
-		VMX(vcpu, itc_check) = 0;
-		vcpu_unpend_interrupt(vcpu, vitv);
-	} else {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
-	}
-
-}
-
-static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itm));
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
-{
-	unsigned long vitv = VCPU(vcpu, itv);
-	VCPU(vcpu, itm) = val;
-
-	if (val > vcpu_get_itc(vcpu)) {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_unpend_interrupt(vcpu, vitv);
-		VMX(vcpu, timer_pending) = 0;
-	} else
-		VMX(vcpu, itc_check) = 0;
-}
-
-#define  ITV_VECTOR(itv)    (itv&0xff)
-#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
-
-static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itv) = val;
-	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
-		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
-		vcpu->arch.timer_pending = 0;
-	}
-}
-
-static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
-{
-	int vec;
-
-	vec = highest_inservice_irq(vcpu);
-	if (vec == NULL_VECTOR)
-		return;
-	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
-	VCPU(vcpu, eoi) = 0;
-	vcpu->arch.irq_new_pending = 1;
-
-}
-
-/* See Table 5-8 in SDM vol2 for the definition */
-int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
-{
-	union ia64_tpr vtpr;
-
-	vtpr.val = VCPU(vcpu, tpr);
-
-	if (h_inservice == NMI_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == NMI_VECTOR) {
-		/* Non Maskable Interrupt */
-		return IRQ_NO_MASKED;
-	}
-
-	if (h_inservice == ExtINT_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == ExtINT_VECTOR) {
-		if (vtpr.mmi) {
-			/* mask all external IRQ */
-			return IRQ_MASKED_BY_VTPR;
-		} else
-			return IRQ_NO_MASKED;
-	}
-
-	if (is_higher_irq(h_pending, h_inservice)) {
-		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
-			return IRQ_NO_MASKED;
-		else
-			return IRQ_MASKED_BY_VTPR;
-	} else {
-		return IRQ_MASKED_BY_INSVC;
-	}
-}
-
-void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-
-	vcpu->arch.irq_new_pending = 1;
-}
-
-void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-	if (ret) {
-		vcpu->arch.irq_new_pending = 1;
-		wmb();
-	}
-}
-
-void update_vhpi(struct kvm_vcpu *vcpu, int vec)
-{
-	u64 vhpi;
-
-	if (vec == NULL_VECTOR)
-		vhpi = 0;
-	else if (vec == NMI_VECTOR)
-		vhpi = 32;
-	else if (vec == ExtINT_VECTOR)
-		vhpi = 16;
-	else
-		vhpi = vec >> 4;
-
-	VCPU(vcpu, vhpi) = vhpi;
-	if (VCPU(vcpu, vac).a_int)
-		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
-				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
-}
-
-u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
-{
-	int vec, h_inservice, mask;
-
-	vec = highest_pending_irq(vcpu);
-	h_inservice = highest_inservice_irq(vcpu);
-	mask = irq_masked(vcpu, vec, h_inservice);
-	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	if (mask == IRQ_MASKED_BY_VTPR) {
-		update_vhpi(vcpu, vec);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
-	vcpu_unpend_interrupt(vcpu, vec);
-	return  (u64)vec;
-}
-
-/**************************************************************************
-  Privileged operation emulation routines
- **************************************************************************/
-u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_pta vpta;
-	union ia64_rr vrr;
-	u64 pval;
-	u64 vhpt_offset;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
-				vpta.val, 0, 0, 0, 0);
-	} else {
-		pval = (vadr & VRN_MASK) | vhpt_offset |
-			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
-	}
-	return  pval;
-}
-
-u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_rr vrr;
-	union ia64_pta vpta;
-	u64 pval;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
-						0, 0, 0, 0, 0);
-	} else
-		pval = 1;
-
-	return  pval;
-}
-
-u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	struct thash_data *data;
-	union ia64_pta vpta;
-	u64 key;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	if (vpta.vf == 0) {
-		key = 1;
-		return key;
-	}
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (!data || !data->p)
-		key = 1;
-	else
-		key = data->key;
-
-	return key;
-}
-
-void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long thash, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	thash = vcpu_thash(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
-}
-
-void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tag, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	tag = vcpu_ttag(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
-}
-
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
-{
-	struct thash_data *data;
-	union ia64_isr visr, pt_isr;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr vpsr;
-
-	regs = vcpu_regs(vcpu);
-	pt_isr.val = VMX(vcpu, cr_isr);
-	visr.val = 0;
-	visr.ei = pt_isr.ei;
-	visr.ir = pt_isr.ir;
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	visr.na = 1;
-
-	data = vhpt_lookup(vadr);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			*padr = (data->gpaddr >> data->ps << data->ps) |
-				(vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
-				| (vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			alt_dtlb(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	} else {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			dvhpt_fault(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	}
-
-	return IA64_NO_FAULT;
-}
-
-int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-
-	if (vcpu_tpa(vcpu, r3, &r1))
-		return IA64_FAULT;
-
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-	return(IA64_NO_FAULT);
-}
-
-void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-	r1 = vcpu_tak(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-}
-
-/************************************
- * Insert/Purge translation register/cache
- ************************************/
-void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
-}
-
-void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
-}
-
-void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 ps, va, rid;
-	struct thash_data *p_itr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-	rid = vcpu_get_rr(vcpu, ifa);
-	rid = rid & RR_RID_MASK;
-	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
-	vcpu_set_tr(p_itr, pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
-}
-
-
-void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 gpfn;
-	u64 ps, va, rid;
-	struct thash_data *p_dtr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-
-	if (ps != _PAGE_SIZE_16M)
-		thash_purge_entries(vcpu, va, ps);
-	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	if (__gpfn_is_io(gpfn))
-		pte |= VTLB_PTE_IO;
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
-	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
-							pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
-}
-
-void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
-		vcpu->arch.dtrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
-		vcpu->arch.itrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	va = PAGEALIGN(va, ps);
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
-{
-	thash_purge_all(vcpu);
-}
-
-void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_PTC_G;
-
-	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
-	p->u.ptc_g_data.vaddr = va;
-	p->u.ptc_g_data.ps = ps;
-	vmm_transition(vcpu);
-	/* Do Local Purge Here*/
-	vcpu_ptc_l(vcpu, va, ps);
-	local_irq_restore(psr);
-}
-
-
-void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	vcpu_ptc_ga(vcpu, va, ps);
-}
-
-void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	vcpu_ptc_e(vcpu, ifa);
-}
-
-void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
-}
-
-
-
-void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
-}
-
-void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_d(vcpu, pte, itir, ifa);
-}
-
-void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_i(vcpu, pte, itir, ifa);
-}
-
-/*************************************
- * Moves to semi-privileged registers
- *************************************/
-
-void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long imm;
-
-	if (inst.M30.s)
-		imm = -inst.M30.imm;
-	else
-		imm = inst.M30.imm;
-
-	vcpu_set_itc(vcpu, imm);
-}
-
-void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
-	vcpu_set_itc(vcpu, r2);
-}
-
-void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1;
-
-	r1 = vcpu_get_itc(vcpu);
-	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
-}
-
-/**************************************************************************
-  struct kvm_vcpu protection key register access routines
- **************************************************************************/
-
-unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	return ((unsigned long)ia64_get_pkr(reg));
-}
-
-void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
-{
-	ia64_set_pkr(reg, val);
-}
-
-/********************************
- * Moves to privileged registers
- ********************************/
-unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
-					unsigned long val)
-{
-	union ia64_rr oldrr, newrr;
-	unsigned long rrval;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	oldrr.val = vcpu_get_rr(vcpu, reg);
-	newrr.val = val;
-	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
-
-	switch ((unsigned long)(reg >> VRN_SHIFT)) {
-	case VRN6:
-		vcpu->arch.vmm_rr = vrrtomrr(val);
-		local_irq_save(psr);
-		p->exit_reason = EXIT_REASON_SWITCH_RR6;
-		vmm_transition(vcpu);
-		local_irq_restore(psr);
-		break;
-	case VRN4:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr4 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	case VRN0:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr0 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	default:
-		ia64_set_rr(reg, vrrtomrr(val));
-		break;
-	}
-
-	return (IA64_NO_FAULT);
-}
-
-void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_rr(vcpu, r3, r2);
-}
-
-void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmc(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmd(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	u64 r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pkr(vcpu, r3, r2);
-}
-
-void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_rr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pkr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_dbr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_ibr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pmc(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	/* FIXME: This could get called as a result of a rsvd-reg fault */
-	if (reg > (ia64_get_cpuid(3) & 0xff))
-		return 0;
-	else
-		return ia64_get_cpuid(reg);
-}
-
-void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_cpuid(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	VCPU(vcpu, tpr) = val;
-	vcpu->arch.irq_check = 1;
-}
-
-unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
-	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
-
-	switch (inst.M32.cr3) {
-	case 0:
-		vcpu_set_dcr(vcpu, r2);
-		break;
-	case 1:
-		vcpu_set_itm(vcpu, r2);
-		break;
-	case 66:
-		vcpu_set_tpr(vcpu, r2);
-		break;
-	case 67:
-		vcpu_set_eoi(vcpu, r2);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tgt = inst.M33.r1;
-	unsigned long val;
-
-	switch (inst.M33.cr3) {
-	case 65:
-		val = vcpu_get_ivr(vcpu);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-
-	case 67:
-		vcpu_set_gr(vcpu, tgt, 0L, 0);
-		break;
-	default:
-		val = VCPU(vcpu, vcr[inst.M33.cr3]);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-	}
-
-	return 0;
-}
-
-void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-
-	unsigned long mask;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr old_psr, new_psr;
-
-	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	regs = vcpu_regs(vcpu);
-	/* We only support guest as:
-	 *  vpsr.pk = 0
-	 *  vpsr.is = 0
-	 * Otherwise panic
-	 */
-	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
-				"& vpsr.is=0\n");
-
-	/*
-	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
-	 * Since these bits will become 0, after success execution of each
-	 * instruction, we will change set them to mIA64_PSR
-	 */
-	VCPU(vcpu, vpsr) = val
-		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
-			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
-
-	if (!old_psr.i && (val & IA64_PSR_I)) {
-		/* vpsr.i 0->1 */
-		vcpu->arch.irq_check = 1;
-	}
-	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	/*
-	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
-	 * , except for the following bits:
-	 *  ic/i/dt/si/rt/mc/it/bn/vm
-	 */
-	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
-		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
-		IA64_PSR_VM;
-
-	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
-
-	check_mm_mode_switch(vcpu, old_psr, new_psr);
-
-	return ;
-}
-
-unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
-{
-	struct ia64_psr vpsr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	if (!vpsr.ic)
-		VCPU(vcpu, ifs) = regs->cr_ifs;
-	regs->cr_ifs = IA64_IFS_V;
-	return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
-  VCPU banked general register access routines
- **************************************************************************/
-#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {     							\
-		__asm__ __volatile__ (					\
-				";;extr.u %0 = %3,%6,16;;\n"		\
-				"dep %1 = %0, %1, 0, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 16, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
-				"r"(*runat), "r"(b1unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw0(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-
-	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
-		for (i = 0; i < 16; i++) {
-			*b1++ = *r;
-			*r++ = *b0++;
-		}
-		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
-	}
-}
-
-#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {             						\
-		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
-				"dep %1 = %0, %1, 16, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 0, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
-				"r"(*runat), "r"(b0unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw1(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
-		for (i = 0; i < 16; i++) {
-			*b0++ = *r;
-			*r++ = *b1++;
-		}
-		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
-	}
-}
-
-void vcpu_rfi(struct kvm_vcpu *vcpu)
-{
-	unsigned long ifs, psr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	psr = VCPU(vcpu, ipsr);
-	if (psr & IA64_PSR_BN)
-		vcpu_bsw1(vcpu);
-	else
-		vcpu_bsw0(vcpu);
-	vcpu_set_psr(vcpu, psr);
-	ifs = VCPU(vcpu, ifs);
-	if (ifs >> 63)
-		regs->cr_ifs = ifs;
-	regs->cr_iip = VCPU(vcpu, iip);
-}
-
-/*
-   VPSR can't keep track of below bits of guest PSR
-   This function gets guest PSR
- */
-
-unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
-{
-	unsigned long mask;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
-		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
-	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
-}
-
-void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
-					| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr &= (~imm24);
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
-				| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr |= imm24;
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-/* Generate Mask
- * Parameter:
- *  bit -- starting bit
- *  len -- how many bits
- */
-#define MASK(bit,len)				   	\
-({							\
-		__u64	ret;				\
-							\
-		__asm __volatile("dep %0=-1, r0, %1, %2"\
-				: "=r" (ret):		\
-		  "M" (bit),				\
-		  "M" (len));				\
-		ret;					\
-})
-
-void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
-	vcpu_set_psr(vcpu, val);
-}
-
-void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_gr(vcpu, inst.M35.r2);
-	vcpu_set_psr_l(vcpu, val);
-}
-
-void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_psr(vcpu);
-	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
-	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
-}
-
-void vcpu_increment_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-	if (ipsr->ri == 2) {
-		ipsr->ri = 0;
-		regs->cr_iip += 16;
-	} else
-		ipsr->ri++;
-}
-
-void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-
-	if (ipsr->ri == 0) {
-		ipsr->ri = 2;
-		regs->cr_iip -= 16;
-	} else
-		ipsr->ri--;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
-{
-	unsigned long status, cause, opcode ;
-	INST64 inst;
-
-	status = IA64_NO_FAULT;
-	cause = VMX(vcpu, cause);
-	opcode = VMX(vcpu, opcode);
-	inst.inst = opcode;
-	/*
-	 * Switch to actual virtual rid in rr0 and rr4,
-	 * which is required by some tlb related instructions.
-	 */
-	prepare_if_physical_mode(vcpu);
-
-	switch (cause) {
-	case EVENT_RSM:
-		kvm_rsm(vcpu, inst);
-		break;
-	case EVENT_SSM:
-		kvm_ssm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PSR:
-		kvm_mov_to_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PSR:
-		kvm_mov_from_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CR:
-		kvm_mov_from_cr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_CR:
-		kvm_mov_to_cr(vcpu, inst);
-		break;
-	case EVENT_BSW_0:
-		vcpu_bsw0(vcpu);
-		break;
-	case EVENT_BSW_1:
-		vcpu_bsw1(vcpu);
-		break;
-	case EVENT_COVER:
-		vcpu_cover(vcpu);
-		break;
-	case EVENT_RFI:
-		vcpu_rfi(vcpu);
-		break;
-	case EVENT_ITR_D:
-		kvm_itr_d(vcpu, inst);
-		break;
-	case EVENT_ITR_I:
-		kvm_itr_i(vcpu, inst);
-		break;
-	case EVENT_PTR_D:
-		kvm_ptr_d(vcpu, inst);
-		break;
-	case EVENT_PTR_I:
-		kvm_ptr_i(vcpu, inst);
-		break;
-	case EVENT_ITC_D:
-		kvm_itc_d(vcpu, inst);
-		break;
-	case EVENT_ITC_I:
-		kvm_itc_i(vcpu, inst);
-		break;
-	case EVENT_PTC_L:
-		kvm_ptc_l(vcpu, inst);
-		break;
-	case EVENT_PTC_G:
-		kvm_ptc_g(vcpu, inst);
-		break;
-	case EVENT_PTC_GA:
-		kvm_ptc_ga(vcpu, inst);
-		break;
-	case EVENT_PTC_E:
-		kvm_ptc_e(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_RR:
-		kvm_mov_to_rr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_RR:
-		kvm_mov_from_rr(vcpu, inst);
-		break;
-	case EVENT_THASH:
-		kvm_thash(vcpu, inst);
-		break;
-	case EVENT_TTAG:
-		kvm_ttag(vcpu, inst);
-		break;
-	case EVENT_TPA:
-		status = kvm_tpa(vcpu, inst);
-		break;
-	case EVENT_TAK:
-		kvm_tak(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR_IMM:
-		kvm_mov_to_ar_imm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR:
-		kvm_mov_to_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_AR:
-		kvm_mov_from_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_DBR:
-		kvm_mov_to_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_IBR:
-		kvm_mov_to_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMC:
-		kvm_mov_to_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMD:
-		kvm_mov_to_pmd(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PKR:
-		kvm_mov_to_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_DBR:
-		kvm_mov_from_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_IBR:
-		kvm_mov_from_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PMC:
-		kvm_mov_from_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PKR:
-		kvm_mov_from_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CPUID:
-		kvm_mov_from_cpuid(vcpu, inst);
-		break;
-	case EVENT_VMSW:
-		status = IA64_FAULT;
-		break;
-	default:
-		break;
-	};
-	/*Assume all status is NO_FAULT ?*/
-	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
-		vcpu_increment_iip(vcpu);
-
-	recover_if_physical_mode(vcpu);
-}
-
-void init_vcpu(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-	VMX(vcpu, vrr[0]) = 0x38;
-	VMX(vcpu, vrr[1]) = 0x38;
-	VMX(vcpu, vrr[2]) = 0x38;
-	VMX(vcpu, vrr[3]) = 0x38;
-	VMX(vcpu, vrr[4]) = 0x38;
-	VMX(vcpu, vrr[5]) = 0x38;
-	VMX(vcpu, vrr[6]) = 0x38;
-	VMX(vcpu, vrr[7]) = 0x38;
-	VCPU(vcpu, vpsr) = IA64_PSR_BN;
-	VCPU(vcpu, dcr) = 0;
-	/* pta.size must not be 0.  The minimum is 15 (32k) */
-	VCPU(vcpu, pta) = 15 << 2;
-	VCPU(vcpu, itv) = 0x10000;
-	VCPU(vcpu, itm) = 0;
-	VMX(vcpu, last_itc) = 0;
-
-	VCPU(vcpu, lid) = VCPU_LID(vcpu);
-	VCPU(vcpu, ivr) = 0;
-	VCPU(vcpu, tpr) = 0x10000;
-	VCPU(vcpu, eoi) = 0;
-	VCPU(vcpu, irr[0]) = 0;
-	VCPU(vcpu, irr[1]) = 0;
-	VCPU(vcpu, irr[2]) = 0;
-	VCPU(vcpu, irr[3]) = 0;
-	VCPU(vcpu, pmv) = 0x10000;
-	VCPU(vcpu, cmcv) = 0x10000;
-	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
-	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
-	update_vhpi(vcpu, NULL_VECTOR);
-	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
-
-	for (i = 0; i < 4; i++)
-		VLSAPIC_INSVC(vcpu, i) = 0;
-}
-
-void kvm_init_all_rr(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	/* WARNING: not allow co-exist of both virtual mode and physical
-	 * mode in same region
-	 */
-
-	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
-	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
-
-	if (is_physical_mode(vcpu)) {
-		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu, "Machine Status conflicts!\n");
-
-		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
-		ia64_dv_serialize_data();
-	} else {
-		ia64_set_rr((VRN0 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr4);
-		ia64_dv_serialize_data();
-	}
-	ia64_set_rr((VRN1 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN1])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN2 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN2])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN3 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN3])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN5 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN5])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN7 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN7])));
-	ia64_dv_serialize_data();
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-}
-
-int vmm_entry(void)
-{
-	struct kvm_vcpu *v;
-	v = current_vcpu;
-
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
-						0, 0, 0, 0, 0, 0);
-	kvm_init_vtlb(v);
-	kvm_init_vhpt(v);
-	init_vcpu(v);
-	kvm_init_all_rr(v);
-	vmm_reset_entry();
-
-	return 0;
-}
-
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-	struct kvm_vcpu *vcpu = current_vcpu;
-	if (vcpu != NULL)
-		printk("vcpu 0x%p vcpu %d\n",
-		       vcpu, vcpu->vcpu_id);
-
-	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-	       regs->cr_ipsr, regs->cr_ifs, ip);
-
-	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
-	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-							regs->b6, regs->b7);
-	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-	       regs->f6.u.bits[1], regs->f6.u.bits[0],
-	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
-	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-	       regs->f8.u.bits[1], regs->f8.u.bits[0],
-	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
-	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-	       regs->f10.u.bits[1], regs->f10.u.bits[0],
-	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-							regs->r2, regs->r3);
-	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-							regs->r9, regs->r10);
-	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-							regs->r12, regs->r13);
-	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-							regs->r15, regs->r16);
-	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-							regs->r18, regs->r19);
-	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-							regs->r21, regs->r22);
-	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-							regs->r24, regs->r25);
-	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-							regs->r27, regs->r28);
-	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-							regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-	va_list args;
-	char buf[256];
-
-	struct kvm_pt_regs *regs = vcpu_regs(v);
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	va_start(args, fmt);
-	vsnprintf(buf, sizeof(buf), fmt, args);
-	va_end(args);
-	printk(buf);
-	kvm_show_registers(regs);
-	p->exit_reason = EXIT_REASON_VM_PANIC;
-	vmm_transition(v);
-	/*Never to return*/
-	while (1);
-}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
deleted file mode 100644
index 988911b4cc7a..000000000000
--- a/arch/ia64/kvm/vcpu.h
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- *  vcpu.h: vcpu routines
- *  	Copyright (c) 2005, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *
- * 	Copyright (c) 2007, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *	Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#ifndef __KVM_VCPU_H__
-#define __KVM_VCPU_H__
-
-#include <asm/types.h>
-#include <asm/fpu.h>
-#include <asm/processor.h>
-
-#ifndef __ASSEMBLY__
-#include "vti.h"
-
-#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
-
-typedef unsigned long IA64_INST;
-
-typedef union U_IA64_BUNDLE {
-	unsigned long i64[2];
-	struct { unsigned long template:5, slot0:41, slot1a:18,
-		slot1b:23, slot2:41; };
-	/* NOTE: following doesn't work because bitfields can't cross natural
-	   size boundaries
-	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
-} IA64_BUNDLE;
-
-typedef union U_INST64_A5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
-		imm9d:9, s:1, major:4; };
-} INST64_A5;
-
-typedef union U_INST64_B4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
-		wh:2, d:1, un1:1, major:4; };
-} INST64_B4;
-
-typedef union U_INST64_B8 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
-} INST64_B8;
-
-typedef union U_INST64_B9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
-} INST64_B9;
-
-typedef union U_INST64_I19 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
-} INST64_I19;
-
-typedef union U_INST64_I26 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I26;
-
-typedef union U_INST64_I27 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
-} INST64_I27;
-
-typedef union U_INST64_I28 { /* not privileged (mov from AR) */
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I28;
-
-typedef union U_INST64_M28 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M28;
-
-typedef union U_INST64_M29 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M29;
-
-typedef union U_INST64_M30 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
-		x3:3, s:1, major:4; };
-} INST64_M30;
-
-typedef union U_INST64_M31 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M31;
-
-typedef union U_INST64_M32 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M32;
-
-typedef union U_INST64_M33 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M33;
-
-typedef union U_INST64_M35 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-
-} INST64_M35;
-
-typedef union U_INST64_M36 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
-} INST64_M36;
-
-typedef union U_INST64_M37 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
-		i:1, major:4; };
-} INST64_M37;
-
-typedef union U_INST64_M41 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-} INST64_M41;
-
-typedef union U_INST64_M42 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M42;
-
-typedef union U_INST64_M43 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M43;
-
-typedef union U_INST64_M44 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
-} INST64_M44;
-
-typedef union U_INST64_M45 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M45;
-
-typedef union U_INST64_M46 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
-		x3:3, un1:1, major:4; };
-} INST64_M46;
-
-typedef union U_INST64_M47 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
-} INST64_M47;
-
-typedef union U_INST64_M1{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M1;
-
-typedef union U_INST64_M2{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M2;
-
-typedef union U_INST64_M3{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M3;
-
-typedef union U_INST64_M4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M4;
-
-typedef union U_INST64_M5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M5;
-
-typedef union U_INST64_M6 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M6;
-
-typedef union U_INST64_M9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M9;
-
-typedef union U_INST64_M10 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M10;
-
-typedef union U_INST64_M12 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M12;
-
-typedef union U_INST64_M15 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M15;
-
-typedef union U_INST64 {
-	IA64_INST inst;
-	struct { unsigned long :37, major:4; } generic;
-	INST64_A5 A5;	/* used in build_hypercall_bundle only */
-	INST64_B4 B4;	/* used in build_hypercall_bundle only */
-	INST64_B8 B8;	/* rfi, bsw.[01] */
-	INST64_B9 B9;	/* break.b */
-	INST64_I19 I19;	/* used in build_hypercall_bundle only */
-	INST64_I26 I26;	/* mov register to ar (I unit) */
-	INST64_I27 I27;	/* mov immediate to ar (I unit) */
-	INST64_I28 I28;	/* mov from ar (I unit) */
-	INST64_M1  M1;	/* ld integer */
-	INST64_M2  M2;
-	INST64_M3  M3;
-	INST64_M4  M4;	/* st integer */
-	INST64_M5  M5;
-	INST64_M6  M6;	/* ldfd floating pointer 		*/
-	INST64_M9  M9;	/* stfd floating pointer		*/
-	INST64_M10 M10;	/* stfd floating pointer		*/
-	INST64_M12 M12;     /* ldfd pair floating pointer		*/
-	INST64_M15 M15;	/* lfetch + imm update			*/
-	INST64_M28 M28;	/* purge translation cache entry	*/
-	INST64_M29 M29;	/* mov register to ar (M unit)		*/
-	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
-	INST64_M31 M31;	/* mov from ar (M unit)			*/
-	INST64_M32 M32;	/* mov reg to cr			*/
-	INST64_M33 M33;	/* mov from cr				*/
-	INST64_M35 M35;	/* mov to psr				*/
-	INST64_M36 M36;	/* mov from psr				*/
-	INST64_M37 M37;	/* break.m				*/
-	INST64_M41 M41;	/* translation cache insert		*/
-	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
-	INST64_M43 M43;	/* mov from indirect reg		*/
-	INST64_M44 M44;	/* set/reset system mask		*/
-	INST64_M45 M45;	/* translation purge			*/
-	INST64_M46 M46;	/* translation access (tpa,tak)		*/
-	INST64_M47 M47;	/* purge translation entry		*/
-} INST64;
-
-#define MASK_41 ((unsigned long)0x1ffffffffff)
-
-/* Virtual address memory attributes encoding */
-#define VA_MATTR_WB         0x0
-#define VA_MATTR_UC         0x4
-#define VA_MATTR_UCE        0x5
-#define VA_MATTR_WC         0x6
-#define VA_MATTR_NATPAGE    0x7
-
-#define PMASK(size)         (~((size) - 1))
-#define PSIZE(size)         (1UL<<(size))
-#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
-#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
-#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
-#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
-
-#define ARCH_PAGE_SHIFT   12
-
-#define INVALID_TI_TAG (1UL << 63)
-
-#define VTLB_PTE_P_BIT      0
-#define VTLB_PTE_IO_BIT     60
-#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
-#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
-
-#define vcpu_quick_region_check(_tr_regions,_ifa)		\
-	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
-
-#define vcpu_quick_region_set(_tr_regions,_ifa)             \
-	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
-
-static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
-		u64 va, u64 rid)
-{
-	trp->page_flags = pte;
-	trp->itir = itir;
-	trp->vadr = va;
-	trp->rid = rid;
-}
-
-extern u64 kvm_get_mpt_entry(u64 gpfn);
-
-/* Return I/ */
-static inline u64 __gpfn_is_io(u64 gpfn)
-{
-	u64  pte;
-	pte = kvm_get_mpt_entry(gpfn);
-	if (!(pte & GPFN_INV_MASK)) {
-		pte = pte & GPFN_IO_MASK;
-		if (pte != GPFN_PHYS_MMIO)
-			return pte;
-	}
-	return 0;
-}
-#endif
-#define IA64_NO_FAULT	0
-#define IA64_FAULT	1
-
-#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
-
-#define SW_BAD  0   /* Bad mode transitition */
-#define SW_V2P  1   /* Physical emulatino is activated */
-#define SW_P2V  2   /* Exit physical mode emulation */
-#define SW_SELF 3   /* No mode transition */
-#define SW_NOP  4   /* Mode transition, but without action required */
-
-#define GUEST_IN_PHY    0x1
-#define GUEST_PHY_EMUL  0x2
-
-#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
-
-#define VRN_SHIFT	61
-#define VRN_MASK	0xe000000000000000
-#define VRN0		0x0UL
-#define VRN1		0x1UL
-#define VRN2		0x2UL
-#define VRN3		0x3UL
-#define VRN4		0x4UL
-#define VRN5		0x5UL
-#define VRN6		0x6UL
-#define VRN7		0x7UL
-
-#define IRQ_NO_MASKED         0
-#define IRQ_MASKED_BY_VTPR    1
-#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
-
-#define PTA_BASE_SHIFT      15
-
-#define IA64_PSR_VM_BIT     46
-#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-
-/* Interruption Function State */
-#define IA64_IFS_V_BIT      63
-#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
-
-#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
-#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/gcc_intrin.h>
-
-#define is_physical_mode(v)		\
-	((v->arch.mode_flags) & GUEST_IN_PHY)
-
-#define is_virtual_mode(v)	\
-	(!is_physical_mode(v))
-
-#define MODE_IND(psr)	\
-	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
-
-#ifndef CONFIG_SMP
-#define _vmm_raw_spin_lock(x)	 do {}while(0)
-#define _vmm_raw_spin_unlock(x) do {}while(0)
-#else
-typedef struct {
-	volatile unsigned int lock;
-} vmm_spinlock_t;
-#define _vmm_raw_spin_lock(x)						\
-	do {								\
-		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
-		__u64 ia64_spinlock_val;				\
-		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-		if (unlikely(ia64_spinlock_val)) {			\
-			do {						\
-				while (*ia64_spinlock_ptr)		\
-				ia64_barrier();				\
-				ia64_spinlock_val =			\
-				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-			} while (ia64_spinlock_val);			\
-		}							\
-	} while (0)
-
-#define _vmm_raw_spin_unlock(x)				\
-	do { barrier();				\
-		((vmm_spinlock_t *)x)->lock = 0; } \
-while (0)
-#endif
-
-void vmm_spin_lock(vmm_spinlock_t *lock);
-void vmm_spin_unlock(vmm_spinlock_t *lock);
-enum {
-	I_TLB = 1,
-	D_TLB = 2
-};
-
-union kvm_va {
-	struct {
-		unsigned long off : 60;		/* intra-region offset */
-		unsigned long reg :  4;		/* region number */
-	} f;
-	unsigned long l;
-	void *p;
-};
-
-#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-						_v.f.reg = 0; _v.l; })
-#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-				_v.f.reg = -1; _v.p; })
-
-#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.rid; })
-#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.ps; })
-#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
-						_v.ve; })
-
-enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
-enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
-
-#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
-#define VMX(_v, _x)  ((_v)->arch._x)
-
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
-#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
-
-static inline unsigned long itir_ps(unsigned long itir)
-{
-	return ((itir >> 2) & 0x3f);
-}
-
-
-/**************************************************************************
-  VCPU control register access routines
- **************************************************************************/
-
-static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itir));
-}
-
-static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itir) = val;
-}
-
-static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, ifa));
-}
-
-static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifa) = val;
-}
-
-static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, iva));
-}
-
-static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, pta));
-}
-
-static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, lid));
-}
-
-static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, tpr));
-}
-
-static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
-{
-	return (0UL);		/*reads of eoi always return 0 */
-}
-
-static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[0]));
-}
-
-static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[1]));
-}
-
-static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[2]));
-}
-
-static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[3]));
-}
-
-static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
-{
-	ia64_setreg(_IA64_REG_CR_DCR, val);
-}
-
-static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, isr) = val;
-}
-
-static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, lid) = val;
-}
-
-static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ipsr) = val;
-}
-
-static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iip) = val;
-}
-
-static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifs) = val;
-}
-
-static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iipa) = val;
-}
-
-static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iha) = val;
-}
-
-
-static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return vcpu->arch.vrr[reg>>61];
-}
-
-/**************************************************************************
-  VCPU debug breakpoint register access routines
- **************************************************************************/
-
-static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	__ia64_set_dbr(reg, val);
-}
-
-static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	ia64_set_ibr(reg, val);
-}
-
-static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)__ia64_get_dbr(reg));
-}
-
-static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)ia64_get_ibr(reg));
-}
-
-/**************************************************************************
-  VCPU performance monitor register access routines
- **************************************************************************/
-static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMC registers are discarded */
-	ia64_set_pmc(reg, val);
-}
-
-static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMD registers are discarded */
-	ia64_set_pmd(reg, val);
-}
-
-static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMC registers return zero */
-	return ((u64)ia64_get_pmc(reg));
-}
-
-static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMD registers return zero */
-	return ((u64)ia64_get_pmd(reg));
-}
-
-static inline unsigned long vrrtomrr(unsigned long val)
-{
-	union ia64_rr rr;
-	rr.val = val;
-	rr.rid = (rr.rid << 4) | 0xe;
-	if (rr.ps > PAGE_SHIFT)
-		rr.ps = PAGE_SHIFT;
-	rr.ve = 1;
-	return rr.val;
-}
-
-
-static inline int highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-	return NULL_VECTOR;
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static inline int is_higher_irq(int pending, int inservice)
-{
-	return ((pending > inservice)
-			|| ((pending != NULL_VECTOR)
-				&& (inservice == NULL_VECTOR)));
-}
-
-static inline int is_higher_class(int pending, int mic)
-{
-	return ((pending >> 4) > mic);
-}
-
-/*
- * Return 0-255 for pending irq.
- *        NULL_VECTOR: when no pending.
- */
-static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&VCPU(vcpu, irr[0]));
-}
-
-static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
-{
-	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
-}
-
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
-			u64 val, int nat);
-extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
-extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
-extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
-extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
-					u64 itir, u64 va, int type);
-extern struct thash_data *vhpt_lookup(u64 va);
-extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
-extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
-extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
-extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
-		u64 itir, u64 ifa, int type);
-extern void thash_purge_all(struct kvm_vcpu *v);
-extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
-						u64 va, int is_data);
-extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
-						u64 ps, int is_data);
-
-extern void vcpu_increment_iip(struct kvm_vcpu *v);
-extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
-extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
-extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
-extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
-extern void nested_dtlb(struct kvm_vcpu *vcpu);
-extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
-extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
-
-extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
-extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
-
-extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
-extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
-extern void vmm_transition(struct kvm_vcpu *vcpu);
-extern void vmm_trampoline(union context *from, union context *to);
-extern int vmm_entry(void);
-extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
-
-extern void vmm_reset_entry(void);
-void kvm_init_vtlb(struct kvm_vcpu *v);
-void kvm_init_vhpt(struct kvm_vcpu *v);
-void thash_init(struct thash_cb *hcb, u64 sz);
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-u64 kvm_gpa_to_mpa(u64 gpa);
-extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
-		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
-#endif
-#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
deleted file mode 100644
index 176a12cd56de..000000000000
--- a/arch/ia64/kvm/vmm.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * vmm.c: vmm module interface with kvm module
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpswa.h>
-
-#include "vcpu.h"
-
-MODULE_AUTHOR("Intel");
-MODULE_LICENSE("GPL");
-
-extern char kvm_ia64_ivt;
-extern char kvm_asm_mov_from_ar;
-extern char kvm_asm_mov_from_ar_sn2;
-extern fpswa_interface_t *vmm_fpswa_interface;
-
-long vmm_sanity = 1;
-
-struct kvm_vmm_info vmm_info = {
-	.module			= THIS_MODULE,
-	.vmm_entry		= vmm_entry,
-	.tramp_entry		= vmm_trampoline,
-	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
-	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
-	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
-};
-
-static int __init  kvm_vmm_init(void)
-{
-
-	vmm_fpswa_interface = fpswa_interface;
-
-	/*Register vmm data to kvm side*/
-	return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
-}
-
-static void __exit kvm_vmm_exit(void)
-{
-	kvm_exit();
-	return ;
-}
-
-void vmm_spin_lock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_lock(lock);
-}
-
-void vmm_spin_unlock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_unlock(lock);
-}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_DEBUG;
-	vmm_transition(vcpu);
-	local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	va_list args;
-	int r;
-
-	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-	va_start(args, fmt);
-	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-	va_end(args);
-	vcpu_debug_exit(vcpu);
-	return r;
-}
-
-module_init(kvm_vmm_init)
-module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
deleted file mode 100644
index 397e34a63e18..000000000000
--- a/arch/ia64/kvm/vmm_ivt.S
+++ /dev/null
@@ -1,1392 +0,0 @@
-/*
- * arch/ia64/kvm/vmm_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- *      Stephane Eranian <eranian@hpl.hp.com>
- *      David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *      Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
- * for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
- * handler now uses virtual PT.
- *
- * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
- *              Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for
- * critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
- *              (12,51)
- *  entry offset ----/     /         /                  /
- *  /
- *  entry number ---------/         /                  /
- *  /
- *  size of the entry -------------/                  /
- *  /
- *  vector name -------------------------------------/
- *  /
- *  interruptions triggering this vector
- *  ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB
- * boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-#include "kvm_minstate.h"
-#include "vti.h"
-
-#if 0
-# define PSR_DEFAULT_BITS   psr.ac
-#else
-# define PSR_DEFAULT_BITS   0
-#endif
-
-#define KVM_FAULT(n)    \
-	kvm_fault_##n:;          \
-	mov r19=n;;          \
-	br.sptk.many kvm_vmm_panic;         \
-	;;                  \
-
-#define KVM_REFLECT(n)    \
-	mov r31=pr;           \
-	mov r19=n;       /* prepare to save predicates */ \
-	mov r29=cr.ipsr;      \
-	;;      \
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)	br.sptk.many kvm_dispatch_reflection;        \
-	br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
-
-    .section .text..ivt,"ax"
-
-    .align 32768    // align on 32KB boundary
-    .global kvm_ia64_ivt
-kvm_ia64_ivt:
-///////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(kvm_vhpt_miss)
-	KVM_FAULT(0)
-END(kvm_vhpt_miss)
-
-    .org kvm_ia64_ivt+0x400
-////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(kvm_itlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_itlb_miss
-	mov r19 = 1
-	br.sptk kvm_itlb_miss_dispatch
-	KVM_FAULT(1);
-END(kvm_itlb_miss)
-
-    .org kvm_ia64_ivt+0x0800
-//////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(kvm_dtlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_dtlb_miss
-	br.sptk kvm_dtlb_miss_dispatch
-END(kvm_dtlb_miss)
-
-     .org kvm_ia64_ivt+0x0c00
-////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(kvm_alt_itlb_miss)
-	mov r16=cr.ifa    // get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	mov r24=cr.ipsr
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r17,r19      // insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.i r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_itlb_miss)
-
-    .org kvm_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(kvm_alt_dtlb_miss)
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r24=cr.ipsr
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r19,r17	// insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.d r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1400
-//////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(kvm_nested_dtlb_miss)
-	KVM_FAULT(5)
-END(kvm_nested_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(kvm_ikey_miss)
-	KVM_REFLECT(6)
-END(kvm_ikey_miss)
-
-    .org kvm_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(kvm_dkey_miss)
-	KVM_REFLECT(7)
-END(kvm_dkey_miss)
-
-    .org kvm_ia64_ivt+0x2000
-////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(kvm_dirty_bit)
-	KVM_REFLECT(8)
-END(kvm_dirty_bit)
-
-    .org kvm_ia64_ivt+0x2400
-////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(kvm_iaccess_bit)
-	KVM_REFLECT(9)
-END(kvm_iaccess_bit)
-
-    .org kvm_ia64_ivt+0x2800
-///////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(kvm_daccess_bit)
-	KVM_REFLECT(10)
-END(kvm_daccess_bit)
-
-    .org kvm_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(kvm_break_fault)
-	mov r31=pr
-	mov r19=11
-	mov r29=cr.ipsr
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-	mov out0=cr.ifa
-	mov out2=cr.isr     // FIXME: pity to make this slow access twice
-	mov out3=cr.iim     // FIXME: pity to make this slow access twice
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i         // guarantee that interruption collection is on
-	;;
-	(p15)ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out1=16,sp
-	br.call.sptk.many b6=kvm_ia64_handle_break
-	;;
-END(kvm_break_fault)
-
-    .org kvm_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(kvm_interrupt)
-	mov r31=pr		// prepare to save predicates
-	mov r19=12
-	mov r29=cr.ipsr
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-	tbit.z p0,p15=r29,IA64_PSR_I_BIT
-	;;
-(p7)	br.sptk kvm_dispatch_interrupt
-	;;
-	mov r27=ar.rsc		/* M */
-	mov r20=r1			/* A */
-	mov r25=ar.unat		/* M */
-	mov r26=ar.pfs		/* I */
-	mov r28=cr.iip		/* M */
-	cover			/* B (or nothing) */
-	;;
-	mov r1=sp
-	;;
-	invala			/* M */
-	mov r30=cr.ifs
-	;;
-	addl r1=-VMM_PT_REGS_SIZE,r1
-	;;
-	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-	adds r16=PT(CR_IPSR),r1
-	;;
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-	st8 [r16]=r29			/* save cr.ipsr */
-	;;
-	lfetch.fault.excl.nt1 [r17]
-	mov r29=b0
-	;;
-	adds r16=PT(R8),r1  	/* initialize first base pointer */
-	adds r17=PT(R9),r1  	/* initialize second base pointer */
-	mov r18=r0      		/* make sure r18 isn't NaT */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
-        ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
-        ;;
-	st8 [r16]=r28,16		/* save cr.iip */
-	st8 [r17]=r30,16		/* save cr.ifs */
-	mov r8=ar.fpsr		/* M */
-	mov r9=ar.csd
-	mov r10=ar.ssd
-	movl r11=FPSR_DEFAULT	/* L-unit */
-	;;
-	st8 [r16]=r25,16		/* save ar.unat */
-	st8 [r17]=r26,16		/* save ar.pfs */
-	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-	;;
-	st8 [r16]=r27,16		/* save ar.rsc */
-	adds r17=16,r17		/* skip over ar_rnat field */
-	;;
-	st8 [r17]=r31,16		/* save predicates */
-	adds r16=16,r16		/* skip over ar_bspstore field */
-	;;
-	st8 [r16]=r29,16		/* save b0 */
-	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
-	adds r12=-16,r1
-	/* switch to kernel memory stack (with 16 bytes of scratch) */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
-	dep r14=-1,r0,60,4
-	;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
-	adds r2=VMM_PT_REGS_R16_OFFSET,r1
-	adds r14 = VMM_VCPU_GP_OFFSET,r13
-	;;
-	mov r8=ar.ccv
-	ld8 r14 = [r14]
-	;;
-	mov r1=r14       /* establish kernel global pointer */
-	;;                                          \
-	bsw.1
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-	mov out0=r13
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	//(p15) ssm psr.i
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-	mov r18=b6
-	;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-	mov r19=b7
-	;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
-	;;
-	mov ar.fpsr=r11       /* M-unit */
-	st8 [r2]=r8,8         /* ar.ccv */
-	adds r24=PT(B6)-PT(F7),r3
-	;;
-	stf.spill [r2]=f6,32
-	stf.spill [r3]=f7,32
-	;;
-	stf.spill [r2]=f8,32
-	stf.spill [r3]=f9,32
-	;;
-	stf.spill [r2]=f10
-	stf.spill [r3]=f11
-	adds r25=PT(B7)-PT(F11),r3
-	;;
-	st8 [r24]=r18,16       /* b6 */
-	st8 [r25]=r19,16       /* b7 */
-	;;
-	st8 [r24]=r9           /* ar.csd */
-	st8 [r25]=r10          /* ar.ssd */
-	;;
-	srlz.d		// make sure we see the effect of cr.ivr
-	addl r14=@gprel(ia64_leave_nested),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-	;;
-END(kvm_interrupt)
-
-    .global kvm_dispatch_vexirq
-    .org kvm_ia64_ivt+0x3400
-//////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(kvm_virtual_exirq)
-	mov r31=pr
-	mov r19=13
-	mov r30 =r0
-	;;
-kvm_dispatch_vexirq:
-	cmp.eq p6,p0 = 1,r30
-	;;
-(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-(p6)	ld8 r1 = [r29]
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r13
-
-	ssm psr.ic
-	;;
-	srlz.i // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	adds r3=8,r2                // set up second base pointer
-	;;
-	KVM_SAVE_REST
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_vexirq
-END(kvm_virtual_exirq)
-
-    .org kvm_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-	KVM_FAULT(14)
-	// this code segment is from 2.6.16.13
-
-    .org kvm_ia64_ivt+0x3c00
-///////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-	KVM_FAULT(15)
-
-    .org kvm_ia64_ivt+0x4000
-///////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	KVM_FAULT(16)
-
-    .org kvm_ia64_ivt+0x4400
-//////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-	KVM_FAULT(17)
-
-    .org kvm_ia64_ivt+0x4800
-//////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-	KVM_FAULT(18)
-
-    .org kvm_ia64_ivt+0x4c00
-//////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-	KVM_FAULT(19)
-
-    .org kvm_ia64_ivt+0x5000
-//////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(kvm_page_not_present)
-	KVM_REFLECT(20)
-END(kvm_page_not_present)
-
-    .org kvm_ia64_ivt+0x5100
-///////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(kvm_key_permission)
-	KVM_REFLECT(21)
-END(kvm_key_permission)
-
-    .org kvm_ia64_ivt+0x5200
-//////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(kvm_iaccess_rights)
-	KVM_REFLECT(22)
-END(kvm_iaccess_rights)
-
-    .org kvm_ia64_ivt+0x5300
-//////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(kvm_daccess_rights)
-	KVM_REFLECT(23)
-END(kvm_daccess_rights)
-
-    .org kvm_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(kvm_general_exception)
-	KVM_REFLECT(24)
-	KVM_FAULT(24)
-END(kvm_general_exception)
-
-    .org kvm_ia64_ivt+0x5500
-//////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(kvm_disabled_fp_reg)
-	KVM_REFLECT(25)
-END(kvm_disabled_fp_reg)
-
-    .org kvm_ia64_ivt+0x5600
-////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(kvm_nat_consumption)
-	KVM_REFLECT(26)
-END(kvm_nat_consumption)
-
-    .org kvm_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(kvm_speculation_vector)
-	KVM_REFLECT(27)
-END(kvm_speculation_vector)
-
-    .org kvm_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-	KVM_FAULT(28)
-
-    .org kvm_ia64_ivt+0x5900
-///////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(kvm_debug_vector)
-	KVM_FAULT(29)
-END(kvm_debug_vector)
-
-    .org kvm_ia64_ivt+0x5a00
-///////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(kvm_unaligned_access)
-	KVM_REFLECT(30)
-END(kvm_unaligned_access)
-
-    .org kvm_ia64_ivt+0x5b00
-//////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(kvm_unsupported_data_reference)
-	KVM_REFLECT(31)
-END(kvm_unsupported_data_reference)
-
-    .org kvm_ia64_ivt+0x5c00
-////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
-ENTRY(kvm_floating_point_fault)
-	KVM_REFLECT(32)
-END(kvm_floating_point_fault)
-
-    .org kvm_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(kvm_floating_point_trap)
-	KVM_REFLECT(33)
-END(kvm_floating_point_trap)
-
-    .org kvm_ia64_ivt+0x5e00
-//////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(kvm_lower_privilege_trap)
-	KVM_REFLECT(34)
-END(kvm_lower_privilege_trap)
-
-    .org kvm_ia64_ivt+0x5f00
-//////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(kvm_taken_branch_trap)
-	KVM_REFLECT(35)
-END(kvm_taken_branch_trap)
-
-    .org kvm_ia64_ivt+0x6000
-////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(kvm_single_step_trap)
-	KVM_REFLECT(36)
-END(kvm_single_step_trap)
-    .global kvm_virtualization_fault_back
-    .org kvm_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(kvm_virtualization_fault)
-	mov r31=pr
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	st8 [r16] = r1
-	adds r17 = VMM_VCPU_GP_OFFSET, r21
-	;;
-	ld8 r1 = [r17]
-	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-	cmp.eq p9,p0=EVENT_RSM,r24
-	cmp.eq p10,p0=EVENT_SSM,r24
-	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-	cmp.eq p12,p0=EVENT_THASH,r24
-(p6)	br.dptk.many kvm_asm_mov_from_ar
-(p7)	br.dptk.many kvm_asm_mov_from_rr
-(p8)	br.dptk.many kvm_asm_mov_to_rr
-(p9)	br.dptk.many kvm_asm_rsm
-(p10)	br.dptk.many kvm_asm_ssm
-(p11)	br.dptk.many kvm_asm_mov_to_psr
-(p12)	br.dptk.many kvm_asm_thash
-	;;
-kvm_virtualization_fault_back:
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 = [r16]
-	;;
-	mov r19=37
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	cmp.ne p6,p0=EVENT_RFI, r24
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]
-	;;
-	adds r18=VMM_VPD_VIFS_OFFSET,r18
-	;;
-	ld8 r18=[r18]
-	;;
-	tbit.z p6,p0=r18,63
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-//if vifs.v=1 desert current register frame
-	alloc r18=ar.pfs,0,0,0,0
-	br.sptk kvm_dispatch_virtualization_fault
-END(kvm_virtualization_fault)
-
-    .org kvm_ia64_ivt+0x6200
-//////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-	KVM_FAULT(38)
-
-    .org kvm_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-	KVM_FAULT(39)
-
-    .org kvm_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-	KVM_FAULT(40)
-
-    .org kvm_ia64_ivt+0x6500
-//////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-	KVM_FAULT(41)
-
-    .org kvm_ia64_ivt+0x6600
-//////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-	KVM_FAULT(42)
-
-    .org kvm_ia64_ivt+0x6700
-//////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-	KVM_FAULT(43)
-
-    .org kvm_ia64_ivt+0x6800
-//////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-	KVM_FAULT(44)
-
-    .org kvm_ia64_ivt+0x6900
-///////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
-//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(kvm_ia32_exception)
-	KVM_FAULT(45)
-END(kvm_ia32_exception)
-
-    .org kvm_ia64_ivt+0x6a00
-////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(kvm_ia32_intercept)
-	KVM_FAULT(47)
-END(kvm_ia32_intercept)
-
-    .org kvm_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-	KVM_FAULT(48)
-
-    .org kvm_ia64_ivt+0x6d00
-//////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-	KVM_FAULT(49)
-
-    .org kvm_ia64_ivt+0x6e00
-//////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-	KVM_FAULT(50)
-
-    .org kvm_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-	KVM_FAULT(52)
-
-    .org kvm_ia64_ivt+0x7100
-////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-	KVM_FAULT(53)
-
-    .org kvm_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-	KVM_FAULT(54)
-
-    .org kvm_ia64_ivt+0x7300
-////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-	KVM_FAULT(55)
-
-    .org kvm_ia64_ivt+0x7400
-////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-	KVM_FAULT(56)
-
-    .org kvm_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-	KVM_FAULT(57)
-
-    .org kvm_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-	KVM_FAULT(58)
-
-    .org kvm_ia64_ivt+0x7700
-////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-	KVM_FAULT(59)
-
-    .org kvm_ia64_ivt+0x7800
-////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-	KVM_FAULT(60)
-
-    .org kvm_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-	KVM_FAULT(61)
-
-    .org kvm_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-	KVM_FAULT(62)
-
-    .org kvm_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-	KVM_FAULT(63)
-
-    .org kvm_ia64_ivt+0x7c00
-////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-	KVM_FAULT(64)
-
-    .org kvm_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-	KVM_FAULT(65)
-
-    .org kvm_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-	KVM_FAULT(66)
-
-    .org kvm_ia64_ivt+0x7f00
-////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-	KVM_FAULT(67)
-
-    .org kvm_ia64_ivt+0x8000
-// There is no particular reason for this code to be here, other than that
-// there happens to be space here that would go unused otherwise.  If this
-// fault ever gets "unreserved", simply moved the following code to a more
-// suitable spot...
-
-
-ENTRY(kvm_dtlb_miss_dispatch)
-	mov r19 = 2
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i     // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_dtlb_miss_dispatch)
-
-ENTRY(kvm_itlb_miss_dispatch)
-
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_itlb_miss_dispatch)
-
-ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	mov out2=cr.iim
-	mov out3=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out4=16,r12
-	br.call.sptk.many b6=reflect_interruption
-END(kvm_dispatch_reflection)
-
-ENTRY(kvm_dispatch_virtualization_fault)
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-	mov out0=r13        //vcpu
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out1=16,sp         //regs
-	br.call.sptk.many b6=kvm_emulate
-END(kvm_dispatch_virtualization_fault)
-
-
-ENTRY(kvm_dispatch_interrupt)
-	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-	;;
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	(p15) ssm psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	mov out0=r13		// pass pointer to pt_regs as second arg
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-END(kvm_dispatch_interrupt)
-
-GLOBAL_ENTRY(ia64_leave_nested)
-	rsm psr.i
-	;;
-	adds r21=PT(PR)+16,r12
-	;;
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3]
-	adds r3=PT(AR_CSD)-PT(R16),r3
-	adds r30=PT(AR_CCV)+16,r12
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala			// invalidate ALAT
-	;;
-	srlz.i
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.i			// ensure interruption collection is off
-	mov ar.ccv=r15
-	;;
-	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
-	;;
-	ldf.fill f11=[r2]
-//	mov r18=r13
-//	mov r21=r13
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-	;;
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0
-	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r22=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-	;;
-	ld8.fill r3=[r16]
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	;;
-	mov b0=r22
-	mov ar.pfs=r26
-	mov cr.ifs=r30
-	mov cr.ipsr=r29
-	mov ar.fpsr=r20
-	mov cr.iip=r28
-	;;
-	mov ar.rsc=r27
-	mov ar.unat=r25
-	mov pr=r31,-1
-	rfi
-END(ia64_leave_nested)
-
-GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-	adds r2 = PT(R4)+16,r12
-	adds r3 = PT(R5)+16,r12
-	adds r8 = PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8 = [r8]
-	;;
-	mov ar.unat=r8
-	;;
-	ld8.fill r4=[r2],16    //load r4
-	ld8.fill r5=[r3],16    //load r5
-	;;
-	ld8.fill r6=[r2]    //load r6
-	ld8.fill r7=[r3]    //load r7
-	;;
-END(ia64_leave_hypervisor_prepare)
-//fall through
-GLOBAL_ENTRY(ia64_leave_hypervisor)
-	rsm psr.i
-	;;
-	br.call.sptk.many b0=leave_hypervisor_tail
-	;;
-	adds r20=PT(PR)+16,r12
-	adds r8=PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8=[r8]
-	;;
-	mov ar.unat=r8
-	;;
-	lfetch [r20],PT(CR_IPSR)-PT(PR)
-	adds r2 = PT(B6)+16,r12
-	adds r3 = PT(B7)+16,r12
-	;;
-	lfetch [r20]
-	;;
-	ld8 r24=[r2],16        /* B6 */
-	ld8 r25=[r3],16        /* B7 */
-	;;
-	ld8 r26=[r2],16        /* ar_csd */
-	ld8 r27=[r3],16        /* ar_ssd */
-	mov b6 = r24
-	;;
-	ld8.fill r8=[r2],16
-	ld8.fill r9=[r3],16
-	mov b7 = r25
-	;;
-	mov ar.csd = r26
-	mov ar.ssd = r27
-	;;
-	ld8.fill r10=[r2],PT(R15)-PT(R10)
-	ld8.fill r11=[r3],PT(R14)-PT(R11)
-	;;
-	ld8.fill r15=[r2],PT(R16)-PT(R15)
-	ld8.fill r14=[r3],PT(R17)-PT(R14)
-	;;
-	ld8.fill r16=[r2],16
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	;;
-	ld8.fill r22=[r2],16
-	ld8.fill r23=[r3],16
-	;;
-	ld8.fill r24=[r2],16
-	ld8.fill r25=[r3],16
-	;;
-	ld8.fill r26=[r2],16
-	ld8.fill r27=[r3],16
-	;;
-	ld8.fill r28=[r2],16
-	ld8.fill r29=[r3],16
-	;;
-	ld8.fill r30=[r2],PT(F6)-PT(R30)
-	ld8.fill r31=[r3],PT(F7)-PT(R31)
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala          // invalidate ALAT
-	;;
-	srlz.i          // ensure interruption collection is off
-	;;
-	bsw.0
-	;;
-	adds r16 = PT(CR_IPSR)+16,r12
-	adds r17 = PT(CR_IIP)+16,r12
-	mov r21=r13		// get current
-	;;
-	ld8 r31=[r16],16    // load cr.ipsr
-	ld8 r30=[r17],16    // load cr.iip
-	;;
-	ld8 r29=[r16],16    // load cr.ifs
-	ld8 r28=[r17],16    // load ar.unat
-	;;
-	ld8 r27=[r16],16    // load ar.pfs
-	ld8 r26=[r17],16    // load ar.rsc
-	;;
-	ld8 r25=[r16],16    // load ar.rnat
-	ld8 r24=[r17],16    // load ar.bspstore
-	;;
-	ld8 r23=[r16],16    // load predicates
-	ld8 r22=[r17],16    // load b0
-	;;
-	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16    //load r1
-	;;
-	ld8.fill r12=[r16],16    //load r12
-	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-	;;
-	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-	;;
-	ld8.fill r3=[r16]	//load r3
-	ld8 r18=[r17]	//load ar_ccv
-	;;
-	mov ar.fpsr=r19
-	mov ar.ccv=r18
-	shr.u r18=r20,16
-	;;
-kvm_rbs_switch:
-	mov r19=96
-
-kvm_dont_preserve_current_frame:
-/*
-    * To prevent leaking bits between the hypervisor and guest domain,
-    * we must clear the stacked registers in the "invalid" partition here.
-    * 5 registers/cycle on McKinley).
-    */
-#   define pRecurse	p6
-#   define pReturn	p7
-#   define Nregs	14
-
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
-	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r19
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-kvm_rse_clear_invalid:
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0
-	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1		// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1
-	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-
-#	undef pRecurse
-#	undef pReturn
-
-// loadrs has already been shifted
-	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-	;;
-	loadrs
-	;;
-	mov ar.bspstore=r24
-	;;
-	mov ar.unat=r28
-	mov ar.rnat=r25
-	mov ar.rsc=r26
-	;;
-	mov cr.ipsr=r31
-	mov cr.iip=r30
-	mov cr.ifs=r29
-	mov ar.pfs=r27
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]   //vpd
-	adds r17=VMM_VCPU_ISR_OFFSET,r21
-	;;
-	ld8 r17=[r17]
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]        //vpsr
-	mov r25=r18
-	adds r16= VMM_VCPU_GP_OFFSET,r21
-	;;
-	ld8 r16= [r16] // Put gp in r24
-	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-	;;
-	add  r24=r24,r16
-	;;
-	br.sptk.many  kvm_vps_sync_write       // call the service
-	;;
-END(ia64_leave_hypervisor)
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- *  must be at bank 0
- *  parameter:
- *  r17:cr.isr
- *  r18:vpd
- *  r19:vpsr
- *  r22:b0
- *  r23:predicate
- */
-	mov r24=r22
-	mov r25=r18
-	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1) 	br.cond.sptk.few kvm_vps_resume_normal
-(p2)	br.cond.sptk.many kvm_vps_resume_handler
-	;;
-END(ia64_vmm_entry)
-
-/*
- * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
- *                  u64 arg3, u64 arg4, u64 arg5,
- *                  u64 arg6, u64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- *  rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
-    .regstk 4,4,0,0
-
-rpsave  =   loc0
-pfssave =   loc1
-psrsave =   loc2
-entry   =   loc3
-hostret =   r24
-
-	alloc   pfssave=ar.pfs,4,4,0,0
-	mov rpsave=rp
-	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-	;;
-	ld8 entry=[entry]
-1:	mov hostret=ip
-	mov r25=in1         // copy arguments
-	mov r26=in2
-	mov r27=in3
-	mov psrsave=psr
-	;;
-	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-	;;
-	add hostret=2f-1b,hostret   // calculate return address
-	add entry=entry,in0
-	;;
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	mov b6=entry
-	br.cond.sptk b6         // call the service
-2:
-// Architectural sequence for enabling interrupts if necessary
-(p7)    ssm psr.ic
-	;;
-(p7)    srlz.i
-	;;
-(p6)    ssm psr.i
-	;;
-	mov rp=rpsave
-	mov ar.pfs=pfssave
-	mov r8=r31
-	;;
-	srlz.d
-	br.ret.sptk rp
-
-END(ia64_call_vsa)
-
-#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
-
-GLOBAL_ENTRY(vmm_reset_entry)
-	//set up ipsr, iip, vpd.vpsr, dcr
-	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-	// For DCR: all bits 0
-	bsw.0
-	;;
-	mov r21 =r13
-	adds r14=-VMM_PT_REGS_SIZE, r12
-	;;
-	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-	movl r10=0x8000000000000000
-	adds r16=PT(CR_IIP), r14
-	adds r20=PT(R1), r14
-	;;
-	rsm psr.ic | psr.i
-	;;
-	srlz.i
-	;;
-	mov ar.rsc = 0
-	;;
-	flushrs
-	;;
-	mov ar.bspstore = 0
-	// clear BSPSTORE
-	;;
-	mov cr.ipsr=r6
-	mov cr.ifs=r10
-	ld8 r4 = [r16] // Set init iip for first run.
-	ld8 r1 = [r20]
-	;;
-	mov cr.iip=r4
-	adds r16=VMM_VPD_BASE_OFFSET,r13
-	;;
-	ld8 r18=[r16]
-	;;
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]
-	mov r17=r0
-	mov r22=r0
-	mov r23=r0
-	br.cond.sptk ia64_vmm_entry
-	br.ret.sptk  b0
-END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
deleted file mode 100644
index b214b5b0432d..000000000000
--- a/arch/ia64/kvm/vti.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * vti.h: prototype for generial vt related interface
- *   	Copyright (c) 2004, Intel Corporation.
- *
- *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- *	Fred Yang (fred.yang@intel.com)
- * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
- *
- *  	Copyright (c) 2007, Intel Corporation.
- *  	Zhang xiantao <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#ifndef _KVM_VT_I_H
-#define _KVM_VT_I_H
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-
-#include <linux/kvm_host.h>
-
-/* define itr.i and itr.d  in ia64_itr function */
-#define	ITR	0x01
-#define	DTR	0x02
-#define	IaDTR	0x03
-
-#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
-#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
-
-#define RR6 (6UL<<61)
-#define RR7 (7UL<<61)
-
-
-/* config_options in pal_vp_init_env */
-#define	VP_INITIALIZE	1UL
-#define	VP_FR_PMC	1UL<<1
-#define	VP_OPCODE	1UL<<8
-#define	VP_CAUSE	1UL<<9
-#define VP_FW_ACC   	1UL<<63
-
-/* init vp env with initializing vm_buffer */
-#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
-	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
-/* init vp env without initializing vm_buffer */
-#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
-
-#define		PAL_VP_CREATE   265
-/* Stacked Virt. Initializes a new VPD for the operation of
- * a new virtual processor in the virtual environment.
- */
-#define		PAL_VP_ENV_INFO 266
-/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
-#define		PAL_VP_EXIT_ENV 267
-/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
-#define		PAL_VP_INIT_ENV 268
-/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
-#define		PAL_VP_REGISTER 269
-/*Stacked Virt. Register a different host IVT for the virtual processor.*/
-#define		PAL_VP_RESUME   270
-/* Renamed from PAL_VP_RESUME */
-#define		PAL_VP_RESTORE  270
-/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
-#define		PAL_VP_SUSPEND  271
-/* Renamed from PAL_VP_SUSPEND */
-#define		PAL_VP_SAVE	271
-/* Stacked Virt. Suspends operation for the specified virtual processor on
- * the logical processor.
- */
-#define		PAL_VP_TERMINATE 272
-/* Stacked Virt. Terminates operation for the specified virtual processor.*/
-
-union vac {
-	unsigned long value;
-	struct {
-		unsigned int a_int:1;
-		unsigned int a_from_int_cr:1;
-		unsigned int a_to_int_cr:1;
-		unsigned int a_from_psr:1;
-		unsigned int a_from_cpuid:1;
-		unsigned int a_cover:1;
-		unsigned int a_bsw:1;
-		long reserved:57;
-	};
-};
-
-union vdc {
-	unsigned long value;
-	struct {
-		unsigned int d_vmsw:1;
-		unsigned int d_extint:1;
-		unsigned int d_ibr_dbr:1;
-		unsigned int d_pmc:1;
-		unsigned int d_to_pmd:1;
-		unsigned int d_itm:1;
-		long reserved:58;
-	};
-};
-
-struct vpd {
-	union vac   vac;
-	union vdc   vdc;
-	unsigned long  virt_env_vaddr;
-	unsigned long  reserved1[29];
-	unsigned long  vhpi;
-	unsigned long  reserved2[95];
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  reserved3[11];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	unsigned long  reserved4[76];
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-	unsigned long  reserved5[128];
-	unsigned long  reserved6[3456];
-	unsigned long  vmm_avail[128];
-	unsigned long  reserved7[4096];
-};
-
-#define PAL_PROC_VM_BIT		(1UL << 40)
-#define PAL_PROC_VMSW_BIT	(1UL << 54)
-
-static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
-		u64 *vp_env_info)
-{
-	struct ia64_pal_retval iprv;
-	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
-	*buffer_size = iprv.v0;
-	*vp_env_info = iprv.v1;
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_exit_env(u64 iva)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
-			u64 vbase_addr, u64 *vsa_base)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
-			vbase_addr);
-	*vsa_base = iprv.v0;
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-#endif
-
-/*VPD field offset*/
-#define VPD_VAC_START_OFFSET		0
-#define VPD_VDC_START_OFFSET		8
-#define VPD_VHPI_START_OFFSET		256
-#define VPD_VGR_START_OFFSET		1024
-#define VPD_VBGR_START_OFFSET		1152
-#define VPD_VNAT_START_OFFSET		1280
-#define VPD_VBNAT_START_OFFSET		1288
-#define VPD_VCPUID_START_OFFSET		1296
-#define VPD_VPSR_START_OFFSET		1424
-#define VPD_VPR_START_OFFSET		1432
-#define VPD_VRSE_CFLE_START_OFFSET	1440
-#define VPD_VCR_START_OFFSET		2048
-#define VPD_VTPR_START_OFFSET		2576
-#define VPD_VRR_START_OFFSET		3072
-#define VPD_VMM_VAIL_START_OFFSET	31744
-
-/*Virtualization faults*/
-
-#define EVENT_MOV_TO_AR			 1
-#define EVENT_MOV_TO_AR_IMM		 2
-#define EVENT_MOV_FROM_AR		 3
-#define EVENT_MOV_TO_CR			 4
-#define EVENT_MOV_FROM_CR		 5
-#define EVENT_MOV_TO_PSR		 6
-#define EVENT_MOV_FROM_PSR		 7
-#define EVENT_ITC_D			 8
-#define EVENT_ITC_I			 9
-#define EVENT_MOV_TO_RR			 10
-#define EVENT_MOV_TO_DBR		 11
-#define EVENT_MOV_TO_IBR		 12
-#define EVENT_MOV_TO_PKR		 13
-#define EVENT_MOV_TO_PMC		 14
-#define EVENT_MOV_TO_PMD		 15
-#define EVENT_ITR_D			 16
-#define EVENT_ITR_I			 17
-#define EVENT_MOV_FROM_RR		 18
-#define EVENT_MOV_FROM_DBR		 19
-#define EVENT_MOV_FROM_IBR		 20
-#define EVENT_MOV_FROM_PKR		 21
-#define EVENT_MOV_FROM_PMC		 22
-#define EVENT_MOV_FROM_CPUID		 23
-#define EVENT_SSM			 24
-#define EVENT_RSM			 25
-#define EVENT_PTC_L			 26
-#define EVENT_PTC_G			 27
-#define EVENT_PTC_GA			 28
-#define EVENT_PTR_D			 29
-#define EVENT_PTR_I			 30
-#define EVENT_THASH			 31
-#define EVENT_TTAG			 32
-#define EVENT_TPA			 33
-#define EVENT_TAK			 34
-#define EVENT_PTC_E			 35
-#define EVENT_COVER			 36
-#define EVENT_RFI			 37
-#define EVENT_BSW_0			 38
-#define EVENT_BSW_1			 39
-#define EVENT_VMSW			 40
-
-/**PAL virtual services offsets */
-#define PAL_VPS_RESUME_NORMAL           0x0000
-#define PAL_VPS_RESUME_HANDLER          0x0400
-#define PAL_VPS_SYNC_READ               0x0800
-#define PAL_VPS_SYNC_WRITE              0x0c00
-#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
-#define PAL_VPS_THASH                   0x1400
-#define PAL_VPS_TTAG                    0x1800
-#define PAL_VPS_RESTORE                 0x1c00
-#define PAL_VPS_SAVE                    0x2000
-
-#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
deleted file mode 100644
index a7869f8f49a6..000000000000
--- a/arch/ia64/kvm/vtlb.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *
- * Copyright (c) 2007, Intel Corporation.
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include "vcpu.h"
-
-#include <linux/rwsem.h>
-
-#include <asm/tlb.h>
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-
-static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
-{
-	return ((trp->p) && (trp->rid == rid)
-				&& ((va-trp->vadr) < PSIZE(trp->ps)));
-}
-
-/*
- * Only for GUEST TR format.
- */
-static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
-{
-	u64 sa1, ea1;
-
-	if (!trp->p || trp->rid != rid)
-		return 0;
-
-	sa1 = trp->vadr;
-	ea1 = sa1 + PSIZE(trp->ps) - 1;
-	eva -= 1;
-	if ((sva > ea1) || (sa1 > eva))
-		return 0;
-	else
-		return 1;
-
-}
-
-void machine_tlb_purge(u64 va, u64 ps)
-{
-	ia64_ptcl(va, ps << 2);
-}
-
-void local_flush_tlb_all(void)
-{
-	int i, j;
-	unsigned long flags, count0, count1;
-	unsigned long stride0, stride1, addr;
-
-	addr    = current_vcpu->arch.ptce_base;
-	count0  = current_vcpu->arch.ptce_count[0];
-	count1  = current_vcpu->arch.ptce_count[1];
-	stride0 = current_vcpu->arch.ptce_stride[0];
-	stride1 = current_vcpu->arch.ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();          /* srlz.i implies srlz.d */
-}
-
-int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
-{
-	union ia64_rr    vrr;
-	union ia64_pta   vpta;
-	struct  ia64_psr   vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vpta.val = vcpu_get_pta(vcpu);
-
-	if (vrr.ve & vpta.ve) {
-		switch (ref) {
-		case DATA_REF:
-		case NA_REF:
-			return vpsr.dt;
-		case INST_REF:
-			return vpsr.dt && vpsr.it && vpsr.ic;
-		case RSE_REF:
-			return vpsr.dt && vpsr.rt;
-
-		}
-	}
-	return 0;
-}
-
-struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
-{
-	u64 index, pfn, rid, pfn_bits;
-
-	pfn_bits = vpta.size - 5 - 8;
-	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
-	rid = _REGION_ID(vrr);
-	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
-	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
-
-	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
-				(index << 5));
-}
-
-struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
-{
-
-	struct thash_data *trp;
-	int  i;
-	u64 rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-						i < NDTRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
-{
-	union ia64_rr rr;
-	struct thash_data *head;
-	unsigned long ps, gpaddr;
-
-	ps = itir_ps(itir);
-	rr.val = ia64_get_rr(ifa);
-
-	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-					(ifa & ((1UL << ps) - 1));
-
-	head = (struct thash_data *)ia64_thash(ifa);
-	head->etag = INVALID_TI_TAG;
-	ia64_mf();
-	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
-	head->itir = rr.ps << 2;
-	head->etag = ia64_ttag(ifa);
-	head->gpaddr = gpaddr;
-}
-
-void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
-{
-	u64 i, dirty_pages = 1;
-	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
-	vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
-	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
-
-	vmm_spin_lock(lock);
-	for (i = 0; i < dirty_pages; i++) {
-		/* avoid RMW */
-		if (!test_bit(base_gfn + i, dirty_bitmap))
-			set_bit(base_gfn + i , dirty_bitmap);
-	}
-	vmm_spin_unlock(lock);
-}
-
-void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
-{
-	u64 phy_pte, psr;
-	union ia64_rr mrr;
-
-	mrr.val = ia64_get_rr(va);
-	phy_pte = translate_phy_pte(&pte, itir, va);
-
-	if (itir_ps(itir) >= mrr.ps) {
-		vhpt_insert(phy_pte, itir, va, pte);
-	} else {
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, va, phy_pte, itir_ps(itir));
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, itir_ps(itir));
-}
-
-/*
- *   vhpt lookup
- */
-struct thash_data *vhpt_lookup(u64 va)
-{
-	struct thash_data *head;
-	u64 tag;
-
-	head = (struct thash_data *)ia64_thash(va);
-	tag = ia64_ttag(va);
-	if (head->etag == tag)
-		return head;
-	return NULL;
-}
-
-u64 guest_vhpt_lookup(u64 iha, u64 *pte)
-{
-	u64 ret;
-	struct thash_data *data;
-
-	data = __vtr_lookup(current_vcpu, iha, D_TLB);
-	if (data != NULL)
-		thash_vhpt_insert(current_vcpu, data->page_flags,
-			data->itir, iha, D_TLB);
-
-	asm volatile ("rsm psr.ic|psr.i;;"
-			"srlz.d;;"
-			"ld8.s r9=[%1];;"
-			"tnat.nz p6,p7=r9;;"
-			"(p6) mov %0=1;"
-			"(p6) mov r9=r0;"
-			"(p7) extr.u r9=r9,0,53;;"
-			"(p7) mov %0=r0;"
-			"(p7) st8 [%2]=r9;;"
-			"ssm psr.ic;;"
-			"srlz.d;;"
-			"ssm psr.i;;"
-			"srlz.d;;"
-			: "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
-
-	return ret;
-}
-
-/*
- *  purge software guest tlb
- */
-
-static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, curadr, size, psbits, tag, rr_ps, num;
-	union ia64_rr vrr;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	psbits = VMX(v, psbits[(va >> 61)]);
-	start = va & ~((1UL << ps) - 1);
-	while (psbits) {
-		curadr = start;
-		rr_ps = __ffs(psbits);
-		psbits &= ~(1UL << rr_ps);
-		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
-		size = PSIZE(rr_ps);
-		vrr.ps = rr_ps;
-		while (num) {
-			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
-			if (cur->etag == tag && cur->ps == rr_ps)
-				cur->etag = INVALID_TI_TAG;
-			curadr += size;
-			num--;
-		}
-	}
-}
-
-
-/*
- *  purge VHPT and machine TLB
- */
-static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, size, tag, num;
-	union ia64_rr rr;
-
-	start = va & ~((1UL << ps) - 1);
-	rr.val = ia64_get_rr(va);
-	size = PSIZE(rr.ps);
-	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
-	while (num) {
-		cur = (struct thash_data *)ia64_thash(start);
-		tag = ia64_ttag(start);
-		if (cur->etag == tag)
-			cur->etag = INVALID_TI_TAG;
-		start += size;
-		num--;
-	}
-	machine_tlb_purge(va, ps);
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- *  1: When inserting VHPT to thash, "va" is a must covered
- *  address by the inserted machine VHPT entry.
- *  2: The format of entry is always in TLB.
- *  3: The caller need to make sure the new entry will not overlap
- *     with any existed entry.
- */
-void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
-{
-	struct thash_data *head;
-	union ia64_rr vrr;
-	u64 tag;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	vrr.ps = itir_ps(itir);
-	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
-	head->page_flags = pte;
-	head->itir = itir;
-	head->etag = tag;
-}
-
-int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
-{
-	struct thash_data  *trp;
-	int  i;
-	u64 end, rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	end = va + PSIZE(ps);
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-					i < NDTRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	}
-	return -1;
-}
-
-/*
- * Purge entries in VTLB and VHPT
- */
-void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	if (vcpu_quick_region_check(v->arch.tc_regions, va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	u64 old_va = va;
-	va = REGION_OFFSET(va);
-	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
-{
-	u64 ps, ps_mask, paddr, maddr, io_mask;
-	union pte_flags phy_pte;
-
-	ps = itir_ps(itir);
-	ps_mask = ~((1UL << ps) - 1);
-	phy_pte.val = *pte;
-	paddr = *pte;
-	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-	maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
-	io_mask = maddr & GPFN_IO_MASK;
-	if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
-		*pte |= VTLB_PTE_IO;
-		return -1;
-	}
-	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
-					(paddr & ~PAGE_MASK);
-	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
-	return phy_pte.val;
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
-						u64 ifa, int type)
-{
-	u64 ps;
-	u64 phy_pte, io_mask, index;
-	union ia64_rr vrr, mrr;
-
-	ps = itir_ps(itir);
-	vrr.val = vcpu_get_rr(v, ifa);
-	mrr.val = ia64_get_rr(ifa);
-
-	index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
-	phy_pte = translate_phy_pte(&pte, itir, ifa);
-
-	/* Ensure WB attribute if pte is related to a normal mem page,
-	 * which is required by vga acceleration since qemu maps shared
-	 * vram buffer with WB.
-	 */
-	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
-			io_mask != GPFN_PHYS_MMIO) {
-		pte &= ~_PAGE_MA_MASK;
-		phy_pte &= ~_PAGE_MA_MASK;
-	}
-
-	vtlb_purge(v, ifa, ps);
-	vhpt_purge(v, ifa, ps);
-
-	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
-		vtlb_insert(v, pte, itir, ifa);
-		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-	}
-	if (pte & VTLB_PTE_IO)
-		return;
-
-	if (ps >= mrr.ps)
-		vhpt_insert(phy_pte, itir, ifa, pte);
-	else {
-		u64 psr;
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, ifa, phy_pte, ps);
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, ps);
-
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-void thash_purge_all(struct kvm_vcpu *v)
-{
-	int i;
-	struct thash_data *head;
-	struct thash_cb  *vtlb, *vhpt;
-	vtlb = &v->arch.vtlb;
-	vhpt = &v->arch.vhpt;
-
-	for (i = 0; i < 8; i++)
-		VMX(v, psbits[i]) = 0;
-
-	head = vtlb->hash;
-	for (i = 0; i < vtlb->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	head = vhpt->hash;
-	for (i = 0; i < vhpt->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	local_flush_tlb_all();
-}
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- *  in: TLB format for both VHPT & TLB.
- */
-struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
-{
-	struct thash_data  *cch;
-	u64    psbits, ps, tag;
-	union ia64_rr vrr;
-
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	cch = __vtr_lookup(v, va, is_data);
-	if (cch)
-		return cch;
-
-	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
-		return NULL;
-
-	psbits = VMX(v, psbits[(va >> 61)]);
-	vrr.val = vcpu_get_rr(v, va);
-	while (psbits) {
-		ps = __ffs(psbits);
-		psbits &= ~(1UL << ps);
-		vrr.ps = ps;
-		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
-		if (cch->etag == tag && cch->ps == ps)
-			return cch;
-	}
-
-	return NULL;
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(struct thash_cb *hcb, u64 sz)
-{
-	int i;
-	struct thash_data *head;
-
-	hcb->pta.val = (unsigned long)hcb->hash;
-	hcb->pta.vf = 1;
-	hcb->pta.ve = 1;
-	hcb->pta.size = sz;
-	head = hcb->hash;
-	for (i = 0; i < hcb->num; i++) {
-		head->page_flags = 0;
-		head->itir = 0;
-		head->etag = INVALID_TI_TAG;
-		head->next = 0;
-		head++;
-	}
-}
-
-u64 kvm_get_mpt_entry(u64 gpfn)
-{
-	u64 *base = (u64 *) KVM_P2M_BASE;
-
-	if (gpfn >= (KVM_P2M_SIZE >> 3))
-		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
-
-	return *(base + gpfn);
-}
-
-u64 kvm_lookup_mpa(u64 gpfn)
-{
-	u64 maddr;
-	maddr = kvm_get_mpt_entry(gpfn);
-	return maddr&_PAGE_PPN_MASK;
-}
-
-u64 kvm_gpa_to_mpa(u64 gpa)
-{
-	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
-	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
-}
-
-/*
- * Fetch guest bundle code.
- * INPUT:
- *  gip: guest ip
- *  pbundle: used to return fetched bundle.
- */
-int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
-{
-	u64     gpip = 0;   /* guest physical IP*/
-	u64     *vpa;
-	struct thash_data    *tlb;
-	u64     maddr;
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
-		/* I-side physical mode */
-		gpip = gip;
-	} else {
-		tlb = vtlb_lookup(vcpu, gip, I_TLB);
-		if (tlb)
-			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
-				(gip & (PSIZE(tlb->ps) - 1));
-	}
-	if (gpip) {
-		maddr = kvm_gpa_to_mpa(gpip);
-	} else {
-		tlb = vhpt_lookup(gip);
-		if (tlb == NULL) {
-			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
-			return IA64_FAULT;
-		}
-		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
-					| (gip & (PSIZE(tlb->ps) - 1));
-	}
-	vpa = (u64 *)__kvm_va(maddr);
-
-	pbundle->i64[0] = *vpa++;
-	pbundle->i64[1] = *vpa;
-
-	return IA64_NO_FAULT;
-}
-
-void kvm_init_vhpt(struct kvm_vcpu *v)
-{
-	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&v->arch.vhpt, VHPT_SHIFT);
-	ia64_set_pta(v->arch.vhpt.pta.val);
-	/*Enable VHPT here?*/
-}
-
-void kvm_init_vtlb(struct kvm_vcpu *v)
-{
-	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&v->arch.vtlb, VTLB_SHIFT);
-}
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 95cef0b5f836..df19d0c47be8 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -565,6 +565,7 @@ void consistent_free(size_t size, void *vaddr);
 void consistent_sync(void *vaddr, size_t size, int direction);
 void consistent_sync_page(struct page *page, unsigned long offset,
 	size_t size, int direction);
+unsigned long consistent_virt_to_pfn(void *vaddr);
 
 void setup_memory(void);
 #endif /* __ASSEMBLY__ */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 4633c36c1b32..ed7ba8a11822 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -154,9 +154,36 @@ dma_direct_sync_sg_for_device(struct device *dev,
 			__dma_sync(sg->dma_address, sg->length, direction);
 }
 
+int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t handle, size_t size,
+			     struct dma_attrs *attrs)
+{
+#ifdef CONFIG_MMU
+	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+	unsigned long pfn;
+
+	if (off >= count || user_count > (count - off))
+		return -ENXIO;
+
+#ifdef NOT_COHERENT_CACHE
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	pfn = consistent_virt_to_pfn(cpu_addr);
+#else
+	pfn = virt_to_pfn(cpu_addr);
+#endif
+	return remap_pfn_range(vma, vma->vm_start, pfn + off,
+			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+#else
+	return -ENXIO;
+#endif
+}
+
 struct dma_map_ops dma_direct_ops = {
 	.alloc		= dma_direct_alloc_coherent,
 	.free		= dma_direct_free_coherent,
+	.mmap		= dma_direct_mmap_coherent,
 	.map_sg		= dma_direct_map_sg,
 	.dma_supported	= dma_direct_dma_supported,
 	.map_page	= dma_direct_map_page,
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index e10ad930895e..b06c3a7faf20 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -156,6 +156,25 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
 }
 EXPORT_SYMBOL(consistent_alloc);
 
+#ifdef CONFIG_MMU
+static pte_t *consistent_virt_to_pte(void *vaddr)
+{
+	unsigned long addr = (unsigned long)vaddr;
+
+	return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
+}
+
+unsigned long consistent_virt_to_pfn(void *vaddr)
+{
+	pte_t *ptep = consistent_virt_to_pte(vaddr);
+
+	if (pte_none(*ptep) || !pte_present(*ptep))
+		return 0;
+
+	return pte_pfn(*ptep);
+}
+#endif
+
 /*
  * free page(s) as defined by the above mapping.
  */
@@ -181,13 +200,9 @@ void consistent_free(size_t size, void *vaddr)
 	} while (size -= PAGE_SIZE);
 #else
 	do {
-		pte_t *ptep;
+		pte_t *ptep = consistent_virt_to_pte(vaddr);
 		unsigned long pfn;
 
-		ptep = pte_offset_kernel(pmd_offset(pgd_offset_k(
-						(unsigned int)vaddr),
-					(unsigned int)vaddr),
-				(unsigned int)vaddr);
 		if (!pte_none(*ptep) && pte_present(*ptep)) {
 			pfn = pte_pfn(*ptep);
 			pte_clear(&init_mm, (unsigned int)vaddr, ptep);
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index 203e4403c366..48a9dfc55b51 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -374,7 +374,7 @@ static long alchemy_calc_div(unsigned long rate, unsigned long prate,
 
 static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk,
+					struct clk_hw **best_parent_clk,
 					int scale, int maxdiv)
 {
 	struct clk *pc, *bpc, *free;
@@ -453,7 +453,7 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 	}
 
 	*best_parent_rate = bpr;
-	*best_parent_clk = bpc;
+	*best_parent_clk = __clk_get_hw(bpc);
 	return br;
 }
 
@@ -547,7 +547,7 @@ static unsigned long alchemy_clk_fgv1_recalc(struct clk_hw *hw,
 
 static long alchemy_clk_fgv1_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate,
 				     best_parent_clk, 2, 512);
@@ -679,7 +679,7 @@ static unsigned long alchemy_clk_fgv2_recalc(struct clk_hw *hw,
 
 static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale, maxdiv;
@@ -898,7 +898,7 @@ static int alchemy_clk_csrc_setr(struct clk_hw *hw, unsigned long rate,
 
 static long alchemy_clk_csrc_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale = c->dt[2] == 3 ? 1 : 2; /* au1300 check */
diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index 46e8f7676a15..3bdb72a70364 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig
@@ -36,7 +36,7 @@ CONFIG_PCI=y
 CONFIG_PCI_REALLOC_ENABLE_AUTO=y
 CONFIG_PCCARD=y
 CONFIG_PCMCIA_ALCHEMY_DEVBOARD=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 227a9de32246..e51aad9a94b1 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -37,7 +37,6 @@ CONFIG_MIPS32_N32=y
 CONFIG_PM=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_STD_PARTITION="/dev/hda3"
-CONFIG_PM_RUNTIME=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_DEBUG=y
 CONFIG_CPU_FREQ_STAT=m
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 1c6191ebd583..7eabcd2031ea 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -58,7 +58,7 @@ CONFIG_BINFMT_MISC=m
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index 70509a48df82..b3d1d37f85ea 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig
@@ -61,7 +61,7 @@ CONFIG_BINFMT_MISC=y
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index 82207e8079f3..3d8016d6cf3e 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -41,7 +41,7 @@ CONFIG_PCI=y
 CONFIG_PCI_MSI=y
 CONFIG_PCI_DEBUG=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 7cba480568c8..70795a67a276 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -30,7 +30,7 @@ retry:
 
 	return pte;
 #else
-	return ACCESS_ONCE(*ptep);
+	return READ_ONCE(*ptep);
 #endif
 }
 
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index e142c9ee51fa..2328f82ba2a8 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -14,6 +14,8 @@
 # Nios2 port by Wind River Systems Inc trough:
 #   fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
 
+KBUILD_DEFCONFIG := 3c120_defconfig
+
 UTS_SYSNAME = Linux
 
 export MMU
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index 9102bfd3fa1c..6e24d7cceb0c 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -45,6 +45,8 @@ static inline void iounmap(void __iomem *addr)
 	__iounmap(addr);
 }
 
+#define ioremap_wc ioremap_nocache
+
 /* Pages to physical address... */
 #define page_to_phys(page)	virt_to_phys(page_to_virt(page))
 #define page_to_bus(page)	page_to_virt(page)
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index acedc0a2860e..caa51ff85a3c 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -168,7 +168,7 @@ do {									\
 	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);		\
 	unsigned long __gu_val;						\
 	__get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
-	(x) = (__typeof__(x))__gu_val;					\
+	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
 	})
 
@@ -180,7 +180,7 @@ do {									\
 	if (access_ok(VERIFY_READ,  __gu_ptr, sizeof(*__gu_ptr)))	\
 		__get_user_common(__gu_val, sizeof(*__gu_ptr),		\
 			__gu_ptr, __gu_err);				\
-	(x) = (__typeof__(x))__gu_val;					\
+	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
 })
 
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index d2d11b7055ba..8121aa6db2ff 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -33,11 +33,18 @@
 
 #endif /*!CONFIG_PA20*/
 
-/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.  */
+/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
+   We don't explicitly expose that "*a" may be written as reload
+   fails to find a register in class R1_REGS when "a" needs to be
+   reloaded when generating 64-bit PIC code.  Instead, we clobber
+   memory to indicate to the compiler that the assembly code reads
+   or writes to items other than those listed in the input and output
+   operands.  This may pessimize the code somewhat but __ldcw is
+   usually used within code blocks surrounded by memory barriors.  */
 #define __ldcw(a) ({						\
 	unsigned __ret;						\
-	__asm__ __volatile__(__LDCW " 0(%2),%0"			\
-		: "=r" (__ret), "+m" (*(a)) : "r" (a));		\
+	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
+		: "=r" (__ret) : "r" (a) : "memory");		\
 	__ret;							\
 })
 
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2e637c881d2b..879de5efb073 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -36,7 +36,7 @@ CONFIG_KEXEC=y
 CONFIG_SCHED_SMT=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE=""
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 # CONFIG_SECCOMP is not set
 # CONFIG_PCI is not set
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 000000000000..d2f99ca1e3a6
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Used in powernv idle state management */
+#define PNV_THREAD_RUNNING              0
+#define PNV_THREAD_NAP                  1
+#define PNV_THREAD_SLEEP                2
+#define PNV_THREAD_WINKLE               3
+#define PNV_CORE_IDLE_LOCK_BIT          0x100
+#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
+
+#ifndef __ASSEMBLY__
+extern u32 pnv_fastsleep_workaround_at_entry[];
+extern u32 pnv_fastsleep_workaround_at_exit[];
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2a0f99..942c7b1678e3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 			unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
 			unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel,
 			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa817933e6a..2d81e202bdcc 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
 
-	rb |= v >> (62 - 8);			/*  B field */
+	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/*  B field */
 	/*
 	 * AVA in v had cleared lower 23 bits. We need to derive
 	 * that from pteg index
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 047855619cc4..7efd666a3fa7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvm_rma_info {
-	atomic_t use_count;
-	unsigned long base_pfn;
-};
-
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
 #define KVMPPC_RMAP_PRESENT	0x100000000ul
 #define KVMPPC_RMAP_INDEX	0xfffffffful
 
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK	0x1f
-#define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU	HPTE_R_W	/* 0x40 */
-#define KVMPPC_GOT_PAGE		0x80
-
 struct kvm_arch_memory_slot {
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	unsigned long *rmap;
-	unsigned long *slot_phys;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
@@ -242,14 +230,12 @@ struct kvm_arch {
 	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
-	int using_mmu_notifiers;
 	u32 hpt_order;
 	atomic_t vcpus_running;
 	u32 online_vcores;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
 	atomic_t hpte_mod_interest;
-	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
 	struct list_head runnable_threads;
 	spinlock_t lock;
 	wait_queue_head_t wq;
+	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
 	ulong dpdes;		/* doorbell state (POWER8) */
 	void *mpp_buffer; /* Micro Partition Prefetch buffer */
 	bool mpp_buffer_is_valid;
+	ulong conferring_threads;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_preempt;
+
+	u32 emul_inst;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a6dcdb6d13c1..46bf652c9169 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5cd8d2fddba9..eb95b675109b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -56,6 +56,14 @@ struct opal_sg_list {
 #define OPAL_HARDWARE_FROZEN	-13
 #define OPAL_WRONG_STATE	-14
 #define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
 
 /* API Tokens (in r0) */
 #define OPAL_INVALID_CALL			-1
@@ -152,12 +160,25 @@ struct opal_sg_list {
 #define OPAL_PCI_ERR_INJECT			96
 #define OPAL_PCI_EEH_FREEZE_SET			97
 #define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
 #define OPAL_REGISTER_DUMP_REGION		101
 #define OPAL_UNREGISTER_DUMP_REGION		102
 #define OPAL_WRITE_TPO				103
 #define OPAL_READ_TPO				104
 #define OPAL_IPMI_SEND				107
 #define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED	0x00010000
+#define OPAL_PM_SLEEP_ENABLED	0x00020000
+#define OPAL_PM_WINKLE_ENABLED	0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000
 
 #ifndef __ASSEMBLY__
 
@@ -712,6 +733,24 @@ typedef struct oppanel_line {
 	uint64_t 	line_len;
 } oppanel_line_t;
 
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
 
@@ -876,11 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
 int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+			 struct opal_i2c_request *oreq);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386cbb928..e5f22c6c4bf9 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,16 @@ struct paca_struct {
 	u64 tm_scratch;                 /* TM scratch area for reclaim */
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+	u32 *core_idle_state_ptr;
+	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
+	/* Mask to indicate thread id in core */
+	u8 thread_mask;
+	/* Mask to denote subcore sibling threads */
+	u8 subcore_sibling_mask;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Exclusive emergency stack pointer for machine check exception. */
 	void *mc_emergency_sp;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a5287759fc8..03cd858a401c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -194,6 +194,7 @@
 
 #define PPC_INST_NAP			0x4c000364
 #define PPC_INST_SLEEP			0x4c0003a4
+#define PPC_INST_WINKLE			0x4c0003e4
 
 /* A2 specific instructions */
 #define PPC_INST_ERATWE			0x7c0001a6
@@ -375,6 +376,7 @@
 
 #define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
 #define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+#define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
 
 /* BHRB instructions */
 #define PPC_CLRBHRB		stringify_in_c(.long PPC_INST_CLRBHRB)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798cf800..bf117d8fb45f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -452,7 +452,8 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern unsigned long power7_nap(int check_irq);
-extern void power7_sleep(void);
+extern unsigned long power7_sleep(void);
+extern unsigned long power7_winkle(void);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c998279bd85b..1c874fb533bb 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,8 +118,10 @@
 #define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
 #ifdef __BIG_ENDIAN__
 #define MSR_		__MSR
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV)
 #else
 #define MSR_		(__MSR | MSR_LE)
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV | MSR_LE)
 #endif
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
@@ -371,6 +373,7 @@
 #define SPRN_DBAT7L	0x23F	/* Data BAT 7 Lower Register */
 #define SPRN_DBAT7U	0x23E	/* Data BAT 7 Upper Register */
 #define SPRN_PPR	0x380	/* SMT Thread status Register */
+#define SPRN_TSCR	0x399	/* Thread Switch Control Register */
 
 #define SPRN_DEC	0x016		/* Decrement Register */
 #define SPRN_DER	0x095		/* Debug Enable Regsiter */
@@ -728,6 +731,7 @@
 #define SPRN_BESCR	806	/* Branch event status and control register */
 #define   BESCR_GE	0x8000000000000000ULL /* Global Enable */
 #define SPRN_WORT	895	/* Workload optimization register - thread */
+#define SPRN_WORC	863	/* Workload optimization register - core */
 
 #define SPRN_PMC1	787
 #define SPRN_PMC2	788
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6240698fee9a..ff21b7a2f0cc 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 static inline int syscall_get_arch(void)
 {
-	return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+	int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+	arch |= __AUDIT_ARCH_LE;
+#endif
+	return arch;
 }
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43a7c00..a0c071d24e0e 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -284,7 +284,7 @@ do {								\
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
 		might_fault();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 #endif /* __powerpc64__ */
@@ -297,7 +297,7 @@ do {								\
 	might_fault();							\
 	if (access_ok(VERIFY_READ, __gu_addr, (size)))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
 
@@ -308,7 +308,7 @@ do {								\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3f28a1..e624f9646350 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -489,7 +489,6 @@ int main(void)
 	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
 	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -726,5 +726,16 @@ int main(void)
 					arch.timing_last_enter.tv32.tbl));
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	DEFINE(PACA_CORE_IDLE_STATE_PTR,
+			offsetof(struct paca_struct, core_idle_state_ptr));
+	DEFINE(PACA_THREAD_IDLE_STATE,
+			offsetof(struct paca_struct, thread_idle_state));
+	DEFINE(PACA_THREAD_MASK,
+			offsetof(struct paca_struct, thread_mask));
+	DEFINE(PACA_SUBCORE_SIBLING_MASK,
+			offsetof(struct paca_struct, subcore_sibling_mask));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382e19f1..c2df8150bd7a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
 #include <asm/hw_irq.h>
 #include <asm/exception-64s.h>
 #include <asm/ptrace.h>
+#include <asm/cpuidle.h>
 
 /*
  * We layout physical memory as follows:
@@ -101,23 +102,34 @@ system_reset_pSeries:
 #ifdef CONFIG_PPC_P7_NAP
 BEGIN_FTR_SECTION
 	/* Running native on arch 2.06 or later, check if we are
-	 * waking up from nap. We only handle no state loss and
-	 * supervisor state loss. We do -not- handle hypervisor
-	 * state loss at this time.
+	 * waking up from nap/sleep/winkle.
 	 */
 	mfspr	r13,SPRN_SRR1
 	rlwinm.	r13,r13,47-31,30,31
 	beq	9f
 
-	/* waking up from powersave (nap) state */
-	cmpwi	cr1,r13,2
-	/* Total loss of HV state is fatal, we could try to use the
-	 * PIR to locate a PACA, then use an emergency stack etc...
-	 * OPAL v3 based powernv platforms have new idle states
-	 * which fall in this catagory.
+	cmpwi	cr3,r13,2
+
+	/*
+	 * Check if last bit of HSPGR0 is set. This indicates whether we are
+	 * waking up from winkle.
 	 */
-	bgt	cr1,8f
 	GET_PACA(r13)
+	clrldi	r5,r13,63
+	clrrdi	r13,r13,1
+	cmpwi	cr4,r5,1
+	mtspr	SPRN_HSPRG0,r13
+
+	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi   cr2,r0,PNV_THREAD_NAP
+	bgt     cr2,8f				/* Either sleep or Winkle */
+
+	/* Waking up from nap should not cause hypervisor state loss */
+	bgt	cr3,.
+
+	/* Waking up from nap */
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	li	r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +145,7 @@ BEGIN_FTR_SECTION
 
 	/* Return SRR1 from power7_nap() */
 	mfspr	r3,SPRN_SRR1
-	beq	cr1,2f
+	beq	cr3,2f
 	b	power7_wakeup_noloss
 2:	b	power7_wakeup_loss
 
@@ -1382,6 +1394,7 @@ machine_check_handle_early:
 	MACHINE_CHECK_HANDLER_WINDUP
 	GET_PACA(r13)
 	ld	r1,PACAR1(r13)
+	li	r3,PNV_THREAD_NAP
 	b	power7_enter_nap_mode
 4:
 #endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 18c0687e5ab3..05adc8bbdef8 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,9 +18,25 @@
 #include <asm/hw_irq.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/opal.h>
+#include <asm/cpuidle.h>
+#include <asm/mmu-hash64.h>
 
 #undef DEBUG
 
+/*
+ * Use unused space in the interrupt stack to save and restore
+ * registers for winkle support.
+ */
+#define _SDR1	GPR3
+#define _RPR	GPR4
+#define _SPURR	GPR5
+#define _PURR	GPR6
+#define _TSCR	GPR7
+#define _DSCR	GPR8
+#define _AMOR	GPR9
+#define _WORT	GPR10
+#define _WORC	GPR11
+
 /* Idle state entry routines */
 
 #define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\
@@ -37,8 +53,7 @@
 
 /*
  * Pass requested state in r3:
- * 	0 - nap
- * 	1 - sleep
+ *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
  * To check IRQ_HAPPENED in r4
  * 	0 - don't check
@@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common)
 	std	r9,_MSR(r1)
 	std	r1,PACAR1(r13)
 
-_GLOBAL(power7_enter_nap_mode)
+	/*
+	 * Go to real mode to do the nap, as required by the architecture.
+	 * Also, we need to be in real mode before setting hwthread_state,
+	 * because as soon as we do that, another thread can switch
+	 * the MMU context to the guest.
+	 */
+	LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+	li	r6, MSR_RI
+	andc	r6, r9, r6
+	LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+	mtmsrd	r6, 1		/* clear RI before setting SRR0/1 */
+	mtspr	SPRN_SRR0, r7
+	mtspr	SPRN_SRR1, r5
+	rfid
+
+	.globl	power7_enter_nap_mode
+power7_enter_nap_mode:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	/* Tell KVM we're napping */
 	li	r4,KVM_HWTHREAD_IN_NAP
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
 #endif
-	cmpwi	cr0,r3,1
-	beq	2f
+	stb	r3,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi	cr3,r3,PNV_THREAD_SLEEP
+	bge	cr3,2f
 	IDLE_STATE_ENTER_SEQ(PPC_NAP)
 	/* No return */
-2:	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
-	/* No return */
+2:
+	/* Sleep or winkle */
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop1:
+	lwarx	r15,0,r14
+	andc	r15,r15,r7			/* Clear thread bit */
+
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+
+/*
+ * If cr0 = 0, then current thread is the last thread of the core entering
+ * sleep. Last thread needs to execute the hardware bug workaround code if
+ * required by the platform.
+ * Make the workaround call unconditionally here. The below branch call is
+ * patched out when the idle states are discovered if the platform does not
+ * require it.
+ */
+.global pnv_fastsleep_workaround_at_entry
+pnv_fastsleep_workaround_at_entry:
+	beq	fastsleep_workaround_at_entry
+
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+common_enter: /* common code for all the threads entering sleep or winkle */
+	bgt	cr3,enter_winkle
+	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+
+fastsleep_workaround_at_entry:
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+	/* Fast sleep workaround */
+	li	r3,1
+	li	r4,1
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+
+	/* Clear Lock bit */
+	li	r0,0
+	lwsync
+	stw	r0,0(r14)
+	b	common_enter
+
+enter_winkle:
+	/*
+	 * Note all register i.e per-core, per-subcore or per-thread is saved
+	 * here since any thread in the core might wake up first
+	 */
+	mfspr	r3,SPRN_SDR1
+	std	r3,_SDR1(r1)
+	mfspr	r3,SPRN_RPR
+	std	r3,_RPR(r1)
+	mfspr	r3,SPRN_SPURR
+	std	r3,_SPURR(r1)
+	mfspr	r3,SPRN_PURR
+	std	r3,_PURR(r1)
+	mfspr	r3,SPRN_TSCR
+	std	r3,_TSCR(r1)
+	mfspr	r3,SPRN_DSCR
+	std	r3,_DSCR(r1)
+	mfspr	r3,SPRN_AMOR
+	std	r3,_AMOR(r1)
+	mfspr	r3,SPRN_WORT
+	std	r3,_WORT(r1)
+	mfspr	r3,SPRN_WORC
+	std	r3,_WORC(r1)
+	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
 
 _GLOBAL(power7_idle)
 	/* Now check if user or arch enabled NAP mode */
@@ -125,48 +227,21 @@ _GLOBAL(power7_idle)
 
 _GLOBAL(power7_nap)
 	mr	r4,r3
-	li	r3,0
+	li	r3,PNV_THREAD_NAP
 	b	power7_powersave_common
 	/* No return */
 
 _GLOBAL(power7_sleep)
-	li	r3,1
+	li	r3,PNV_THREAD_SLEEP
 	li	r4,1
 	b	power7_powersave_common
 	/* No return */
 
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1  - stack pointer
- * r3  - opal token
- */
-opal_call_realmode:
-	mflr	r12
-	std	r12,_LINK(r1)
-	ld	r2,PACATOC(r13)
-	/* Set opal return address */
-	LOAD_REG_ADDR(r0,return_from_opal_call)
-	mtlr	r0
-	/* Handle endian-ness */
-	li	r0,MSR_LE
-	mfmsr	r12
-	andc	r12,r12,r0
-	mtspr	SPRN_HSRR1,r12
-	mr	r0,r3			/* Move opal token to r0 */
-	LOAD_REG_ADDR(r11,opal)
-	ld	r12,8(r11)
-	ld	r2,0(r11)
-	mtspr	SPRN_HSRR0,r12
-	hrfid
-
-return_from_opal_call:
-	FIXUP_ENDIAN
-	ld	r0,_LINK(r1)
-	mtlr	r0
-	blr
+_GLOBAL(power7_winkle)
+	li	r3,3
+	li	r4,1
+	b	power7_powersave_common
+	/* No return */
 
 #define CHECK_HMI_INTERRUPT						\
 	mfspr	r0,SPRN_SRR1;						\
@@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
 	ld	r2,PACATOC(r13);					\
 	ld	r1,PACAR1(r13);						\
 	std	r3,ORIG_GPR3(r1);	/* Save original r3 */		\
-	li	r3,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
+	li	r0,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
 	bl	opal_call_realmode;					\
 	ld	r3,ORIG_GPR3(r1);	/* Restore original r3 */	\
 20:	nop;
@@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
 _GLOBAL(power7_wakeup_tb_loss)
 	ld	r2,PACATOC(r13);
 	ld	r1,PACAR1(r13)
+	/*
+	 * Before entering any idle state, the NVGPRs are saved in the stack
+	 * and they are restored before switching to the process context. Hence
+	 * until they are restored, they are free to be used.
+	 *
+	 * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
+	 * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
+	 * wakeup reason if we branch to kvm_start_guest.
+	 */
 
+	mfspr	r16,SPRN_SRR1
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop2:
+	lwarx	r15,0,r14
+	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	/*
+	 * Lock bit is set in one of the 2 cases-
+	 * a. In the sleep/winkle enter path, the last thread is executing
+	 * fastsleep workaround code.
+	 * b. In the wake up path, another thread is executing fastsleep
+	 * workaround undo code or resyncing timebase or restoring context
+	 * In either case loop until the lock bit is cleared.
+	 */
+	bne	core_idle_lock_held
+
+	cmpwi	cr2,r15,0
+	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
+	and	r4,r4,r15
+	cmpwi	cr1,r4,0	/* Check if first in subcore */
+
+	/*
+	 * At this stage
+	 * cr1 - 0b0100 if first thread to wakeup in subcore
+	 * cr2 - 0b0100 if first thread to wakeup in core
+	 * cr3-  0b0010 if waking up from sleep or winkle
+	 * cr4 - 0b0100 if waking up from winkle
+	 */
+
+	or	r15,r15,r7		/* Set thread bit */
+
+	beq	cr1,first_thread_in_subcore
+
+	/* Not first thread in subcore to wake up */
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+	b	common_exit
+
+core_idle_lock_held:
+	HMT_LOW
+core_idle_lock_loop:
+	lwz	r15,0(14)
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	core_idle_lock_loop
+	HMT_MEDIUM
+	b	lwarx_loop2
+
+first_thread_in_subcore:
+	/* First thread in subcore to wakeup */
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+
+	/*
+	 * If waking up from sleep, subcore state is not lost. Hence
+	 * skip subcore state restore
+	 */
+	bne	cr4,subcore_state_restored
+
+	/* Restore per-subcore state */
+	ld      r4,_SDR1(r1)
+	mtspr   SPRN_SDR1,r4
+	ld      r4,_RPR(r1)
+	mtspr   SPRN_RPR,r4
+	ld	r4,_AMOR(r1)
+	mtspr	SPRN_AMOR,r4
+
+subcore_state_restored:
+	/*
+	 * Check if the thread is also the first thread in the core. If not,
+	 * skip to clear_lock.
+	 */
+	bne	cr2,clear_lock
+
+first_thread_in_core:
+
+	/*
+	 * First thread in the core waking up from fastsleep. It needs to
+	 * call the fastsleep workaround code if the platform requires it.
+	 * Call it unconditionally here. The below branch instruction will
+	 * be patched out when the idle states are discovered if platform
+	 * does not require workaround.
+	 */
+.global pnv_fastsleep_workaround_at_exit
+pnv_fastsleep_workaround_at_exit:
+	b	fastsleep_workaround_at_exit
+
+timebase_resync:
+	/* Do timebase resync if we are waking up from sleep. Use cr3 value
+	 * set in exceptions-64s.S */
+	ble	cr3,clear_lock
 	/* Time base re-sync */
-	li	r3,OPAL_RESYNC_TIMEBASE
+	li	r0,OPAL_RESYNC_TIMEBASE
 	bl	opal_call_realmode;
-
 	/* TODO: Check r3 for failure */
 
+	/*
+	 * If waking up from sleep, per core state is not lost, skip to
+	 * clear_lock.
+	 */
+	bne	cr4,clear_lock
+
+	/* Restore per core state */
+	ld	r4,_TSCR(r1)
+	mtspr	SPRN_TSCR,r4
+	ld	r4,_WORC(r1)
+	mtspr	SPRN_WORC,r4
+
+clear_lock:
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	lwsync
+	stw	r15,0(r14)
+
+common_exit:
+	/*
+	 * Common to all threads.
+	 *
+	 * If waking up from sleep, hypervisor state is not lost. Hence
+	 * skip hypervisor state restore.
+	 */
+	bne	cr4,hypervisor_state_restored
+
+	/* Waking up from winkle */
+
+	/* Restore per thread state */
+	bl	__restore_cpu_power8
+
+	/* Restore SLB  from PACA */
+	ld	r8,PACA_SLBSHADOWPTR(r13)
+
+	.rept	SLB_NUM_BOLTED
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
+	andis.	r7,r5,SLB_ESID_V@h
+	beq	1f
+	slbmte	r6,r5
+1:	addi	r8,r8,16
+	.endr
+
+	ld	r4,_SPURR(r1)
+	mtspr	SPRN_SPURR,r4
+	ld	r4,_PURR(r1)
+	mtspr	SPRN_PURR,r4
+	ld	r4,_DSCR(r1)
+	mtspr	SPRN_DSCR,r4
+	ld	r4,_WORT(r1)
+	mtspr	SPRN_WORT,r4
+
+hypervisor_state_restored:
+
+	li	r5,PNV_THREAD_RUNNING
+	stb     r5,PACA_THREAD_IDLE_STATE(r13)
+
+	mtspr	SPRN_SRR1,r16
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li      r0,KVM_HWTHREAD_IN_KERNEL
+	stb     r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz     r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi   r0,0
+	beq     6f
+	b       kvm_start_guest
+6:
+#endif
+
 	REST_NVGPRS(r1)
 	REST_GPR(2, r1)
 	ld	r3,_CCR(r1)
@@ -212,6 +461,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	mtspr	SPRN_SRR0,r5
 	rfid
 
+fastsleep_workaround_at_exit:
+	li	r3,1
+	li	r4,0
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+	b	timebase_resync
+
 /*
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8b2d2dc8ef10..8ec017cb4446 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -700,7 +700,6 @@ void start_secondary(void *unused)
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
-	cpu_callin_map[cpu] = 1;
 
 	if (smp_ops->setup_cpu)
 		smp_ops->setup_cpu(cpu);
@@ -739,6 +738,14 @@ void start_secondary(void *unused)
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 
+	/*
+	 * CPU must be marked active and online before we signal back to the
+	 * master, because the scheduler needs to see the cpu_online and
+	 * cpu_active bits set.
+	 */
+	smp_wmb();
+	cpu_callin_map[cpu] = 1;
+
 	local_irq_enable();
 
 	cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51d20bc..f5769f19ae25 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -172,6 +172,7 @@ config KVM_XICS
 	depends on KVM_BOOK3S_64 && !KVM_MPIC
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
+	default y
 	---help---
 	  Include support for the XICS (eXternal Interrupt Controller
 	  Specification) interrupt controller architecture used on
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b32db4b95361..888bf466d8c6 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b0730e29e..a2eb6d354a57 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
 	return (sr_raw & 0x20000000) ? true: false;
 }
 
-static inline bool sr_nx(u32 sr_raw)
-{
-	return (sr_raw & 0x10000000) ? true: false;
-}
-
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  struct kvmppc_pte *pte, bool data,
 					  bool iswrite);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d40770248b6a..534acb3c6c3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970	63
+#include "trace_hv.h"
 
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER	18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
 	if (!cpu_has_feature(CPU_FTR_HVMODE))
 		return -EINVAL;
 
-	/* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
-	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
-		host_lpid = mfspr(SPRN_LPID);	/* POWER7 */
-		rsvd_lpid = LPID_RSVD;
-	} else {
-		host_lpid = 0;			/* PPC970 */
-		rsvd_lpid = MAX_LPID_970;
-	}
+	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+	host_lpid = mfspr(SPRN_LPID);
+	rsvd_lpid = LPID_RSVD;
 
 	kvmppc_init_lpid(rsvd_lpid + 1);
 
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, msr);
 }
 
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
-				  struct kvm_memory_slot *memslot,
-				  unsigned long psize)
-{
-	unsigned long start;
-	long np, err;
-	struct page *page, *hpage, *pages[1];
-	unsigned long s, pgsize;
-	unsigned long *physp;
-	unsigned int is_io, got, pgorder;
-	struct vm_area_struct *vma;
-	unsigned long pfn, i, npages;
-
-	physp = memslot->arch.slot_phys;
-	if (!physp)
-		return -EINVAL;
-	if (physp[gfn - memslot->base_gfn])
-		return 0;
-
-	is_io = 0;
-	got = 0;
-	page = NULL;
-	pgsize = psize;
-	err = -EINVAL;
-	start = gfn_to_hva_memslot(memslot, gfn);
-
-	/* Instantiate and get the page we want access to */
-	np = get_user_pages_fast(start, 1, 1, pages);
-	if (np != 1) {
-		/* Look up the vma for the page */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, start);
-		if (!vma || vma->vm_start > start ||
-		    start + psize > vma->vm_end ||
-		    !(vma->vm_flags & VM_PFNMAP))
-			goto up_err;
-		is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
-		pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-		/* check alignment of pfn vs. requested page size */
-		if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
-			goto up_err;
-		up_read(&current->mm->mmap_sem);
-
-	} else {
-		page = pages[0];
-		got = KVMPPC_GOT_PAGE;
-
-		/* See if this is a large page */
-		s = PAGE_SIZE;
-		if (PageHuge(page)) {
-			hpage = compound_head(page);
-			s <<= compound_order(hpage);
-			/* Get the whole large page if slot alignment is ok */
-			if (s > psize && slot_is_aligned(memslot, s) &&
-			    !(memslot->userspace_addr & (s - 1))) {
-				start &= ~(s - 1);
-				pgsize = s;
-				get_page(hpage);
-				put_page(page);
-				page = hpage;
-			}
-		}
-		if (s < psize)
-			goto out;
-		pfn = page_to_pfn(page);
-	}
-
-	npages = pgsize >> PAGE_SHIFT;
-	pgorder = __ilog2(npages);
-	physp += (gfn - memslot->base_gfn) & ~(npages - 1);
-	spin_lock(&kvm->arch.slot_phys_lock);
-	for (i = 0; i < npages; ++i) {
-		if (!physp[i]) {
-			physp[i] = ((pfn + i) << PAGE_SHIFT) +
-				got + is_io + pgorder;
-			got = 0;
-		}
-	}
-	spin_unlock(&kvm->arch.slot_phys_lock);
-	err = 0;
-
- out:
-	if (got)
-		put_page(page);
-	return err;
-
- up_err:
-	up_read(&current->mm->mmap_sem);
-	return err;
-}
-
 long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret)
 {
-	unsigned long psize, gpa, gfn;
-	struct kvm_memory_slot *memslot;
 	long ret;
 
-	if (kvm->arch.using_mmu_notifiers)
-		goto do_insert;
-
-	psize = hpte_page_size(pteh, ptel);
-	if (!psize)
-		return H_PARAMETER;
-
-	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
-	/* Find the memslot (if any) for this address */
-	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
-	gfn = gpa >> PAGE_SHIFT;
-	memslot = gfn_to_memslot(kvm, gfn);
-	if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
-		if (!slot_is_aligned(memslot, psize))
-			return H_PARAMETER;
-		if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
-			return H_PARAMETER;
-	}
-
- do_insert:
 	/* Protect linux PTE lookup from page table destruction */
 	rcu_read_lock_sched();	/* this disables preemption too */
 	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			     long pte_index, unsigned long pteh,
-			     unsigned long ptel)
-{
-	return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
-					  pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
 							 gva_t eaddr)
 {
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
 
 	/* Storage key permission check for POWER7 */
-	if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+	if (data && virtmode) {
 		int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
 		if (amrfield & 1)
 			gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 
+	trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
 	/* No memslot means it's an emulated MMIO region */
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
 					      dsisr & DSISR_ISSTORE);
 
-	if (!kvm->arch.using_mmu_notifiers)
-		return -EFAULT;		/* should never get here */
-
 	/*
 	 * This should never happen, because of the slot_is_aligned()
 	 * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	mmu_seq = kvm->mmu_notifier_seq;
 	smp_rmb();
 
+	ret = -EFAULT;
 	is_io = 0;
 	pfn = 0;
 	page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		up_read(&current->mm->mmap_sem);
 		if (!pfn)
-			return -EFAULT;
+			goto out_put;
 	} else {
 		page = pages[0];
 		pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
-	ret = -EFAULT;
 	if (psize > pte_size)
 		goto out_put;
 
 	/* Check WIMG vs. the actual page we're accessing */
 	if (!hpte_cache_flags_ok(r, is_io)) {
 		if (is_io)
-			return -EFAULT;
+			goto out_put;
+
 		/*
 		 * Allow guest to map emulated device memory as
 		 * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		SetPageDirty(page);
 
  out_put:
+	trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
 	if (page) {
 		/*
 		 * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
 		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
-			if (kvm->arch.using_mmu_notifiers)
-				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+			hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
 			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 	return 0;
 }
 
 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+	kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
 	return 0;
 }
 
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
 void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return;
 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 }
 
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		}
 
 		/* Now check and modify the HPTE */
-		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+			/* unlock and continue */
+			hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 			continue;
+		}
 
 		/* need to make it temporarily absent so C is stable */
 		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	struct page *page, *pages[1];
 	int npages;
 	unsigned long hva, offset;
-	unsigned long pa;
-	unsigned long *physp;
 	int srcu_idx;
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, gfn);
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		goto err;
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			goto err;
-		physp += gfn - memslot->base_gfn;
-		pa = *physp;
-		if (!pa) {
-			if (kvmppc_get_guest_page(kvm, gfn, memslot,
-						  PAGE_SIZE) < 0)
-				goto err;
-			pa = *physp;
-		}
-		page = pfn_to_page(pa >> PAGE_SHIFT);
-		get_page(page);
-	} else {
-		hva = gfn_to_hva_memslot(memslot, gfn);
-		npages = get_user_pages_fast(hva, 1, 1, pages);
-		if (npages < 1)
-			goto err;
-		page = pages[0];
-	}
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	npages = get_user_pages_fast(hva, 1, 1, pages);
+	if (npages < 1)
+		goto err;
+	page = pages[0];
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
 	offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
 
 	put_page(page);
 
-	if (!dirty || !kvm->arch.using_mmu_notifiers)
+	if (!dirty)
 		return;
 
 	/* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 		lbuf = (unsigned long __user *)buf;
 		for (j = 0; j < hdr.n_valid; ++j) {
+			__be64 hpte_v;
+			__be64 hpte_r;
+
 			err = -EFAULT;
-			if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+			if (__get_user(hpte_v, lbuf) ||
+			    __get_user(hpte_r, lbuf + 1))
 				goto out;
+			v = be64_to_cpu(hpte_v);
+			r = be64_to_cpu(hpte_r);
 			err = -EINVAL;
 			if (!(v & HPTE_V_VALID))
 				goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		vcpu->arch.slb_nr = 32;		/* POWER7 */
-	else
-		vcpu->arch.slb_nr = 64;
+	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
 
 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
 	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e63587d30b70..de4018a1bc4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * stolen.
  *
  * Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner).  The stolen times are measured in units of
- * timebase ticks.  (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
  */
 
 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
-	    vc->preempt_tb != TB_NIL) {
-		vc->stolen_tb += mftb() - vc->preempt_tb;
-		vc->preempt_tb = TB_NIL;
+	/*
+	 * We can test vc->runner without taking the vcore lock,
+	 * because only this task ever sets vc->runner to this
+	 * vcpu, and once it is set to this vcpu, only this task
+	 * ever sets it to NULL.
+	 */
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
+		if (vc->preempt_tb != TB_NIL) {
+			vc->stolen_tb += mftb() - vc->preempt_tb;
+			vc->preempt_tb = TB_NIL;
+		}
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
 	    vcpu->arch.busy_preempt != TB_NIL) {
 		vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
 		vc->preempt_tb = mftb();
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
 		vcpu->arch.busy_preempt = mftb();
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	if (arch_compat) {
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return -EINVAL;	/* 970 has no compat mode support */
-
 		switch (arch_compat) {
 		case PVR_ARCH_205:
 			/*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
 {
 	u64 p;
+	unsigned long flags;
 
-	/*
-	 * If we are the task running the vcore, then since we hold
-	 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
-	 * can't be updated, so we don't need the tbacct_lock.
-	 * If the vcore is inactive, it can't become active (since we
-	 * hold the vcore lock), so the vcpu load/put functions won't
-	 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
-	 */
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	p = vc->stolen_tb;
 	if (vc->vcore_state != VCORE_INACTIVE &&
-	    vc->runner->arch.run_task != current) {
-		spin_lock_irq(&vc->runner->arch.tbacct_lock);
-		p = vc->stolen_tb;
-		if (vc->preempt_tb != TB_NIL)
-			p += now - vc->preempt_tb;
-		spin_unlock_irq(&vc->runner->arch.tbacct_lock);
-	} else {
-		p = vc->stolen_tb;
-	}
+	    vc->preempt_tb != TB_NIL)
+		p += now - vc->preempt_tb;
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	return p;
 }
 
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 	}
 }
 
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct kvmppc_vcore *vcore = target->arch.vcore;
+
+	/*
+	 * We expect to have been called by the real mode handler
+	 * (kvmppc_rm_h_confer()) which would have directly returned
+	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+	 * have useful work to do and should not confer) so we don't
+	 * recheck that here.
+	 */
+
+	spin_lock(&vcore->lock);
+	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	    vcore->vcore_state != VCORE_INACTIVE)
+		target = vcore->runner;
+	spin_unlock(&vcore->lock);
+
+	return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+	int yield_count = 0;
+	struct lppaca *lppaca;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+	if (lppaca)
+		yield_count = lppaca->yield_count;
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return yield_count;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
+	int yield_count;
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		return RESUME_HOST;
 
 	switch (req) {
-	case H_ENTER:
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
-					      kvmppc_get_gpr(vcpu, 5),
-					      kvmppc_get_gpr(vcpu, 6),
-					      kvmppc_get_gpr(vcpu, 7));
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);
-		break;
 	case H_CEDE:
 		break;
 	case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 			ret = H_PARAMETER;
 			break;
 		}
-		kvm_vcpu_yield_to(tvcpu);
+		yield_count = kvmppc_get_gpr(vcpu, 5);
+		if (kvmppc_get_yield_count(tvcpu) != yield_count)
+			break;
+		kvm_arch_vcpu_yield_to(tvcpu);
 		break;
 	case H_REGISTER_VPA:
 		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
+	/* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
 	case BOOK3S_INTERRUPT_PERFMON:
 		r = RESUME_GUEST;
 		break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * Accordingly return to Guest or Host.
 	 */
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+			vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.emul_inst) :
+				vcpu->arch.emul_inst;
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
 			r = kvmppc_emulate_debug_inst(run, vcpu);
 		} else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
 	INIT_LIST_HEAD(&vcore->runnable_threads);
 	spin_lock_init(&vcore->lock);
+	spin_lock_init(&vcore->stoltb_lock);
 	init_waitqueue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
+	vc->preempt_tb = TB_NIL;
 	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
 
 	/*
 	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
 		kvmppc_create_dtl_entry(vcpu, vc);
+		trace_kvm_guest_enter(vcpu);
 	}
 
 	/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
+
+	trace_kvmppc_run_core(vc, 0);
+
 	spin_unlock(&vc->lock);
 
 	kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		    kvmppc_core_pending_dec(vcpu))
 			kvmppc_core_dequeue_dec(vcpu);
 
+		trace_kvm_guest_exit(vcpu);
+
 		ret = RESUME_GUEST;
 		if (vcpu->arch.trap)
 			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
+
+	trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
+	struct kvm_vcpu *vcpu;
+	int do_sleep = 1;
+
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * Check one last time for pending exceptions and ceded state after
+	 * we put ourselves on the wait queue
+	 */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+			do_sleep = 0;
+			break;
+		}
+	}
+
+	if (!do_sleep) {
+		finish_wait(&vc->wq, &wait);
+		return;
+	}
+
 	vc->vcore_state = VCORE_SLEEPING;
+	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
 	finish_wait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_vcore_blocked(vc, 1);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc;
 	struct kvm_vcpu *v, *vn;
 
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		    VCORE_EXIT_COUNT(vc) == 0) {
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
+			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
 			wake_up(&vc->wq);
 		}
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		wake_up(&v->arch.cpu_run);
 	}
 
+	trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
 	spin_unlock(&vc->lock);
 	return vcpu->arch.ret;
 }
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
 	smp_mb();
 
-	/* On the first time here, set up HTAB and VRMA or RMA */
+	/* On the first time here, set up HTAB and VRMA */
 	if (!vcpu->kvm->arch.rma_setup_done) {
 		r = kvmppc_hv_setup_htab_rma(vcpu);
 		if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
+			trace_kvm_hcall_enter(vcpu);
 			r = kvmppc_pseries_do_hcall(vcpu);
+			trace_kvm_hcall_exit(vcpu, r);
 			kvmppc_core_prepare_to_enter(vcpu);
 		} else if (r == RESUME_PAGE_FAULT) {
 			srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page;
-	struct kvm_rma_info *ri = vma->vm_file->private_data;
-
-	if (vmf->pgoff >= kvm_rma_pages)
-		return VM_FAULT_SIGBUS;
-
-	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
-	get_page(page);
-	vmf->page = page;
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
-	.fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &kvm_rma_vm_ops;
-	return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
-	struct kvm_rma_info *ri = filp->private_data;
-
-	kvm_release_rma(ri);
-	return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
-	.mmap           = kvm_rma_mmap,
-	.release	= kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-				      struct kvm_allocate_rma *ret)
-{
-	long fd;
-	struct kvm_rma_info *ri;
-	/*
-	 * Only do this on PPC970 in HV mode
-	 */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return -EINVAL;
-
-	if (!kvm_rma_pages)
-		return -EINVAL;
-
-	ri = kvm_alloc_rma();
-	if (!ri)
-		return -ENOMEM;
-
-	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
-	if (fd < 0)
-		kvm_release_rma(ri);
-
-	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
-	return fd;
-}
-
 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 				     int linux_psize)
 {
@@ -2167,26 +2150,6 @@ out:
 	return r;
 }
 
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
-	unsigned long *physp;
-	unsigned long j, npages, pfn;
-	struct page *page;
-
-	physp = memslot->arch.slot_phys;
-	npages = memslot->npages;
-	if (!physp)
-		return;
-	for (j = 0; j < npages; j++) {
-		if (!(physp[j] & KVMPPC_GOT_PAGE))
-			continue;
-		pfn = physp[j] >> PAGE_SHIFT;
-		page = pfn_to_page(pfn);
-		SetPageDirty(page);
-		put_page(page);
-	}
-}
-
 static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 					struct kvm_memory_slot *dont)
 {
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 		vfree(free->arch.rmap);
 		free->arch.rmap = NULL;
 	}
-	if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
-		unpin_slot(free);
-		vfree(free->arch.slot_phys);
-		free->arch.slot_phys = NULL;
-	}
 }
 
 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
 	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
 	if (!slot->arch.rmap)
 		return -ENOMEM;
-	slot->arch.slot_phys = NULL;
 
 	return 0;
 }
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
 					struct kvm_memory_slot *memslot,
 					struct kvm_userspace_memory_region *mem)
 {
-	unsigned long *phys;
-
-	/* Allocate a slot_phys array if needed */
-	phys = memslot->arch.slot_phys;
-	if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
-		phys = vzalloc(memslot->npages * sizeof(unsigned long));
-		if (!phys)
-			return -ENOMEM;
-		memslot->arch.slot_phys = phys;
-	}
-
 	return 0;
 }
 
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
 	unsigned long lpcr = 0, senc;
-	unsigned long lpcr_mask = 0;
 	unsigned long psize, porder;
-	unsigned long rma_size;
-	unsigned long rmls;
-	unsigned long *physp;
-	unsigned long i, npages;
 	int srcu_idx;
 
 	mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	psize = vma_kernel_pagesize(vma);
 	porder = __ilog2(psize);
 
-	/* Is this one of our preallocated RMAs? */
-	if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
-	    hva == vma->vm_start)
-		ri = vma->vm_file->private_data;
-
 	up_read(&current->mm->mmap_sem);
 
-	if (!ri) {
-		/* On POWER7, use VRMA; on PPC970, give up */
-		err = -EPERM;
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			pr_err("KVM: CPU requires an RMO\n");
-			goto out_srcu;
-		}
+	/* We can handle 4k, 64k or 16M pages in the VRMA */
+	err = -EINVAL;
+	if (!(psize == 0x1000 || psize == 0x10000 ||
+	      psize == 0x1000000))
+		goto out_srcu;
 
-		/* We can handle 4k, 64k or 16M pages in the VRMA */
-		err = -EINVAL;
-		if (!(psize == 0x1000 || psize == 0x10000 ||
-		      psize == 0x1000000))
-			goto out_srcu;
+	/* Update VRMASD field in the LPCR */
+	senc = slb_pgsize_encoding(psize);
+	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* the -4 is to account for senc values starting at 0x10 */
+	lpcr = senc << (LPCR_VRMASD_SH - 4);
 
-		/* Update VRMASD field in the LPCR */
-		senc = slb_pgsize_encoding(psize);
-		kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		lpcr_mask = LPCR_VRMASD;
-		/* the -4 is to account for senc values starting at 0x10 */
-		lpcr = senc << (LPCR_VRMASD_SH - 4);
+	/* Create HPTEs in the hash page table for the VRMA */
+	kvmppc_map_vrma(vcpu, memslot, porder);
 
-		/* Create HPTEs in the hash page table for the VRMA */
-		kvmppc_map_vrma(vcpu, memslot, porder);
-
-	} else {
-		/* Set up to use an RMO region */
-		rma_size = kvm_rma_pages;
-		if (rma_size > memslot->npages)
-			rma_size = memslot->npages;
-		rma_size <<= PAGE_SHIFT;
-		rmls = lpcr_rmls(rma_size);
-		err = -EINVAL;
-		if ((long)rmls < 0) {
-			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-			goto out_srcu;
-		}
-		atomic_inc(&ri->use_count);
-		kvm->arch.rma = ri;
-
-		/* Update LPCR and RMOR */
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			/* PPC970; insert RMLS value (split field) in HID4 */
-			lpcr_mask = (1ul << HID4_RMLS0_SH) |
-				(3ul << HID4_RMLS2_SH) | HID4_RMOR;
-			lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
-				((rmls & 3) << HID4_RMLS2_SH);
-			/* RMOR is also in HID4 */
-			lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
-				<< HID4_RMOR_SH;
-		} else {
-			/* POWER7 */
-			lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
-			lpcr = rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
-		}
-		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
-			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
-		/* Initialize phys addrs of pages in RMO */
-		npages = kvm_rma_pages;
-		porder = __ilog2(npages);
-		physp = memslot->arch.slot_phys;
-		if (physp) {
-			if (npages > memslot->npages)
-				npages = memslot->npages;
-			spin_lock(&kvm->arch.slot_phys_lock);
-			for (i = 0; i < npages; ++i)
-				physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
-					porder;
-			spin_unlock(&kvm->arch.slot_phys_lock);
-		}
-	}
-
-	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+	kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
 	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
 	smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
 	       sizeof(kvm->arch.enabled_hcalls));
 
-	kvm->arch.rma = NULL;
-
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970; HID4 is effectively the LPCR */
-		kvm->arch.host_lpid = 0;
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
-		lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
-		lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
-			((lpid & 0xf) << HID4_LPID5_SH);
-	} else {
-		/* POWER7; init LPCR for virtual RMA mode */
-		kvm->arch.host_lpid = mfspr(SPRN_LPID);
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-		lpcr &= LPCR_PECE | LPCR_LPES;
-		lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
-			LPCR_VPM0 | LPCR_VPM1;
-		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		/* On POWER8 turn on online bit to enable PURR/SPURR */
-		if (cpu_has_feature(CPU_FTR_ARCH_207S))
-			lpcr |= LPCR_ONL;
-	}
+	/* Init LPCR for virtual RMA mode */
+	kvm->arch.host_lpid = mfspr(SPRN_LPID);
+	kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+	lpcr &= LPCR_PECE | LPCR_LPES;
+	lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+		LPCR_VPM0 | LPCR_VPM1;
+	kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* On POWER8 turn on online bit to enable PURR/SPURR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr |= LPCR_ONL;
 	kvm->arch.lpcr = lpcr;
 
-	kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
-	spin_lock_init(&kvm->arch.slot_phys_lock);
-
 	/*
 	 * Track that we now have a HV mode VM active. This blocks secondary
 	 * CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 	kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
-	if (kvm->arch.rma) {
-		kvm_release_rma(kvm->arch.rma);
-		kvm->arch.rma = NULL;
-	}
 
 	kvmppc_free_hpt(kvm);
 }
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
 
 static int kvmppc_core_check_processor_compat_hv(void)
 {
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_206))
 		return -EIO;
 	return 0;
 }
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 
 	switch (ioctl) {
 
-	case KVM_ALLOCATE_RMA: {
-		struct kvm_allocate_rma rma;
-		struct kvm *kvm = filp->private_data;
-
-		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-			r = -EFAULT;
-		break;
-	}
-
 	case KVM_PPC_ALLOCATE_HTAB: {
 		u32 htab_order;
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3f1bb5a36c27..1f083ff8a61a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/sizes.h>
 #include <linux/cma.h>
+#include <linux/bitops.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes.  Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 static struct cma *kvm_cma;
 
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int __init early_parse_rma_size(char *p)
-{
-	unsigned long kvm_rma_size;
-
-	pr_debug("%s(%s)\n", __func__, p);
-	if (!p)
-		return -EINVAL;
-	kvm_rma_size = memparse(p, &p);
-	/*
-	 * Check that the requested size is one supported in hardware
-	 */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return -EINVAL;
-	}
-	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
-	return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
-	struct page *page;
-	struct kvm_rma_info *ri;
-
-	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
-	if (!ri)
-		return NULL;
-	page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
-	if (!page)
-		goto err_out;
-	atomic_set(&ri->use_count, 1);
-	ri->base_pfn = page_to_pfn(page);
-	return ri;
-err_out:
-	kfree(ri);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
-		kfree(ri);
-	}
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
 static int __init early_parse_kvm_cma_resv(char *p)
 {
 	pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
 struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	unsigned long align_pages = HPT_ALIGN_PAGES;
-
 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
-	/* Old CPUs require HPT aligned on a multiple of its size */
-	if (!cpu_has_feature(CPU_FTR_ARCH_206))
-		align_pages = nr_pages;
-	return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
@@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void)
 	if (selected_size) {
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
-		/*
-		 * Old CPUs require HPT aligned on a multiple of its size. So for them
-		 * make the alignment as max size we could request.
-		 */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			align_size = __rounddown_pow_of_two(selected_size);
-		else
-			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
-		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 		cma_declare_contiguous(0, selected_size, 0, align_size,
 			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
 	}
 }
 
 /*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+			    unsigned int yield_count)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	int threads_running;
+	int threads_ceded;
+	int threads_conferring;
+	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+	int rv = H_SUCCESS; /* => don't yield */
+
+	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+		threads_running = VCORE_ENTRY_COUNT(vc);
+		threads_ceded = hweight32(vc->napping_threads);
+		threads_conferring = hweight32(vc->conferring_threads);
+		if (threads_ceded + threads_conferring >= threads_running) {
+			rv = H_TOO_HARD; /* => do yield */
+			break;
+		}
+	}
+	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	return rv;
+}
+
+/*
  * When running HV mode KVM we need to block certain operations while KVM VMs
  * exist in the system. We use a counter of VMs to track this.
  *
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be7478b27..36540a99d178 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
 	std	r3, _CCR(r1)
 
 	/* Save host DSCR */
-BEGIN_FTR_SECTION
 	mfspr	r3, SPRN_DSCR
 	std	r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r5, 0
 	mtspr	SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
 	lbz	r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r7, SPRN_PMC4
 	mfspr	r8, SPRN_PMC5
 	mfspr	r9, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, HSTATE_PMC(r13)
 	stw	r5, HSTATE_PMC + 4(r13)
 	stw	r6, HSTATE_PMC + 8(r13)
 	stw	r7, HSTATE_PMC + 12(r13)
 	stw	r8, HSTATE_PMC + 16(r13)
 	stw	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	stw	r10, HSTATE_PMC + 24(r13)
-	stw	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 31:
 
 	/*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
-#ifdef CONFIG_SMP
-	/*
-	 * On PPC970, if the guest vcpu has an external interrupt pending,
-	 * send ourselves an IPI so as to interrupt the guest once it
-	 * enables interrupts.  (It must have interrupts disabled,
-	 * otherwise we would already have delivered the interrupt.)
-	 *
-	 * XXX If this is a UP build, smp_send_reschedule is not available,
-	 * so the interrupt will be delayed until the next time the vcpu
-	 * enters the guest with interrupts enabled.
-	 */
-BEGIN_FTR_SECTION
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r0, VCPU_PENDING_EXC(r4)
-	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and.	r0, r0, r7
-	beq	32f
-	lhz	r3, PACAPACAINDEX(r13)
-	bl	smp_send_reschedule
-	nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
 	/* Jump to partition switch code */
 	bl	kvmppc_hv_entry_trampoline
 	nop
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e2bc30..60081bd75847 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -138,8 +138,5 @@ out:
 
 long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		return kvmppc_realmode_mc_power7(vcpu);
-
-	return 0;
+	return kvmppc_realmode_mc_power7(vcpu);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54c73cd..510bdfbc4073 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
 	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
 	 * we can use tlbiel as long as we mark all other physical
 	 * cores as potentially having stale TLB entries for this lpid.
-	 * If we're not using MMU notifiers, we never take pages away
-	 * from the guest, so we can use tlbiel if requested.
 	 * Otherwise, don't use tlbiel.
 	 */
 	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
 		global = 0;
-	else if (kvm->arch.using_mmu_notifiers)
-		global = 1;
 	else
-		global = !(flags & H_LOCAL);
+		global = 1;
 
 	if (!global) {
 		/* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
-	unsigned long *physp, pte_size;
+	unsigned long pte_size;
 	unsigned long is_io;
 	unsigned long *rmap;
 	pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	is_io = ~0ul;
 	rmap = NULL;
 	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
-		/* PPC970 can't do emulated MMIO */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return H_PARAMETER;
 		/* Emulated MMIO - mark this with key=31 */
 		pteh |= HPTE_V_ABSENT;
 		ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	slot_fn = gfn - memslot->base_gfn;
 	rmap = &memslot->arch.rmap[slot_fn];
 
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			return H_PARAMETER;
-		physp += slot_fn;
-		if (realmode)
-			physp = real_vmalloc_addr(physp);
-		pa = *physp;
-		if (!pa)
-			return H_TOO_HARD;
-		is_io = pa & (HPTE_R_I | HPTE_R_W);
-		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
-		pa &= PAGE_MASK;
+	/* Translate to host virtual address */
+	hva = __gfn_to_hva_memslot(memslot, gfn);
+
+	/* Look up the Linux PTE for the backing page */
+	pte_size = psize;
+	pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+	if (pte_present(pte) && !pte_numa(pte)) {
+		if (writing && !pte_write(pte))
+			/* make the actual HPTE be read-only */
+			ptel = hpte_make_readonly(ptel);
+		is_io = hpte_cache_bits(pte_val(pte));
+		pa = pte_pfn(pte) << PAGE_SHIFT;
+		pa |= hva & (pte_size - 1);
 		pa |= gpa & ~PAGE_MASK;
-	} else {
-		/* Translate to host virtual address */
-		hva = __gfn_to_hva_memslot(memslot, gfn);
-
-		/* Look up the Linux PTE for the backing page */
-		pte_size = psize;
-		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
-						  &pte_size);
-		if (pte_present(pte) && !pte_numa(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
-			pa |= hva & (pte_size - 1);
-			pa |= gpa & ~PAGE_MASK;
-		}
 	}
 
 	if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			rmap = real_vmalloc_addr(rmap);
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
-		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(kvm, mmu_seq)) {
+		if (mmu_notifier_retry(kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
 	return old == 0;
 }
 
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
-			  long npages, int global, bool need_sync)
-{
-	long i;
-
-	if (global) {
-		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-			cpu_relax();
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbie %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbie %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-		kvm->arch.tlbie_lock = 0;
-	} else {
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbiel %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbiel %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("ptesync" : : : "memory");
-	}
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		      long npages, int global, bool need_sync)
 {
 	long i;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970 tlbie instruction is a bit different */
-		do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
-		return;
-	}
 	if (global) {
 		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 			cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		rev->guest_rpte = r;
 		note_hpte_modification(kvm, rev);
 	}
-	r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
 	/* Update HPTE */
 	if (v & HPTE_V_VALID) {
-		rb = compute_tlbie_rb(v, r, pte_index);
-		hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
-		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
-		 * If the host has this page as readonly but the guest
-		 * wants to make it read/write, reduce the permissions.
-		 * Checking the host permissions involves finding the
-		 * memslot and then the Linux PTE for the page.
+		 * If the page is valid, don't let it transition from
+		 * readonly to writable.  If it should be writable, we'll
+		 * take a trap and let the page fault code sort it out.
 		 */
-		if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
-			unsigned long psize, gfn, hva;
-			struct kvm_memory_slot *memslot;
-			pgd_t *pgdir = vcpu->arch.pgdir;
-			pte_t pte;
-
-			psize = hpte_page_size(v, r);
-			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
-			memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-			if (memslot) {
-				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte_and_update(pgdir, hva,
-								  1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
-			}
+		pte = be64_to_cpu(hpte[1]);
+		r = (pte & ~mask) | bits;
+		if (hpte_is_writable(r) && !hpte_is_writable(pte))
+			r = hpte_make_readonly(r);
+		/* If the PTE is changing, invalidate it first */
+		if (r != pte) {
+			rb = compute_tlbie_rb(v, r, pte_index);
+			hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+					      HPTE_V_ABSENT);
+			do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+				  true);
+			hpte[1] = cpu_to_be64(r);
 		}
 	}
-	hpte[1] = cpu_to_be64(r);
-	eieio();
-	hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 	asm volatile("ptesync" : : : "memory");
 	return H_SUCCESS;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6e884f..7b066f6b02ad 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	 * state update in HW (ie bus transactions) so we can handle them
 	 * separately here as well.
 	 */
-	if (resend)
+	if (resend) {
 		icp->rm_action |= XICS_RM_CHECK_RESEND;
+		icp->rm_resend_icp = icp;
+	}
 }
 
 
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	 * nothing needs to be done as there can be no XISR to
 	 * reject.
 	 *
+	 * ICP state: Check_IPI
+	 *
 	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
+	 * an interrupt, and thus need to possibly reject it.
 	 *
-	 * ICP state: Check_IPI
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	}
 
 	/* Pass resends to virtual mode */
-	if (resend)
+	if (resend) {
 		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+		this_icp->rm_resend_icp = icp;
+	}
 
 	return check_too_hard(xics, this_icp);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 65c105b17a25..10554df13852 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r6, HSTATE_PMC + 12(r13)
 	lwz	r8, HSTATE_PMC + 16(r13)
 	lwz	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	lwz	r10, HSTATE_PMC + 24(r13)
-	lwz	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r4
 	mtspr	SPRN_PMC3, r5
 	mtspr	SPRN_PMC4, r6
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, HSTATE_MMCR(r13)
 	ld	r4, HSTATE_MMCR + 8(r13)
 	ld	r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
 	beq	11f
 	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
 	beq	cr2, 14f			/* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* RFI into the highmem handler, or branch to interrupt handler */
 	mfmsr	r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	beqa	0x500			/* external interrupt (PPC970) */
 	beq	cr1, 13f		/* machine check */
 	RFI
 
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
 	slbia
 	ptesync
 
-BEGIN_FTR_SECTION
-	b	30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 host -> guest partition switch code.
+	 * POWER7/POWER8 host -> guest partition switch code.
 	 * We don't have to lock against concurrent tlbies,
 	 * but we do have to coordinate across hardware threads.
 	 */
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	cmpwi	r3,512		/* 1 microsecond */
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	blt	hdec_soon
-	b	31f
-
-	/*
-	 * PPC970 host -> guest partition switch code.
-	 * We have to lock against concurrent tlbies,
-	 * using native_tlbie_lock to lock against host tlbies
-	 * and kvm->arch.tlbie_lock to lock against guest tlbies.
-	 * We also have to invalidate the TLB since its
-	 * entries aren't tagged with the LPID.
-	 */
-30:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* first take native_tlbie_lock */
-	.section ".toc","aw"
-toc_tlbie_lock:
-	.tc	native_tlbie_lock[TC],native_tlbie_lock
-	.previous
-	ld	r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
 
-	/* Take the guest's tlbie_lock */
-	addi	r3,r9,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-	ld	r6,KVM_SDR1(r9)
-	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
-
-	/* Set up HID4 with the guest's LPID etc. */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-
-	/* drop the guest's tlbie_lock */
-	li	r0,0
-	stw	r0,0(r3)
-
-	/* Check if HDEC expires soon */
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,10
-	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	blt	hdec_soon
-
-	/* Enable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,1
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-31:
 	/* Do we have a guest vcpu to run? */
 	cmpdi	r4, 0
 	beq	kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
 	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
 
-BEGIN_FTR_SECTION
 	/* Save purr/spurr */
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
 	ld	r8,VCPU_SPURR(r4)
 	mtspr	SPRN_PURR,r7
 	mtspr	SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
 	ld	r6,VCPU_DABR(r4)
 	mtspr	SPRN_DABRX,r5
 	mtspr	SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
 	isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r7, VCPU_PMC + 12(r4)
 	lwz	r8, VCPU_PMC + 16(r4)
 	lwz	r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-	lwz	r10, VCPU_PMC + 24(r4)
-	lwz	r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r5
 	mtspr	SPRN_PMC3, r6
 	mtspr	SPRN_PMC4, r7
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, VCPU_MMCR(r4)
 	ld	r5, VCPU_MMCR + 8(r4)
 	ld	r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ld	r30, VCPU_GPR(R30)(r4)
 	ld	r31, VCPU_GPR(R31)(r4)
 
-BEGIN_FTR_SECTION
 	/* Switch DSCR to guest value */
 	ld	r5, VCPU_DSCR(r4)
 	mtspr	SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
-	/* Skip next section on POWER7 or PPC970 */
+	/* Skip next section on POWER7 */
 	b	8f
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_DAR, r5
 	mtspr	SPRN_DSISR, r6
 
-BEGIN_FTR_SECTION
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
 	ld	r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_AMR,r5
 	mtspr	SPRN_UAMOR,r6
 	mtspr	SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	lwz	r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
 	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
 	cmpdi	cr1, r0, 0
 	andi.	r8, r11, MSR_EE
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_LPCR
 	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
 	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
 	mtspr	SPRN_LPCR, r8
 	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	stw	r12,VCPU_TRAP(r9)
 
-	/* Save HEIR (HV emulation assist reg) in last_inst
+	/* Save HEIR (HV emulation assist reg) in emul_inst
 	   if this is an HEI (HV emulation interrupt, e40) */
 	li	r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
 	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	bne	11f
 	mfspr	r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11:	stw	r3,VCPU_LAST_INST(r9)
+11:	stw	r3,VCPU_HEIR(r9)
 
 	/* these are volatile across C function calls */
 	mfctr	r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r3, VCPU_CTR(r9)
 	stw	r4, VCPU_XER(r9)
 
-BEGIN_FTR_SECTION
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	kvmppc_hdsi
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
 
-	/* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
-	b	ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
 	/* External interrupt ? */
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 	bne+	ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	mfdsisr	r7
 	std	r6, VCPU_DAR(r9)
 	stw	r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
 	/* don't overwrite fault_dar/fault_dsisr if HDSI */
 	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r6, VCPU_FAULT_DAR(r9)
 	stw	r7, VCPU_FAULT_DSISR(r9)
 
@@ -1236,7 +1103,6 @@ mc_cont:
 	/*
 	 * Save the guest PURR/SPURR
 	 */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
 	ld	r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
 	add	r4,r4,r6
 	mtspr	SPRN_PURR,r3
 	mtspr	SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 8:
 
 	/* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_AMR
 	mfspr	r6,SPRN_UAMOR
 	std	r5,VCPU_AMR(r9)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_DSCR
 	ld	r7, HSTATE_DSCR(r13)
 	std	r8, VCPU_DSCR(r9)
 	mtspr	SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Save non-volatile GPRs */
 	std	r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r7, 0
 	mtspr	SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	beq	21f			/* if no VPA, save PMU stuff anyway */
 	lbz	r7, LPPACA_PMCINUSE(r8)
@@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r6, SPRN_PMC4
 	mfspr	r7, SPRN_PMC5
 	mfspr	r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, VCPU_PMC(r9)
 	stw	r4, VCPU_PMC + 4(r9)
 	stw	r5, VCPU_PMC + 8(r9)
@@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r7, VCPU_PMC + 16(r9)
 	stw	r8, VCPU_PMC + 20(r9)
 BEGIN_FTR_SECTION
-	stw	r10, VCPU_PMC + 24(r9)
-	stw	r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_SIER
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ptesync
 
 hdec_soon:			/* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
-	b	32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 guest -> host partition switch code.
+	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
 	 * have to coordinate the hardware threads.
 	 */
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 16:	ld	r8,KVM_HOST_LPCR(r4)
 	mtspr	SPRN_LPCR,r8
 	isync
-	b	33f
-
-	/*
-	 * PPC970 guest -> host partition switch code.
-	 * We have to lock against concurrent tlbies, and
-	 * we have to flush the whole TLB.
-	 */
-32:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-	addi	r3,r4,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r7,KVM_HOST_LPCR(r4)	/* use kvm->arch.host_lpcr for HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop guest tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
-
-	/* take native_tlbie_lock */
-	ld	r3,toc_tlbie_lock@toc(2)
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r6,KVM_HOST_SDR1(r4)
-	mtspr	SPRN_SDR1,r6		/* switch to host page table */
-
-	/* Set up host HID4 value */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	lis	r8,0x7fff		/* MAX_INT@h */
-	mtspr	SPRN_HDEC,r8
-
-	/* Disable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,0
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
 
 	/* load host SLB entries */
-33:	ld	r8,PACA_SLBSHADOWPTR(r13)
+	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
 	li	r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
 	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
-	.long	0		/* 0xe4 */
+	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
 	.long	0		/* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
 	stw	r0,VCPU_TRAP(r3)
 	li	r0,H_SUCCESS
 	std	r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
-	b	kvm_cede_exit	/* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
 	/*
 	 * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r3,VCPU_FPRS
 	bl	store_fp_state
 #ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r4,VCPU_FPRS
 	bl	load_fp_state
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035314e3..bd6ab1672ae6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
 	return kvmppc_get_field(inst, msb + 32, lsb + 32);
 }
 
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
-	return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
 bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
 {
 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cf2eb16846d1..f57383941d03 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return r;
 }
 
-static inline int get_fpr_index(int i)
-{
-	return i * TS_FPRWIDTH;
-}
-
 /* Give up external provider (FPU, Altivec, VSX) */
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb78047fb8..807351f76f84 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	 * there might be a previously-rejected interrupt needing
 	 * to be resent.
 	 *
+	 * ICP state: Check_IPI
+	 *
 	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
+	 * an interrupt, and thus need to possibly reject it.
 	 *
-	 * ICP state: Check_IPI
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 	if (icp->rm_action & XICS_RM_KICK_VCPU)
 		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
 	if (icp->rm_action & XICS_RM_CHECK_RESEND)
-		icp_check_resend(xics, icp);
+		icp_check_resend(xics, icp->rm_resend_icp);
 	if (icp->rm_action & XICS_RM_REJECT)
 		icp_deliver_irq(xics, icp, icp->rm_reject);
 	if (icp->rm_action & XICS_RM_NOTIFY_EOI)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a3f209..73f0f2723c07 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -74,6 +74,7 @@ struct kvmppc_icp {
 #define XICS_RM_NOTIFY_EOI	0x8
 	u32 rm_action;
 	struct kvm_vcpu *rm_kick_target;
+	struct kvmppc_icp *rm_resend_icp;
 	u32  rm_reject;
 	u32  rm_eoied_irq;
 
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 16095841afe1..b29ce752c7d6 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry)
 
 	sid = __this_cpu_inc_return(pcpu_last_used_sid);
 	if (sid < NUM_TIDS) {
-		__this_cpu_write(pcpu_sids)entry[sid], entry);
+		__this_cpu_write(pcpu_sids.entry[sid], entry);
 		entry->val = sid;
 		entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
 		ret = sid;
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_booke_vcpu_load(vcpu, cpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c1f8f53cd312..c45eaab752b0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = hv_enabled;
-		/* PPC970 requires an RMA */
-		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
-			r = 2;
+		r = 0;
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		if (hv_enabled)
-			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-		else
-			r = 0;
+		r = hv_enabled;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 		r = 1;
 #else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 000000000000..f647ce0f428b
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf26ce7..7ec534d1db9f 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
 		__entry->pfn, __entry->flags)
 );
 
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+	{BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+	{BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+	kvm_trace_symbol_irqprio_spe \
+	kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+	{BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+	{BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+	{BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+	{BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+	{BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+	{BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+	{BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+	{BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+	{BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+	{BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+	{BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+	{BOOKE_IRQPRIO_FIT, "FIT"}, \
+	{BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+	{BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+	{BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+	{BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+	{BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
 TRACE_EVENT(kvm_booke_queue_irqprio,
 	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
 	TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
 		__entry->pending	= vcpu->arch.pending_exceptions;
 	),
 
-	TP_printk("vcpu=%x prio=%x pending=%lx",
-		__entry->cpu_nr, __entry->priority, __entry->pending)
+	TP_printk("vcpu=%x prio=%s pending=%lx",
+		__entry->cpu_nr,
+		__print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+		__entry->pending)
 );
 
 #endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 000000000000..33d9daff5783
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+	{H_REMOVE,			"H_REMOVE"}, \
+	{H_ENTER,			"H_ENTER"}, \
+	{H_READ,			"H_READ"}, \
+	{H_CLEAR_MOD,			"H_CLEAR_MOD"}, \
+	{H_CLEAR_REF,			"H_CLEAR_REF"}, \
+	{H_PROTECT,			"H_PROTECT"}, \
+	{H_GET_TCE,			"H_GET_TCE"}, \
+	{H_PUT_TCE,			"H_PUT_TCE"}, \
+	{H_SET_SPRG0,			"H_SET_SPRG0"}, \
+	{H_SET_DABR,			"H_SET_DABR"}, \
+	{H_PAGE_INIT,			"H_PAGE_INIT"}, \
+	{H_SET_ASR,			"H_SET_ASR"}, \
+	{H_ASR_ON,			"H_ASR_ON"}, \
+	{H_ASR_OFF,			"H_ASR_OFF"}, \
+	{H_LOGICAL_CI_LOAD,		"H_LOGICAL_CI_LOAD"}, \
+	{H_LOGICAL_CI_STORE,		"H_LOGICAL_CI_STORE"}, \
+	{H_LOGICAL_CACHE_LOAD,		"H_LOGICAL_CACHE_LOAD"}, \
+	{H_LOGICAL_CACHE_STORE,		"H_LOGICAL_CACHE_STORE"}, \
+	{H_LOGICAL_ICBI,		"H_LOGICAL_ICBI"}, \
+	{H_LOGICAL_DCBF,		"H_LOGICAL_DCBF"}, \
+	{H_GET_TERM_CHAR,		"H_GET_TERM_CHAR"}, \
+	{H_PUT_TERM_CHAR,		"H_PUT_TERM_CHAR"}, \
+	{H_REAL_TO_LOGICAL,		"H_REAL_TO_LOGICAL"}, \
+	{H_HYPERVISOR_DATA,		"H_HYPERVISOR_DATA"}, \
+	{H_EOI,				"H_EOI"}, \
+	{H_CPPR,			"H_CPPR"}, \
+	{H_IPI,				"H_IPI"}, \
+	{H_IPOLL,			"H_IPOLL"}, \
+	{H_XIRR,			"H_XIRR"}, \
+	{H_PERFMON,			"H_PERFMON"}, \
+	{H_MIGRATE_DMA,			"H_MIGRATE_DMA"}, \
+	{H_REGISTER_VPA,		"H_REGISTER_VPA"}, \
+	{H_CEDE,			"H_CEDE"}, \
+	{H_CONFER,			"H_CONFER"}, \
+	{H_PROD,			"H_PROD"}, \
+	{H_GET_PPP,			"H_GET_PPP"}, \
+	{H_SET_PPP,			"H_SET_PPP"}, \
+	{H_PURR,			"H_PURR"}, \
+	{H_PIC,				"H_PIC"}, \
+	{H_REG_CRQ,			"H_REG_CRQ"}, \
+	{H_FREE_CRQ,			"H_FREE_CRQ"}, \
+	{H_VIO_SIGNAL,			"H_VIO_SIGNAL"}, \
+	{H_SEND_CRQ,			"H_SEND_CRQ"}, \
+	{H_COPY_RDMA,			"H_COPY_RDMA"}, \
+	{H_REGISTER_LOGICAL_LAN,	"H_REGISTER_LOGICAL_LAN"}, \
+	{H_FREE_LOGICAL_LAN,		"H_FREE_LOGICAL_LAN"}, \
+	{H_ADD_LOGICAL_LAN_BUFFER,	"H_ADD_LOGICAL_LAN_BUFFER"}, \
+	{H_SEND_LOGICAL_LAN,		"H_SEND_LOGICAL_LAN"}, \
+	{H_BULK_REMOVE,			"H_BULK_REMOVE"}, \
+	{H_MULTICAST_CTRL,		"H_MULTICAST_CTRL"}, \
+	{H_SET_XDABR,			"H_SET_XDABR"}, \
+	{H_STUFF_TCE,			"H_STUFF_TCE"}, \
+	{H_PUT_TCE_INDIRECT,		"H_PUT_TCE_INDIRECT"}, \
+	{H_CHANGE_LOGICAL_LAN_MAC,	"H_CHANGE_LOGICAL_LAN_MAC"}, \
+	{H_VTERM_PARTNER_INFO,		"H_VTERM_PARTNER_INFO"}, \
+	{H_REGISTER_VTERM,		"H_REGISTER_VTERM"}, \
+	{H_FREE_VTERM,			"H_FREE_VTERM"}, \
+	{H_RESET_EVENTS,		"H_RESET_EVENTS"}, \
+	{H_ALLOC_RESOURCE,		"H_ALLOC_RESOURCE"}, \
+	{H_FREE_RESOURCE,		"H_FREE_RESOURCE"}, \
+	{H_MODIFY_QP,			"H_MODIFY_QP"}, \
+	{H_QUERY_QP,			"H_QUERY_QP"}, \
+	{H_REREGISTER_PMR,		"H_REREGISTER_PMR"}, \
+	{H_REGISTER_SMR,		"H_REGISTER_SMR"}, \
+	{H_QUERY_MR,			"H_QUERY_MR"}, \
+	{H_QUERY_MW,			"H_QUERY_MW"}, \
+	{H_QUERY_HCA,			"H_QUERY_HCA"}, \
+	{H_QUERY_PORT,			"H_QUERY_PORT"}, \
+	{H_MODIFY_PORT,			"H_MODIFY_PORT"}, \
+	{H_DEFINE_AQP1,			"H_DEFINE_AQP1"}, \
+	{H_GET_TRACE_BUFFER,		"H_GET_TRACE_BUFFER"}, \
+	{H_DEFINE_AQP0,			"H_DEFINE_AQP0"}, \
+	{H_RESIZE_MR,			"H_RESIZE_MR"}, \
+	{H_ATTACH_MCQP,			"H_ATTACH_MCQP"}, \
+	{H_DETACH_MCQP,			"H_DETACH_MCQP"}, \
+	{H_CREATE_RPT,			"H_CREATE_RPT"}, \
+	{H_REMOVE_RPT,			"H_REMOVE_RPT"}, \
+	{H_REGISTER_RPAGES,		"H_REGISTER_RPAGES"}, \
+	{H_DISABLE_AND_GETC,		"H_DISABLE_AND_GETC"}, \
+	{H_ERROR_DATA,			"H_ERROR_DATA"}, \
+	{H_GET_HCA_INFO,		"H_GET_HCA_INFO"}, \
+	{H_GET_PERF_COUNT,		"H_GET_PERF_COUNT"}, \
+	{H_MANAGE_TRACE,		"H_MANAGE_TRACE"}, \
+	{H_FREE_LOGICAL_LAN_BUFFER,	"H_FREE_LOGICAL_LAN_BUFFER"}, \
+	{H_QUERY_INT_STATE,		"H_QUERY_INT_STATE"}, \
+	{H_POLL_PENDING,		"H_POLL_PENDING"}, \
+	{H_ILLAN_ATTRIBUTES,		"H_ILLAN_ATTRIBUTES"}, \
+	{H_MODIFY_HEA_QP,		"H_MODIFY_HEA_QP"}, \
+	{H_QUERY_HEA_QP,		"H_QUERY_HEA_QP"}, \
+	{H_QUERY_HEA,			"H_QUERY_HEA"}, \
+	{H_QUERY_HEA_PORT,		"H_QUERY_HEA_PORT"}, \
+	{H_MODIFY_HEA_PORT,		"H_MODIFY_HEA_PORT"}, \
+	{H_REG_BCMC,			"H_REG_BCMC"}, \
+	{H_DEREG_BCMC,			"H_DEREG_BCMC"}, \
+	{H_REGISTER_HEA_RPAGES,		"H_REGISTER_HEA_RPAGES"}, \
+	{H_DISABLE_AND_GET_HEA,		"H_DISABLE_AND_GET_HEA"}, \
+	{H_GET_HEA_INFO,		"H_GET_HEA_INFO"}, \
+	{H_ALLOC_HEA_RESOURCE,		"H_ALLOC_HEA_RESOURCE"}, \
+	{H_ADD_CONN,			"H_ADD_CONN"}, \
+	{H_DEL_CONN,			"H_DEL_CONN"}, \
+	{H_JOIN,			"H_JOIN"}, \
+	{H_VASI_STATE,			"H_VASI_STATE"}, \
+	{H_ENABLE_CRQ,			"H_ENABLE_CRQ"}, \
+	{H_GET_EM_PARMS,		"H_GET_EM_PARMS"}, \
+	{H_SET_MPP,			"H_SET_MPP"}, \
+	{H_GET_MPP,			"H_GET_MPP"}, \
+	{H_HOME_NODE_ASSOCIATIVITY,	"H_HOME_NODE_ASSOCIATIVITY"}, \
+	{H_BEST_ENERGY,			"H_BEST_ENERGY"}, \
+	{H_XIRR_X,			"H_XIRR_X"}, \
+	{H_RANDOM,			"H_RANDOM"}, \
+	{H_COP,				"H_COP"}, \
+	{H_GET_MPP_X,			"H_GET_MPP_X"}, \
+	{H_SET_MODE,			"H_SET_MODE"}, \
+	{H_RTAS,			"H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+	{RESUME_GUEST,			"RESUME_GUEST"}, \
+	{RESUME_GUEST_NV,		"RESUME_GUEST_NV"}, \
+	{RESUME_HOST,			"RESUME_HOST"}, \
+	{RESUME_HOST_NV,		"RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+	{H_SUCCESS,			"H_SUCCESS"}, \
+	{H_BUSY,			"H_BUSY"}, \
+	{H_CLOSED,			"H_CLOSED"}, \
+	{H_NOT_AVAILABLE,		"H_NOT_AVAILABLE"}, \
+	{H_CONSTRAINED,			"H_CONSTRAINED"}, \
+	{H_PARTIAL,			"H_PARTIAL"}, \
+	{H_IN_PROGRESS,			"H_IN_PROGRESS"}, \
+	{H_PAGE_REGISTERED,		"H_PAGE_REGISTERED"}, \
+	{H_PARTIAL_STORE,		"H_PARTIAL_STORE"}, \
+	{H_PENDING,			"H_PENDING"}, \
+	{H_CONTINUE,			"H_CONTINUE"}, \
+	{H_LONG_BUSY_START_RANGE,	"H_LONG_BUSY_START_RANGE"}, \
+	{H_LONG_BUSY_ORDER_1_MSEC,	"H_LONG_BUSY_ORDER_1_MSEC"}, \
+	{H_LONG_BUSY_ORDER_10_MSEC,	"H_LONG_BUSY_ORDER_10_MSEC"}, \
+	{H_LONG_BUSY_ORDER_100_MSEC,	"H_LONG_BUSY_ORDER_100_MSEC"}, \
+	{H_LONG_BUSY_ORDER_1_SEC,	"H_LONG_BUSY_ORDER_1_SEC"}, \
+	{H_LONG_BUSY_ORDER_10_SEC,	"H_LONG_BUSY_ORDER_10_SEC"}, \
+	{H_LONG_BUSY_ORDER_100_SEC,	"H_LONG_BUSY_ORDER_100_SEC"}, \
+	{H_LONG_BUSY_END_RANGE,		"H_LONG_BUSY_END_RANGE"}, \
+	{H_TOO_HARD,			"H_TOO_HARD"}, \
+	{H_HARDWARE,			"H_HARDWARE"}, \
+	{H_FUNCTION,			"H_FUNCTION"}, \
+	{H_PRIVILEGE,			"H_PRIVILEGE"}, \
+	{H_PARAMETER,			"H_PARAMETER"}, \
+	{H_BAD_MODE,			"H_BAD_MODE"}, \
+	{H_PTEG_FULL,			"H_PTEG_FULL"}, \
+	{H_NOT_FOUND,			"H_NOT_FOUND"}, \
+	{H_RESERVED_DABR,		"H_RESERVED_DABR"}, \
+	{H_NO_MEM,			"H_NO_MEM"}, \
+	{H_AUTHORITY,			"H_AUTHORITY"}, \
+	{H_PERMISSION,			"H_PERMISSION"}, \
+	{H_DROPPED,			"H_DROPPED"}, \
+	{H_SOURCE_PARM,			"H_SOURCE_PARM"}, \
+	{H_DEST_PARM,			"H_DEST_PARM"}, \
+	{H_REMOTE_PARM,			"H_REMOTE_PARM"}, \
+	{H_RESOURCE,			"H_RESOURCE"}, \
+	{H_ADAPTER_PARM,		"H_ADAPTER_PARM"}, \
+	{H_RH_PARM,			"H_RH_PARM"}, \
+	{H_RCQ_PARM,			"H_RCQ_PARM"}, \
+	{H_SCQ_PARM,			"H_SCQ_PARM"}, \
+	{H_EQ_PARM,			"H_EQ_PARM"}, \
+	{H_RT_PARM,			"H_RT_PARM"}, \
+	{H_ST_PARM,			"H_ST_PARM"}, \
+	{H_SIGT_PARM,			"H_SIGT_PARM"}, \
+	{H_TOKEN_PARM,			"H_TOKEN_PARM"}, \
+	{H_MLENGTH_PARM,		"H_MLENGTH_PARM"}, \
+	{H_MEM_PARM,			"H_MEM_PARM"}, \
+	{H_MEM_ACCESS_PARM,		"H_MEM_ACCESS_PARM"}, \
+	{H_ATTR_PARM,			"H_ATTR_PARM"}, \
+	{H_PORT_PARM,			"H_PORT_PARM"}, \
+	{H_MCG_PARM,			"H_MCG_PARM"}, \
+	{H_VL_PARM,			"H_VL_PARM"}, \
+	{H_TSIZE_PARM,			"H_TSIZE_PARM"}, \
+	{H_TRACE_PARM,			"H_TRACE_PARM"}, \
+	{H_MASK_PARM,			"H_MASK_PARM"}, \
+	{H_MCG_FULL,			"H_MCG_FULL"}, \
+	{H_ALIAS_EXIST,			"H_ALIAS_EXIST"}, \
+	{H_P_COUNTER,			"H_P_COUNTER"}, \
+	{H_TABLE_FULL,			"H_TABLE_FULL"}, \
+	{H_ALT_TABLE,			"H_ALT_TABLE"}, \
+	{H_MR_CONDITION,		"H_MR_CONDITION"}, \
+	{H_NOT_ENOUGH_RESOURCES,	"H_NOT_ENOUGH_RESOURCES"}, \
+	{H_R_STATE,			"H_R_STATE"}, \
+	{H_RESCINDED,			"H_RESCINDED"}, \
+	{H_P2,				"H_P2"}, \
+	{H_P3,				"H_P3"}, \
+	{H_P4,				"H_P4"}, \
+	{H_P5,				"H_P5"}, \
+	{H_P6,				"H_P6"}, \
+	{H_P7,				"H_P7"}, \
+	{H_P8,				"H_P8"}, \
+	{H_P9,				"H_P9"}, \
+	{H_TOO_BIG,			"H_TOO_BIG"}, \
+	{H_OVERLAP,			"H_OVERLAP"}, \
+	{H_INTERRUPT,			"H_INTERRUPT"}, \
+	{H_BAD_DATA,			"H_BAD_DATA"}, \
+	{H_NOT_ACTIVE,			"H_NOT_ACTIVE"}, \
+	{H_SG_LIST,			"H_SG_LIST"}, \
+	{H_OP_MODE,			"H_OP_MODE"}, \
+	{H_COP_HW,			"H_COP_HW"}, \
+	{H_UNSUPPORTED_FLAG_START,	"H_UNSUPPORTED_FLAG_START"}, \
+	{H_UNSUPPORTED_FLAG_END,	"H_UNSUPPORTED_FLAG_END"}, \
+	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
+	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	pc)
+		__field(unsigned long,  pending_exceptions)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu->vcpu_id;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->ceded		= vcpu->arch.ceded;
+		__entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+			__entry->vcpu_id,
+			__entry->pc,
+			__entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		trap)
+		__field(unsigned long,	pc)
+		__field(unsigned long,	msr)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu->vcpu_id;
+		__entry->trap	 = vcpu->arch.trap;
+		__entry->ceded	 = vcpu->arch.ceded;
+		__entry->pc	 = kvmppc_get_pc(vcpu);
+		__entry->msr	 = vcpu->arch.shregs.msr;
+	),
+
+	TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+		__entry->vcpu_id,
+		__print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+		__entry->pc, __entry->msr, __entry->ceded
+	)
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+		 struct kvm_memory_slot *memslot, unsigned long ea,
+		 unsigned long dsisr),
+
+	TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(unsigned long,	gpte_r)
+		__field(unsigned long,	ea)
+		__field(u64,		base_gfn)
+		__field(u32,		slot_flags)
+		__field(u32,		dsisr)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	  = hptep[0];
+		__entry->hpte_r	  = hptep[1];
+		__entry->gpte_r	  = hptep[2];
+		__entry->ea	  = ea;
+		__entry->dsisr	  = dsisr;
+		__entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+		__entry->slot_flags = memslot ? memslot->flags : 0;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+		   __entry->ea, __entry->dsisr,
+		   __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+	TP_ARGS(vcpu, hptep, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(long,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	= hptep[0];
+		__entry->hpte_r	= hptep[1];
+		__entry->ret = ret;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	req)
+		__field(unsigned long,	gpr4)
+		__field(unsigned long,	gpr5)
+		__field(unsigned long,	gpr6)
+		__field(unsigned long,	gpr7)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->req   = kvmppc_get_gpr(vcpu, 3);
+		__entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+		__entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+		__entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+		__entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+	),
+
+	TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+		   __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+	TP_ARGS(vcpu, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	ret)
+		__field(unsigned long,	hcall_rc)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->ret	  = ret;
+		__entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+	),
+
+	TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+		   __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+					H_TOO_HARD : __entry->hcall_rc,
+					kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu	= vc->runner->vcpu_id;
+		__entry->n_runnable	= vc->n_runnable;
+		__entry->where		= where;
+		__entry->tgid		= current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+		    __entry->where ? "Exit" : "Enter",
+		    __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu = vc->runner->vcpu_id;
+		__entry->n_runnable  = vc->n_runnable;
+		__entry->where       = where;
+		__entry->tgid	     = current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+		   __entry->where ? "Exit" : "Enter",
+		   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(pid_t,		tgid)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->tgid	  = current->tgid;
+	),
+
+	TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+	TP_ARGS(vcpu, run),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		exit)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->exit     = run->exit_reason;
+		__entry->ret      = vcpu->arch.ret;
+	),
+
+	TP_printk("VCPU %d: exit=%d, ret=%d",
+			__entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd8dc1f..810507cb688a 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,36 +3,13 @@
 #define _TRACE_KVM_PR_H
 
 #include <linux/tracepoint.h>
+#include "trace_book3s.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace_pr
 
-#define kvm_trace_symbol_exit \
-	{0x100, "SYSTEM_RESET"}, \
-	{0x200, "MACHINE_CHECK"}, \
-	{0x300, "DATA_STORAGE"}, \
-	{0x380, "DATA_SEGMENT"}, \
-	{0x400, "INST_STORAGE"}, \
-	{0x480, "INST_SEGMENT"}, \
-	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
-	{0x502, "EXTERNAL_HV"}, \
-	{0x600, "ALIGNMENT"}, \
-	{0x700, "PROGRAM"}, \
-	{0x800, "FP_UNAVAIL"}, \
-	{0x900, "DECREMENTER"}, \
-	{0x980, "HV_DECREMENTER"}, \
-	{0xc00, "SYSCALL"}, \
-	{0xd00, "TRACE"}, \
-	{0xe00, "H_DATA_STORAGE"}, \
-	{0xe20, "H_INST_STORAGE"}, \
-	{0xe40, "H_EMUL_ASSIST"}, \
-	{0xf00, "PERFMON"}, \
-	{0xf20, "ALTIVEC"}, \
-	{0xf40, "VSX"}
-
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
 	TP_ARGS(r, vcpu),
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index dba34088da28..f162d0b8eea3 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev,			\
 	}							\
 	ret = sprintf(buf, _fmt, _expr);			\
 e_free:								\
-	kfree(page);						\
+	kmem_cache_free(hv_page_cache, page);			\
 	return ret;						\
 }								\
 static DEVICE_ATTR_RO(_name)
@@ -217,11 +217,14 @@ static bool is_physical_domain(int domain)
 		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
 }
 
+DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+
 static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 					 u16 lpar, u64 *res,
 					 bool success_expected)
 {
-	unsigned long ret = -ENOMEM;
+	unsigned long ret;
 
 	/*
 	 * request_buffer and result_buffer are not required to be 4k aligned,
@@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
 
-	request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!request_buffer)
-		goto out;
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
 
-	result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!result_buffer)
-		goto out_free_request_buffer;
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
 
 	*request_buffer = (struct reqb) {
 		.buf = {
@@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 				domain, offset, ix, lpar, ret, ret,
 				result_buffer->buf.detailed_rc,
 				result_buffer->buf.failing_request_ix);
-		goto out_free_result_buffer;
+		goto out;
 	}
 
 	*res = be64_to_cpu(result_buffer->result);
 
-out_free_result_buffer:
-	kfree(result_buffer);
-out_free_request_buffer:
-	kfree(request_buffer);
 out:
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0a299be588af..54eca8b3b288 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -158,6 +158,43 @@ opal_tracepoint_return:
 	blr
 #endif
 
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1  - stack pointer
+ * r0  - opal token
+ */
+_GLOBAL(opal_call_realmode)
+	mflr	r12
+	std	r12,PPC_LR_STKOFF(r1)
+	ld	r2,PACATOC(r13)
+	/* Set opal return address */
+	LOAD_REG_ADDR(r12,return_from_opal_call)
+	mtlr	r12
+
+	mfmsr	r12
+#ifdef __LITTLE_ENDIAN__
+	/* Handle endian-ness */
+	li	r11,MSR_LE
+	andc	r12,r12,r11
+#endif
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+	FIXUP_ENDIAN
+#endif
+	ld	r12,PPC_LR_STKOFF(r1)
+	mtlr	r12
+	blr
+
 OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
 OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
 OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
@@ -247,6 +284,7 @@ OPAL_CALL(opal_sensor_read,			OPAL_SENSOR_READ);
 OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
 OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
 OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
 OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CXL_MODE);
@@ -254,3 +292,4 @@ OPAL_CALL(opal_tpo_write,			OPAL_WRITE_TPO);
 OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
 OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
 OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cb0b6de79cd4..f10b9ec8c1f5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -9,8 +9,9 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
+#define pr_fmt(fmt)	"opal: " fmt
 
+#include <linux/printk.h>
 #include <linux/types.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -625,6 +626,39 @@ static int opal_sysfs_init(void)
 	return 0;
 }
 
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+				       bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+	const __be64 *syms;
+	unsigned int size;
+	struct device_node *fw;
+	int rc;
+
+	fw = of_find_node_by_path("/ibm,opal/firmware");
+	if (!fw)
+		return;
+	syms = of_get_property(fw, "symbol-map", &size);
+	if (!syms || size != 2 * sizeof(__be64))
+		return;
+
+	/* Setup attributes */
+	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+	if (rc)
+		pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
 static void __init opal_dump_region_init(void)
 {
 	void *addr;
@@ -653,6 +687,14 @@ static void opal_ipmi_init(struct device_node *opal_node)
 			of_platform_device_create(np, NULL, NULL);
 }
 
+static void opal_i2c_create_devs(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+		of_platform_device_create(np, NULL, NULL);
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -679,6 +721,9 @@ static int __init opal_init(void)
 		of_node_put(consoles);
 	}
 
+	/* Create i2c platform devices */
+	opal_i2c_create_devs();
+
 	/* Find all OPAL interrupts and request them */
 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
 	pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -702,6 +747,8 @@ static int __init opal_init(void)
 	/* Create "opal" kobject under /sys/firmware */
 	rc = opal_sysfs_init();
 	if (rc == 0) {
+		/* Export symbol map to userspace */
+		opal_export_symmap();
 		/* Setup dump region interface */
 		opal_dump_region_init();
 		/* Setup error log interface */
@@ -824,3 +871,4 @@ EXPORT_SYMBOL_GPL(opal_rtc_read);
 EXPORT_SYMBOL_GPL(opal_rtc_write);
 EXPORT_SYMBOL_GPL(opal_tpo_read);
 EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d188cd0..604c48e7879a 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
 }
 #endif
 
+extern u32 pnv_get_supported_cpuidle_states(void);
+
 extern void pnv_lpc_init(void);
 
 bool cpu_core_split_required(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 30b1c3e298a6..b700a329c31d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,8 +36,12 @@
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
 
 #include "powernv.h"
+#include "subcore.h"
 
 static void __init pnv_setup_arch(void)
 {
@@ -288,6 +292,168 @@ static void __init pnv_setup_machdep_rtas(void)
 }
 #endif /* CONFIG_PPC_POWERNV_RTAS */
 
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+	int cpu;
+	int rc;
+
+	/*
+	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+	 * all cpus at boot. Get these reg values of current cpu and use the
+	 * same accross all cpus.
+	 */
+	uint64_t lpcr_val = mfspr(SPRN_LPCR);
+	uint64_t hid0_val = mfspr(SPRN_HID0);
+	uint64_t hid1_val = mfspr(SPRN_HID1);
+	uint64_t hid4_val = mfspr(SPRN_HID4);
+	uint64_t hid5_val = mfspr(SPRN_HID5);
+	uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+	for_each_possible_cpu(cpu) {
+		uint64_t pir = get_hard_smp_processor_id(cpu);
+		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+		/*
+		 * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+		 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+		 * with 63rd bit set, so that when a thread wakes up at 0x100 we
+		 * can use this bit to distinguish between fastsleep and
+		 * deep winkle.
+		 */
+		hsprg0_val |= 1;
+
+		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+		if (rc != 0)
+			return rc;
+
+		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+		if (rc != 0)
+			return rc;
+
+		/* HIDs are per core registers */
+		if (cpu_thread_in_core(cpu) == 0) {
+
+			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	u32 *core_idle_state;
+
+	/*
+	 * core_idle_state - First 8 bits track the idle state of each thread
+	 * of the core. The 8th bit is the lock bit. Initially all thread bits
+	 * are set. They are cleared when the thread enters deep idle state
+	 * like sleep and winkle. Initially the lock bit is cleared.
+	 * The lock bit has 2 purposes
+	 * a. While the first thread is restoring core state, it prevents
+	 * other threads in the core from switching to process context.
+	 * b. While the last thread in the core is saving the core state, it
+	 * prevents a different thread from waking up.
+	 */
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+		*core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca[cpu].core_idle_state_ptr = core_idle_state;
+			paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+			paca[cpu].thread_mask = 1 << j;
+		}
+	}
+
+	update_subcore_sibling_mask();
+
+	if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+		pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+	return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+	struct device_node *power_mgt;
+	int dt_idle_states;
+	const __be32 *idle_state_flags;
+	u32 len_flags, flags;
+	int i;
+
+	supported_cpuidle_states = 0;
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return 0;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return 0;
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		return 0;
+	}
+
+	idle_state_flags = of_get_property(power_mgt,
+			"ibm,cpu-idle-state-flags", &len_flags);
+	if (!idle_state_flags) {
+		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+		return 0;
+	}
+
+	dt_idle_states = len_flags / sizeof(u32);
+
+	for (i = 0; i < dt_idle_states; i++) {
+		flags = be32_to_cpu(idle_state_flags[i]);
+		supported_cpuidle_states |= flags;
+	}
+	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_entry,
+			PPC_INST_NOP);
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_exit,
+			PPC_INST_NOP);
+	}
+	pnv_alloc_idle_core_states();
+	return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
 static int __init pnv_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b716f666e48a..fc34025ef822 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -150,6 +150,7 @@ static void pnv_smp_cpu_kill_self(void)
 {
 	unsigned int cpu;
 	unsigned long srr1;
+	u32 idle_states;
 
 	/* Standard hot unplug procedure */
 	local_irq_disable();
@@ -160,13 +161,23 @@ static void pnv_smp_cpu_kill_self(void)
 	generic_set_cpu_dead(cpu);
 	smp_wmb();
 
+	idle_states = pnv_get_supported_cpuidle_states();
 	/* We don't want to take decrementer interrupts while we are offline,
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
+
 		ppc64_runlatch_off();
-		srr1 = power7_nap(1);
+
+		if (idle_states & OPAL_PM_WINKLE_ENABLED)
+			srr1 = power7_winkle();
+		else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+				(idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+			srr1 = power7_sleep();
+		else
+			srr1 = power7_nap(1);
+
 		ppc64_runlatch_on();
 
 		/*
@@ -198,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void)
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static int pnv_cpu_bootable(unsigned int nr)
+{
+	/*
+	 * Starting with POWER8, the subcore logic relies on all threads of a
+	 * core being booted so that they can participate in split mode
+	 * switches. So on those machines we ignore the smt_enabled_at_boot
+	 * setting (smt-enabled on the kernel command line).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 1;
+
+	return smp_generic_cpu_bootable(nr);
+}
+
 static struct smp_ops_t pnv_smp_ops = {
 	.message_pass	= smp_muxed_ipi_message_pass,
 	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
 	.probe		= xics_smp_probe,
 	.kick_cpu	= pnv_smp_kick_cpu,
 	.setup_cpu	= pnv_smp_setup_cpu,
-	.cpu_bootable	= smp_generic_cpu_bootable,
+	.cpu_bootable	= pnv_cpu_bootable,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= pnv_smp_cpu_disable,
 	.cpu_die	= generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b79d1a..f60f80ada903 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -160,6 +160,18 @@ static void wait_for_sync_step(int step)
 	mb();
 }
 
+static void update_hid_in_slw(u64 hid0)
+{
+	u64 idle_states = pnv_get_supported_cpuidle_states();
+
+	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+		/* OPAL call to patch slw with the new HID0 value */
+		u64 cpu_pir = hard_smp_processor_id();
+
+		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+	}
+}
+
 static void unsplit_core(void)
 {
 	u64 hid0, mask;
@@ -179,6 +191,7 @@ static void unsplit_core(void)
 	hid0 = mfspr(SPRN_HID0);
 	hid0 &= ~HID0_POWER8_DYNLPARDIS;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	while (mfspr(SPRN_HID0) & mask)
 		cpu_relax();
@@ -215,6 +228,7 @@ static void split_core(int new_mode)
 	hid0  = mfspr(SPRN_HID0);
 	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	/* Wait for it to happen */
 	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@ bool cpu_core_split_required(void)
 	return true;
 }
 
+void update_subcore_sibling_mask(void)
+{
+	int cpu;
+	/*
+	 * sibling mask for the first cpu. Left shift this by required bits
+	 * to get sibling mask for the rest of the cpus.
+	 */
+	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
+
+	for_each_possible_cpu(cpu) {
+		int tid = cpu_thread_in_core(cpu);
+		int offset = (tid / threads_per_subcore) * threads_per_subcore;
+		int mask = sibling_mask_first_cpu << offset;
+
+		paca[cpu].subcore_sibling_mask = mask;
+
+	}
+}
+
 static int cpu_update_split_mode(void *data)
 {
 	int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data)
 		/* Make the new mode public */
 		subcores_per_core = new_mode;
 		threads_per_subcore = threads_per_core / subcores_per_core;
+		update_subcore_sibling_mask();
 
 		/* Make sure the new mode is written before we exit */
 		mb();
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc91debf..84e02ae52895 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -14,5 +14,12 @@
 #define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
 
 #ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
 void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911a73a..9cba74d5d853 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
 #define ICPT_PARTEXEC	0x38
 #define ICPT_IOINST	0x40
 	__u8	icptcode;		/* 0x0050 */
-	__u8	reserved51;		/* 0x0051 */
+	__u8	icptstatus;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
 	__u8	reserved54[2];		/* 0x0054 */
 	__u16	ipa;			/* 0x0056 */
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
 	u32 instruction_sigp_sense_running;
 	u32 instruction_sigp_external_call;
 	u32 instruction_sigp_emergency;
+	u32 instruction_sigp_cond_emergency;
+	u32 instruction_sigp_start;
 	u32 instruction_sigp_stop;
+	u32 instruction_sigp_stop_store_status;
+	u32 instruction_sigp_store_status;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
+	u32 instruction_sigp_init_cpu_reset;
+	u32 instruction_sigp_cpu_reset;
+	u32 instruction_sigp_unknown;
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
@@ -288,6 +295,79 @@ struct kvm_vcpu_stat {
 #define PGM_PER				0x80
 #define PGM_CRYPTO_OPERATION		0x119
 
+/* irq types in order of priority */
+enum irq_types {
+	IRQ_PEND_MCHK_EX = 0,
+	IRQ_PEND_SVC,
+	IRQ_PEND_PROG,
+	IRQ_PEND_MCHK_REP,
+	IRQ_PEND_EXT_IRQ_KEY,
+	IRQ_PEND_EXT_MALFUNC,
+	IRQ_PEND_EXT_EMERGENCY,
+	IRQ_PEND_EXT_EXTERNAL,
+	IRQ_PEND_EXT_CLOCK_COMP,
+	IRQ_PEND_EXT_CPU_TIMER,
+	IRQ_PEND_EXT_TIMING,
+	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_HOST,
+	IRQ_PEND_PFAULT_INIT,
+	IRQ_PEND_PFAULT_DONE,
+	IRQ_PEND_VIRTIO,
+	IRQ_PEND_IO_ISC_0,
+	IRQ_PEND_IO_ISC_1,
+	IRQ_PEND_IO_ISC_2,
+	IRQ_PEND_IO_ISC_3,
+	IRQ_PEND_IO_ISC_4,
+	IRQ_PEND_IO_ISC_5,
+	IRQ_PEND_IO_ISC_6,
+	IRQ_PEND_IO_ISC_7,
+	IRQ_PEND_SIGP_STOP,
+	IRQ_PEND_RESTART,
+	IRQ_PEND_SET_PREFIX,
+	IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+		       (1UL << MCHK_EXTD_BIT) | \
+		       (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+			   (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			   (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			   (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+			   (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			   (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			   (1UL << IRQ_PEND_EXT_TIMING)     | \
+			   (1UL << IRQ_PEND_EXT_HOST)       | \
+			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_VIRTIO)         | \
+			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
+			   (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+			  (1UL << IRQ_PEND_IO_ISC_1) | \
+			  (1UL << IRQ_PEND_IO_ISC_2) | \
+			  (1UL << IRQ_PEND_IO_ISC_3) | \
+			  (1UL << IRQ_PEND_IO_ISC_4) | \
+			  (1UL << IRQ_PEND_IO_ISC_5) | \
+			  (1UL << IRQ_PEND_IO_ISC_6) | \
+			  (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+			    (1UL << IRQ_PEND_MCHK_EX))
+
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
@@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info {
 #define ACTION_STORE_ON_STOP		(1<<0)
 #define ACTION_STOP_ON_STOP		(1<<1)
 
+struct kvm_s390_irq_payload {
+	struct kvm_s390_io_info io;
+	struct kvm_s390_ext_info ext;
+	struct kvm_s390_pgm_info pgm;
+	struct kvm_s390_emerg_info emerg;
+	struct kvm_s390_extcall_info extcall;
+	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_mchk_info mchk;
+};
+
 struct kvm_s390_local_interrupt {
 	spinlock_t lock;
-	struct list_head list;
-	atomic_t active;
 	struct kvm_s390_float_interrupt *float_int;
 	wait_queue_head_t *wq;
 	atomic_t *cpuflags;
 	unsigned int action_bits;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_irq_payload irq;
+	unsigned long pending_irqs;
 };
 
 struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@ struct kvm_arch{
 	int user_cpu_state_ctrl;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
+	int ipte_lock_count;
+	struct mutex ipte_mutex;
 	spinlock_t start_stop_lock;
 	struct kvm_s390_crypto crypto;
 };
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index e510b9460efa..3009c2ba46d2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 49576115dbb7..fad4ae23ece0 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
 #define SIGP_RESTART		      6
 #define SIGP_STOP_AND_STORE_STATUS    9
 #define SIGP_INITIAL_CPU_RESET	     11
+#define SIGP_CPU_RESET		     12
 #define SIGP_SET_PREFIX		     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE	     18
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index ca38139423ae..437e61159279 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 	struct group_info *group_info;
 	int retval;
 
-	if (!capable(CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1c64b3..8a1be9017730 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@ union raddress {
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -216,47 +214,48 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.sie_block->eca & 1)
 		return ic->kh != 0;
-	return ipte_lock_count != 0;
+	return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 
 static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count++;
-	if (ipte_lock_count > 1)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count++;
+	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
 		while (old.k) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = READ_ONCE(*ic);
 		}
 		new = old;
 		new.k = 1;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count--;
-	if (ipte_lock_count)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count--;
+	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
+		new = old;
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +264,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
 		while (old.kg) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = READ_ONCE(*ic);
 		}
 		new = old;
 		new.k = 1;
@@ -282,7 +281,8 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
+		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eaf46291d361..81c77ab8102e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = {
 	[0xeb] = kvm_s390_handle_eb,
 };
 
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+	/* Use the length of the EXECUTE instruction if necessary */
+	if (sie_block->icptstatus & 1) {
+		ilc = (sie_block->icptstatus >> 4) & 0x6;
+		if (!ilc)
+			ilc = 4;
+	}
+	sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
 	switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
 	u16 eic = vcpu->arch.sie_block->eic;
-	struct kvm_s390_interrupt irq;
+	struct kvm_s390_irq irq;
 	psw_t newpsw;
 	int rc;
 
@@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 		if (kvm_s390_si_ext_call_pending(vcpu))
 			return 0;
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
-		irq.parm = vcpu->arch.sie_block->extcpuaddr;
+		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
  */
 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	unsigned long srcaddr, dstaddr;
 	int reg1, reg2, rc;
 
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 	if (rc != 0)
 		return rc;
 
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 
 	return 0;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a39838457f01..f00f31e66cd8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/bitmap.h>
 #include <asm/asm-offsets.h>
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
@@ -27,8 +28,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
-
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
 
 static int is_ioint(u64 type)
 {
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask = pending_local_irqs(vcpu);
+
+	if (psw_extint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (psw_mchk_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_MCHK_MASK;
+
+	return active_mask;
+}
+
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+		return;
+	if (psw_extint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+		return;
+	if (psw_mchk_disabled(vcpu))
+		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+	set_intercept_indicators_ext(vcpu);
+	set_intercept_indicators_mchk(vcpu);
+}
+
 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
-	case KVM_S390_INT_EXTERNAL_CALL:
-	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
-	case KVM_S390_INT_CLOCK_COMP:
-	case KVM_S390_INT_CPU_TIMER:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 		else
 			vcpu->arch.sie_block->lctl |= LCTL_CR0;
 		break;
-	case KVM_S390_SIGP_STOP:
-		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-		break;
 	case KVM_S390_MCHK:
 		if (psw_mchk_disabled(vcpu))
 			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
 	}
 }
 
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
-			      struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+			   (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_ext_info ext;
+	int rc;
+
+	spin_lock(&li->lock);
+	ext = li->irq.ext;
+	clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	li->irq.ext.ext_params2 = 0;
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+		   0, ext.ext_params2);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_INIT,
+					 0, ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info mchk;
+	int rc;
+
+	spin_lock(&li->lock);
+	mchk = li->irq.mchk;
+	/*
+	 * If there was an exigent machine check pending, then any repressible
+	 * machine checks that might have been pending are indicated along
+	 * with it, so always clear both bits
+	 */
+	clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+	memset(&li->irq.mchk, 0, sizeof(mchk));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk.mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk.cr14, mchk.mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk.mcic,
+			   (u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk.fixed_logout, sizeof(mchk.fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	vcpu->stat.deliver_restart_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	rc  = write_guest_lc(vcpu,
+			     offsetof(struct _lowcore, restart_old_psw),
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+	vcpu->stat.deliver_stop_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
+					 0, 0);
+
+	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+	clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
+	return 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info prefix;
+
+	spin_lock(&li->lock);
+	prefix = li->irq.prefix;
+	li->irq.prefix.address = 0;
+	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+	vcpu->stat.deliver_prefix_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_SIGP_SET_PREFIX,
+					 prefix.address, 0);
+
+	kvm_s390_set_prefix(vcpu, prefix.address);
+	return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+	int cpu_addr;
+
+	spin_lock(&li->lock);
+	cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	clear_bit(cpu_addr, li->sigp_emerg_pending);
+	if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	vcpu->stat.deliver_emergency_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+					 cpu_addr, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info extcall;
+	int rc;
+
+	spin_lock(&li->lock);
+	extcall = li->irq.extcall;
+	li->irq.extcall.code = 0;
+	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	vcpu->stat.deliver_external_call++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_EXTERNAL_CALL,
+					 extcall.code, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_pgm_info pgm_info;
 	int rc = 0;
 	u16 ilc = get_ilc(vcpu);
 
-	switch (pgm_info->code & ~PGM_PER) {
+	spin_lock(&li->lock);
+	pgm_info = li->irq.pgm;
+	clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	memset(&li->irq.pgm, 0, sizeof(pgm_info));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+		   pgm_info.code, ilc);
+	vcpu->stat.deliver_program_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+					 pgm_info.code, 0);
+
+	switch (pgm_info.code & ~PGM_PER) {
 	case PGM_AFX_TRANSLATION:
 	case PGM_ASX_TRANSLATION:
 	case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
 	case PGM_SPACE_SWITCH:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
 		break;
 	case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_ASTE_SEQUENCE:
 	case PGM_ASTE_VALIDITY:
 	case PGM_EXTENDED_AUTHORITY:
-		rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
 				  (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_REGION_SECOND_TRANS:
 	case PGM_REGION_THIRD_TRANS:
 	case PGM_SEGMENT_TRANSLATION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
-		rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
 				   (u8 *)__LC_OP_ACCESS_ID);
 		break;
 	case PGM_MONITOR:
-		rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
-				  (u64 *)__LC_MON_CLASS_NR);
-		rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+				  (u16 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
 				   (u64 *)__LC_MON_CODE);
 		break;
 	case PGM_DATA:
-		rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
 				  (u32 *)__LC_DATA_EXC_CODE);
 		break;
 	case PGM_PROTECTION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	}
 
-	if (pgm_info->code & PGM_PER) {
-		rc |= put_guest_lc(vcpu, pgm_info->per_code,
+	if (pgm_info.code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info.per_code,
 				   (u8 *) __LC_PER_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+		rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
 				   (u8 *)__LC_PER_ATMID);
-		rc |= put_guest_lc(vcpu, pgm_info->per_address,
+		rc |= put_guest_lc(vcpu, pgm_info.per_address,
 				   (u64 *) __LC_PER_ADDRESS);
-		rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
 	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
-	rc |= put_guest_lc(vcpu, pgm_info->code,
+	rc |= put_guest_lc(vcpu, pgm_info.code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
 
-	return rc;
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
+					  struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+		   inti->ext.ext_params);
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-				   struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
-	int rc = 0;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_DONE, 0,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
+					 struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
+		   inti->ext.ext_params, inti->ext.ext_params2);
+	vcpu->stat.deliver_virtio_interrupt++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+				     struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+	vcpu->stat.deliver_io_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 ((__u32)inti->io.subchannel_id << 16) |
+						inti->io.subchannel_nr,
+					 ((__u64)inti->io.io_int_parm << 32) |
+						inti->io.io_int_word);
+
+	rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+			   (u16 *)__LC_SUBCHANNEL_ID);
+	rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+			   (u16 *)__LC_SUBCHANNEL_NR);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+			   (u32 *)__LC_IO_INT_PARM);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+			   (u32 *)__LC_IO_INT_WORD);
+	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_mchk_info *mchk = &inti->mchk;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk->mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk->cr14, mchk->mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk->mcic,
+			(u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+			(u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk->fixed_logout, sizeof(mchk->fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_PROG]           = __deliver_prog,
+	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+	[IRQ_PEND_RESTART]        = __deliver_restart,
+	[IRQ_PEND_SIGP_STOP]      = __deliver_stop,
+	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+};
+
+static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
 
 	switch (inti->type) {
-	case KVM_S390_INT_EMERGENCY:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
-		vcpu->stat.deliver_emergency_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->emerg.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->emerg.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		break;
-	case KVM_S390_INT_EXTERNAL_CALL:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
-		vcpu->stat.deliver_external_call++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->extcall.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->extcall.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
-		break;
-	case KVM_S390_INT_CLOCK_COMP:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc = deliver_ckc_interrupt(vcpu);
-		break;
-	case KVM_S390_INT_CPU_TIMER:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-				   (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
 	case KVM_S390_INT_SERVICE:
-		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-			   inti->ext.ext_params);
-		vcpu->stat.deliver_service_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
-	case KVM_S390_INT_PFAULT_INIT:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
-				   (u16 *) __LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *) __LC_EXT_PARAMS2);
+		rc = __deliver_service(vcpu, inti);
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
+		rc = __deliver_pfault_done(vcpu, inti);
 		break;
 	case KVM_S390_INT_VIRTIO:
-		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-			   inti->ext.ext_params, inti->ext.ext_params2);
-		vcpu->stat.deliver_virtio_interrupt++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
-		break;
-	case KVM_S390_SIGP_STOP:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
-		vcpu->stat.deliver_stop_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		__set_intercept_indicator(vcpu, inti);
-		break;
-
-	case KVM_S390_SIGP_SET_PREFIX:
-		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
-			   inti->prefix.address);
-		vcpu->stat.deliver_prefix_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->prefix.address, 0);
-		kvm_s390_set_prefix(vcpu, inti->prefix.address);
-		break;
-
-	case KVM_S390_RESTART:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
-		vcpu->stat.deliver_restart_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		rc  = write_guest_lc(vcpu,
-				     offsetof(struct _lowcore, restart_old_psw),
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_virtio(vcpu, inti);
 		break;
-	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
-			   inti->pgm.code,
-			   table[vcpu->arch.sie_block->ipa >> 14]);
-		vcpu->stat.deliver_program_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->pgm.code, 0);
-		rc = __deliver_prog_irq(vcpu, &inti->pgm);
-		break;
-
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-			   inti->mchk.mcic);
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->mchk.cr14,
-						 inti->mchk.mcic);
-		rc  = kvm_s390_vcpu_store_status(vcpu,
-						 KVM_S390_STORE_STATUS_PREFIXED);
-		rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
-		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc = __deliver_mchk_floating(vcpu, inti);
 		break;
-
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-	{
-		__u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
-			inti->io.subchannel_nr;
-		__u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
-			inti->io.io_int_word;
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-		vcpu->stat.deliver_io_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 param0, param1);
-		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-				   (u16 *)__LC_SUBCHANNEL_ID);
-		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-				   (u16 *)__LC_SUBCHANNEL_NR);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-				   (u32 *)__LC_IO_INT_PARM);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-				   (u32 *)__LC_IO_INT_WORD);
-		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_io(vcpu, inti);
 		break;
-	}
 	default:
 		BUG();
 	}
@@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
-{
-	int rc;
-
-	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw,
-			    sizeof(psw_t));
-	return rc;
-}
-
 /* Check whether SIGP interpretation facility has an external call pending */
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *inti;
-	int rc = 0;
+	int rc;
 
-	if (atomic_read(&li->active)) {
-		spin_lock(&li->lock);
-		list_for_each_entry(inti, &li->list, list)
-			if (__interrupt_is_deliverable(vcpu, inti)) {
-				rc = 1;
-				break;
-			}
-		spin_unlock(&li->lock);
-	}
+	rc = !!deliverable_local_irqs(vcpu);
 
 	if ((!rc) && atomic_read(&fi->active)) {
 		spin_lock(&fi->lock);
@@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 
 	spin_lock(&li->lock);
-	list_for_each_entry_safe(inti, n, &li->list, list) {
-		list_del(&inti->list);
-		kfree(inti);
-	}
-	atomic_set(&li->active, 0);
+	li->pending_irqs = 0;
+	bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	memset(&li->irq, 0, sizeof(li->irq));
 	spin_unlock(&li->lock);
 
 	/* clear pending external calls set by sigp interpretation facility */
-	atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
 	atomic_clear_mask(SIGP_CTRL_C,
 			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
@@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *n, *inti = NULL;
+	deliver_irq_t func;
 	int deliver;
 	int rc = 0;
+	unsigned long irq_type;
+	unsigned long deliverable_irqs;
 
 	__reset_intercept_indicators(vcpu);
-	if (atomic_read(&li->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&li->lock);
-			list_for_each_entry_safe(inti, n, &li->list, list) {
-				if (__interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&li->list))
-				atomic_set(&li->active, 0);
-			spin_unlock(&li->lock);
-			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (!rc && deliver);
-	}
 
-	if (!rc && kvm_cpu_has_pending_timer(vcpu))
-		rc = deliver_ckc_interrupt(vcpu);
+	/* pending ckc conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	if (kvm_cpu_has_pending_timer(vcpu))
+		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+	do {
+		deliverable_irqs = deliverable_local_irqs(vcpu);
+		/* bits are in the order of interrupt priority */
+		irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+		if (irq_type == IRQ_PEND_COUNT)
+			break;
+		func = deliver_irq_funcs[irq_type];
+		if (!func) {
+			WARN_ON_ONCE(func == NULL);
+			clear_bit(irq_type, &li->pending_irqs);
+			continue;
+		}
+		rc = func(vcpu);
+	} while (!rc && irq_type != IRQ_PEND_COUNT);
+
+	set_intercept_indicators_local(vcpu);
 
 	if (!rc && atomic_read(&fi->active)) {
 		do {
@@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 				atomic_set(&fi->active, 0);
 			spin_unlock(&fi->lock);
 			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
+				rc = __deliver_floating_interrupt(vcpu, inti);
 				kfree(inti);
 			}
 		} while (!rc && deliver);
@@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	li->irq.pgm = irq->u.pgm;
+	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	return 0;
+}
 
-	inti->type = KVM_S390_PROGRAM_INT;
-	inti->pgm.code = code;
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq irq;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+				   0, 1);
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm.code = code;
+	__inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
 	return 0;
@@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 			     struct kvm_s390_pgm_info *pgm_info)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
-
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	struct kvm_s390_irq irq;
+	int rc;
 
 	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
 		   pgm_info->code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 				   pgm_info->code, 0, 1);
-
-	inti->type = KVM_S390_PROGRAM_INT;
-	memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm = *pgm_info;
+	rc = __inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
+	return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+				   irq->u.ext.ext_params,
+				   irq->u.ext.ext_params2, 2);
+
+	li->irq.ext = irq->u.ext;
+	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
+int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+
+	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+		   irq->u.extcall.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+				   irq->u.extcall.code, 0, 2);
+
+	*extcall = irq->u.extcall;
+	set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+		   prefix->address);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+				   prefix->address, 0, 2);
+
+	*prefix = irq->u.prefix;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
+
+	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+		   irq->u.emerg.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+				   emerg->code, 0, 2);
+
+	set_bit(emerg->code, li->sigp_emerg_pending);
+	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+		   mchk->mcic);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+				   mchk->mcic, 2);
+
+	/*
+	 * Because repressible machine checks can be indicated along with
+	 * exigent machine checks (PoP, Chapter 11, Interruption action)
+	 * we need to combine cr14, mcic and external damage code.
+	 * Failing storage address and the logout area should not be or'ed
+	 * together, we just indicate the last occurrence of the corresponding
+	 * machine check
+	 */
+	mchk->cr14 |= irq->u.mchk.cr14;
+	mchk->mcic |= irq->u.mchk.mcic;
+	mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+	mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+	memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+	       sizeof(mchk->fixed_logout));
+	if (mchk->mcic & MCHK_EX_MASK)
+		set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	else if (mchk->mcic & MCHK_REP_MASK)
+		set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid)
 {
@@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
 	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	switch (inti->type) {
+	case KVM_S390_MCHK:
+		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		break;
+	default:
+		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		break;
+	}
 	spin_unlock(&li->lock);
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
@@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 	__inject_vm(kvm, inti);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-			 struct kvm_s390_interrupt *s390int)
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_local_interrupt *li;
-	struct kvm_s390_interrupt_info *inti;
+	irq->type = s390int->type;
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.pgm.code = s390int->parm;
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		irq->u.prefix.address = s390int->parm;
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		if (irq->u.extcall.code & 0xffff0000)
+			return -EINVAL;
+		irq->u.extcall.code = s390int->parm;
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		if (irq->u.emerg.code & 0xffff0000)
+			return -EINVAL;
+		irq->u.emerg.code = s390int->parm;
+		break;
+	case KVM_S390_MCHK:
+		irq->u.mchk.mcic = s390int->parm64;
+		break;
+	}
+	return 0;
+}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
 
-	switch (s390int->type) {
+	spin_lock(&li->lock);
+	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		inti->type = s390int->type;
-		inti->pgm.code = s390int->parm;
 		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   s390int->parm);
+			   irq->u.pgm.code);
+		rc = __inject_prog(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
-		inti->prefix.address = s390int->parm;
-		inti->type = s390int->type;
-		VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
-			   s390int->parm);
+		rc = __inject_set_prefix(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_STOP:
+		rc = __inject_sigp_stop(vcpu, irq);
+		break;
 	case KVM_S390_RESTART:
+		rc = __inject_sigp_restart(vcpu, irq);
+		break;
 	case KVM_S390_INT_CLOCK_COMP:
+		rc = __inject_ckc(vcpu);
+		break;
 	case KVM_S390_INT_CPU_TIMER:
-		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
-		inti->type = s390int->type;
+		rc = __inject_cpu_timer(vcpu);
 		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
-			   s390int->parm);
-		inti->type = s390int->type;
-		inti->extcall.code = s390int->parm;
+		rc = __inject_extcall(vcpu, irq);
 		break;
 	case KVM_S390_INT_EMERGENCY:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
-		inti->type = s390int->type;
-		inti->emerg.code = s390int->parm;
+		rc = __inject_sigp_emergency(vcpu, irq);
 		break;
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
-			   s390int->parm64);
-		inti->type = s390int->type;
-		inti->mchk.mcic = s390int->parm64;
+		rc = __inject_mchk(vcpu, irq);
 		break;
 	case KVM_S390_INT_PFAULT_INIT:
-		inti->type = s390int->type;
-		inti->ext.ext_params2 = s390int->parm64;
+		rc = __inject_pfault_init(vcpu, irq);
 		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 	default:
-		kfree(inti);
-		return -EINVAL;
+		rc = -EINVAL;
 	}
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
-				   s390int->parm64, 2);
-
-	li = &vcpu->arch.local_int;
-	spin_lock(&li->lock);
-	if (inti->type == KVM_S390_PROGRAM_INT)
-		list_add(&inti->list, &li->list);
-	else
-		list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
-	if (inti->type == KVM_S390_SIGP_STOP)
-		li->action_bits |= ACTION_STOP_ON_STOP;
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	spin_unlock(&li->lock);
-	kvm_s390_vcpu_wakeup(vcpu);
-	return 0;
+	if (!rc)
+		kvm_s390_vcpu_wakeup(vcpu);
+	return rc;
 }
 
 void kvm_s390_clear_float_irqs(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6b049ee75a56..3e09801e3104 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
+	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	}
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
-	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
 				      unsigned long token)
 {
 	struct kvm_s390_interrupt inti;
-	inti.parm64 = token;
+	struct kvm_s390_irq irq;
 
 	if (start_token) {
-		inti.type = KVM_S390_INT_PFAULT_INIT;
-		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+		irq.u.ext.ext_params2 = token;
+		irq.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
 	} else {
 		inti.type = KVM_S390_INT_PFAULT_DONE;
+		inti.parm64 = token;
 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
 	}
 }
@@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	switch (ioctl) {
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
+		struct kvm_s390_irq s390irq;
 
 		r = -EFAULT;
 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
 			break;
-		r = kvm_s390_inject_vcpu(vcpu, &s390int);
+		if (s390int_to_s390irq(&s390int, &s390irq))
+			return -EINVAL;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
 		break;
 	}
 	case KVM_S390_STORE_STATUS:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d02303182..a8f3d9b71c11 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
 #define TDB_FORMAT1		1
@@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 				    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt *s390int);
+				      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 			      struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
 	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 }
 
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+			struct kvm_s390_irq *s390irq);
+
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f47cb0c6d906..1be578d64dfc 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 	return 0;
 }
 
 static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-
 	vcpu->stat.instruction_ipte_interlock++;
-	if (psw_bits(*psw).p)
+	if (psw_bits(vcpu->arch.sie_block->gpsw).p)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
 	return 0;
 }
@@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_protection(vcpu, start))
-			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-	}
+	start = kvm_s390_logical_to_effective(vcpu, start);
 
 	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 	case 0x00000000:
@@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	default:
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	}
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_protection(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
 	while (start < end) {
 		unsigned long useraddr, abs_addr;
 
@@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Rewind PSW to repeat the ESSA instruction */
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
 	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
 	down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 val = 0;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
 
 	vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-		vcpu->arch.sie_block->gcr[reg] |= val;
-		ga += 4;
+		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
-	u32 val;
-	int reg, rc;
 
 	vcpu->stat.instruction_stctl++;
 
@@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 4;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_lctlg++;
 
@@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		vcpu->arch.sie_block->gcr[reg] = val;
-		ga += 8;
+		vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_stctg++;
 
@@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg];
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 8;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static const intercept_handler_t eb_handlers[256] = {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba3d50f..6651f9f73973 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
 #include "kvm-s390.h"
 #include "trace.h"
 
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 			u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int cpuflags;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+		   rc);
 	return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EMERGENCY,
-		.parm = vcpu->vcpu_id,
+		.u.emerg.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc = 0;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu,
 					u16 asn, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
 	u16 p_asn, s_asn;
 	psw_t *psw;
 	u32 flags;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
 	psw = &dst_vcpu->arch.sie_block->gpsw;
 	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
 	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
 
-	/* Deliver the emergency signal? */
+	/* Inject the emergency signal? */
 	if (!(flags & CPUSTAT_STOPPED)
 	    || (psw->mask & psw_int_mask) != psw_int_mask
 	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
 	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
-		return __sigp_emergency(vcpu, cpu_addr);
+		return __inject_sigp_emergency(vcpu, dst_vcpu);
 	} else {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	}
 }
 
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EXTERNAL_CALL,
-		.parm = vcpu->vcpu_id,
+		.u.extcall.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
@@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
 {
 	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
-	if (!inti)
-		return -ENOMEM;
-	inti->type = KVM_S390_SIGP_STOP;
-
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP) {
 		/* another SIGP STOP is pending */
-		kfree(inti);
 		rc = SIGP_CC_BUSY;
 		goto out;
 	}
 	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
-		kfree(inti);
 		if ((action & ACTION_STORE_ON_STOP) != 0)
 			rc = -ESHUTDOWN;
 		goto out;
 	}
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
 	li->action_bits |= action;
 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@ out:
 	return rc;
 }
 
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
+	return rc;
+}
 
-	rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	int rc;
 
-	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+					      ACTION_STORE_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+		   dst_vcpu->vcpu_id);
 
-	if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+	if (rc == -ESHUTDOWN) {
 		/* If the CPU has already been stopped, we still have
 		 * to save the status when doing stop-and-store. This
 		 * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 	return rc;
 }
 
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
-			     u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			     u32 address, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
-	struct kvm_s390_interrupt_info *inti;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	/*
@@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 		return SIGP_CC_STATUS_STORED;
 	}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return SIGP_CC_BUSY;
-
 	spin_lock(&li->lock);
 	/* cpu must be in stopped state */
 	if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		rc = SIGP_CC_STATUS_STORED;
-		kfree(inti);
 		goto out_li;
 	}
 
-	inti->type = KVM_S390_SIGP_SET_PREFIX;
-	inti->prefix.address = address;
-
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	li->irq.prefix.address = address;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+		   address);
 out_li:
 	spin_unlock(&li->lock);
 	return rc;
 }
 
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
-					u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+				       struct kvm_vcpu *dst_vcpu,
+				       u32 addr, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int flags;
 	int rc;
 
-	if (cpu_id < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
 	spin_lock(&dst_vcpu->arch.local_int.lock);
 	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
 	spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
 	return rc;
 }
 
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
-				u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
 		/* running */
@@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
-		   rc);
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+		   dst_vcpu->vcpu_id, rc);
 
 	return rc;
 }
 
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	struct kvm_s390_local_interrupt *li;
-	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-	struct kvm_vcpu *dst_vcpu = NULL;
-
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+	/* handle (RE)START in user space */
+	int rc = -EOPNOTSUPP;
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP)
 		rc = SIGP_CC_BUSY;
@@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	return rc;
 }
 
-int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 parameter;
-	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
-	u8 order_code;
-	int rc;
+	/* handle (INITIAL) CPU RESET in user space */
+	return -EOPNOTSUPP;
+}
 
-	/* sigp in userspace can exit */
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+				  struct kvm_vcpu *dst_vcpu)
+{
+	/* handle unknown orders in user space */
+	return -EOPNOTSUPP;
+}
 
-	order_code = kvm_s390_get_base_disp_rs(vcpu);
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+			   u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+	int rc;
+	struct kvm_vcpu *dst_vcpu;
 
-	if (r1 % 2)
-		parameter = vcpu->run->s.regs.gprs[r1];
-	else
-		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
 
-	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
 	switch (order_code) {
 	case SIGP_SENSE:
 		vcpu->stat.instruction_sigp_sense++;
-		rc = __sigp_sense(vcpu, cpu_addr,
-				  &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_EXTERNAL_CALL:
 		vcpu->stat.instruction_sigp_external_call++;
-		rc = __sigp_external_call(vcpu, cpu_addr);
+		rc = __sigp_external_call(vcpu, dst_vcpu);
 		break;
 	case SIGP_EMERGENCY_SIGNAL:
 		vcpu->stat.instruction_sigp_emergency++;
-		rc = __sigp_emergency(vcpu, cpu_addr);
+		rc = __sigp_emergency(vcpu, dst_vcpu);
 		break;
 	case SIGP_STOP:
 		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
+		rc = __sigp_stop(vcpu, dst_vcpu);
 		break;
 	case SIGP_STOP_AND_STORE_STATUS:
-		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
-						 ACTION_STOP_ON_STOP);
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_STORE_STATUS_AT_ADDRESS:
-		rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-						 &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_SET_ARCHITECTURE:
-		vcpu->stat.instruction_sigp_arch++;
-		rc = __sigp_set_arch(vcpu, parameter);
+		vcpu->stat.instruction_sigp_store_status++;
+		rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+						 status_reg);
 		break;
 	case SIGP_SET_PREFIX:
 		vcpu->stat.instruction_sigp_prefix++;
-		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-				       &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
 		break;
 	case SIGP_COND_EMERGENCY_SIGNAL:
-		rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
-						  &vcpu->run->s.regs.gprs[r1]);
+		vcpu->stat.instruction_sigp_cond_emergency++;
+		rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+						  status_reg);
 		break;
 	case SIGP_SENSE_RUNNING:
 		vcpu->stat.instruction_sigp_sense_running++;
-		rc = __sigp_sense_running(vcpu, cpu_addr,
-					  &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_START:
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-			rc = -EOPNOTSUPP;    /* Handle START in user space */
+		vcpu->stat.instruction_sigp_start++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
 		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-			VCPU_EVENT(vcpu, 4,
-				   "sigp restart %x to handle userspace",
-				   cpu_addr);
-			/* user space must know about restart */
-			rc = -EOPNOTSUPP;
-		}
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+		rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+	}
+
+	if (rc == -EOPNOTSUPP)
+		VCPU_EVENT(vcpu, 4,
+			   "sigp order %u -> cpu %x: handled in user space",
+			   order_code, dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u32 parameter;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	u8 order_code;
+	int rc;
+
+	/* sigp in userspace can exit */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+	if (r1 % 2)
+		parameter = vcpu->run->s.regs.gprs[r1];
+	else
+		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+	switch (order_code) {
+	case SIGP_SET_ARCHITECTURE:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
 		break;
 	default:
-		return -EOPNOTSUPP;
+		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+				     parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
 	}
 
 	if (rc < 0)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 71c7eff2c89f..be99357d238c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 
 	down_read(&mm->mmap_sem);
 retry:
-	ptep = get_locked_pte(current->mm, addr, &ptl);
+	ptep = get_locked_pte(mm, addr, &ptl);
 	if (unlikely(!ptep)) {
 		up_read(&mm->mmap_sem);
 		return -EFAULT;
@@ -888,6 +888,45 @@ retry:
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	uint64_t physaddr;
+	unsigned long key = 0;
+
+	down_read(&mm->mmap_sem);
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+	pgste = pgste_get_lock(ptep);
+
+	if (pte_val(*ptep) & _PAGE_INVALID) {
+		key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+	} else {
+		physaddr = pte_val(*ptep) & PAGE_MASK;
+		key = page_get_storage_key(physaddr);
+
+		/* Reflect guest's logical view, not physical */
+		if (pgste_val(pgste) & PGSTE_GR_BIT)
+			key |= _PAGE_REFERENCED;
+		if (pgste_val(pgste) & PGSTE_GC_BIT)
+			key |= _PAGE_CHANGED;
+	}
+
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c6b6ee5f38b2..0f09f5285d5e 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -223,7 +223,7 @@ config CPU_SHX3
 config ARCH_SHMOBILE
 	bool
 	select ARCH_SUSPEND_POSSIBLE
-	select PM_RUNTIME
+	select PM
 
 config CPU_HAS_PMU
        depends on CPU_SH4 || CPU_SH4A
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index ec70475da890..a8d975793b6d 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -47,7 +47,7 @@ CONFIG_PREEMPT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 76a76a295d74..e7e56a4131b4 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -82,7 +82,7 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be65f035d18a..5cbc96d801ff 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -460,10 +460,12 @@ static void __init sparc_context_init(int numctx)
 void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
 	       struct task_struct *tsk)
 {
+	unsigned long flags;
+
 	if (mm->context == NO_CONTEXT) {
-		spin_lock(&srmmu_context_spinlock);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
 		alloc_context(old_mm, mm);
-		spin_unlock(&srmmu_context_spinlock);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
 	}
 
@@ -986,14 +988,15 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 
 void destroy_context(struct mm_struct *mm)
 {
+	unsigned long flags;
 
 	if (mm->context != NO_CONTEXT) {
 		flush_cache_mm(mm);
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
 		flush_tlb_mm(mm);
-		spin_lock(&srmmu_context_spinlock);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
 		free_context(mm->context);
-		spin_unlock(&srmmu_context_spinlock);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
 		mm->context = NO_CONTEXT;
 	}
 }
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
index 320ff5e6e61e..6f00e9850636 100644
--- a/arch/tile/gxio/mpipe.c
+++ b/arch/tile/gxio/mpipe.c
@@ -463,6 +463,7 @@ int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context,
 					    (uint64_t)ts->tv_nsec,
 					    (uint64_t)cycles);
 }
+EXPORT_SYMBOL_GPL(gxio_mpipe_set_timestamp);
 
 int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
 			     struct timespec *ts)
@@ -485,11 +486,13 @@ int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(gxio_mpipe_get_timestamp);
 
 int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context, int64_t delta)
 {
 	return gxio_mpipe_adjust_timestamp_aux(context, delta);
 }
+EXPORT_SYMBOL_GPL(gxio_mpipe_adjust_timestamp);
 
 /* Get our internal context used for link name access.  This context is
  *  special in that it is not associated with an mPIPE service domain.
@@ -542,6 +545,7 @@ int gxio_mpipe_link_instance(const char *link_name)
 
 	return gxio_mpipe_info_instance_aux(context, name);
 }
+EXPORT_SYMBOL_GPL(gxio_mpipe_link_instance);
 
 int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac)
 {
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index d372641054d9..6ef4ecab1df2 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -396,8 +396,7 @@ extern void ioport_unmap(void __iomem *addr);
 static inline long ioport_panic(void)
 {
 #ifdef __tilegx__
-	panic("PCI IO space support is disabled. Configure the kernel with"
-	      " CONFIG_TILE_PCI_IO to enable it");
+	panic("PCI IO space support is disabled. Configure the kernel with CONFIG_TILE_PCI_IO to enable it");
 #else
 	panic("inb/outb and friends do not exist on tile");
 #endif
@@ -406,7 +405,7 @@ static inline long ioport_panic(void)
 
 static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
 {
-	pr_info("ioport_map: mapping IO resources is unsupported on tile.\n");
+	pr_info("ioport_map: mapping IO resources is unsupported on tile\n");
 	return NULL;
 }
 
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 33587f16c152..5d1950788c69 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -235,9 +235,9 @@ static inline void __pte_clear(pte_t *ptep)
 #define pte_donemigrate(x) hv_pte_set_present(hv_pte_clear_migrating(x))
 
 #define pte_ERROR(e) \
-	pr_err("%s:%d: bad pte 0x%016llx.\n", __FILE__, __LINE__, pte_val(e))
+	pr_err("%s:%d: bad pte 0x%016llx\n", __FILE__, __LINE__, pte_val(e))
 #define pgd_ERROR(e) \
-	pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
+	pr_err("%s:%d: bad pgd 0x%016llx\n", __FILE__, __LINE__, pgd_val(e))
 
 /* Return PA and protection info for a given kernel VA. */
 int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte);
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index 2c8a9cd102d3..e96cec52f6d8 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -86,7 +86,7 @@ static inline int pud_huge_page(pud_t pud)
 }
 
 #define pmd_ERROR(e) \
-	pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
+	pr_err("%s:%d: bad pmd 0x%016llx\n", __FILE__, __LINE__, pmd_val(e))
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/tile/include/uapi/asm/ptrace.h b/arch/tile/include/uapi/asm/ptrace.h
index 7757e1985fb6..d03b829857e8 100644
--- a/arch/tile/include/uapi/asm/ptrace.h
+++ b/arch/tile/include/uapi/asm/ptrace.h
@@ -52,12 +52,16 @@ typedef uint_reg_t pt_reg_t;
  * system call or exception.  "struct sigcontext" has the same shape.
  */
 struct pt_regs {
-	/* Saved main processor registers; 56..63 are special. */
-	/* tp, sp, and lr must immediately follow regs[] for aliasing. */
-	pt_reg_t regs[53];
-	pt_reg_t tp;		/* aliases regs[TREG_TP] */
-	pt_reg_t sp;		/* aliases regs[TREG_SP] */
-	pt_reg_t lr;		/* aliases regs[TREG_LR] */
+	union {
+		/* Saved main processor registers; 56..63 are special. */
+		pt_reg_t regs[56];
+		struct {
+			pt_reg_t __regs[53];
+			pt_reg_t tp;		/* aliases regs[TREG_TP] */
+			pt_reg_t sp;		/* aliases regs[TREG_SP] */
+			pt_reg_t lr;		/* aliases regs[TREG_LR] */
+		};
+	};
 
 	/* Saved special registers. */
 	pt_reg_t pc;		/* stored in EX_CONTEXT_K_0 */
diff --git a/arch/tile/include/uapi/asm/sigcontext.h b/arch/tile/include/uapi/asm/sigcontext.h
index 6348e59d3724..39ff5d1a232d 100644
--- a/arch/tile/include/uapi/asm/sigcontext.h
+++ b/arch/tile/include/uapi/asm/sigcontext.h
@@ -24,10 +24,16 @@
  * but is simplified since we know the fault is from userspace.
  */
 struct sigcontext {
-	__uint_reg_t gregs[53];	/* General-purpose registers.  */
-	__uint_reg_t tp;	/* Aliases gregs[TREG_TP].  */
-	__uint_reg_t sp;	/* Aliases gregs[TREG_SP].  */
-	__uint_reg_t lr;	/* Aliases gregs[TREG_LR].  */
+	__extension__ union {
+		/* General-purpose registers.  */
+		__uint_reg_t gregs[56];
+		__extension__ struct {
+			__uint_reg_t __gregs[53];
+			__uint_reg_t tp;	/* Aliases gregs[TREG_TP].  */
+			__uint_reg_t sp;	/* Aliases gregs[TREG_SP].  */
+			__uint_reg_t lr;	/* Aliases gregs[TREG_LR].  */
+		};
+	};
 	__uint_reg_t pc;	/* Program counter.  */
 	__uint_reg_t ics;	/* In Interrupt Critical Section?  */
 	__uint_reg_t faultnum;	/* Fault number.  */
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index aca6000bca75..c4646bb99342 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -365,8 +365,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
 	 * to quiesce.
 	 */
 	if (rect->teardown_in_progress) {
-		pr_notice("cpu %d: detected %s hardwall violation %#lx"
-		       " while teardown already in progress\n",
+		pr_notice("cpu %d: detected %s hardwall violation %#lx while teardown already in progress\n",
 			  cpu, hwt->name,
 			  (long)mfspr_XDN(hwt, DIRECTION_PROTECT));
 		goto done;
@@ -630,8 +629,7 @@ static void _hardwall_deactivate(struct hardwall_type *hwt,
 	struct thread_struct *ts = &task->thread;
 
 	if (cpumask_weight(&task->cpus_allowed) != 1) {
-		pr_err("pid %d (%s) releasing %s hardwall with"
-		       " an affinity mask containing %d cpus!\n",
+		pr_err("pid %d (%s) releasing %s hardwall with an affinity mask containing %d cpus!\n",
 		       task->pid, task->comm, hwt->name,
 		       cpumask_weight(&task->cpus_allowed));
 		BUG();
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index ba85765e1436..22044fc691ef 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -107,9 +107,8 @@ void tile_dev_intr(struct pt_regs *regs, int intnum)
 	{
 		long sp = stack_pointer - (long) current_thread_info();
 		if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
-			pr_emerg("tile_dev_intr: "
-			       "stack overflow: %ld\n",
-			       sp - sizeof(struct thread_info));
+			pr_emerg("%s: stack overflow: %ld\n",
+				 __func__, sp - sizeof(struct thread_info));
 			dump_stack();
 		}
 	}
diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c
index 4cd88381a83e..ff5335ae050d 100644
--- a/arch/tile/kernel/kgdb.c
+++ b/arch/tile/kernel/kgdb.c
@@ -125,9 +125,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
 void
 sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 {
-	int reg;
 	struct pt_regs *thread_regs;
-	unsigned long *ptr = gdb_regs;
 
 	if (task == NULL)
 		return;
@@ -136,9 +134,7 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 	memset(gdb_regs, 0, NUMREGBYTES);
 
 	thread_regs = task_pt_regs(task);
-	for (reg = 0; reg <= TREG_LAST_GPR; reg++)
-		*(ptr++) = thread_regs->regs[reg];
-
+	memcpy(gdb_regs, thread_regs, TREG_LAST_GPR * sizeof(unsigned long));
 	gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc;
 	gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum;
 }
diff --git a/arch/tile/kernel/kprobes.c b/arch/tile/kernel/kprobes.c
index 27cdcacbe81d..f8a45c51e9e4 100644
--- a/arch/tile/kernel/kprobes.c
+++ b/arch/tile/kernel/kprobes.c
@@ -90,8 +90,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 		return -EINVAL;
 
 	if (insn_has_control(*p->addr)) {
-		pr_notice("Kprobes for control instructions are not "
-			  "supported\n");
+		pr_notice("Kprobes for control instructions are not supported\n");
 		return -EINVAL;
 	}
 
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index f0b54a934712..008aa2faef55 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -77,16 +77,13 @@ void machine_crash_shutdown(struct pt_regs *regs)
 int machine_kexec_prepare(struct kimage *image)
 {
 	if (num_online_cpus() > 1) {
-		pr_warning("%s: detected attempt to kexec "
-		       "with num_online_cpus() > 1\n",
-		       __func__);
+		pr_warn("%s: detected attempt to kexec with num_online_cpus() > 1\n",
+			__func__);
 		return -ENOSYS;
 	}
 	if (image->type != KEXEC_TYPE_DEFAULT) {
-		pr_warning("%s: detected attempt to kexec "
-		       "with unsupported type: %d\n",
-		       __func__,
-		       image->type);
+		pr_warn("%s: detected attempt to kexec with unsupported type: %d\n",
+			__func__, image->type);
 		return -ENOSYS;
 	}
 	return 0;
@@ -131,8 +128,8 @@ static unsigned char *kexec_bn2cl(void *pg)
 	 */
 	csum = ip_compute_csum(pg, bhdrp->b_size);
 	if (csum != 0) {
-		pr_warning("%s: bad checksum %#x (size %d)\n",
-			   __func__, csum, bhdrp->b_size);
+		pr_warn("%s: bad checksum %#x (size %d)\n",
+			__func__, csum, bhdrp->b_size);
 		return 0;
 	}
 
@@ -160,8 +157,7 @@ static unsigned char *kexec_bn2cl(void *pg)
 	while (*desc != '\0') {
 		desc++;
 		if (((unsigned long)desc & PAGE_MASK) != (unsigned long)pg) {
-			pr_info("%s: ran off end of page\n",
-			       __func__);
+			pr_info("%s: ran off end of page\n", __func__);
 			return 0;
 		}
 	}
@@ -195,20 +191,18 @@ static void kexec_find_and_set_command_line(struct kimage *image)
 	}
 
 	if (command_line != 0) {
-		pr_info("setting new command line to \"%s\"\n",
-		       command_line);
+		pr_info("setting new command line to \"%s\"\n", command_line);
 
 		hverr = hv_set_command_line(
 			(HV_VirtAddr) command_line, strlen(command_line));
 		kunmap_atomic(command_line);
 	} else {
-		pr_info("%s: no command line found; making empty\n",
-		       __func__);
+		pr_info("%s: no command line found; making empty\n", __func__);
 		hverr = hv_set_command_line((HV_VirtAddr) command_line, 0);
 	}
 	if (hverr)
-		pr_warning("%s: hv_set_command_line returned error: %d\n",
-			   __func__, hverr);
+		pr_warn("%s: hv_set_command_line returned error: %d\n",
+			__func__, hverr);
 }
 
 /*
diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c
index ac950be1318e..7475af3aacec 100644
--- a/arch/tile/kernel/messaging.c
+++ b/arch/tile/kernel/messaging.c
@@ -59,9 +59,8 @@ void hv_message_intr(struct pt_regs *regs, int intnum)
 	{
 		long sp = stack_pointer - (long) current_thread_info();
 		if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
-			pr_emerg("hv_message_intr: "
-			       "stack overflow: %ld\n",
-			       sp - sizeof(struct thread_info));
+			pr_emerg("%s: stack overflow: %ld\n",
+				 __func__, sp - sizeof(struct thread_info));
 			dump_stack();
 		}
 	}
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index d19b13e3a59f..96447c9160a0 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -96,8 +96,8 @@ void module_free(struct module *mod, void *module_region)
 static int validate_hw2_last(long value, struct module *me)
 {
 	if (((value << 16) >> 16) != value) {
-		pr_warning("module %s: Out of range HW2_LAST value %#lx\n",
-			   me->name, value);
+		pr_warn("module %s: Out of range HW2_LAST value %#lx\n",
+			me->name, value);
 		return 0;
 	}
 	return 1;
@@ -210,10 +210,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			value -= (unsigned long) location;  /* pc-relative */
 			value = (long) value >> 3;     /* count by instrs */
 			if (!validate_jumpoff(value)) {
-				pr_warning("module %s: Out of range jump to"
-					   " %#llx at %#llx (%p)\n", me->name,
-					   sym->st_value + rel[i].r_addend,
-					   rel[i].r_offset, location);
+				pr_warn("module %s: Out of range jump to %#llx at %#llx (%p)\n",
+					me->name,
+					sym->st_value + rel[i].r_addend,
+					rel[i].r_offset, location);
 				return -ENOEXEC;
 			}
 			MUNGE(create_JumpOff_X1);
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 1f80a88c75a6..f70c7892fa25 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -178,8 +178,8 @@ int __init tile_pci_init(void)
 				continue;
 			hv_cfg_fd1 = tile_pcie_open(i, 1);
 			if (hv_cfg_fd1 < 0) {
-				pr_err("PCI: Couldn't open config fd to HV "
-				    "for controller %d\n", i);
+				pr_err("PCI: Couldn't open config fd to HV for controller %d\n",
+				       i);
 				goto err_cont;
 			}
 
@@ -423,8 +423,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 		for (i = 0; i < 6; i++) {
 			r = &dev->resource[i];
 			if (r->flags & IORESOURCE_UNSET) {
-				pr_err("PCI: Device %s not available "
-				       "because of resource collisions\n",
+				pr_err("PCI: Device %s not available because of resource collisions\n",
 				       pci_name(dev));
 				return -EINVAL;
 			}
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index e717af20dada..2c95f37ebbed 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -131,8 +131,7 @@ static int tile_irq_cpu(int irq)
 
 	count = cpumask_weight(&intr_cpus_map);
 	if (unlikely(count == 0)) {
-		pr_warning("intr_cpus_map empty, interrupts will be"
-			   " delievered to dataplane tiles\n");
+		pr_warn("intr_cpus_map empty, interrupts will be delievered to dataplane tiles\n");
 		return irq % (smp_height * smp_width);
 	}
 
@@ -197,16 +196,16 @@ static int tile_pcie_open(int trio_index)
 	/* Get the properties of the PCIe ports on this TRIO instance. */
 	ret = gxio_trio_get_port_property(context, &pcie_ports[trio_index]);
 	if (ret < 0) {
-		pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d,"
-		       " on TRIO %d\n", ret, trio_index);
+		pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d, on TRIO %d\n",
+		       ret, trio_index);
 		goto get_port_property_failure;
 	}
 
 	context->mmio_base_mac =
 		iorpc_ioremap(context->fd, 0, HV_TRIO_CONFIG_IOREMAP_SIZE);
 	if (context->mmio_base_mac == NULL) {
-		pr_err("PCI: TRIO config space mapping failure, error %d,"
-		       " on TRIO %d\n", ret, trio_index);
+		pr_err("PCI: TRIO config space mapping failure, error %d, on TRIO %d\n",
+		       ret, trio_index);
 		ret = -ENOMEM;
 
 		goto trio_mmio_mapping_failure;
@@ -622,9 +621,8 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller)
 				    dev_control.max_read_req_sz,
 				    mac);
 	if (err < 0) {
-		pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, "
-			"MAC %d on TRIO %d\n",
-			mac, controller->trio_index);
+		pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, MAC %d on TRIO %d\n",
+		       mac, controller->trio_index);
 	}
 }
 
@@ -720,27 +718,24 @@ int __init pcibios_init(void)
 					 reg_offset);
 		if (!port_status.dl_up) {
 			if (rc_delay[trio_index][mac]) {
-				pr_info("Delaying PCIe RC TRIO init %d sec"
-					" on MAC %d on TRIO %d\n",
+				pr_info("Delaying PCIe RC TRIO init %d sec on MAC %d on TRIO %d\n",
 					rc_delay[trio_index][mac], mac,
 					trio_index);
 				msleep(rc_delay[trio_index][mac] * 1000);
 			}
 			ret = gxio_trio_force_rc_link_up(trio_context, mac);
 			if (ret < 0)
-				pr_err("PCI: PCIE_FORCE_LINK_UP failure, "
-					"MAC %d on TRIO %d\n", mac, trio_index);
+				pr_err("PCI: PCIE_FORCE_LINK_UP failure, MAC %d on TRIO %d\n",
+				       mac, trio_index);
 		}
 
-		pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i,
-			trio_index, controller->mac);
+		pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n",
+			i, trio_index, controller->mac);
 
 		/* Delay the bus probe if needed. */
 		if (rc_delay[trio_index][mac]) {
-			pr_info("Delaying PCIe RC bus enumerating %d sec"
-				" on MAC %d on TRIO %d\n",
-				rc_delay[trio_index][mac], mac,
-				trio_index);
+			pr_info("Delaying PCIe RC bus enumerating %d sec on MAC %d on TRIO %d\n",
+				rc_delay[trio_index][mac], mac, trio_index);
 			msleep(rc_delay[trio_index][mac] * 1000);
 		} else {
 			/*
@@ -758,11 +753,10 @@ int __init pcibios_init(void)
 			if (pcie_ports[trio_index].ports[mac].removable) {
 				pr_info("PCI: link is down, MAC %d on TRIO %d\n",
 					mac, trio_index);
-				pr_info("This is expected if no PCIe card"
-					" is connected to this link\n");
+				pr_info("This is expected if no PCIe card is connected to this link\n");
 			} else
 				pr_err("PCI: link is down, MAC %d on TRIO %d\n",
-					mac, trio_index);
+				       mac, trio_index);
 			continue;
 		}
 
@@ -829,8 +823,8 @@ int __init pcibios_init(void)
 		/* Alloc a PIO region for PCI config access per MAC. */
 		ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
 		if (ret < 0) {
-			pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
-				"on TRIO %d, give up\n", mac, trio_index);
+			pr_err("PCI: PCI CFG PIO alloc failure for mac %d on TRIO %d, give up\n",
+			       mac, trio_index);
 
 			continue;
 		}
@@ -842,8 +836,8 @@ int __init pcibios_init(void)
 			trio_context->pio_cfg_index[mac],
 			mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE);
 		if (ret < 0) {
-			pr_err("PCI: PCI CFG PIO init failure for mac %d "
-				"on TRIO %d, give up\n", mac, trio_index);
+			pr_err("PCI: PCI CFG PIO init failure for mac %d on TRIO %d, give up\n",
+			       mac, trio_index);
 
 			continue;
 		}
@@ -865,7 +859,7 @@ int __init pcibios_init(void)
 			(TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT - 1)));
 		if (trio_context->mmio_base_pio_cfg[mac] == NULL) {
 			pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
-				mac, trio_index);
+			       mac, trio_index);
 
 			continue;
 		}
@@ -925,9 +919,8 @@ int __init pcibios_init(void)
 		/* Alloc a PIO region for PCI memory access for each RC port. */
 		ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
 		if (ret < 0) {
-			pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, "
-			       "give up\n", controller->trio_index,
-			       controller->mac);
+			pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, give up\n",
+			       controller->trio_index, controller->mac);
 
 			continue;
 		}
@@ -944,9 +937,8 @@ int __init pcibios_init(void)
 						    0,
 						    0);
 		if (ret < 0) {
-			pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
-			       "give up\n", controller->trio_index,
-			       controller->mac);
+			pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, give up\n",
+			       controller->trio_index, controller->mac);
 
 			continue;
 		}
@@ -957,9 +949,8 @@ int __init pcibios_init(void)
 		 */
 		ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
 		if (ret < 0) {
-			pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, "
-			       "give up\n", controller->trio_index,
-			       controller->mac);
+			pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, give up\n",
+			       controller->trio_index, controller->mac);
 
 			continue;
 		}
@@ -976,9 +967,8 @@ int __init pcibios_init(void)
 						    0,
 						    HV_TRIO_PIO_FLAG_IO_SPACE);
 		if (ret < 0) {
-			pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, "
-			       "give up\n", controller->trio_index,
-			       controller->mac);
+			pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, give up\n",
+			       controller->trio_index, controller->mac);
 
 			continue;
 		}
@@ -997,10 +987,9 @@ int __init pcibios_init(void)
 			ret = gxio_trio_alloc_memory_maps(trio_context, 1, 0,
 							  0);
 			if (ret < 0) {
-				pr_err("PCI: Mem-Map alloc failure on TRIO %d "
-				       "mac %d for MC %d, give up\n",
-				       controller->trio_index,
-				       controller->mac, j);
+				pr_err("PCI: Mem-Map alloc failure on TRIO %d mac %d for MC %d, give up\n",
+				       controller->trio_index, controller->mac,
+				       j);
 
 				goto alloc_mem_map_failed;
 			}
@@ -1030,10 +1019,9 @@ int __init pcibios_init(void)
 				j,
 				GXIO_TRIO_ORDER_MODE_UNORDERED);
 			if (ret < 0) {
-				pr_err("PCI: Mem-Map init failure on TRIO %d "
-				       "mac %d for MC %d, give up\n",
-				       controller->trio_index,
-				       controller->mac, j);
+				pr_err("PCI: Mem-Map init failure on TRIO %d mac %d for MC %d, give up\n",
+				       controller->trio_index, controller->mac,
+				       j);
 
 				goto alloc_mem_map_failed;
 			}
@@ -1510,9 +1498,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 	 * Most PCIe endpoint devices do support 64-bit message addressing.
 	 */
 	if (desc->msi_attrib.is_64 == 0) {
-		dev_printk(KERN_INFO, &pdev->dev,
-			"64-bit MSI message address not supported, "
-			"falling back to legacy interrupts.\n");
+		dev_info(&pdev->dev, "64-bit MSI message address not supported, falling back to legacy interrupts\n");
 
 		ret = -ENOMEM;
 		goto is_64_failure;
@@ -1549,11 +1535,8 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 		/* SQ regions are out, allocate from map mem regions. */
 		mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
 		if (mem_map < 0) {
-			dev_printk(KERN_INFO, &pdev->dev,
-				"%s Mem-Map alloc failure. "
-				"Failed to initialize MSI interrupts. "
-				"Falling back to legacy interrupts.\n",
-				desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
+			dev_info(&pdev->dev, "%s Mem-Map alloc failure - failed to initialize MSI interrupts - falling back to legacy interrupts\n",
+				 desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
 			ret = -ENOMEM;
 			goto msi_mem_map_alloc_failure;
 		}
@@ -1580,7 +1563,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 					mem_map, mem_map_base, mem_map_limit,
 					trio_context->asid);
 	if (ret < 0) {
-		dev_printk(KERN_INFO, &pdev->dev, "HV MSI config failed.\n");
+		dev_info(&pdev->dev, "HV MSI config failed\n");
 
 		goto hv_msi_config_failure;
 	}
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 0050cbc1d9de..48e5773dd0b7 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -52,7 +52,7 @@ static int __init idle_setup(char *str)
 		return -EINVAL;
 
 	if (!strcmp(str, "poll")) {
-		pr_info("using polling idle threads.\n");
+		pr_info("using polling idle threads\n");
 		cpu_idle_poll_ctrl(true);
 		return 0;
 	} else if (!strcmp(str, "halt")) {
@@ -547,27 +547,25 @@ void show_regs(struct pt_regs *regs)
 	struct task_struct *tsk = validate_current();
 	int i;
 
-	pr_err("\n");
 	if (tsk != &corrupt_current)
 		show_regs_print_info(KERN_ERR);
 #ifdef __tilegx__
 	for (i = 0; i < 17; i++)
-		pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
+		pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
 		       i, regs->regs[i], i+18, regs->regs[i+18],
 		       i+36, regs->regs[i+36]);
-	pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
+	pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n",
 	       regs->regs[17], regs->regs[35], regs->tp);
-	pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
+	pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr);
 #else
 	for (i = 0; i < 13; i++)
-		pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
-		       " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
+		pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
 		       i, regs->regs[i], i+14, regs->regs[i+14],
 		       i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
-	pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
+	pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n",
 	       regs->regs[13], regs->tp, regs->sp, regs->lr);
 #endif
-	pr_err(" pc : "REGFMT" ex1: %ld     faultnum: %ld\n",
+	pr_err(" pc : " REGFMT " ex1: %ld     faultnum: %ld\n",
 	       regs->pc, regs->ex1, regs->faultnum);
 
 	dump_stack_regs(regs);
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 7f079bbfdf4c..864eea69556d 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -130,7 +130,7 @@ static int __init setup_maxmem(char *str)
 
 	maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT);
 	pr_info("Forcing RAM used to no more than %dMB\n",
-	       maxmem_pfn >> (20 - PAGE_SHIFT));
+		maxmem_pfn >> (20 - PAGE_SHIFT));
 	return 0;
 }
 early_param("maxmem", setup_maxmem);
@@ -149,7 +149,7 @@ static int __init setup_maxnodemem(char *str)
 	maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) <<
 		(HPAGE_SHIFT - PAGE_SHIFT);
 	pr_info("Forcing RAM used on node %ld to no more than %dMB\n",
-	       node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
+		node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
 	return 0;
 }
 early_param("maxnodemem", setup_maxnodemem);
@@ -417,8 +417,7 @@ static void __init setup_memory(void)
 			range.start = (start_pa + HPAGE_SIZE - 1) & HPAGE_MASK;
 			range.size -= (range.start - start_pa);
 			range.size &= HPAGE_MASK;
-			pr_err("Range not hugepage-aligned: %#llx..%#llx:"
-			       " now %#llx-%#llx\n",
+			pr_err("Range not hugepage-aligned: %#llx..%#llx: now %#llx-%#llx\n",
 			       start_pa, start_pa + orig_size,
 			       range.start, range.start + range.size);
 		}
@@ -437,8 +436,8 @@ static void __init setup_memory(void)
 		if (PFN_DOWN(range.size) > maxnodemem_pfn[i]) {
 			int max_size = maxnodemem_pfn[i];
 			if (max_size > 0) {
-				pr_err("Maxnodemem reduced node %d to"
-				       " %d pages\n", i, max_size);
+				pr_err("Maxnodemem reduced node %d to %d pages\n",
+				       i, max_size);
 				range.size = PFN_PHYS(max_size);
 			} else {
 				pr_err("Maxnodemem disabled node %d\n", i);
@@ -490,8 +489,8 @@ static void __init setup_memory(void)
 				NR_CPUS * (PFN_UP(per_cpu_size) >> PAGE_SHIFT);
 			if (end < pci_reserve_end_pfn + percpu_pages) {
 				end = pci_reserve_start_pfn;
-				pr_err("PCI mapping region reduced node %d to"
-				       " %ld pages\n", i, end - start);
+				pr_err("PCI mapping region reduced node %d to %ld pages\n",
+				       i, end - start);
 			}
 		}
 #endif
@@ -555,10 +554,9 @@ static void __init setup_memory(void)
 		MAXMEM_PFN : mappable_physpages;
 	highmem_pages = (long) (physpages - lowmem_pages);
 
-	pr_notice("%ldMB HIGHMEM available.\n",
-	       pages_to_mb(highmem_pages > 0 ? highmem_pages : 0));
-	pr_notice("%ldMB LOWMEM available.\n",
-			pages_to_mb(lowmem_pages));
+	pr_notice("%ldMB HIGHMEM available\n",
+		  pages_to_mb(highmem_pages > 0 ? highmem_pages : 0));
+	pr_notice("%ldMB LOWMEM available\n", pages_to_mb(lowmem_pages));
 #else
 	/* Set max_low_pfn based on what node 0 can directly address. */
 	max_low_pfn = node_end_pfn[0];
@@ -571,8 +569,8 @@ static void __init setup_memory(void)
 		max_pfn = MAXMEM_PFN;
 		node_end_pfn[0] = MAXMEM_PFN;
 	} else {
-		pr_notice("%ldMB memory available.\n",
-		       pages_to_mb(node_end_pfn[0]));
+		pr_notice("%ldMB memory available\n",
+			  pages_to_mb(node_end_pfn[0]));
 	}
 	for (i = 1; i < MAX_NUMNODES; ++i) {
 		node_start_pfn[i] = 0;
@@ -587,8 +585,7 @@ static void __init setup_memory(void)
 		if (pages)
 			high_memory = pfn_to_kaddr(node_end_pfn[i]);
 	}
-	pr_notice("%ldMB memory available.\n",
-	       pages_to_mb(lowmem_pages));
+	pr_notice("%ldMB memory available\n", pages_to_mb(lowmem_pages));
 #endif
 #endif
 }
@@ -1535,8 +1532,7 @@ static void __init pcpu_fc_populate_pte(unsigned long addr)
 
 	BUG_ON(pgd_addr_invalid(addr));
 	if (addr < VMALLOC_START || addr >= VMALLOC_END)
-		panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;"
-		      " try increasing CONFIG_VMALLOC_RESERVE\n",
+		panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx; try increasing CONFIG_VMALLOC_RESERVE\n",
 		      addr, VMALLOC_START, VMALLOC_END);
 
 	pgd = swapper_pg_dir + pgd_index(addr);
@@ -1591,8 +1587,8 @@ void __init setup_per_cpu_areas(void)
 			lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
 			ptep = virt_to_kpte(lowmem_va);
 			if (pte_huge(*ptep)) {
-				printk(KERN_DEBUG "early shatter of huge page"
-				       " at %#lx\n", lowmem_va);
+				printk(KERN_DEBUG "early shatter of huge page at %#lx\n",
+				       lowmem_va);
 				shatter_pmd((pmd_t *)ptep);
 				ptep = virt_to_kpte(lowmem_va);
 				BUG_ON(pte_huge(*ptep));
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 7c2fecc52177..bb0a9ce7ae23 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -45,8 +45,7 @@
 int restore_sigcontext(struct pt_regs *regs,
 		       struct sigcontext __user *sc)
 {
-	int err = 0;
-	int i;
+	int err;
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
@@ -57,9 +56,7 @@ int restore_sigcontext(struct pt_regs *regs,
 	 */
 	BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs));
 	BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0);
-
-	for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i)
-		err |= __get_user(regs->regs[i], &sc->gregs[i]);
+	err = __copy_from_user(regs, sc, sizeof(*regs));
 
 	/* Ensure that the PL is always set to USER_PL. */
 	regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1));
@@ -110,12 +107,7 @@ badframe:
 
 int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 {
-	int i, err = 0;
-
-	for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i)
-		err |= __put_user(regs->regs[i], &sc->gregs[i]);
-
-	return err;
+	return  __copy_to_user(sc, regs, sizeof(*regs));
 }
 
 /*
@@ -345,7 +337,6 @@ static void dump_mem(void __user *address)
 	int i, j, k;
 	int found_readable_mem = 0;
 
-	pr_err("\n");
 	if (!access_ok(VERIFY_READ, address, 1)) {
 		pr_err("Not dumping at address 0x%lx (kernel address)\n",
 		       (unsigned long)address);
@@ -367,7 +358,7 @@ static void dump_mem(void __user *address)
 			       (unsigned long)address);
 			found_readable_mem = 1;
 		}
-		j = sprintf(line, REGFMT":", (unsigned long)addr);
+		j = sprintf(line, REGFMT ":", (unsigned long)addr);
 		for (k = 0; k < bytes_per_line; ++k)
 			j += sprintf(&line[j], " %02x", buf[k]);
 		pr_err("%s\n", line);
@@ -411,8 +402,7 @@ void trace_unhandled_signal(const char *type, struct pt_regs *regs,
 		case SIGFPE:
 		case SIGSEGV:
 		case SIGBUS:
-			pr_err("User crash: signal %d,"
-			       " trap %ld, address 0x%lx\n",
+			pr_err("User crash: signal %d, trap %ld, address 0x%lx\n",
 			       sig, regs->faultnum, address);
 			show_regs(regs);
 			dump_mem((void __user *)address);
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 6cb2ce31b5a2..862973074bf9 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -222,11 +222,9 @@ static tilepro_bundle_bits rewrite_load_store_unaligned(
 	}
 
 	if (unaligned_printk || unaligned_fixup_count == 0) {
-		pr_info("Process %d/%s: PC %#lx: Fixup of"
-			" unaligned %s at %#lx.\n",
+		pr_info("Process %d/%s: PC %#lx: Fixup of unaligned %s at %#lx\n",
 			current->pid, current->comm, regs->pc,
-			(mem_op == MEMOP_LOAD ||
-			 mem_op == MEMOP_LOAD_POSTINCR) ?
+			mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR ?
 			"load" : "store",
 			(unsigned long)addr);
 		if (!unaligned_printk) {
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 0d59a1b60c74..20d52a98e171 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -127,8 +127,7 @@ static __init int reset_init_affinity(void)
 {
 	long rc = sched_setaffinity(current->pid, &init_affinity);
 	if (rc != 0)
-		pr_warning("couldn't reset init affinity (%ld)\n",
-		       rc);
+		pr_warn("couldn't reset init affinity (%ld)\n", rc);
 	return 0;
 }
 late_initcall(reset_init_affinity);
@@ -174,7 +173,7 @@ static void start_secondary(void)
 	/* Indicate that we're ready to come up. */
 	/* Must not do this before we're ready to receive messages */
 	if (cpumask_test_and_set_cpu(cpuid, &cpu_started)) {
-		pr_warning("CPU#%d already started!\n", cpuid);
+		pr_warn("CPU#%d already started!\n", cpuid);
 		for (;;)
 			local_irq_enable();
 	}
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index c93977a62116..7ff5afdbd3aa 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -387,9 +387,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 		 * then bust_spinlocks() spit out a space in front of us
 		 * and it will mess up our KERN_ERR.
 		 */
-		pr_err("\n");
-		pr_err("Starting stack dump of tid %d, pid %d (%s)"
-		       " on cpu %d at cycle %lld\n",
+		pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
 		       kbt->task->pid, kbt->task->tgid, kbt->task->comm,
 		       raw_smp_processor_id(), get_cycles());
 	}
@@ -411,8 +409,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 		       i++, address, namebuf, (unsigned long)(kbt->it.sp));
 
 		if (i >= 100) {
-			pr_err("Stack dump truncated"
-			       " (%d frames)\n", i);
+			pr_err("Stack dump truncated (%d frames)\n", i);
 			break;
 		}
 	}
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index b854a1cd0079..d412b0856c0a 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -98,8 +98,8 @@ void __init calibrate_delay(void)
 {
 	loops_per_jiffy = get_clock_rate() / HZ;
 	pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n",
-		loops_per_jiffy/(500000/HZ),
-		(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
+		loops_per_jiffy / (500000 / HZ),
+		(loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy);
 }
 
 /* Called fairly late in init/main.c, but before we go smp. */
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 86900ccd4977..bf841ca517bb 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -46,9 +46,9 @@ static int __init setup_unaligned_fixup(char *str)
 		return 0;
 
 	pr_info("Fixups for unaligned data accesses are %s\n",
-	       unaligned_fixup >= 0 ?
-	       (unaligned_fixup ? "enabled" : "disabled") :
-	       "completely disabled");
+		unaligned_fixup >= 0 ?
+		(unaligned_fixup ? "enabled" : "disabled") :
+		"completely disabled");
 	return 1;
 }
 __setup("unaligned_fixup=", setup_unaligned_fixup);
@@ -305,8 +305,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	case INT_ILL:
 		if (copy_from_user(&instr, (void __user *)regs->pc,
 				   sizeof(instr))) {
-			pr_err("Unreadable instruction for INT_ILL:"
-			       " %#lx\n", regs->pc);
+			pr_err("Unreadable instruction for INT_ILL: %#lx\n",
+			       regs->pc);
 			do_exit(SIGKILL);
 			return;
 		}
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index c02ea2a45f67..7d9a83be0aca 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -969,8 +969,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 		unaligned_fixup_count++;
 
 		if (unaligned_printk) {
-			pr_info("%s/%d. Unalign fixup for kernel access "
-				"to userspace %lx.",
+			pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
 				current->comm, current->pid, regs->regs[ra]);
 		}
 
@@ -985,7 +984,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 			.si_addr = (unsigned char __user *)0
 		};
 		if (unaligned_printk)
-			pr_info("Unalign bundle: unexp @%llx, %llx",
+			pr_info("Unalign bundle: unexp @%llx, %llx\n",
 				(unsigned long long)regs->pc,
 				(unsigned long long)bundle);
 
@@ -1370,8 +1369,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 		frag.bundle = bundle;
 
 		if (unaligned_printk) {
-			pr_info("%s/%d, Unalign fixup: pc=%lx "
-				"bundle=%lx %d %d %d %d %d %d %d %d.",
+			pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
 				current->comm, current->pid,
 				(unsigned long)frag.pc,
 				(unsigned long)frag.bundle,
@@ -1380,8 +1378,8 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 				(int)y1_lr, (int)y1_br, (int)x1_add);
 
 			for (k = 0; k < n; k += 2)
-				pr_info("[%d] %016llx %016llx", k,
-					(unsigned long long)frag.insn[k],
+				pr_info("[%d] %016llx %016llx\n",
+					k, (unsigned long long)frag.insn[k],
 					(unsigned long long)frag.insn[k+1]);
 		}
 
@@ -1402,7 +1400,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 				.si_addr = (void __user *)&jit_code_area[idx]
 			};
 
-			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
+			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
 				current->pid, current->comm,
 				(unsigned long long)&jit_code_area[idx]);
 
@@ -1485,7 +1483,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 			/* If exception came from kernel, try fix it up. */
 			if (fixup_exception(regs)) {
 				if (unaligned_printk)
-					pr_info("Unalign fixup: %d %llx @%llx",
+					pr_info("Unalign fixup: %d %llx @%llx\n",
 						(int)unaligned_fixup,
 						(unsigned long long)regs->ex1,
 						(unsigned long long)regs->pc);
@@ -1519,7 +1517,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 		};
 
 		if (unaligned_printk)
-			pr_info("Unalign fixup: %d %llx @%llx",
+			pr_info("Unalign fixup: %d %llx @%llx\n",
 				(int)unaligned_fixup,
 				(unsigned long long)regs->ex1,
 				(unsigned long long)regs->pc);
@@ -1579,14 +1577,14 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 						    0);
 
 		if (IS_ERR((void __force *)user_page)) {
-			pr_err("Out of kernel pages trying do_mmap.\n");
+			pr_err("Out of kernel pages trying do_mmap\n");
 			return;
 		}
 
 		/* Save the address in the thread_info struct */
 		info->unalign_jit_base = user_page;
 		if (unaligned_printk)
-			pr_info("Unalign bundle: %d:%d, allocate page @%llx",
+			pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
 				raw_smp_processor_id(), current->pid,
 				(unsigned long long)user_page);
 	}
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 6c0571216a9d..565e25a98334 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -169,8 +169,7 @@ static void wait_for_migration(pte_t *pte)
 		while (pte_migrating(*pte)) {
 			barrier();
 			if (++retries > bound)
-				panic("Hit migrating PTE (%#llx) and"
-				      " page PFN %#lx still migrating",
+				panic("Hit migrating PTE (%#llx) and page PFN %#lx still migrating",
 				      pte->val, pte_pfn(*pte));
 		}
 	}
@@ -292,11 +291,10 @@ static int handle_page_fault(struct pt_regs *regs,
 	 */
 	stack_offset = stack_pointer & (THREAD_SIZE-1);
 	if (stack_offset < THREAD_SIZE / 8) {
-		pr_alert("Potential stack overrun: sp %#lx\n",
-		       stack_pointer);
+		pr_alert("Potential stack overrun: sp %#lx\n", stack_pointer);
 		show_regs(regs);
 		pr_alert("Killing current process %d/%s\n",
-		       tsk->pid, tsk->comm);
+			 tsk->pid, tsk->comm);
 		do_group_exit(SIGKILL);
 	}
 
@@ -421,7 +419,7 @@ good_area:
 	} else if (write) {
 #ifdef TEST_VERIFY_AREA
 		if (!is_page_fault && regs->cs == KERNEL_CS)
-			pr_err("WP fault at "REGFMT"\n", regs->eip);
+			pr_err("WP fault at " REGFMT "\n", regs->eip);
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
@@ -519,16 +517,15 @@ no_context:
 		pte_t *pte = lookup_address(address);
 
 		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
-			pr_crit("kernel tried to execute"
-			       " non-executable page - exploit attempt?"
-			       " (uid: %d)\n", current->uid);
+			pr_crit("kernel tried to execute non-executable page - exploit attempt? (uid: %d)\n",
+				current->uid);
 	}
 #endif
 	if (address < PAGE_SIZE)
 		pr_alert("Unable to handle kernel NULL pointer dereference\n");
 	else
 		pr_alert("Unable to handle kernel paging request\n");
-	pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n",
+	pr_alert(" at virtual address " REGFMT ", pc " REGFMT "\n",
 		 address, regs->pc);
 
 	show_regs(regs);
@@ -575,9 +572,10 @@ do_sigbus:
 #ifndef __tilegx__
 
 /* We must release ICS before panicking or we won't get anywhere. */
-#define ics_panic(fmt, ...) do { \
-	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \
-	panic(fmt, __VA_ARGS__); \
+#define ics_panic(fmt, ...)					\
+do {								\
+	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);	\
+	panic(fmt, ##__VA_ARGS__);				\
 } while (0)
 
 /*
@@ -615,8 +613,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
 	     fault_num != INT_DTLB_ACCESS)) {
 		unsigned long old_pc = regs->pc;
 		regs->pc = pc;
-		ics_panic("Bad ICS page fault args:"
-			  " old PC %#lx, fault %d/%d at %#lx\n",
+		ics_panic("Bad ICS page fault args: old PC %#lx, fault %d/%d at %#lx",
 			  old_pc, fault_num, write, address);
 	}
 
@@ -669,8 +666,8 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
 #endif
 		fixup = search_exception_tables(pc);
 		if (!fixup)
-			ics_panic("ICS atomic fault not in table:"
-				  " PC %#lx, fault %d", pc, fault_num);
+			ics_panic("ICS atomic fault not in table: PC %#lx, fault %d",
+				  pc, fault_num);
 		regs->pc = fixup->fixup;
 		regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
 	}
@@ -826,8 +823,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 
 			set_thread_flag(TIF_ASYNC_TLB);
 			if (async->fault_num != 0) {
-				panic("Second async fault %d;"
-				      " old fault was %d (%#lx/%ld)",
+				panic("Second async fault %d; old fault was %d (%#lx/%ld)",
 				      fault_num, async->fault_num,
 				      address, write);
 			}
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 33294fdc402e..cd3387370ebb 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -152,12 +152,10 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
 	cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy);
 	cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy);
 
-	pr_err("hv_flush_remote(%#llx, %#lx, %p [%s],"
-	       " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n",
+	pr_err("hv_flush_remote(%#llx, %#lx, %p [%s], %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n",
 	       cache_pa, cache_control, cache_cpumask, cache_buf,
 	       (unsigned long)tlb_va, tlb_length, tlb_pgsize,
-	       tlb_cpumask, tlb_buf,
-	       asids, asidcount, rc);
+	       tlb_cpumask, tlb_buf, asids, asidcount, rc);
 	panic("Unsafe to continue.");
 }
 
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index e514899e1100..3270e0019266 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -284,22 +284,21 @@ static __init int __setup_hugepagesz(unsigned long ps)
 	int level, base_shift;
 
 	if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
-		pr_warn("Not enabling %ld byte huge pages;"
-			" must be a power of four.\n", ps);
+		pr_warn("Not enabling %ld byte huge pages; must be a power of four\n",
+			ps);
 		return -EINVAL;
 	}
 
 	if (ps > 64*1024*1024*1024UL) {
-		pr_warn("Not enabling %ld MB huge pages;"
-			" largest legal value is 64 GB .\n", ps >> 20);
+		pr_warn("Not enabling %ld MB huge pages; largest legal value is 64 GB\n",
+			ps >> 20);
 		return -EINVAL;
 	} else if (ps >= PUD_SIZE) {
 		static long hv_jpage_size;
 		if (hv_jpage_size == 0)
 			hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
 		if (hv_jpage_size != PUD_SIZE) {
-			pr_warn("Not enabling >= %ld MB huge pages:"
-				" hypervisor reports size %ld\n",
+			pr_warn("Not enabling >= %ld MB huge pages: hypervisor reports size %ld\n",
 				PUD_SIZE >> 20, hv_jpage_size);
 			return -EINVAL;
 		}
@@ -320,14 +319,13 @@ static __init int __setup_hugepagesz(unsigned long ps)
 		int shift_val = log_ps - base_shift;
 		if (huge_shift[level] != 0) {
 			int old_shift = base_shift + huge_shift[level];
-			pr_warn("Not enabling %ld MB huge pages;"
-				" already have size %ld MB.\n",
+			pr_warn("Not enabling %ld MB huge pages; already have size %ld MB\n",
 				ps >> 20, (1UL << old_shift) >> 20);
 			return -EINVAL;
 		}
 		if (hv_set_pte_super_shift(level, shift_val) != 0) {
-			pr_warn("Not enabling %ld MB huge pages;"
-				" no hypervisor support.\n", ps >> 20);
+			pr_warn("Not enabling %ld MB huge pages; no hypervisor support\n",
+				ps >> 20);
 			return -EINVAL;
 		}
 		printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index caa270165f86..be240cc4978d 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -357,11 +357,11 @@ static int __init setup_ktext(char *str)
 		cpulist_scnprintf(buf, sizeof(buf), &ktext_mask);
 		if (cpumask_weight(&ktext_mask) > 1) {
 			ktext_small = 1;
-			pr_info("ktext: using caching neighborhood %s "
-			       "with small pages\n", buf);
+			pr_info("ktext: using caching neighborhood %s with small pages\n",
+				buf);
 		} else {
 			pr_info("ktext: caching on cpu %s with one huge page\n",
-			       buf);
+				buf);
 		}
 	}
 
@@ -413,19 +413,16 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 	int rc, i;
 
 	if (ktext_arg_seen && ktext_hash) {
-		pr_warning("warning: \"ktext\" boot argument ignored"
-			   " if \"kcache_hash\" sets up text hash-for-home\n");
+		pr_warn("warning: \"ktext\" boot argument ignored if \"kcache_hash\" sets up text hash-for-home\n");
 		ktext_small = 0;
 	}
 
 	if (kdata_arg_seen && kdata_hash) {
-		pr_warning("warning: \"kdata\" boot argument ignored"
-			   " if \"kcache_hash\" sets up data hash-for-home\n");
+		pr_warn("warning: \"kdata\" boot argument ignored if \"kcache_hash\" sets up data hash-for-home\n");
 	}
 
 	if (kdata_huge && !hash_default) {
-		pr_warning("warning: disabling \"kdata=huge\"; requires"
-			  " kcache_hash=all or =allbutstack\n");
+		pr_warn("warning: disabling \"kdata=huge\"; requires kcache_hash=all or =allbutstack\n");
 		kdata_huge = 0;
 	}
 
@@ -470,8 +467,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 					pte[pte_ofs] = pfn_pte(pfn, prot);
 			} else {
 				if (kdata_huge)
-					printk(KERN_DEBUG "pre-shattered huge"
-					       " page at %#lx\n", address);
+					printk(KERN_DEBUG "pre-shattered huge page at %#lx\n",
+					       address);
 				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE;
 				     pfn++, pte_ofs++, address += PAGE_SIZE) {
 					pgprot_t prot = init_pgprot(address);
@@ -501,8 +498,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 			pr_info("ktext: not using unavailable cpus %s\n", buf);
 		}
 		if (cpumask_empty(&ktext_mask)) {
-			pr_warning("ktext: no valid cpus; caching on %d.\n",
-				   smp_processor_id());
+			pr_warn("ktext: no valid cpus; caching on %d\n",
+				smp_processor_id());
 			cpumask_copy(&ktext_mask,
 				     cpumask_of(smp_processor_id()));
 		}
@@ -798,11 +795,9 @@ void __init mem_init(void)
 #ifdef CONFIG_HIGHMEM
 	/* check that fixmap and pkmap do not overlap */
 	if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) {
-		pr_err("fixmap and kmap areas overlap"
-		       " - this will crash\n");
+		pr_err("fixmap and kmap areas overlap - this will crash\n");
 		pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n",
-		       PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1),
-		       FIXADDR_START);
+		       PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), FIXADDR_START);
 		BUG();
 	}
 #endif
@@ -926,8 +921,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	unsigned long addr = (unsigned long) begin;
 
 	if (kdata_huge && !initfree) {
-		pr_warning("Warning: ignoring initfree=0:"
-			   " incompatible with kdata=huge\n");
+		pr_warn("Warning: ignoring initfree=0: incompatible with kdata=huge\n");
 		initfree = 1;
 	}
 	end = (end + PAGE_SIZE - 1) & PAGE_MASK;
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 5e86eac4bfae..7bf2491a9c1f 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -44,9 +44,7 @@ void show_mem(unsigned int filter)
 {
 	struct zone *zone;
 
-	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
-	       " free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
-	       " pagecache:%lu swap:%lu\n",
+	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n",
 	       (global_page_state(NR_ACTIVE_ANON) +
 		global_page_state(NR_ACTIVE_FILE)),
 	       (global_page_state(NR_INACTIVE_ANON) +
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d69f1cd87fd9..ba397bde7948 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -249,10 +249,6 @@ config HAVE_INTEL_TXT
 	def_bool y
 	depends on INTEL_IOMMU && ACPI
 
-config X86_INTEL_MPX
-	def_bool y
-	depends on CPU_SUP_INTEL
-
 config X86_32_SMP
 	def_bool y
 	depends on X86_32 && SMP
@@ -887,11 +883,11 @@ config X86_UP_IOAPIC
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
+	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-	def_bool y
-	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
-	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+	def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
+	depends on X86_LOCAL_APIC
 	select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1594,6 +1590,32 @@ config X86_SMAP
 
 	  If unsure, say Y.
 
+config X86_INTEL_MPX
+	prompt "Intel MPX (Memory Protection Extensions)"
+	def_bool n
+	depends on CPU_SUP_INTEL
+	---help---
+	  MPX provides hardware features that can be used in
+	  conjunction with compiler-instrumented code to check
+	  memory references.  It is designed to detect buffer
+	  overflow or underflow bugs.
+
+	  This option enables running applications which are
+	  instrumented or otherwise use MPX.  It does not use MPX
+	  itself inside the kernel or to protect the kernel
+	  against bad memory references.
+
+	  Enabling this option will make the kernel larger:
+	  ~8k of kernel text and 36 bytes of data on a 64-bit
+	  defconfig.  It adds a long to the 'mm_struct' which
+	  will increase the kernel memory overhead of each
+	  process and adds some branches to paths used during
+	  exec() and munmap().
+
+	  For details, see Documentation/x86/intel_mpx.txt
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906d83df..9662290e0b20 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -94,30 +94,7 @@ extern void trace_call_function_single_interrupt(void);
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
 #endif /* CONFIG_TRACING */
 
-/* IOAPIC */
-#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
-extern unsigned long io_apic_irqs;
-
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-
-struct io_apic_irq_attr {
-	int ioapic;
-	int ioapic_pin;
-	int trigger;
-	int polarity;
-};
-
-static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
-					int ioapic, int ioapic_pin,
-					int trigger, int polarity)
-{
-	irq_attr->ioapic	= ioapic;
-	irq_attr->ioapic_pin	= ioapic_pin;
-	irq_attr->trigger	= trigger;
-	irq_attr->polarity	= polarity;
-}
-
+#ifdef CONFIG_IRQ_REMAP
 /* Intel specific interrupt remapping information */
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
@@ -131,14 +108,12 @@ struct irq_2_irte {
 	u16 devid; /* Device ID for IRTE table */
 	u16 index; /* Index into IRTE table*/
 };
+#endif	/* CONFIG_IRQ_REMAP */
+
+#ifdef	CONFIG_X86_LOCAL_APIC
+struct irq_data;
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
 struct irq_cfg {
-	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
 	u8			vector;
@@ -150,18 +125,39 @@ struct irq_cfg {
 		struct irq_2_irte  irq_2_irte;
 	};
 #endif
+	union {
+#ifdef CONFIG_X86_IO_APIC
+		struct {
+			struct list_head	irq_2_pin;
+		};
+#endif
+	};
 };
 
+extern struct irq_cfg *irq_cfg(unsigned int irq);
+extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
+extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
+extern void setup_vector_irq(int cpu);
+#ifdef CONFIG_SMP
 extern void send_cleanup_vector(struct irq_cfg *);
+extern void irq_complete_move(struct irq_cfg *cfg);
+#else
+static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void irq_complete_move(struct irq_cfg *c) { }
+#endif
 
-struct irq_data;
-int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
-			  unsigned int *dest_id);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
-extern void setup_ioapic_dest(void);
-
-extern void enable_IO_APIC(void);
+extern int apic_retrigger_irq(struct irq_data *data);
+extern void apic_ack_edge(struct irq_data *data);
+extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			     unsigned int *dest_id);
+#else	/*  CONFIG_X86_LOCAL_APIC */
+static inline void lock_vector_lock(void) {}
+static inline void unlock_vector_lock(void) {}
+#endif	/* CONFIG_X86_LOCAL_APIC */
 
 /* Statistics */
 extern atomic_t irq_err_count;
@@ -185,7 +181,8 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
+				    - FIRST_EXTERNAL_VECTOR])(void);
 #ifdef CONFIG_TRACING
 #define trace_interrupt interrupt
 #endif
@@ -195,17 +192,6 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
 
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void setup_vector_irq(int cpu);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
-extern void __setup_vector_irq(int cpu);
-#else
-static inline void lock_vector_lock(void) {}
-static inline void unlock_vector_lock(void) {}
-static inline void __setup_vector_irq(int cpu) {}
-#endif
 
 #endif /* !ASSEMBLY_ */
 
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 1733ab49ac5e..bf006cce9418 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -132,6 +132,10 @@ extern int noioapicquirk;
 /* -1 if "noapic" boot option passed */
 extern int noioapicreroute;
 
+extern unsigned long io_apic_irqs;
+
+#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
+
 /*
  * If we use the IO-APIC for IRQ routing, disable automatic
  * assignment of PCI IRQ's.
@@ -139,18 +143,15 @@ extern int noioapicreroute;
 #define io_apic_assign_pci_irqs \
 	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
 
-struct io_apic_irq_attr;
 struct irq_cfg;
 extern void ioapic_insert_resources(void);
+extern int arch_early_ioapic_init(void);
 
 extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
 				     unsigned int, int,
 				     struct io_apic_irq_attr *);
 extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
 
-extern void native_compose_msi_msg(struct pci_dev *pdev,
-				   unsigned int irq, unsigned int dest,
-				   struct msi_msg *msg, u8 hpet_id);
 extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
 
 extern int save_ioapic_entries(void);
@@ -160,6 +161,13 @@ extern int restore_ioapic_entries(void);
 extern void setup_ioapic_ids_from_mpc(void);
 extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
+struct io_apic_irq_attr {
+	int ioapic;
+	int ioapic_pin;
+	int trigger;
+	int polarity;
+};
+
 enum ioapic_domain_type {
 	IOAPIC_DOMAIN_INVALID,
 	IOAPIC_DOMAIN_LEGACY,
@@ -188,8 +196,10 @@ extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
 extern u32 mp_pin_to_gsi(int ioapic, int pin);
 extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
 extern void mp_unmap_irq(int irq);
-extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-				      struct ioapic_domain_cfg *cfg);
+extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+			      struct ioapic_domain_cfg *cfg);
+extern int mp_unregister_ioapic(u32 gsi_base);
+extern int mp_ioapic_registered(u32 gsi_base);
 extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 			    irq_hw_number_t hwirq);
 extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
@@ -227,19 +237,25 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 
 extern void io_apic_eoi(unsigned int apic, unsigned int vector);
 
-extern bool mp_should_keep_irq(struct device *dev);
-
+extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void setup_ioapic_dest(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
+extern void print_IO_APICs(void);
 #else  /* !CONFIG_X86_IO_APIC */
 
+#define IO_APIC_IRQ(x)		0
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static inline void ioapic_insert_resources(void) { }
+static inline int arch_early_ioapic_init(void) { return 0; }
+static inline void print_IO_APICs(void) {}
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
 static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
 static inline void mp_unmap_irq(int irq) { }
-static inline bool mp_should_keep_irq(struct device *dev) { return 1; }
 
 static inline int save_ioapic_entries(void)
 {
@@ -262,7 +278,6 @@ static inline void disable_ioapic_support(void) { }
 #define native_io_apic_print_entries	NULL
 #define native_ioapic_set_affinity	NULL
 #define native_setup_ioapic_entry	NULL
-#define native_compose_msi_msg		NULL
 #define native_eoi_ioapic_pin		NULL
 #endif
 
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e3111d..666c89ec4bd7 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -126,6 +126,12 @@
 
 #define NR_VECTORS			 256
 
+#ifdef CONFIG_X86_LOCAL_APIC
+#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+#else
+#define FIRST_SYSTEM_VECTOR		NR_VECTORS
+#endif
+
 #define FPU_IRQ				  13
 
 #define	FIRST_VM86_IRQ			   3
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30d6a0c..d89c6b828c96 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,7 +33,7 @@
 
 #define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
 #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD		 (1UL << 63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -361,6 +362,7 @@ struct kvm_vcpu_arch {
 	int mp_state;
 	u64 ia32_misc_enable_msr;
 	bool tpr_access_reporting;
+	u64 ia32_xss;
 
 	/*
 	 * Paging state of the vcpu
@@ -542,7 +544,7 @@ struct kvm_apic_map {
 	struct rcu_head rcu;
 	u8 ldr_bits;
 	/* fields bellow are used to decode ldr values in different modes */
-	u32 cid_shift, cid_mask, lid_mask;
+	u32 cid_shift, cid_mask, lid_mask, broadcast;
 	struct kvm_lapic *phys_map[256];
 	/* first index is cluster id second is cpu id in a cluster */
 	struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@ struct kvm_arch {
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
+	/* reads protected by irq_srcu, writes by irq_lock */
+	struct hlist_head mask_notifier_list;
+
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
@@ -659,6 +664,16 @@ struct msr_data {
 	u64 data;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
@@ -767,6 +782,7 @@ struct kvm_x86_ops {
 			       enum x86_intercept_stage stage);
 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
+	bool (*xsaves_supported)(void);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
@@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+struct kvm_irq_mask_notifier {
+	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+	int irq;
+	struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gfn_t gfn, void *data, int offset, int len,
 			    u32 access);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
 
 static inline int __kvm_irq_line_state(unsigned long *irq_state,
 				       int irq_source_id, int level)
@@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0e683f..4e370a5d8117 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,12 +96,15 @@ extern void pci_iommu_alloc(void);
 #ifdef CONFIG_PCI_MSI
 /* implemented in arch/x86/kernel/apic/io_apic. */
 struct msi_desc;
+void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
+			    unsigned int dest, struct msi_msg *msg, u8 hpet_id);
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev);
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 		  unsigned int irq_base, unsigned int irq_offset);
 #else
+#define native_compose_msi_msg		NULL
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #endif
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..164e3f8d3c3d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index af447f95e3be..25bcd4a89517 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -452,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
 				    unsigned int *level);
+extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 				   unsigned numpages, unsigned long page_flags);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a4efe477ceab..625660f8a2fc 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 		unsigned count = SPIN_THRESHOLD;
 
 		do {
-			if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
+			if (READ_ONCE(lock->tickets.head) == inc.tail)
 				goto out;
 			cpu_relax();
 		} while (--count);
@@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	arch_spinlock_t old, new;
 
-	old.tickets = ACCESS_ONCE(lock->tickets);
+	old.tickets = READ_ONCE(lock->tickets);
 	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
 		return 0;
 
@@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	return tmp.tail != tmp.head;
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade26d8d..45afaee9555c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_XSAVES			0x00100000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -159,6 +160,8 @@ enum vmcs_field {
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
+	XSS_EXIT_BITMAP                 = 0x0000202C,
+	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f58ef6c0613b..5eea09915a15 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -41,10 +41,12 @@ typedef struct xpaddr {
 
 extern unsigned long *machine_to_phys_mapping;
 extern unsigned long  machine_to_phys_nr;
+extern unsigned long *xen_p2m_addr;
+extern unsigned long  xen_p2m_size;
+extern unsigned long  xen_max_p2m_pfn;
 
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 					     unsigned long pfn_e);
@@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 				   struct gnttab_map_grant_ref *kmap_ops,
 				   struct page **pages, unsigned int count);
-extern int m2p_add_override(unsigned long mfn, struct page *page,
-			    struct gnttab_map_grant_ref *kmap_op);
 extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 				     struct gnttab_map_grant_ref *kmap_ops,
 				     struct page **pages, unsigned int count);
-extern int m2p_remove_override(struct page *page,
-			       struct gnttab_map_grant_ref *kmap_op,
-			       unsigned long mfn);
-extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
+/*
+ * Helper functions to write or read unsigned long values to/from
+ * memory, when the access may fault.
+ */
+static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
+{
+	return __put_user(val, (unsigned long __user *)addr);
+}
+
+static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
+{
+	return __get_user(*val, (unsigned long __user *)addr);
+}
+
+/*
+ * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
+ * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
+ *   bits (identity or foreign) are set.
+ * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
+ *   identity or foreign indicator will be still set. __pfn_to_mfn() is
+ *   encapsulating get_phys_to_machine() which is called in special cases only.
+ * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
+ *   cases needing an extended handling.
+ */
+static inline unsigned long __pfn_to_mfn(unsigned long pfn)
+{
+	unsigned long mfn;
+
+	if (pfn < xen_p2m_size)
+		mfn = xen_p2m_addr[pfn];
+	else if (unlikely(pfn < xen_max_p2m_pfn))
+		return get_phys_to_machine(pfn);
+	else
+		return IDENTITY_FRAME(pfn);
+
+	if (unlikely(mfn == INVALID_P2M_ENTRY))
+		return get_phys_to_machine(pfn);
+
+	return mfn;
+}
+
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
 	unsigned long mfn;
@@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return pfn;
 
-	mfn = get_phys_to_machine(pfn);
+	mfn = __pfn_to_mfn(pfn);
 
 	if (mfn != INVALID_P2M_ENTRY)
 		mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
@@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return 1;
 
-	return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
+	return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
 }
 
 static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
@@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
 	 * In such cases it doesn't matter what we return (we return garbage),
 	 * but we must handle the fault without crashing!
 	 */
-	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+	ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn);
 	if (ret < 0)
 		return ~0;
 
@@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 		return mfn;
 
 	pfn = mfn_to_pfn_no_overrides(mfn);
-	if (get_phys_to_machine(pfn) != mfn) {
+	if (__pfn_to_mfn(pfn) != mfn) {
 		/*
 		 * If this appears to be a foreign mfn (because the pfn
 		 * doesn't map back to the mfn), then check the local override
@@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 	 * entry doesn't map back to the mfn and m2p_override doesn't have a
 	 * valid entry for it.
 	 */
-	if (pfn == ~0 &&
-			get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+	if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
 		pfn = mfn;
 
 	return pfn;
@@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 		return mfn;
 
 	pfn = mfn_to_pfn(mfn);
-	if (get_phys_to_machine(pfn) != mfn)
+	if (__pfn_to_mfn(pfn) != mfn)
 		return -1; /* force !pfn_valid() */
 	return pfn;
 }
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79ada658..5fa9770035dc 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
 #define XSTATE_Hi16_ZMM		0x80
 
 #define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512	(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
 /* Bit 63 of XCR0 is reserved for future expansion */
 #define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
 
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index 46727eb37bfe..6e1aaf73852a 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -28,6 +28,13 @@ struct user_desc {
 	unsigned int  seg_not_present:1;
 	unsigned int  useable:1;
 #ifdef __x86_64__
+	/*
+	 * Because this bit is not present in 32-bit user code, user
+	 * programs can pass uninitialized values here.  Therefore, in
+	 * any context in which a user_desc comes from a 32-bit program,
+	 * the kernel must act as though lm == 0, regardless of the
+	 * actual value.
+	 */
 	unsigned int  lm:1;
 #endif
 };
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe1588d..b813bf9da1e2 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -72,6 +72,8 @@
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -116,6 +118,8 @@
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
-	{ EXIT_REASON_INVPCID,               "INVPCID" }
+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a142e77693e1..4433a4be8171 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -76,6 +76,19 @@ int acpi_fix_pin2_polarity __initdata;
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 #endif
 
+/*
+ * Locks related to IOAPIC hotplug
+ * Hotplug side:
+ *	->device_hotplug_lock
+ *		->acpi_ioapic_lock
+ *			->ioapic_lock
+ * Interrupt mapping side:
+ *	->acpi_ioapic_lock
+ *		->ioapic_mutex
+ *			->ioapic_lock
+ */
+static DEFINE_MUTEX(acpi_ioapic_lock);
+
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -395,10 +408,6 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
 
-	/* Don't set up the ACPI SCI because it's already set up */
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
-
 	trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
 	polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
 	node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
@@ -411,7 +420,8 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
 	if (irq < 0)
 		return irq;
 
-	if (enable_update_mptable)
+	/* Don't set up the ACPI SCI because it's already set up */
+	if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi)
 		mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
 	return irq;
@@ -424,9 +434,6 @@ static void mp_unregister_gsi(u32 gsi)
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return;
 
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return;
-
 	irq = mp_map_gsi_to_irq(gsi, 0);
 	if (irq > 0)
 		mp_unmap_irq(irq);
@@ -609,8 +616,10 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
 		*irqp = gsi;
 	} else {
+		mutex_lock(&acpi_ioapic_lock);
 		irq = mp_map_gsi_to_irq(gsi,
 					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
+		mutex_unlock(&acpi_ioapic_lock);
 		if (irq < 0)
 			return -1;
 		*irqp = irq;
@@ -650,7 +659,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
 	int irq = gsi;
 
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	irq = mp_register_gsi(dev, gsi, trigger, polarity);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 
 	return irq;
@@ -659,7 +670,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
 static void acpi_unregister_gsi_ioapic(u32 gsi)
 {
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	mp_unregister_gsi(gsi);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 }
 
@@ -690,6 +703,7 @@ void acpi_unregister_gsi(u32 gsi)
 }
 EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
 
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __init acpi_set_irq_model_ioapic(void)
 {
 	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
@@ -697,6 +711,7 @@ static void __init acpi_set_irq_model_ioapic(void)
 	__acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
 	acpi_ioapic = 1;
 }
+#endif
 
 /*
  *  ACPI based hotplug support for CPU
@@ -759,20 +774,74 @@ EXPORT_SYMBOL(acpi_unmap_lsapic);
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	int ioapic_id;
+	u64 addr;
+	struct ioapic_domain_cfg cfg = {
+		.type = IOAPIC_DOMAIN_DYNAMIC,
+		.ops = &acpi_irqdomain_ops,
+	};
+
+	ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
+	if (ioapic_id < 0) {
+		unsigned long long uid;
+		acpi_status status;
 
+		status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+					       NULL, &uid);
+		if (ACPI_FAILURE(status)) {
+			acpi_handle_warn(handle, "failed to get IOAPIC ID.\n");
+			return -EINVAL;
+		}
+		ioapic_id = (int)uid;
+	}
+
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_register_ioapic);
 
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_unregister_ioapic(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
+/**
+ * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
+ *			    has been registered
+ * @handle:	ACPI handle of the IOAPIC deivce
+ * @gsi_base:	GSI base associated with the IOAPIC
+ *
+ * Assume caller holds some type of lock to serialize acpi_ioapic_registered()
+ * with acpi_register_ioapic()/acpi_unregister_ioapic().
+ */
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
+{
+	int ret = 0;
+
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_ioapic_registered(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
+
 static int __init acpi_parse_sbf(struct acpi_table_header *table)
 {
 	struct acpi_table_boot *sb;
@@ -1185,7 +1254,9 @@ static void __init acpi_process_madt(void)
 			/*
 			 * Parse MADT IO-APIC entries
 			 */
+			mutex_lock(&acpi_ioapic_lock);
 			error = acpi_parse_madt_ioapic_entries();
+			mutex_unlock(&acpi_ioapic_lock);
 			if (!error) {
 				acpi_set_irq_model_ioapic();
 
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index dcb5b15401ce..8bb12ddc5db8 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,10 +2,12 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
+obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc041edb1..29b5b18afa27 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -196,7 +196,7 @@ static int disable_apic_timer __initdata;
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-int first_system_vector = 0xfe;
+int first_system_vector = FIRST_SYSTEM_VECTOR;
 
 /*
  * Debug level, exported for io_apic.c
@@ -1930,7 +1930,7 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static inline void __smp_spurious_interrupt(void)
+static inline void __smp_spurious_interrupt(u8 vector)
 {
 	u32 v;
 
@@ -1939,30 +1939,32 @@ static inline void __smp_spurious_interrupt(void)
 	 * if it is a vectored one.  Just in case...
 	 * Spurious interrupts should not be ACKed.
 	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+	if (v & (1 << (vector & 0x1f)))
 		ack_APIC_irq();
 
 	inc_irq_stat(irq_spurious_count);
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	pr_info("spurious APIC interrupt on CPU#%d, "
-		"should never happen.\n", smp_processor_id());
+	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
+		"should never happen.\n", vector, smp_processor_id());
 }
 
 __visible void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
-	__smp_spurious_interrupt();
+	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
 __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
+	u8 vector = ~regs->orig_ax;
+
 	entering_irq();
-	trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
-	__smp_spurious_interrupt();
-	trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+	trace_spurious_apic_entry(vector);
+	__smp_spurious_interrupt(vector);
+	trace_spurious_apic_exit(vector);
 	exiting_irq();
 }
 
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
new file mode 100644
index 000000000000..816f36e979ad
--- /dev/null
+++ b/arch/x86/kernel/apic/htirq.c
@@ -0,0 +1,107 @@
+/*
+ * Support Hypertransport IRQ
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/htirq.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/hypertransport.h>
+
+/*
+ * Hypertransport interrupt support
+ */
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	struct ht_irq_msg msg;
+
+	fetch_ht_irq_msg(irq, &msg);
+
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+	write_ht_irq_msg(irq, &msg);
+}
+
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	target_ht_irq(data->irq, dest, cfg->vector);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ht_irq_chip = {
+	.name			= "PCI-HT",
+	.irq_mask		= mask_ht_irq,
+	.irq_unmask		= unmask_ht_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= ht_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
+	int err;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
+
+	write_ht_irq_msg(irq, &msg);
+
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for HT\n", irq);
+
+	return 0;
+}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7ffe0a2b870f..3f5f60406ab1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -32,15 +32,11 @@
 #include <linux/module.h>
 #include <linux/syscore_ops.h>
 #include <linux/irqdomain.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
 #include <linux/slab.h>
 #include <linux/bootmem.h>
-#include <linux/dmar.h>
-#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -52,17 +48,12 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
-#include <asm/hpet.h>
 #include <asm/hw_irq.h>
 
 #include <asm/apic.h>
 
-#define __apicdebuginit(type) static type __init
-
 #define	for_each_ioapic(idx)		\
 	for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
 #define	for_each_ioapic_reverse(idx)	\
@@ -74,7 +65,7 @@
 		for_each_pin((idx), (pin))
 
 #define for_each_irq_pin(entry, head) \
-	for (entry = head; entry; entry = entry->next)
+	list_for_each_entry(entry, &head, list)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -83,7 +74,6 @@
 int sis_apic_bug = -1;
 
 static DEFINE_RAW_SPINLOCK(ioapic_lock);
-static DEFINE_RAW_SPINLOCK(vector_lock);
 static DEFINE_MUTEX(ioapic_mutex);
 static unsigned int ioapic_dynirq_base;
 static int ioapic_initialized;
@@ -112,6 +102,7 @@ static struct ioapic {
 	struct ioapic_domain_cfg irqdomain_cfg;
 	struct irq_domain *irqdomain;
 	struct mp_pin_info *pin_info;
+	struct resource *iomem_res;
 } ioapics[MAX_IO_APICS];
 
 #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
@@ -205,8 +196,6 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
-
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
 {
@@ -228,8 +217,8 @@ void mp_save_irq(struct mpc_intsrc *m)
 }
 
 struct irq_pin_list {
+	struct list_head list;
 	int apic, pin;
-	struct irq_pin_list *next;
 };
 
 static struct irq_pin_list *alloc_irq_pin_list(int node)
@@ -237,7 +226,26 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
 	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 }
 
-int __init arch_early_irq_init(void)
+static void alloc_ioapic_saved_registers(int idx)
+{
+	size_t size;
+
+	if (ioapics[idx].saved_registers)
+		return;
+
+	size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
+	ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
+	if (!ioapics[idx].saved_registers)
+		pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
+}
+
+static void free_ioapic_saved_registers(int idx)
+{
+	kfree(ioapics[idx].saved_registers);
+	ioapics[idx].saved_registers = NULL;
+}
+
+int __init arch_early_ioapic_init(void)
 {
 	struct irq_cfg *cfg;
 	int i, node = cpu_to_node(0);
@@ -245,13 +253,8 @@ int __init arch_early_irq_init(void)
 	if (!nr_legacy_irqs())
 		io_apic_irqs = ~0UL;
 
-	for_each_ioapic(i) {
-		ioapics[i].saved_registers =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				ioapics[i].nr_registers, GFP_KERNEL);
-		if (!ioapics[i].saved_registers)
-			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
-	}
+	for_each_ioapic(i)
+		alloc_ioapic_saved_registers(i);
 
 	/*
 	 * For legacy IRQ's, start with assigning irq0 to irq15 to
@@ -266,61 +269,6 @@ int __init arch_early_irq_init(void)
 	return 0;
 }
 
-static inline struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	return irq_get_chip_data(irq);
-}
-
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-
-	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
-	if (!cfg)
-		return NULL;
-	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
-		goto out_cfg;
-	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
-		goto out_domain;
-	return cfg;
-out_domain:
-	free_cpumask_var(cfg->domain);
-out_cfg:
-	kfree(cfg);
-	return NULL;
-}
-
-static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
-{
-	if (!cfg)
-		return;
-	irq_set_chip_data(at, NULL);
-	free_cpumask_var(cfg->domain);
-	free_cpumask_var(cfg->old_domain);
-	kfree(cfg);
-}
-
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
-{
-	int res = irq_alloc_desc_at(at, node);
-	struct irq_cfg *cfg;
-
-	if (res < 0) {
-		if (res != -EEXIST)
-			return NULL;
-		cfg = irq_cfg(at);
-		if (cfg)
-			return cfg;
-	}
-
-	cfg = alloc_irq_cfg(at, node);
-	if (cfg)
-		irq_set_chip_data(at, cfg);
-	else
-		irq_free_desc(at);
-	return cfg;
-}
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -445,15 +393,12 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *entry;
 
 	/* don't allow duplicates */
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
+	for_each_irq_pin(entry, cfg->irq_2_pin)
 		if (entry->apic == apic && entry->pin == pin)
 			return 0;
-		last = &entry->next;
-	}
 
 	entry = alloc_irq_pin_list(node);
 	if (!entry) {
@@ -464,22 +409,19 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
 	entry->apic = apic;
 	entry->pin = pin;
 
-	*last = entry;
+	list_add_tail(&entry->list, &cfg->irq_2_pin);
 	return 0;
 }
 
 static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *tmp, *entry;
 
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin)
+	list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
 		if (entry->apic == apic && entry->pin == pin) {
-			*last = entry->next;
+			list_del(&entry->list);
 			kfree(entry);
 			return;
-		} else {
-			last = &entry->next;
 		}
 }
 
@@ -559,7 +501,7 @@ static void mask_ioapic(struct irq_cfg *cfg)
 
 static void mask_ioapic_irq(struct irq_data *data)
 {
-	mask_ioapic(data->chip_data);
+	mask_ioapic(irqd_cfg(data));
 }
 
 static void __unmask_ioapic(struct irq_cfg *cfg)
@@ -578,7 +520,7 @@ static void unmask_ioapic(struct irq_cfg *cfg)
 
 static void unmask_ioapic_irq(struct irq_data *data)
 {
-	unmask_ioapic(data->chip_data);
+	unmask_ioapic(irqd_cfg(data));
 }
 
 /*
@@ -1164,8 +1106,7 @@ void mp_unmap_irq(int irq)
  * Find a specific PCI IRQ entry.
  * Not an __init, possibly needed by modules
  */
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
-				struct io_apic_irq_attr *irq_attr)
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int irq, i, best_ioapic = -1, best_idx = -1;
 
@@ -1219,195 +1160,11 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 		return -1;
 
 out:
-	irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
-			IOAPIC_MAP_ALLOC);
-	if (irq > 0)
-		set_io_apic_irq_attr(irq_attr, best_ioapic,
-				     mp_irqs[best_idx].dstirq,
-				     irq_trigger(best_idx),
-				     irq_polarity(best_idx));
-	return irq;
+	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+			 IOAPIC_MAP_ALLOC);
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	raw_spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	raw_spin_unlock(&vector_lock);
-}
-
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
-	cpumask_var_t tmp_mask;
-
-	if (cfg->move_in_progress)
-		return -EBUSY;
-
-	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
-		return -ENOMEM;
-
-	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
-	cpumask_clear(cfg->old_domain);
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
-
-		apic->vector_allocation_domain(cpu, tmp_mask, mask);
-
-		if (cpumask_subset(tmp_mask, cfg->domain)) {
-			err = 0;
-			if (cpumask_equal(tmp_mask, cfg->domain))
-				break;
-			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
-			 */
-			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
-			break;
-		}
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 16;
-		if (vector >= first_system_vector) {
-			offset = (offset + 1) % 16;
-			vector = FIRST_EXTERNAL_VECTOR + offset;
-		}
-
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
-			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
-			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
-			continue;
-		}
-
-		if (test_bit(vector, used_vectors))
-			goto next;
-
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
-			if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED)
-				goto next;
-		}
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (cfg->vector) {
-			cpumask_copy(cfg->old_domain, cfg->domain);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-		}
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cpumask_copy(cfg->domain, tmp_mask);
-		err = 0;
-		break;
-	}
-	free_cpumask_var(tmp_mask);
-	return err;
-}
-
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	int err;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, cfg, mask);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
-{
-	int cpu, vector;
-
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-
-	cfg->vector = 0;
-	cpumask_clear(cfg->domain);
-
-	if (likely(!cfg->move_in_progress))
-		return;
-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-			if (per_cpu(vector_irq, cpu)[vector] != irq)
-				continue;
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-			break;
-		}
-	}
-	cfg->move_in_progress = 0;
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/*
-	 * vector_lock will make sure that we don't run into irq vector
-	 * assignments that might be happening on another cpu in parallel,
-	 * while we setup our initial vector to irq mappings.
-	 */
-	raw_spin_lock(&vector_lock);
-	/* Mark the inuse vectors */
-	for_each_active_irq(irq) {
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-	}
-	raw_spin_unlock(&vector_lock);
-}
-
 static struct irq_chip ioapic_chip;
 
 #ifdef CONFIG_X86_32
@@ -1496,7 +1253,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 					 &dest)) {
 		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1510,7 +1267,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 	if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
 		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1641,7 +1398,7 @@ void ioapic_zap_locks(void)
 	raw_spin_lock_init(&ioapic_lock);
 }
 
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+static void __init print_IO_APIC(int ioapic_idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -1698,7 +1455,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 	x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
 }
 
-__apicdebuginit(void) print_IO_APICs(void)
+void __init print_IO_APICs(void)
 {
 	int ioapic_idx;
 	struct irq_cfg *cfg;
@@ -1731,8 +1488,7 @@ __apicdebuginit(void) print_IO_APICs(void)
 		cfg = irq_cfg(irq);
 		if (!cfg)
 			continue;
-		entry = cfg->irq_2_pin;
-		if (!entry)
+		if (list_empty(&cfg->irq_2_pin))
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
 		for_each_irq_pin(entry, cfg->irq_2_pin)
@@ -1743,205 +1499,6 @@ __apicdebuginit(void) print_IO_APICs(void)
 	printk(KERN_INFO ".................................... done.\n");
 }
 
-__apicdebuginit(void) print_APIC_field(int base)
-{
-	int i;
-
-	printk(KERN_DEBUG);
-
-	for (i = 0; i < 8; i++)
-		pr_cont("%08x", apic_read(base + i*0x10));
-
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int i, v, ver, maxlvt;
-	u64 icr;
-
-	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		if (!APIC_XAPIC(ver)) {
-			v = apic_read(APIC_ARBPRI);
-			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			       v & APIC_ARBPRI_MASK);
-		}
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	/*
-	 * Remote read supported only in the 82489DX and local APIC for
-	 * Pentium processors.
-	 */
-	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
-		v = apic_read(APIC_RRR);
-		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	if (!x2apic_enabled()) {
-		v = apic_read(APIC_DFR);
-		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	}
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_field(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_field(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_field(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-
-	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
-		v = apic_read(APIC_EFEAT);
-		maxlvt = (v >> 16) & 0xff;
-		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
-		v = apic_read(APIC_ECTRL);
-		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
-		for (i = 0; i < maxlvt; i++) {
-			v = apic_read(APIC_EILVTn(i));
-			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
-		}
-	}
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APICs(int maxcpu)
-{
-	int cpu;
-
-	if (!maxcpu)
-		return;
-
-	preempt_disable();
-	for_each_online_cpu(cpu) {
-		if (cpu >= maxcpu)
-			break;
-		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
-	}
-	preempt_enable();
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (!nr_legacy_irqs())
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	raw_spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-static int __initdata show_lapic = 1;
-static __init int setup_show_lapic(char *arg)
-{
-	int num = -1;
-
-	if (strcmp(arg, "all") == 0) {
-		show_lapic = CONFIG_NR_CPUS;
-	} else {
-		get_option(&arg, &num);
-		if (num >= 0)
-			show_lapic = num;
-	}
-
-	return 1;
-}
-__setup("show_lapic=", setup_show_lapic);
-
-__apicdebuginit(int) print_ICs(void)
-{
-	if (apic_verbosity == APIC_QUIET)
-		return 0;
-
-	print_PIC();
-
-	/* don't print out if apic is not there */
-	if (!cpu_has_apic && !apic_from_smp_config())
-		return 0;
-
-	print_local_APICs(show_lapic);
-	print_IO_APICs();
-
-	return 0;
-}
-
-late_initcall(print_ICs);
-
-
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -2244,26 +1801,12 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
-	__unmask_ioapic(data->chip_data);
+	__unmask_ioapic(irqd_cfg(data));
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
 }
 
-static int ioapic_retrigger_irq(struct irq_data *data)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned long flags;
-	int cpu;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
@@ -2273,113 +1816,6 @@ static int ioapic_retrigger_irq(struct irq_data *data)
  * races.
  */
 
-#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-
-	ack_APIC_irq();
-	irq_enter();
-	exit_idle();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		int irq;
-		unsigned int irr;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __this_cpu_read(vector_irq[vector]);
-
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		raw_spin_lock(&desc->lock);
-
-		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
-		 */
-		if (cfg->move_in_progress)
-			goto unlock;
-
-		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-			goto unlock;
-
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		/*
-		 * Check if the vector that needs to be cleanedup is
-		 * registered at the cpu's IRR. If so, then this is not
-		 * the best time to clean it up. Lets clean it up in the
-		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-		 * to myself.
-		 */
-		if (irr  & (1 << (vector % 32))) {
-			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
-			goto unlock;
-		}
-		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
-unlock:
-		raw_spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
-{
-	unsigned me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	me = smp_processor_id();
-
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-		send_cleanup_vector(cfg);
-}
-
-static void irq_complete_move(struct irq_cfg *cfg)
-{
-	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
-}
-
-void irq_force_complete_move(int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	if (!cfg)
-		return;
-
-	__irq_complete_move(cfg, cfg->vector);
-}
-#else
-static inline void irq_complete_move(struct irq_cfg *cfg) { }
-#endif
-
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -2400,41 +1836,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-			  unsigned int *dest_id)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int irq = data->irq;
-	int err;
-
-	if (!config_enabled(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irq, cfg, mask);
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
-	if (err) {
-		if (assign_irq_vector(irq, cfg, data->affinity))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	cpumask_copy(data->affinity, mask);
-
-	return 0;
-}
-
-
 int native_ioapic_set_affinity(struct irq_data *data,
 			       const struct cpumask *mask,
 			       bool force)
@@ -2447,24 +1848,17 @@ int native_ioapic_set_affinity(struct irq_data *data,
 		return -EPERM;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = __ioapic_set_affinity(data, mask, &dest);
+	ret = apic_set_affinity(data, mask, &dest);
 	if (!ret) {
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		__target_IO_APIC_irq(irq, dest, irqd_cfg(data));
 		ret = IRQ_SET_MASK_OK_NOCOPY;
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return ret;
 }
 
-static void ack_apic_edge(struct irq_data *data)
-{
-	irq_complete_move(data->chip_data);
-	irq_move_irq(data);
-	ack_APIC_irq();
-}
-
 atomic_t irq_mis_count;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -2547,9 +1941,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data,
 }
 #endif
 
-static void ack_apic_level(struct irq_data *data)
+static void ack_ioapic_level(struct irq_data *data)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	int i, irq = data->irq;
 	unsigned long v;
 	bool masked;
@@ -2619,10 +2013,10 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_startup		= startup_ioapic_irq,
 	.irq_mask		= mask_ioapic_irq,
 	.irq_unmask		= unmask_ioapic_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_eoi		= ack_apic_level,
+	.irq_ack		= apic_ack_edge,
+	.irq_eoi		= ack_ioapic_level,
 	.irq_set_affinity	= native_ioapic_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
+	.irq_retrigger		= apic_retrigger_irq,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -2965,6 +2359,16 @@ static int mp_irqdomain_create(int ioapic)
 	return 0;
 }
 
+static void ioapic_destroy_irqdomain(int idx)
+{
+	if (ioapics[idx].irqdomain) {
+		irq_domain_remove(ioapics[idx].irqdomain);
+		ioapics[idx].irqdomain = NULL;
+	}
+	kfree(ioapics[idx].pin_info);
+	ioapics[idx].pin_info = NULL;
+}
+
 void __init setup_IO_APIC(void)
 {
 	int ioapic;
@@ -3044,399 +2448,6 @@ static int __init ioapic_init_ops(void)
 
 device_initcall(ioapic_init_ops);
 
-/*
- * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
- */
-int arch_setup_hwirq(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	int ret;
-
-	cfg = alloc_irq_cfg(irq, node);
-	if (!cfg)
-		return -ENOMEM;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (!ret)
-		irq_set_chip_data(irq, cfg);
-	else
-		free_irq_cfg(irq, cfg);
-	return ret;
-}
-
-void arch_teardown_hwirq(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	free_remapped_irq(irq);
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq, cfg);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	free_irq_cfg(irq, cfg);
-}
-
-/*
- * MSI message composition
- */
-void native_compose_msi_msg(struct pci_dev *pdev,
-			    unsigned int irq, unsigned int dest,
-			    struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	msg->address_hi = MSI_ADDR_BASE_HI;
-
-	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
-
-	msg->address_lo =
-		MSI_ADDR_BASE_LO |
-		((apic->irq_dest_mode == 0) ?
-			MSI_ADDR_DEST_MODE_PHYSICAL:
-			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU:
-			MSI_ADDR_REDIRECTION_LOWPRI) |
-		MSI_ADDR_DEST_ID(dest);
-
-	msg->data =
-		MSI_DATA_TRIGGER_EDGE |
-		MSI_DATA_LEVEL_ASSERT |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED:
-			MSI_DATA_DELIVERY_LOWPRI) |
-		MSI_DATA_VECTOR(cfg->vector);
-}
-
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
-			   struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-
-	return 0;
-}
-
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	__get_cached_msi_msg(data->msi_desc, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	__pci_write_msi_msg(data->msi_desc, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name			= "PCI-MSI",
-	.irq_unmask		= pci_msi_unmask_irq,
-	.irq_mask		= pci_msi_mask_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-		  unsigned int irq_base, unsigned int irq_offset)
-{
-	struct irq_chip *chip = &msi_chip;
-	struct msi_msg msg;
-	unsigned int irq = irq_base + irq_offset;
-	int ret;
-
-	ret = msi_compose_msg(dev, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-
-	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
-
-	/*
-	 * MSI-X message is written per-IRQ, the offset is always 0.
-	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
-	 */
-	if (!irq_offset)
-		pci_write_msi_msg(irq, &msg);
-
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
-
-	return 0;
-}
-
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct msi_desc *msidesc;
-	unsigned int irq;
-	int node, ret;
-
-	/* Multiple MSI vectors only supported with interrupt remapping */
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
-	node = dev_to_node(&dev->dev);
-
-	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = irq_alloc_hwirq(node);
-		if (!irq)
-			return -ENOSPC;
-
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0) {
-			irq_free_hwirq(irq);
-			return ret;
-		}
-
-	}
-	return 0;
-}
-
-void native_teardown_msi_irq(unsigned int irq)
-{
-	irq_free_hwirq(irq);
-}
-
-#ifdef CONFIG_DMAR_TABLE
-static int
-dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		      bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest, irq = data->irq;
-	struct msi_msg msg;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip dmar_msi_type = {
-	.name			= "DMAR_MSI",
-	.irq_unmask		= dmar_msi_unmask,
-	.irq_mask		= dmar_msi_mask,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= dmar_msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-				      "edge");
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_HPET_TIMER
-
-static int hpet_msi_set_affinity(struct irq_data *data,
-				 const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	hpet_msi_read(data->handler_data, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	hpet_msi_write(data->handler_data, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip hpet_msi_type = {
-	.name = "HPET_MSI",
-	.irq_unmask = hpet_msi_unmask,
-	.irq_mask = hpet_msi_mask,
-	.irq_ack = ack_apic_edge,
-	.irq_set_affinity = hpet_msi_set_affinity,
-	.irq_retrigger = ioapic_retrigger_irq,
-	.flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
-{
-	struct irq_chip *chip = &hpet_msi_type;
-	struct msi_msg msg;
-	int ret;
-
-	ret = msi_compose_msg(NULL, irq, &msg, id);
-	if (ret < 0)
-		return ret;
-
-	hpet_msi_write(irq_get_handler_data(irq), &msg);
-	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	target_ht_irq(data->irq, dest, cfg->vector);
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	struct ht_irq_msg msg;
-	unsigned dest;
-	int err;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(dest) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-
-	write_ht_irq_msg(irq, &msg);
-
-	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
-				      handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-
-	return 0;
-}
-#endif /* CONFIG_HT_IRQ */
-
 static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
@@ -3451,7 +2462,7 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 	return ret;
 }
 
-static int __init io_apic_get_redir_entries(int ioapic)
+static int io_apic_get_redir_entries(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3476,28 +2487,8 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
 	return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
 }
 
-int __init arch_probe_nr_irqs(void)
-{
-	int nr;
-
-	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-		nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-	/*
-	 * for MSI and HT dyn irq
-	 */
-	nr += gsi_top * 16;
-#endif
-	if (nr < nr_irqs)
-		nr_irqs = nr;
-
-	return 0;
-}
-
 #ifdef CONFIG_X86_32
-static int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
 	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3572,30 +2563,63 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)
 	return apic_id;
 }
 
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
 	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return io_apic_get_unique_id(nr_ioapics, id);
+		return io_apic_get_unique_id(idx, id);
 	else
 		return id;
 }
 #else
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
-	int i;
+	union IO_APIC_reg_00 reg_00;
 	DECLARE_BITMAP(used, 256);
+	unsigned long flags;
+	u8 new_id;
+	int i;
 
 	bitmap_zero(used, 256);
 	for_each_ioapic(i)
 		__set_bit(mpc_ioapic_id(i), used);
+
+	/* Hand out the requested id if available */
 	if (!test_bit(id, used))
 		return id;
-	return find_first_zero_bit(used, 256);
+
+	/*
+	 * Read the current id from the ioapic and keep it if
+	 * available.
+	 */
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	new_id = reg_00.bits.ID;
+	if (!test_bit(new_id, used)) {
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+			 idx, new_id, id);
+		return new_id;
+	}
+
+	/*
+	 * Get the next free id and write it to the ioapic.
+	 */
+	new_id = find_first_zero_bit(used, 256);
+	reg_00.bits.ID = new_id;
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(idx, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	/* Sanity check */
+	BUG_ON(reg_00.bits.ID != new_id);
+
+	return new_id;
 }
 #endif
 
-static int __init io_apic_get_version(int ioapic)
+static int io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3702,6 +2726,7 @@ static struct resource * __init ioapic_setup_resources(void)
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
 		num++;
+		ioapics[i].iomem_res = res;
 	}
 
 	ioapic_resources = res;
@@ -3799,21 +2824,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
 	return gsi - gsi_cfg->gsi_base;
 }
 
-static __init int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
-			MAX_IO_APICS, nr_ioapics);
-		return 1;
-	}
-	if (!address) {
-		pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-static __init int bad_ioapic_register(int idx)
+static int bad_ioapic_register(int idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -3832,32 +2843,61 @@ static __init int bad_ioapic_register(int idx)
 	return 0;
 }
 
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-			       struct ioapic_domain_cfg *cfg)
+static int find_free_ioapic_entry(void)
 {
-	int idx = 0;
-	int entries;
+	int idx;
+
+	for (idx = 0; idx < MAX_IO_APICS; idx++)
+		if (ioapics[idx].nr_registers == 0)
+			return idx;
+
+	return MAX_IO_APICS;
+}
+
+/**
+ * mp_register_ioapic - Register an IOAPIC device
+ * @id:		hardware IOAPIC ID
+ * @address:	physical address of IOAPIC register area
+ * @gsi_base:	base of GSI associated with the IOAPIC
+ * @cfg:	configuration information for the IOAPIC
+ */
+int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+		       struct ioapic_domain_cfg *cfg)
+{
+	bool hotplug = !!ioapic_initialized;
 	struct mp_ioapic_gsi *gsi_cfg;
+	int idx, ioapic, entries;
+	u32 gsi_end;
 
-	if (bad_ioapic(address))
-		return;
+	if (!address) {
+		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
+		return -EINVAL;
+	}
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].mp_config.apicaddr == address) {
+			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
+				address, ioapic);
+			return -EEXIST;
+		}
 
-	idx = nr_ioapics;
+	idx = find_free_ioapic_entry();
+	if (idx >= MAX_IO_APICS) {
+		pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+			MAX_IO_APICS, idx);
+		return -ENOSPC;
+	}
 
 	ioapics[idx].mp_config.type = MP_IOAPIC;
 	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
 	ioapics[idx].mp_config.apicaddr = address;
-	ioapics[idx].irqdomain = NULL;
-	ioapics[idx].irqdomain_cfg = *cfg;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-
 	if (bad_ioapic_register(idx)) {
 		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
-		return;
+		return -ENODEV;
 	}
 
-	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
+	ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
 
 	/*
@@ -3865,24 +2905,112 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
 	entries = io_apic_get_redir_entries(idx);
+	gsi_end = gsi_base + entries - 1;
+	for_each_ioapic(ioapic) {
+		gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+		if ((gsi_base >= gsi_cfg->gsi_base &&
+		     gsi_base <= gsi_cfg->gsi_end) ||
+		    (gsi_end >= gsi_cfg->gsi_base &&
+		     gsi_end <= gsi_cfg->gsi_end)) {
+			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
+				gsi_base, gsi_end,
+				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOSPC;
+		}
+	}
 	gsi_cfg = mp_ioapic_gsi_routing(idx);
 	gsi_cfg->gsi_base = gsi_base;
-	gsi_cfg->gsi_end = gsi_base + entries - 1;
+	gsi_cfg->gsi_end = gsi_end;
+
+	ioapics[idx].irqdomain = NULL;
+	ioapics[idx].irqdomain_cfg = *cfg;
 
 	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
+	 * If mp_register_ioapic() is called during early boot stage when
+	 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
+	 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
 	 */
-	ioapics[idx].nr_registers = entries;
+	if (hotplug) {
+		if (mp_irqdomain_create(idx)) {
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOMEM;
+		}
+		alloc_ioapic_saved_registers(idx);
+	}
 
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
+	if (nr_ioapics <= idx)
+		nr_ioapics = idx + 1;
+
+	/* Set nr_registers to mark entry present */
+	ioapics[idx].nr_registers = entries;
 
 	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
 		idx, mpc_ioapic_id(idx),
 		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
 		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
-	nr_ioapics++;
+	return 0;
+}
+
+int mp_unregister_ioapic(u32 gsi_base)
+{
+	int ioapic, pin;
+	int found = 0;
+	struct mp_pin_info *pin_info;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
+			found = 1;
+			break;
+		}
+	if (!found) {
+		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
+		return -ENODEV;
+	}
+
+	for_each_pin(ioapic, pin) {
+		pin_info = mp_pin_info(ioapic, pin);
+		if (pin_info->count) {
+			pr_warn("pin%d on IOAPIC%d is still in use.\n",
+				pin, ioapic);
+			return -EBUSY;
+		}
+	}
+
+	/* Mark entry not present */
+	ioapics[ioapic].nr_registers  = 0;
+	ioapic_destroy_irqdomain(ioapic);
+	free_ioapic_saved_registers(ioapic);
+	if (ioapics[ioapic].iomem_res)
+		release_resource(ioapics[ioapic].iomem_res);
+	clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
+	memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
+
+	return 0;
+}
+
+int mp_ioapic_registered(u32 gsi_base)
+{
+	int ioapic;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+			return 1;
+
+	return 0;
+}
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
 int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
@@ -3931,7 +3059,7 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
 
 	ioapic_mask_entry(ioapic, pin);
 	__remove_pin_from_irq(cfg, ioapic, pin);
-	WARN_ON(cfg->irq_2_pin != NULL);
+	WARN_ON(!list_empty(&cfg->irq_2_pin));
 	arch_teardown_hwirq(virq);
 }
 
@@ -3964,18 +3092,6 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
 	return ret;
 }
 
-bool mp_should_keep_irq(struct device *dev)
-{
-	if (dev->power.is_prepared)
-		return true;
-#ifdef	CONFIG_PM_RUNTIME
-	if (dev->power.runtime_status == RPM_SUSPENDING)
-		return true;
-#endif
-
-	return false;
-}
-
 /* Enable IOAPIC early just for system timer */
 void __init pre_init_apic_IRQ0(void)
 {
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
new file mode 100644
index 000000000000..d6ba2d660dc5
--- /dev/null
+++ b/arch/x86/kernel/apic/msi.c
@@ -0,0 +1,286 @@
+/*
+ * Support of MSI, HPET and DMAR interrupts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+#include <linux/msi.h>
+#include <asm/msidef.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/irq_remapping.h>
+
+void native_compose_msi_msg(struct pci_dev *pdev,
+			    unsigned int irq, unsigned int dest,
+			    struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+
+	if (x2apic_enabled())
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((apic->irq_dest_mode == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL :
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU :
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED :
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+}
+
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+	return 0;
+}
+
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	__get_cached_msi_msg(data->msi_desc, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	__pci_write_msi_msg(data->msi_desc, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_unmask		= pci_msi_unmask_irq,
+	.irq_mask		= pci_msi_mask_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
+{
+	struct irq_chip *chip = &msi_chip;
+	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
+	int ret;
+
+	ret = msi_compose_msg(dev, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		pci_write_msi_msg(irq, &msg);
+
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+	return 0;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *msidesc;
+	unsigned int irq;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	node = dev_to_node(&dev->dev);
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = irq_alloc_hwirq(node);
+		if (!irq)
+			return -ENOSPC;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0) {
+			irq_free_hwirq(irq);
+			return ret;
+		}
+
+	}
+	return 0;
+}
+
+void native_teardown_msi_irq(unsigned int irq)
+{
+	irq_free_hwirq(irq);
+}
+
+#ifdef CONFIG_DMAR_TABLE
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest, irq = data->irq;
+	struct msi_msg msg;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip dmar_msi_type = {
+	.name			= "DMAR_MSI",
+	.irq_unmask		= dmar_msi_unmask,
+	.irq_mask		= dmar_msi_mask,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= dmar_msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_HPET_TIMER
+
+static int hpet_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	hpet_msi_read(data->handler_data, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(data->handler_data, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.irq_unmask = hpet_msi_unmask,
+	.irq_mask = hpet_msi_mask,
+	.irq_ack = apic_ack_edge,
+	.irq_set_affinity = hpet_msi_set_affinity,
+	.irq_retrigger = apic_retrigger_irq,
+	.flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	struct irq_chip *chip = &hpet_msi_type;
+	struct msi_msg msg;
+	int ret;
+
+	ret = msi_compose_msg(NULL, irq, &msg, id);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq_get_handler_data(irq), &msg);
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+	return 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
new file mode 100644
index 000000000000..6cedd7914581
--- /dev/null
+++ b/arch/x86/kernel/apic/vector.c
@@ -0,0 +1,719 @@
+/*
+ * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/irq_remapping.h>
+
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+	raw_spin_unlock(&vector_lock);
+}
+
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+{
+	return irq_data->chip_data;
+}
+
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+
+	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+	if (!cfg)
+		return NULL;
+	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+		goto out_cfg;
+	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+		goto out_domain;
+#ifdef	CONFIG_X86_IO_APIC
+	INIT_LIST_HEAD(&cfg->irq_2_pin);
+#endif
+	return cfg;
+out_domain:
+	free_cpumask_var(cfg->domain);
+out_cfg:
+	kfree(cfg);
+	return NULL;
+}
+
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+{
+	int res = irq_alloc_desc_at(at, node);
+	struct irq_cfg *cfg;
+
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = irq_cfg(at);
+		if (cfg)
+			return cfg;
+	}
+
+	cfg = alloc_irq_cfg(at, node);
+	if (cfg)
+		irq_set_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+	return cfg;
+}
+
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+{
+	if (!cfg)
+		return;
+	irq_set_chip_data(at, NULL);
+	free_cpumask_var(cfg->domain);
+	free_cpumask_var(cfg->old_domain);
+	kfree(cfg);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+	static int current_offset = VECTOR_OFFSET_START % 16;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
+
+	if (cfg->move_in_progress)
+		return -EBUSY;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Only try and allocate irqs on cpus that are present */
+	err = -ENOSPC;
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
+		int new_cpu, vector, offset;
+
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
+		}
+
+		vector = current_vector;
+		offset = current_offset;
+next:
+		vector += 16;
+		if (vector >= first_system_vector) {
+			offset = (offset + 1) % 16;
+			vector = FIRST_EXTERNAL_VECTOR + offset;
+		}
+
+		if (unlikely(current_vector == vector)) {
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+			continue;
+		}
+
+		if (test_bit(vector, used_vectors))
+			goto next;
+
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+			if (per_cpu(vector_irq, new_cpu)[vector] >
+			    VECTOR_UNDEFINED)
+				goto next;
+		}
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (cfg->vector) {
+			cpumask_copy(cfg->old_domain, cfg->domain);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+		}
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
+	}
+	free_cpumask_var(tmp_mask);
+
+	return err;
+}
+
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	int err;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, cfg, mask);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+void clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+	int cpu, vector;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+
+	cfg->vector = 0;
+	cpumask_clear(cfg->domain);
+
+	if (likely(!cfg->move_in_progress)) {
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return;
+	}
+
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+		     vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] != irq)
+				continue;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+			break;
+		}
+	}
+	cfg->move_in_progress = 0;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	int nr;
+
+	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+		nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+	/*
+	 * for MSI and HT dyn irq
+	 */
+	if (gsi_top <= NR_IRQS_LEGACY)
+		nr +=  8 * nr_cpu_ids;
+	else
+		nr += gsi_top * 16;
+#endif
+	if (nr < nr_irqs)
+		nr_irqs = nr;
+
+	return nr_legacy_irqs();
+}
+
+int __init arch_early_irq_init(void)
+{
+	return arch_early_ioapic_init();
+}
+
+static void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/*
+	 * vector_lock will make sure that we don't run into irq vector
+	 * assignments that might be happening on another cpu in parallel,
+	 * while we setup our initial vector to irq mappings.
+	 */
+	raw_spin_lock(&vector_lock);
+	/* Mark the inuse vectors */
+	for_each_active_irq(irq) {
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+	}
+	raw_spin_unlock(&vector_lock);
+}
+
+/*
+ * Setup the vector to irq mappings.
+ */
+void setup_vector_irq(int cpu)
+{
+	int irq;
+
+	/*
+	 * On most of the platforms, legacy PIC delivers the interrupts on the
+	 * boot cpu. But there are certain platforms where PIC interrupts are
+	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
+	 * legacy PIC, for the new cpu that is coming online, setup the static
+	 * legacy vector to irq mapping:
+	 */
+	for (irq = 0; irq < nr_legacy_irqs(); irq++)
+		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+
+	__setup_vector_irq(cpu);
+}
+
+int apic_retrigger_irq(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned long flags;
+	int cpu;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
+	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 1;
+}
+
+void apic_ack_edge(struct irq_data *data)
+{
+	irq_complete_move(irqd_cfg(data));
+	irq_move_irq(data);
+	ack_APIC_irq();
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EPERM;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i),
+					    IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+
+	ack_APIC_irq();
+	irq_enter();
+	exit_idle();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		int irq;
+		unsigned int irr;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+
+		irq = __this_cpu_read(vector_irq[vector]);
+
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		raw_spin_lock(&desc->lock);
+
+		/*
+		 * Check if the irq migration is in progress. If so, we
+		 * haven't received the cleanup request yet for this irq.
+		 */
+		if (cfg->move_in_progress)
+			goto unlock;
+
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+			goto unlock;
+
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		/*
+		 * Check if the vector that needs to be cleanedup is
+		 * registered at the cpu's IRR. If so, then this is not
+		 * the best time to clean it up. Lets clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to myself.
+		 */
+		if (irr  & (1 << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			goto unlock;
+		}
+		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
+unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+{
+	unsigned me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	me = smp_processor_id();
+
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
+}
+
+void irq_complete_move(struct irq_cfg *cfg)
+{
+	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+}
+#endif
+
+/*
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
+ */
+int arch_setup_hwirq(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	int ret;
+
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg)
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (!ret)
+		irq_set_chip_data(irq, cfg);
+	else
+		free_irq_cfg(irq, cfg);
+	return ret;
+}
+
+void arch_teardown_hwirq(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	free_remapped_irq(irq);
+	clear_irq_vector(irq, cfg);
+	free_irq_cfg(irq, cfg);
+}
+
+static void __init print_APIC_field(int base)
+{
+	int i;
+
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		pr_cont("%08x", apic_read(base + i*0x10));
+
+	pr_cont("\n");
+}
+
+static void __init print_local_APIC(void *dummy)
+{
+	unsigned int i, v, ver, maxlvt;
+	u64 icr;
+
+	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
+		 smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+	v = apic_read(APIC_LVR);
+	pr_info("... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = lapic_get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
+				 v, v & APIC_ARBPRI_MASK);
+		}
+		v = apic_read(APIC_PROCPRI);
+		pr_debug("... APIC PROCPRI: %08x\n", v);
+	}
+
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		pr_debug("... APIC RRR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_LDR);
+	pr_debug("... APIC LDR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		pr_debug("... APIC DFR: %08x\n", v);
+	}
+	v = apic_read(APIC_SPIV);
+	pr_debug("... APIC SPIV: %08x\n", v);
+
+	pr_debug("... APIC ISR field:\n");
+	print_APIC_field(APIC_ISR);
+	pr_debug("... APIC TMR field:\n");
+	print_APIC_field(APIC_TMR);
+	pr_debug("... APIC IRR field:\n");
+	print_APIC_field(APIC_IRR);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		/* Due to the Pentium erratum 3AP. */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		pr_debug("... APIC ESR: %08x\n", v);
+	}
+
+	icr = apic_icr_read();
+	pr_debug("... APIC ICR: %08x\n", (u32)icr);
+	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+	v = apic_read(APIC_LVTT);
+	pr_debug("... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {
+		/* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		pr_debug("... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	pr_debug("... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	pr_debug("... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {
+		/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		pr_debug("... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	pr_debug("... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	pr_debug("... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	pr_debug("... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		pr_debug("... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		pr_debug("... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			pr_debug("... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
+	pr_cont("\n");
+}
+
+static void __init print_local_APICs(int maxcpu)
+{
+	int cpu;
+
+	if (!maxcpu)
+		return;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
+	preempt_enable();
+}
+
+static void __init print_PIC(void)
+{
+	unsigned int v;
+	unsigned long flags;
+
+	if (!nr_legacy_irqs())
+		return;
+
+	pr_debug("\nprinting PIC contents\n");
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	pr_debug("... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	pr_debug("... PIC  IRR: %04x\n", v);
+
+	outb(0x0b, 0xa0);
+	outb(0x0b, 0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a, 0xa0);
+	outb(0x0a, 0x20);
+
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	pr_debug("... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	pr_debug("... PIC ELCR: %04x\n", v);
+}
+
+static int show_lapic __initdata = 1;
+static __init int setup_show_lapic(char *arg)
+{
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+static int __init print_ICs(void)
+{
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
+	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic && !apic_from_smp_config())
+		return 0;
+
+	print_local_APICs(show_lapic);
+	print_IO_APICs();
+
+	return 0;
+}
+
+late_initcall(print_ICs);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 08f3fed2b0f2..10b8d3eaaf15 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -276,6 +276,17 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
 	return box;
 }
 
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+	return event->pmu->event_init == uncore_pmu_event_init;
+}
+
 static int
 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
 {
@@ -290,13 +301,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b
 		return -EINVAL;
 
 	n = box->n_events;
-	box->event_list[n] = leader;
-	n++;
+
+	if (is_uncore_event(leader)) {
+		box->event_list[n] = leader;
+		n++;
+	}
+
 	if (!dogrp)
 		return n;
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
-		if (event->state <= PERF_EVENT_STATE_OFF)
+		if (!is_uncore_event(event) ||
+		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 
 		if (n >= max_count)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f5ab56d14287..aceb2f90c716 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -28,6 +28,7 @@
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
 #include <asm/cpu.h>
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 2e1a6853e00c..fe9f0b79a18b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -455,6 +455,23 @@ struct intel_stolen_funcs {
 	u32 (*base)(int num, int slot, int func, size_t size);
 };
 
+static size_t __init gen9_stolen_size(int num, int slot, int func)
+{
+	u16 gmch_ctrl;
+
+	gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
+	gmch_ctrl >>= BDW_GMCH_GMS_SHIFT;
+	gmch_ctrl &= BDW_GMCH_GMS_MASK;
+
+	if (gmch_ctrl < 0xf0)
+		return gmch_ctrl << 25; /* 32 MB units */
+	else
+		/* 4MB increments starting at 0xf0 for 4MB */
+		return (gmch_ctrl - 0xf0 + 1) << 22;
+}
+
+typedef size_t (*stolen_size_fn)(int num, int slot, int func);
+
 static const struct intel_stolen_funcs i830_stolen_funcs __initconst = {
 	.base = i830_stolen_base,
 	.size = i830_stolen_size,
@@ -490,6 +507,11 @@ static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = {
 	.size = gen8_stolen_size,
 };
 
+static const struct intel_stolen_funcs gen9_stolen_funcs __initconst = {
+	.base = intel_stolen_base,
+	.size = gen9_stolen_size,
+};
+
 static const struct intel_stolen_funcs chv_stolen_funcs __initconst = {
 	.base = intel_stolen_base,
 	.size = chv_stolen_size,
@@ -523,6 +545,7 @@ static const struct pci_device_id intel_stolen_ids[] __initconst = {
 	INTEL_BDW_M_IDS(&gen8_stolen_funcs),
 	INTEL_BDW_D_IDS(&gen8_stolen_funcs),
 	INTEL_CHV_IDS(&chv_stolen_funcs),
+	INTEL_SKL_IDS(&gen9_stolen_funcs),
 };
 
 static void __init intel_graphics_stolen(int num, int slot, int func)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 1cf7c97ff175..000d4199b03e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -732,10 +732,10 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
       .endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 90878aa38dbd..9ebaf63ba182 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -740,10 +740,10 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	INTR_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -8
       .endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee78361..70e181ea1eac 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -99,32 +99,9 @@ void __init init_IRQ(void)
 	x86_init.irqs.intr_init();
 }
 
-/*
- * Setup the vector to irq mappings.
- */
-void setup_vector_irq(int cpu)
-{
-#ifndef CONFIG_X86_IO_APIC
-	int irq;
-
-	/*
-	 * On most of the platforms, legacy PIC delivers the interrupts on the
-	 * boot cpu. But there are certain platforms where PIC interrupts are
-	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
-	 * legacy PIC, for the new cpu that is coming online, setup the static
-	 * legacy vector to irq mapping:
-	 */
-	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
-#endif
-
-	__setup_vector_irq(cpu);
-}
-
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -144,7 +121,6 @@ static void __init smp_intr_init(void)
 
 	/* IPI used for rebooting/stopping */
 	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif
 #endif /* CONFIG_SMP */
 }
 
@@ -159,7 +135,7 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+#ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -197,10 +173,17 @@ void __init native_init_IRQ(void)
 	 * 'special' SMP interrupts)
 	 */
 	i = FIRST_EXTERNAL_VECTOR;
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifndef CONFIG_X86_LOCAL_APIC
+#define first_system_vector NR_VECTORS
+#endif
+	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
 		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
+#ifdef CONFIG_X86_LOCAL_APIC
+	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
+		set_intr_gate(i, spurious_interrupt);
+#endif
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f6945bef2cd1..94f643484300 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
-	pv_info.paravirt_enabled = 1;
+
+	/*
+	 * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+	 * guest kernel works like a bare metal kernel with additional
+	 * features, and paravirt_enabled is about features that are
+	 * missing.
+	 */
+	pv_info.paravirt_enabled = 0;
 
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d9156ceecdff..42caaef897c8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now)
 
 	native_write_msr(msr_kvm_wall_clock, low, high);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	pvclock_read_wallclock(&wall_clock, vcpu_time, now);
 
-	preempt_enable();
+	put_cpu();
 }
 
 static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void)
 	int cpu;
 	unsigned long tsc_khz;
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	src = &hv_clock[cpu].pvti;
 	tsc_khz = pvclock_tsc_khz(src);
-	preempt_enable();
+	put_cpu();
 	return tsc_khz;
 }
 
@@ -263,7 +261,6 @@ void __init kvmclock_init(void)
 #endif
 	kvm_get_preset_lpj();
 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void)
 
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	flags = pvclock_read_flags(vcpu_time);
 
 	if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
-		preempt_enable();
+		put_cpu();
 		return 1;
 	}
 
 	if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
-		preempt_enable();
+		put_cpu();
 		return ret;
 	}
 
-	preempt_enable();
+	put_cpu();
 
 	kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 72e8e310258d..469b23d6acc2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -20,6 +20,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
 #include <asm/cacheflush.h>
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 485981059a40..415480d3ea84 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/io_apic.h>
 #include <asm/debugreg.h>
 #include <asm/kexec-bzimage64.h>
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 17962e667a91..bae6c609888e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7a8f5845e8eb..6d7022c683e3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1084,7 +1084,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int i;
 
-	preempt_disable();
 	smp_cpu_index_default();
 
 	/*
@@ -1102,22 +1101,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
-
 	if (smp_sanity_check(max_cpus) < 0) {
 		pr_info("SMP disabled\n");
 		disable_smp();
-		goto out;
+		return;
 	}
 
 	default_setup_apic_routing();
 
-	preempt_disable();
 	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
-	preempt_enable();
 
 	connect_bsp_APIC();
 
@@ -1151,8 +1147,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 		uv_system_init();
 
 	set_mtrr_aps_delayed_init();
-out:
-	preempt_enable();
 }
 
 void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 3e551eee87b9..4e942f31b1a7 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -55,12 +55,6 @@ static bool tls_desc_okay(const struct user_desc *info)
 	if (info->seg_not_present)
 		return false;
 
-#ifdef CONFIG_X86_64
-	/* The L bit makes no sense for data. */
-	if (info->lm)
-		return false;
-#endif
-
 	return true;
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a9ae20579895..88900e288021 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -331,7 +331,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 		break; /* Success, it was handled */
 	case 1: /* Bound violation. */
 		info = mpx_generate_siginfo(regs, xsave_buf);
-		if (PTR_ERR(info)) {
+		if (IS_ERR(info)) {
 			/*
 			 * We failed to decode the MPX instruction.  Act as if
 			 * the exception was not caused by MPX.
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4c540c4719d8..0de1fae2bdf0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
 
 	return (void *)xsave + xstate_comp_offsets[feature];
 }
+EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2d6509..08f790dfadc9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
 
-kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
new file mode 100644
index 000000000000..6eb5c20ee373
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.c
@@ -0,0 +1,1052 @@
+/*
+ * Kernel-based Virtual Machine - device assignment support
+ *
+ * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_segnr;
+	int host_busnr;
+	int host_devfn;
+	unsigned int entries_nr;
+	int host_irq;
+	bool host_irq_disabled;
+	bool pci_2_3;
+	struct msix_entry *host_msix_entries;
+	int guest_irq;
+	struct msix_entry *guest_msix_entries;
+	unsigned long irq_requested_type;
+	int irq_source_id;
+	int flags;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+	spinlock_t intx_lock;
+	spinlock_t intx_mask_lock;
+	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
+};
+
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+						      int assigned_dev_id)
+{
+	struct list_head *ptr;
+	struct kvm_assigned_dev_kernel *match;
+
+	list_for_each(ptr, head) {
+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+		if (match->assigned_dev_id == assigned_dev_id)
+			return match;
+	}
+	return NULL;
+}
+
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+				    *assigned_dev, int irq)
+{
+	int i, index;
+	struct msix_entry *host_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+
+	index = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == host_msix_entries[i].vector) {
+			index = i;
+			break;
+		}
+	if (index < 0)
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+
+	return index;
+}
+
+static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int ret;
+
+	spin_lock(&assigned_dev->intx_lock);
+	if (pci_check_and_mask_intx(assigned_dev->dev)) {
+		assigned_dev->host_irq_disabled = true;
+		ret = IRQ_WAKE_THREAD;
+	} else
+		ret = IRQ_NONE;
+	spin_unlock(&assigned_dev->intx_lock);
+
+	return ret;
+}
+
+static void
+kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
+				 int vector)
+{
+	if (unlikely(assigned_dev->irq_requested_type &
+		     KVM_DEV_IRQ_GUEST_INTX)) {
+		spin_lock(&assigned_dev->intx_mask_lock);
+		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
+			kvm_set_irq(assigned_dev->kvm,
+				    assigned_dev->irq_source_id, vector, 1,
+				    false);
+		spin_unlock(&assigned_dev->intx_mask_lock);
+	} else
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    vector, 1, false);
+}
+
+static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+
+	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+		spin_lock_irq(&assigned_dev->intx_lock);
+		disable_irq_nosync(irq);
+		assigned_dev->host_irq_disabled = true;
+		spin_unlock_irq(&assigned_dev->intx_lock);
+	}
+
+	kvm_assigned_dev_raise_guest_irq(assigned_dev,
+					 assigned_dev->guest_irq);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef __KVM_HAVE_MSI
+static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+				       assigned_dev->irq_source_id,
+				       assigned_dev->guest_irq, 1);
+	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+
+	kvm_assigned_dev_raise_guest_irq(assigned_dev,
+					 assigned_dev->guest_irq);
+
+	return IRQ_HANDLED;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int index = find_index_from_host_irq(assigned_dev, irq);
+	u32 vector;
+	int ret = 0;
+
+	if (index >= 0) {
+		vector = assigned_dev->guest_msix_entries[index].vector;
+		ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+					   assigned_dev->irq_source_id,
+					   vector, 1);
+	}
+
+	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int index = find_index_from_host_irq(assigned_dev, irq);
+	u32 vector;
+
+	if (index >= 0) {
+		vector = assigned_dev->guest_msix_entries[index].vector;
+		kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
+	}
+
+	return IRQ_HANDLED;
+}
+#endif
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+	struct kvm_assigned_dev_kernel *dev =
+		container_of(kian, struct kvm_assigned_dev_kernel,
+			     ack_notifier);
+
+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
+
+	spin_lock(&dev->intx_mask_lock);
+
+	if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
+		bool reassert = false;
+
+		spin_lock_irq(&dev->intx_lock);
+		/*
+		 * The guest IRQ may be shared so this ack can come from an
+		 * IRQ for another guest device.
+		 */
+		if (dev->host_irq_disabled) {
+			if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
+				enable_irq(dev->host_irq);
+			else if (!pci_check_and_unmask_intx(dev->dev))
+				reassert = true;
+			dev->host_irq_disabled = reassert;
+		}
+		spin_unlock_irq(&dev->intx_lock);
+
+		if (reassert)
+			kvm_set_irq(dev->kvm, dev->irq_source_id,
+				    dev->guest_irq, 1, false);
+	}
+
+	spin_unlock(&dev->intx_mask_lock);
+}
+
+static void deassign_guest_irq(struct kvm *kvm,
+			       struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	if (assigned_dev->ack_notifier.gsi != -1)
+		kvm_unregister_irq_ack_notifier(kvm,
+						&assigned_dev->ack_notifier);
+
+	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+		    assigned_dev->guest_irq, 0, false);
+
+	if (assigned_dev->irq_source_id != -1)
+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+	assigned_dev->irq_source_id = -1;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
+
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+			      struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	/*
+	 * We disable irq here to prevent further events.
+	 *
+	 * Notice this maybe result in nested disable if the interrupt type is
+	 * INTx, but it's OK for we are going to free it.
+	 *
+	 * If this function is a part of VM destroy, please ensure that till
+	 * now, the kvm state is still legal for probably we also have to wait
+	 * on a currently running IRQ handler.
+	 */
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq(assigned_dev->host_msix_entries[i].vector);
+
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 assigned_dev);
+
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		if ((assigned_dev->irq_requested_type &
+		     KVM_DEV_IRQ_HOST_INTX) &&
+		    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+			spin_lock_irq(&assigned_dev->intx_lock);
+			pci_intx(assigned_dev->dev, false);
+			spin_unlock_irq(&assigned_dev->intx_lock);
+			synchronize_irq(assigned_dev->host_irq);
+		} else
+			disable_irq(assigned_dev->host_irq);
+
+		free_irq(assigned_dev->host_irq, assigned_dev);
+
+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
+
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
+}
+
+static int kvm_deassign_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *assigned_dev,
+			    unsigned long irq_requested_type)
+{
+	unsigned long guest_irq_type, host_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return -EINVAL;
+	/* no irq assignment to deassign */
+	if (!assigned_dev->irq_requested_type)
+		return -ENXIO;
+
+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+	if (host_irq_type)
+		deassign_host_irq(kvm, assigned_dev);
+	if (guest_irq_type)
+		deassign_guest_irq(kvm, assigned_dev);
+
+	return 0;
+}
+
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
+{
+	kvm_free_assigned_irq(kvm, assigned_dev);
+
+	pci_reset_function(assigned_dev->dev);
+	if (pci_load_and_free_saved_state(assigned_dev->dev,
+					  &assigned_dev->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&assigned_dev->dev->dev));
+	else
+		pci_restore_state(assigned_dev->dev);
+
+	pci_clear_dev_assigned(assigned_dev->dev);
+
+	pci_release_regions(assigned_dev->dev);
+	pci_disable_device(assigned_dev->dev);
+	pci_dev_put(assigned_dev->dev);
+
+	list_del(&assigned_dev->list);
+	kfree(assigned_dev);
+}
+
+void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+	struct list_head *ptr, *ptr2;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+		assigned_dev = list_entry(ptr,
+					  struct kvm_assigned_dev_kernel,
+					  list);
+
+		kvm_free_assigned_device(kvm, assigned_dev);
+	}
+}
+
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
+{
+	irq_handler_t irq_handler;
+	unsigned long flags;
+
+	dev->host_irq = dev->dev->irq;
+
+	/*
+	 * We can only share the IRQ line with other host devices if we are
+	 * able to disable the IRQ source at device-level - independently of
+	 * the guest driver. Otherwise host devices may suffer from unbounded
+	 * IRQ latencies when the guest keeps the line asserted.
+	 */
+	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
+		irq_handler = kvm_assigned_dev_intx;
+		flags = IRQF_SHARED;
+	} else {
+		irq_handler = NULL;
+		flags = IRQF_ONESHOT;
+	}
+	if (request_threaded_irq(dev->host_irq, irq_handler,
+				 kvm_assigned_dev_thread_intx, flags,
+				 dev->irq_name, dev))
+		return -EIO;
+
+	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
+		spin_lock_irq(&dev->intx_lock);
+		pci_intx(dev->dev, true);
+		spin_unlock_irq(&dev->intx_lock);
+	}
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+					   struct kvm_assigned_dev_kernel *dev)
+{
+	int r;
+
+	if (!dev->dev->msi_enabled) {
+		r = pci_enable_msi(dev->dev);
+		if (r)
+			return r;
+	}
+
+	dev->host_irq = dev->dev->irq;
+	if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
+				 kvm_assigned_dev_thread_msi, 0,
+				 dev->irq_name, dev)) {
+		pci_disable_msi(dev->dev);
+		return -EIO;
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
+{
+	int i, r = -EINVAL;
+
+	/* host_msix_entries and guest_msix_entries should have been
+	 * initialized */
+	if (dev->entries_nr == 0)
+		return r;
+
+	r = pci_enable_msix_exact(dev->dev,
+				  dev->host_msix_entries, dev->entries_nr);
+	if (r)
+		return r;
+
+	for (i = 0; i < dev->entries_nr; i++) {
+		r = request_threaded_irq(dev->host_msix_entries[i].vector,
+					 kvm_assigned_dev_msix,
+					 kvm_assigned_dev_thread_msix,
+					 0, dev->irq_name, dev);
+		if (r)
+			goto err;
+	}
+
+	return 0;
+err:
+	for (i -= 1; i >= 0; i--)
+		free_irq(dev->host_msix_entries[i].vector, dev);
+	pci_disable_msix(dev->dev);
+	return r;
+}
+
+#endif
+
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *dev,
+				struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = irq->guest_irq;
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+			   struct kvm_assigned_dev_kernel *dev,
+			   __u32 host_irq_type)
+{
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+		return r;
+
+	snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
+		 pci_name(dev->dev));
+
+	switch (host_irq_type) {
+	case KVM_DEV_IRQ_HOST_INTX:
+		r = assigned_device_enable_host_intx(kvm, dev);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_HOST_MSI:
+		r = assigned_device_enable_host_msi(kvm, dev);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_HOST_MSIX:
+		r = assigned_device_enable_host_msix(kvm, dev);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+	dev->host_irq_disabled = false;
+
+	if (!r)
+		dev->irq_requested_type |= host_irq_type;
+
+	return r;
+}
+
+static int assign_guest_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *dev,
+			    struct kvm_assigned_irq *irq,
+			    unsigned long guest_irq_type)
+{
+	int id;
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+		return r;
+
+	id = kvm_request_irq_source_id(kvm);
+	if (id < 0)
+		return id;
+
+	dev->irq_source_id = id;
+
+	switch (guest_irq_type) {
+	case KVM_DEV_IRQ_GUEST_INTX:
+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_GUEST_MSI:
+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_GUEST_MSIX:
+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+	if (!r) {
+		dev->irq_requested_type |= guest_irq_type;
+		if (dev->ack_notifier.gsi != -1)
+			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+	} else {
+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
+		dev->irq_source_id = -1;
+	}
+
+	return r;
+}
+
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+				   struct kvm_assigned_irq *assigned_irq)
+{
+	int r = -EINVAL;
+	struct kvm_assigned_dev_kernel *match;
+	unsigned long host_irq_type, guest_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	mutex_lock(&kvm->lock);
+	r = -ENODEV;
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
+
+	r = -EINVAL;
+	/* can only assign one type at a time */
+	if (hweight_long(host_irq_type) > 1)
+		goto out;
+	if (hweight_long(guest_irq_type) > 1)
+		goto out;
+	if (host_irq_type == 0 && guest_irq_type == 0)
+		goto out;
+
+	r = 0;
+	if (host_irq_type)
+		r = assign_host_irq(kvm, match, host_irq_type);
+	if (r)
+		goto out;
+
+	if (guest_irq_type)
+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+					 struct kvm_assigned_irq
+					 *assigned_irq)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *match;
+	unsigned long irq_type;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
+					  KVM_DEV_IRQ_GUEST_MASK);
+	r = kvm_deassign_irq(kvm, match, irq_type);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+/*
+ * We want to test whether the caller has been granted permissions to
+ * use this device.  To be able to configure and control the device,
+ * the user needs access to PCI configuration space and BAR resources.
+ * These are accessed through PCI sysfs.  PCI config space is often
+ * passed to the process calling this ioctl via file descriptor, so we
+ * can't rely on access to that file.  We can check for permissions
+ * on each of the BAR resource files, which is a pretty clear
+ * indicator that the user has been granted access to the device.
+ */
+static int probe_sysfs_permissions(struct pci_dev *dev)
+{
+#ifdef CONFIG_SYSFS
+	int i;
+	bool bar_found = false;
+
+	for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
+		char *kpath, *syspath;
+		struct path path;
+		struct inode *inode;
+		int r;
+
+		if (!pci_resource_len(dev, i))
+			continue;
+
+		kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
+		if (!kpath)
+			return -ENOMEM;
+
+		/* Per sysfs-rules, sysfs is always at /sys */
+		syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
+		kfree(kpath);
+		if (!syspath)
+			return -ENOMEM;
+
+		r = kern_path(syspath, LOOKUP_FOLLOW, &path);
+		kfree(syspath);
+		if (r)
+			return r;
+
+		inode = path.dentry->d_inode;
+
+		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
+		path_put(&path);
+		if (r)
+			return r;
+
+		bar_found = true;
+	}
+
+	/* If no resources, probably something special */
+	if (!bar_found)
+		return -EPERM;
+
+	return 0;
+#else
+	return -EINVAL; /* No way to control the device without sysfs */
+#endif
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+				      struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0, idx;
+	struct kvm_assigned_dev_kernel *match;
+	struct pci_dev *dev;
+
+	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	idx = srcu_read_lock(&kvm->srcu);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (match) {
+		/* device already assigned */
+		r = -EEXIST;
+		goto out;
+	}
+
+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+	if (match == NULL) {
+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
+		       __func__);
+		r = -ENOMEM;
+		goto out;
+	}
+	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+				   assigned_dev->busnr,
+				   assigned_dev->devfn);
+	if (!dev) {
+		printk(KERN_INFO "%s: host device not found\n", __func__);
+		r = -EINVAL;
+		goto out_free;
+	}
+
+	/* Don't allow bridges to be assigned */
+	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
+		r = -EPERM;
+		goto out_put;
+	}
+
+	r = probe_sysfs_permissions(dev);
+	if (r)
+		goto out_put;
+
+	if (pci_enable_device(dev)) {
+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+		r = -EBUSY;
+		goto out_put;
+	}
+	r = pci_request_regions(dev, "kvm_assigned_device");
+	if (r) {
+		printk(KERN_INFO "%s: Could not get access to device regions\n",
+		       __func__);
+		goto out_disable;
+	}
+
+	pci_reset_function(dev);
+	pci_save_state(dev);
+	match->pci_saved_state = pci_store_saved_state(dev);
+	if (!match->pci_saved_state)
+		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
+		       __func__, dev_name(&dev->dev));
+
+	if (!pci_intx_mask_supported(dev))
+		assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
+
+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_segnr = assigned_dev->segnr;
+	match->host_busnr = assigned_dev->busnr;
+	match->host_devfn = assigned_dev->devfn;
+	match->flags = assigned_dev->flags;
+	match->dev = dev;
+	spin_lock_init(&match->intx_lock);
+	spin_lock_init(&match->intx_mask_lock);
+	match->irq_source_id = -1;
+	match->kvm = kvm;
+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+	if (!kvm->arch.iommu_domain) {
+		r = kvm_iommu_map_guest(kvm);
+		if (r)
+			goto out_list_del;
+	}
+	r = kvm_assign_device(kvm, match->dev);
+	if (r)
+		goto out_list_del;
+
+out:
+	srcu_read_unlock(&kvm->srcu, idx);
+	mutex_unlock(&kvm->lock);
+	return r;
+out_list_del:
+	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&dev->dev));
+	list_del(&match->list);
+	pci_release_regions(dev);
+out_disable:
+	pci_disable_device(dev);
+out_put:
+	pci_dev_put(dev);
+out_free:
+	kfree(match);
+	srcu_read_unlock(&kvm->srcu, idx);
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		printk(KERN_INFO "%s: device hasn't been assigned before, "
+		  "so cannot be deassigned\n", __func__);
+		r = -EINVAL;
+		goto out;
+	}
+
+	kvm_deassign_device(kvm, match->dev);
+
+	kvm_free_assigned_device(kvm, match);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
+			r = -EINVAL;
+			goto msix_nr_out;
+		}
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries =
+			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
+				GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else /* Not allowed set MSI-X number twice */
+		r = -EINVAL;
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+#endif
+
+static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		r = -ENODEV;
+		goto out;
+	}
+
+	spin_lock(&match->intx_mask_lock);
+
+	match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
+	match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
+
+	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
+		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
+			kvm_set_irq(match->kvm, match->irq_source_id,
+				    match->guest_irq, 0, false);
+			/*
+			 * Masking at hardware-level is performed on demand,
+			 * i.e. when an IRQ actually arrives at the host.
+			 */
+		} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+			/*
+			 * Unmask the IRQ line if required. Unmasking at
+			 * device level will be performed by user space.
+			 */
+			spin_lock_irq(&match->intx_lock);
+			if (match->host_irq_disabled) {
+				enable_irq(match->host_irq);
+				match->host_irq_disabled = false;
+			}
+			spin_unlock_irq(&match->intx_lock);
+		}
+	}
+
+	spin_unlock(&match->intx_mask_lock);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	case KVM_ASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_IRQ: {
+		r = -EOPNOTSUPP;
+		break;
+	}
+	case KVM_ASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_DEASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_DEASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+#ifdef __KVM_HAVE_MSIX
+	case KVM_ASSIGN_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
+	case KVM_ASSIGN_SET_INTX_MASK: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+		break;
+	}
+out:
+	return r;
+}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644
index 000000000000..a428c1a211b2
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h
@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a57f9ea..8a80737ee6e6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,7 +23,7 @@
 #include "mmu.h"
 #include "trace.h"
 
-static u32 xstate_required_size(u64 xstate_bv)
+static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
 	int feature_bit = 0;
 	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv)
 	xstate_bv &= XSTATE_EXTEND_MASK;
 	while (xstate_bv) {
 		if (xstate_bv & 0x1) {
-		        u32 eax, ebx, ecx, edx;
+		        u32 eax, ebx, ecx, edx, offset;
 		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
-			ret = max(ret, eax + ebx);
+			offset = compacted ? ret : ebx;
+			ret = max(ret, offset + eax);
 		}
 
 		xstate_bv >>= 1;
@@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void)
 	return xcr0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 
 	/* Update OSXSAVE bit */
 	if (cpu_has_xsave && best->function == 0x1) {
-		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		best->ecx &= ~F(OSXSAVE);
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+			best->ecx |= F(OSXSAVE);
 	}
 
 	if (apic) {
-		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+		if (best->ecx & F(TSC_DEADLINE_TIMER))
 			apic->lapic_timer.timer_mode_mask = 3 << 17;
 		else
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 			(best->eax | ((u64)best->edx << 32)) &
 			kvm_supported_xcr0();
 		vcpu->arch.guest_xstate_size = best->ebx =
-			xstate_required_size(vcpu->arch.xcr0);
+			xstate_required_size(vcpu->arch.xcr0, false);
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
 	 * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 			break;
 		}
 	}
-	if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
-		entry->edx &= ~bit(X86_FEATURE_NX);
+	if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
+		entry->edx &= ~F(NX);
 		printk(KERN_INFO "kvm: guest NX capability removed\n");
 	}
 }
@@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	entry->flags = 0;
 }
 
-#define F(x) bit(X86_FEATURE_##x)
-
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
 				   u32 func, u32 index, int *nent, int maxnent)
 {
@@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
 	unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	const u32 kvm_supported_word9_x86_features =
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
-		F(ADX) | F(SMAP);
+		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+		F(AVX512CD);
+
+	/* cpuid 0xD.1.eax */
+	const u32 kvm_supported_word10_x86_features =
+		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
@@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		u64 supported = kvm_supported_xcr0();
 
 		entry->eax &= supported;
+		entry->ebx = xstate_required_size(supported, false);
+		entry->ecx = entry->ebx;
 		entry->edx &= supported >> 32;
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		if (!supported)
+			break;
+
 		for (idx = 1, i = 1; idx < 64; ++idx) {
 			u64 mask = ((u64)1 << idx);
 			if (*nent >= maxnent)
 				goto out;
 
 			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !(supported & mask))
-				continue;
+			if (idx == 1) {
+				entry[i].eax &= kvm_supported_word10_x86_features;
+				entry[i].ebx = 0;
+				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
+					entry[i].ebx =
+						xstate_required_size(supported,
+								     true);
+			} else {
+				if (entry[i].eax == 0 || !(supported & mask))
+					continue;
+				if (WARN_ON_ONCE(entry[i].ecx & 1))
+					continue;
+			}
+			entry[i].ecx = 0;
+			entry[i].edx = 0;
 			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 			++*nent;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9f8a2faf5040..169b09d76ddd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -123,6 +123,7 @@
 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
+#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
 #define Sse         (1<<18)     /* SSE Vector instruction */
 /* Generic ModRM decode. */
 #define ModRM       (1<<19)
@@ -166,6 +167,8 @@
 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 #define NoBigReal   ((u64)1 << 50)  /* No big real mode */
 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
+#define NearBranch  ((u64)1 << 52)  /* Near branches */
+#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -209,6 +212,7 @@ struct opcode {
 		const struct group_dual *gdual;
 		const struct gprefix *gprefix;
 		const struct escape *esc;
+		const struct instr_dual *idual;
 		void (*fastop)(struct fastop *fake);
 	} u;
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@ struct escape {
 	struct opcode high[64];
 };
 
+struct instr_dual {
+	struct opcode mod012;
+	struct opcode mod3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	ON64(FOP2E(op##q, rax, cl)) \
 	FOP_END
 
+/* 2 operand, src and dest are reversed */
+#define FASTOP2R(op, name) \
+	FOP_START(name) \
+	FOP2E(op##b, dl, al) \
+	FOP2E(op##w, dx, ax) \
+	FOP2E(op##l, edx, eax) \
+	ON64(FOP2E(op##q, rdx, rax)) \
+	FOP_END
+
 #define FOP3E(op,  dst, src, src2) \
 	FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
 
@@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 }
 
 static inline unsigned long
-register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+register_address(struct x86_emulate_ctxt *ctxt, int reg)
 {
-	return address_mask(ctxt, reg);
+	return address_mask(ctxt, reg_read(ctxt, reg));
 }
 
 static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
 }
 
 static inline void
-register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
+register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
 {
 	ulong mask;
 
@@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
 		mask = ~0UL;
 	else
 		mask = ad_mask(ctxt);
-	masked_increment(reg, mask, inc);
+	masked_increment(reg_rmw(ctxt, reg), mask, inc);
 }
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
-static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
-			       int cs_l)
-{
-	switch (ctxt->op_bytes) {
-	case 2:
-		ctxt->_eip = (u16)dst;
-		break;
-	case 4:
-		ctxt->_eip = (u32)dst;
-		break;
-#ifdef CONFIG_X86_64
-	case 8:
-		if ((cs_l && is_noncanonical_address(dst)) ||
-		    (!cs_l && (dst >> 32) != 0))
-			return emulate_gp(ctxt, 0);
-		ctxt->_eip = dst;
-		break;
-#endif
-	default:
-		WARN(1, "unsupported eip assignment size\n");
-	}
-	return X86EMUL_CONTINUE;
-}
-
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
-{
-	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
-}
-
-static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-	return assign_eip_near(ctxt, ctxt->_eip + rel);
-}
-
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
 	u16 selector;
@@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
 		return true;
 }
 
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-		     struct segmented_address addr,
-		     unsigned *max_size, unsigned size,
-		     bool write, bool fetch,
-		     ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+				       struct segmented_address addr,
+				       unsigned *max_size, unsigned size,
+				       bool write, bool fetch,
+				       enum x86emul_mode mode, ulong *linear)
 {
 	struct desc_struct desc;
 	bool usable;
 	ulong la;
 	u32 lim;
 	u16 sel;
-	unsigned cpl;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	*max_size = 0;
-	switch (ctxt->mode) {
+	switch (mode) {
 	case X86EMUL_MODE_PROT64:
-		if (((signed long)la << 16) >> 16 != la)
-			return emulate_gp(ctxt, 0);
+		if (is_noncanonical_address(la))
+			goto bad;
 
 		*max_size = min_t(u64, ~0u, (1ull << 48) - la);
 		if (size > *max_size)
@@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 		if (!fetch && (desc.type & 8) && !(desc.type & 2))
 			goto bad;
 		lim = desc_limit_scaled(&desc);
-		if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
-		    (ctxt->d & NoBigReal)) {
-			/* la is between zero and 0xffff */
-			if (la > 0xffff)
-				goto bad;
-			*max_size = 0x10000 - la;
-		} else if ((desc.type & 8) || !(desc.type & 4)) {
-			/* expand-up segment */
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
-		} else {
+		if (!(desc.type & 8) && (desc.type & 4)) {
 			/* expand-down segment */
 			if (addr.ea <= lim)
 				goto bad;
 			lim = desc.d ? 0xffffffff : 0xffff;
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		}
+		if (addr.ea > lim)
+			goto bad;
+		*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		if (size > *max_size)
 			goto bad;
-		cpl = ctxt->ops->cpl(ctxt);
-		if (!(desc.type & 8)) {
-			/* data segment */
-			if (cpl > desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && !(desc.type & 4)) {
-			/* nonconforming code segment */
-			if (cpl != desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && (desc.type & 4)) {
-			/* conforming code segment */
-			if (cpl < desc.dpl)
-				goto bad;
-		}
+		la &= (u32)-1;
 		break;
 	}
-	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-		la &= (u32)-1;
 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
 		return emulate_gp(ctxt, 0);
 	*linear = la;
@@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
 		     ulong *linear)
 {
 	unsigned max_size;
-	return __linearize(ctxt, addr, &max_size, size, write, false, linear);
+	return __linearize(ctxt, addr, &max_size, size, write, false,
+			   ctxt->mode, linear);
+}
+
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+			     enum x86emul_mode mode)
+{
+	ulong linear;
+	int rc;
+	unsigned max_size;
+	struct segmented_address addr = { .seg = VCPU_SREG_CS,
+					   .ea = dst };
+
+	if (ctxt->op_bytes != sizeof(unsigned long))
+		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+	if (rc == X86EMUL_CONTINUE)
+		ctxt->_eip = addr.ea;
+	return rc;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+	return assign_eip(ctxt, dst, ctxt->mode);
 }
 
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+			  const struct desc_struct *cs_desc)
+{
+	enum x86emul_mode mode = ctxt->mode;
+
+#ifdef CONFIG_X86_64
+	if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
+		u64 efer = 0;
+
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+		if (efer & EFER_LMA)
+			mode = X86EMUL_MODE_PROT64;
+	}
+#endif
+	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+	return assign_eip(ctxt, dst, mode);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+	return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 			      struct segmented_address addr,
@@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
 	 * boundary check itself.  Instead, we use max_size to check
 	 * against op_size.
 	 */
-	rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
+	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
+			 &linear);
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return rc;
 
@@ -911,6 +915,8 @@ FASTOP2W(btc);
 
 FASTOP2(xadd);
 
+FASTOP2R(cmp, cmp_r);
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			if (index_reg != 4)
 				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
+			modrm_ea += insn_fetch(s32, ctxt);
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
@@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
-		case 0:
-			if (ctxt->modrm_rm == 5)
-				modrm_ea += insn_fetch(s32, ctxt);
-			break;
 		case 1:
 			modrm_ea += insn_fetch(s8, ctxt);
 			break;
@@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
 		else
 			sv = (s64)ctxt->src.val & (s64)mask;
 
-		ctxt->dst.addr.mem.ea += (sv >> 3);
+		ctxt->dst.addr.mem.ea = address_mask(ctxt,
+					   ctxt->dst.addr.mem.ea + (sv >> 3));
 	}
 
 	/* only subword offset */
@@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				sizeof(base3), &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
+		if (is_noncanonical_address(get_desc_base(&seg_desc) |
+					     ((u64)base3 << 32)))
+			return emulate_gp(ctxt, 0);
 	}
 load:
 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
 	int seg = ctxt->src2.val;
 
 	ctxt->src.val = get_segment_selector(ctxt, seg);
+	if (ctxt->op_bytes == 4) {
+		rsp_increment(ctxt, -2);
+		ctxt->op_bytes = 2;
+	}
 
 	return em_push(ctxt);
 }
@@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->src.val =  (unsigned long)ctxt->eflags;
+	ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
 	return em_push(ctxt);
 }
 
@@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		/* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
 {
-	int rc = X86EMUL_CONTINUE;
+	return assign_eip_near(ctxt, ctxt->src.val);
+}
 
-	switch (ctxt->modrm_reg) {
-	case 2: /* call near abs */ {
-		long int old_eip;
-		old_eip = ctxt->_eip;
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		if (rc != X86EMUL_CONTINUE)
-			break;
-		ctxt->src.val = old_eip;
-		rc = em_push(ctxt);
-		break;
-	}
-	case 4: /* jmp abs */
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		break;
-	case 5: /* jmp far */
-		rc = em_jmp_far(ctxt);
-		break;
-	case 6:	/* push */
-		rc = em_push(ctxt);
-		break;
-	}
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+	int rc;
+	long int old_eip;
+
+	old_eip = ctxt->_eip;
+	rc = assign_eip_near(ctxt, ctxt->src.val);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	ctxt->src.val = old_eip;
+	rc = em_push(ctxt);
 	return rc;
 }
 
@@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	/* Outer-privilege level return is not implemented */
 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
 		return X86EMUL_UNHANDLEABLE;
-	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
 				       &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, eip, new_desc.l);
+	rc = assign_eip_far(ctxt, eip, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~msr_data;
+		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
 #endif
 	} else {
 		/* legacy mode */
@@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	    && !vendor_intel(ctxt))
 		return emulate_ud(ctxt);
 
-	/* XXX sysenter/sysexit have not been tested in 64bit mode.
-	* Therefore, we inject an #UD.
-	*/
+	/* sysenter/sysexit have not been tested in 64bit mode. */
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		return emulate_ud(ctxt);
+		return X86EMUL_UNHANDLEABLE;
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 		if ((msr_data & 0xfffc) == 0x0)
 			return emulate_gp(ctxt, 0);
 		ss_sel = (u16)(msr_data + 24);
+		rcx = (u32)rcx;
+		rdx = (u32)rdx;
 		break;
 	case X86EMUL_MODE_PROT64:
 		cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			     &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 				     sizeof tss_seg.prev_task_link,
 				     &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
-			/* FIXME: need to provide precise fault address */
 			return ret;
 	}
 
@@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 	 *
 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
 	 * 2. Exception/IRQ/iret: No check is performed
-	 * 3. jmp/call to TSS: Check against DPL of the TSS
+	 * 3. jmp/call to TSS/task-gate: No check is performed since the
+	 *    hardware checks it before exiting.
 	 */
 	if (reason == TASK_SWITCH_GATE) {
 		if (idt_index != -1) {
@@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
 				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
 		}
-	} else if (reason != TASK_SWITCH_IRET) {
-		int dpl = next_tss_desc.dpl;
-		if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
-			return emulate_gp(ctxt, tss_selector);
 	}
 
-
 	desc_limit = desc_limit_scaled(&next_tss_desc);
 	if (!next_tss_desc.p ||
 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
+	register_address_increment(ctxt, reg, df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg);
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		goto fail;
 
@@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
 		return emulate_ud(ctxt);
 
 	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
+		ctxt->dst.bytes = 2;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
 	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
 }
 
-static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
 {
 	struct desc_ptr desc_ptr;
 	int rc;
@@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
 			     ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
+	    is_noncanonical_address(desc_ptr.address))
+		return emulate_gp(ctxt, 0);
+	if (lgdt)
+		ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	else
+		ctxt->ops->set_idt(ctxt, &desc_ptr);
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
 	return X86EMUL_CONTINUE;
 }
 
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+	return em_lgdt_lidt(ctxt, true);
+}
+
 static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
@@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
-	struct desc_ptr desc_ptr;
-	int rc;
-
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ctxt->op_bytes = 8;
-	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
-			     &desc_ptr.size, &desc_ptr.address,
-			     ctxt->op_bytes);
-	if (rc != X86EMUL_CONTINUE)
-		return rc;
-	ctxt->ops->set_idt(ctxt, &desc_ptr);
-	/* Disable writeback. */
-	ctxt->dst.type = OP_NONE;
-	return X86EMUL_CONTINUE;
+	return em_lgdt_lidt(ctxt, false);
 }
 
 static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = X86EMUL_CONTINUE;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 		if (efer & EFER_LMA)
-			rsvd = CR3_L_MODE_RESERVED_BITS;
+			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
 
 		if (new_val & rsvd)
 			return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
 	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
 		return emulate_ud(ctxt);
 
-	if (check_dr7_gd(ctxt))
+	if (check_dr7_gd(ctxt)) {
+		ulong dr6;
+
+		ctxt->ops->get_dr(ctxt, 6, &dr6);
+		dr6 &= ~15;
+		dr6 |= DR6_BD | DR6_RTM;
+		ctxt->ops->set_dr(ctxt, 6, dr6);
 		return emulate_db(ctxt);
+	}
 
 	return X86EMUL_CONTINUE;
 }
@@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@ static const struct opcode group4[] = {
 static const struct opcode group5[] = {
 	F(DstMem | SrcNone | Lock,		em_inc),
 	F(DstMem | SrcNone | Lock,		em_dec),
-	I(SrcMem | Stack,			em_grp45),
+	I(SrcMem | NearBranch,			em_call_near_abs),
 	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
-	I(SrcMem | Stack,			em_grp45),
-	I(SrcMemFAddr | ImplicitOps,		em_grp45),
-	I(SrcMem | Stack,			em_grp45), D(Undefined),
+	I(SrcMem | NearBranch,			em_jmp_abs),
+	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
+	I(SrcMem | Stack,			em_push), D(Undefined),
 };
 
 static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
+static const struct instr_dual instr_dual_0f_2b = {
+	I(0, em_mov), N
+};
+
 static const struct gprefix pfx_0f_2b = {
-	I(0, em_mov), I(0, em_mov), N, N,
+	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
 };
 
 static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { {
 	N, N, N, N, N, N, N, N,
 } };
 
+static const struct instr_dual instr_dual_0f_c3 = {
+	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
+};
+
 static const struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = {
 	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
-	X16(D(SrcImmByte)),
+	X16(D(SrcImmByte | NearBranch)),
 	/* 0x80 - 0x87 */
 	G(ByteOp | DstMem | SrcImm, group1),
 	G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = {
 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
 	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
-	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xA8 - 0xAF */
 	F2bv(DstAcc | SrcImm | NoWrite, em_test),
 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xB0 - 0xB7 */
 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
 	/* 0xB8 - 0xBF */
 	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
 	/* 0xC0 - 0xC7 */
 	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
-	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
-	I(ImplicitOps | Stack, em_ret),
+	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+	I(ImplicitOps | NearBranch, em_ret),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
 	G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = {
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
-	X3(I(SrcImmByte, em_loop)),
-	I(SrcImmByte, em_jcxz),
+	X3(I(SrcImmByte | NearBranch, em_loop)),
+	I(SrcImmByte | NearBranch, em_jcxz),
 	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
-	I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+	I(SrcImmFAddr | No64, em_jmp_far),
+	D(SrcImmByte | ImplicitOps | NearBranch),
 	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N,
 	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
-	X16(D(SrcImm)),
+	X16(D(SrcImm | NearBranch)),
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = {
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
 	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
-	N, D(DstMem | SrcReg | ModRM | Mov),
+	N, ID(0, &instr_dual_0f_c3),
 	N, N, N, GD(0, &group9),
 	/* 0xC8 - 0xCF */
 	X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
 
+static const struct instr_dual instr_dual_0f_38_f0 = {
+	I(DstReg | SrcMem | Mov, em_movbe), N
+};
+
+static const struct instr_dual instr_dual_0f_38_f1 = {
+	I(DstMem | SrcReg | Mov, em_movbe), N
+};
+
 static const struct gprefix three_byte_0f_38_f0 = {
-	I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f0), N, N, N
 };
 
 static const struct gprefix three_byte_0f_38_f1 = {
-	I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f1), N, N, N
 };
 
 /*
@@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
 	/* 0x80 - 0xef */
 	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
 	/* 0xf0 - 0xf1 */
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
 	/* 0xf2 - 0xff */
 	N, N, X4(N), X8(N)
 };
@@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
+			register_address(ctxt, VCPU_REGS_RDI);
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		op->count = 1;
@@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
+			register_address(ctxt, VCPU_REGS_RSI);
 		op->addr.mem.seg = ctxt->seg_override;
 		op->val = 0;
 		op->count = 1;
@@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt,
+			address_mask(ctxt,
 				reg_read(ctxt, VCPU_REGS_RBX) +
 				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
 		op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@ done_prefixes:
 
 	/* vex-prefix instructions are not implemented */
 	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
-	    (mode == X86EMUL_MODE_PROT64 ||
-	    (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
 		ctxt->d = NotImpl;
 	}
 
@@ -4549,6 +4568,12 @@ done_prefixes:
 			else
 				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
 			break;
+		case InstrDual:
+			if ((ctxt->modrm >> 6) == 3)
+				opcode = opcode.u.idual->mod3;
+			else
+				opcode = opcode.u.idual->mod012;
+			break;
 		default:
 			return EMULATION_FAILED;
 		}
@@ -4567,7 +4592,8 @@ done_prefixes:
 		return EMULATION_FAILED;
 
 	if (unlikely(ctxt->d &
-		     (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
+	     No16))) {
 		/*
 		 * These are copied unconditionally here, and checked unconditionally
 		 * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@ done_prefixes:
 		if (ctxt->d & NotImpl)
 			return EMULATION_FAILED;
 
-		if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-			ctxt->op_bytes = 8;
+		if (mode == X86EMUL_MODE_PROT64) {
+			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+				ctxt->op_bytes = 8;
+			else if (ctxt->d & NearBranch)
+				ctxt->op_bytes = 8;
+		}
 
 		if (ctxt->d & Op3264) {
 			if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@ done_prefixes:
 				ctxt->op_bytes = 4;
 		}
 
+		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
+			ctxt->op_bytes = 4;
+
 		if (ctxt->d & Sse)
 			ctxt->op_bytes = 16;
 		else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@ done_prefixes:
 	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
 	if (ctxt->rip_relative)
-		ctxt->memopp->addr.mem.ea += ctxt->_eip;
+		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
+					ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 				goto done;
 		}
 
+		/* Instruction can only be executed in protected mode */
+		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+			rc = emulate_ud(ctxt);
+			goto done;
+		}
+
 		/* Privileged instruction can be executed only in CPL=0 */
 		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
 			if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 			goto done;
 		}
 
-		/* Instruction can only be executed in protected mode */
-		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-			rc = emulate_ud(ctxt);
-			goto done;
-		}
-
 		/* Do instruction specific permission checks */
 		if (ctxt->d & CheckPerm) {
 			rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@ writeback:
 			count = ctxt->src.count;
 		else
 			count = ctxt->dst.count;
-		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-				-count);
+		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
@@ -5053,11 +5086,6 @@ twobyte_insn:
 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
 							(s16) ctxt->src.val;
 		break;
-	case 0xc3:		/* movnti */
-		ctxt->dst.bytes = ctxt->op_bytes;
-		ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
-							(u32) ctxt->src.val;
-		break;
 	default:
 		goto cannot_emulate;
 	}
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
new file mode 100644
index 000000000000..b1947e0f3e10
--- /dev/null
+++ b/arch/x86/kvm/ioapic.c
@@ -0,0 +1,675 @@
+/*
+ *  Copyright (C) 2001  MandrakeSoft S.A.
+ *  Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ *    MandrakeSoft S.A.
+ *    43, rue d'Aboukir
+ *    75002 Paris - France
+ *    http://www.linux-mandrake.com/
+ *    http://www.mandrakesoft.com/
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Yunhong Jiang <yunhong.jiang@intel.com>
+ *  Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *  Based on Xen 3.1 code.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <trace/events/kvm.h>
+
+#include "ioapic.h"
+#include "lapic.h"
+#include "irq.h"
+
+#if 0
+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
+#else
+#define ioapic_debug(fmt, arg...)
+#endif
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
+		bool line_status);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+					  unsigned long addr,
+					  unsigned long length)
+{
+	unsigned long result = 0;
+
+	switch (ioapic->ioregsel) {
+	case IOAPIC_REG_VERSION:
+		result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+			  | (IOAPIC_VERSION_ID & 0xff));
+		break;
+
+	case IOAPIC_REG_APIC_ID:
+	case IOAPIC_REG_ARB_ID:
+		result = ((ioapic->id & 0xf) << 24);
+		break;
+
+	default:
+		{
+			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+			u64 redir_content;
+
+			if (redir_index < IOAPIC_NUM_PINS)
+				redir_content =
+					ioapic->redirtbl[redir_index].bits;
+			else
+				redir_content = ~0ULL;
+
+			result = (ioapic->ioregsel & 0x1) ?
+			    (redir_content >> 32) & 0xffffffff :
+			    redir_content & 0xffffffff;
+			break;
+		}
+	}
+
+	return result;
+}
+
+static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
+{
+	ioapic->rtc_status.pending_eoi = 0;
+	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+
+static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
+{
+	if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+		kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+	bool new_val, old_val;
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	union kvm_ioapic_redirect_entry *e;
+
+	e = &ioapic->redirtbl[RTC_GSI];
+	if (!kvm_apic_match_dest(vcpu, NULL, 0,	e->fields.dest_id,
+				e->fields.dest_mode))
+		return;
+
+	new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
+	old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+
+	if (new_val == old_val)
+		return;
+
+	if (new_val) {
+		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi++;
+	} else {
+		__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi--;
+		rtc_status_pending_eoi_check_valid(ioapic);
+	}
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+	spin_lock(&ioapic->lock);
+	__rtc_irq_eoi_tracking_restore_one(vcpu);
+	spin_unlock(&ioapic->lock);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	if (RTC_GSI >= IOAPIC_NUM_PINS)
+		return;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
+	    __rtc_irq_eoi_tracking_restore_one(vcpu);
+}
+
+static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
+{
+	if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
+		--ioapic->rtc_status.pending_eoi;
+		rtc_status_pending_eoi_check_valid(ioapic);
+	}
+}
+
+static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
+{
+	if (ioapic->rtc_status.pending_eoi > 0)
+		return true; /* coalesced */
+
+	return false;
+}
+
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+		int irq_level, bool line_status)
+{
+	union kvm_ioapic_redirect_entry entry;
+	u32 mask = 1 << irq;
+	u32 old_irr;
+	int edge, ret;
+
+	entry = ioapic->redirtbl[irq];
+	edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+	if (!irq_level) {
+		ioapic->irr &= ~mask;
+		ret = 1;
+		goto out;
+	}
+
+	/*
+	 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+	 * this only happens if a previous edge has not been delivered due
+	 * do masking.  For level interrupts, the remote_irr field tells
+	 * us if the interrupt is waiting for an EOI.
+	 *
+	 * RTC is special: it is edge-triggered, but userspace likes to know
+	 * if it has been already ack-ed via EOI because coalesced RTC
+	 * interrupts lead to time drift in Windows guests.  So we track
+	 * EOI manually for the RTC interrupt.
+	 */
+	if (irq == RTC_GSI && line_status &&
+		rtc_irq_check_coalesced(ioapic)) {
+		ret = 0;
+		goto out;
+	}
+
+	old_irr = ioapic->irr;
+	ioapic->irr |= mask;
+	if ((edge && old_irr == ioapic->irr) ||
+	    (!edge && entry.fields.remote_irr)) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+	return ret;
+}
+
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+	u32 idx;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+		ioapic_set_irq(ioapic, idx, 1, true);
+
+	kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+
+static void update_handled_vectors(struct kvm_ioapic *ioapic)
+{
+	DECLARE_BITMAP(handled_vectors, 256);
+	int i;
+
+	memset(handled_vectors, 0, sizeof(handled_vectors));
+	for (i = 0; i < IOAPIC_NUM_PINS; ++i)
+		__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
+	memcpy(ioapic->handled_vectors, handled_vectors,
+	       sizeof(handled_vectors));
+	smp_wmb();
+}
+
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+			u32 *tmr)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	union kvm_ioapic_redirect_entry *e;
+	int index;
+
+	spin_lock(&ioapic->lock);
+	for (index = 0; index < IOAPIC_NUM_PINS; index++) {
+		e = &ioapic->redirtbl[index];
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
+		    kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
+		    index == RTC_GSI) {
+			if (kvm_apic_match_dest(vcpu, NULL, 0,
+				e->fields.dest_id, e->fields.dest_mode)) {
+				__set_bit(e->fields.vector,
+					(unsigned long *)eoi_exit_bitmap);
+				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+					__set_bit(e->fields.vector,
+						(unsigned long *)tmr);
+			}
+		}
+	}
+	spin_unlock(&ioapic->lock);
+}
+
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	if (!ioapic)
+		return;
+	kvm_make_scan_ioapic_request(kvm);
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+	unsigned index;
+	bool mask_before, mask_after;
+	union kvm_ioapic_redirect_entry *e;
+
+	switch (ioapic->ioregsel) {
+	case IOAPIC_REG_VERSION:
+		/* Writes are ignored. */
+		break;
+
+	case IOAPIC_REG_APIC_ID:
+		ioapic->id = (val >> 24) & 0xf;
+		break;
+
+	case IOAPIC_REG_ARB_ID:
+		break;
+
+	default:
+		index = (ioapic->ioregsel - 0x10) >> 1;
+
+		ioapic_debug("change redir index %x val %x\n", index, val);
+		if (index >= IOAPIC_NUM_PINS)
+			return;
+		e = &ioapic->redirtbl[index];
+		mask_before = e->fields.mask;
+		if (ioapic->ioregsel & 1) {
+			e->bits &= 0xffffffff;
+			e->bits |= (u64) val << 32;
+		} else {
+			e->bits &= ~0xffffffffULL;
+			e->bits |= (u32) val;
+			e->fields.remote_irr = 0;
+		}
+		update_handled_vectors(ioapic);
+		mask_after = e->fields.mask;
+		if (mask_before != mask_after)
+			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
+		    && ioapic->irr & (1 << index))
+			ioapic_service(ioapic, index, false);
+		kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+		break;
+	}
+}
+
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
+{
+	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+	struct kvm_lapic_irq irqe;
+	int ret;
+
+	if (entry->fields.mask)
+		return -1;
+
+	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+		     "vector=%x trig_mode=%x\n",
+		     entry->fields.dest_id, entry->fields.dest_mode,
+		     entry->fields.delivery_mode, entry->fields.vector,
+		     entry->fields.trig_mode);
+
+	irqe.dest_id = entry->fields.dest_id;
+	irqe.vector = entry->fields.vector;
+	irqe.dest_mode = entry->fields.dest_mode;
+	irqe.trig_mode = entry->fields.trig_mode;
+	irqe.delivery_mode = entry->fields.delivery_mode << 8;
+	irqe.level = 1;
+	irqe.shorthand = 0;
+
+	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+		ioapic->irr &= ~(1 << irq);
+
+	if (irq == RTC_GSI && line_status) {
+		/*
+		 * pending_eoi cannot ever become negative (see
+		 * rtc_status_pending_eoi_check_valid) and the caller
+		 * ensures that it is only called if it is >= zero, namely
+		 * if rtc_irq_check_coalesced returns false).
+		 */
+		BUG_ON(ioapic->rtc_status.pending_eoi != 0);
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+				ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
+	} else
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+
+	if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+		entry->fields.remote_irr = 1;
+
+	return ret;
+}
+
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
+		       int level, bool line_status)
+{
+	int ret, irq_level;
+
+	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
+
+	spin_lock(&ioapic->lock);
+	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
+					 irq_source_id, level);
+	ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
+
+	spin_unlock(&ioapic->lock);
+
+	return ret;
+}
+
+void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
+{
+	int i;
+
+	spin_lock(&ioapic->lock);
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+		__clear_bit(irq_source_id, &ioapic->irq_states[i]);
+	spin_unlock(&ioapic->lock);
+}
+
+static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
+{
+	int i;
+	struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
+						 eoi_inject.work);
+	spin_lock(&ioapic->lock);
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
+			continue;
+
+		if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
+			ioapic_service(ioapic, i, false);
+	}
+	spin_unlock(&ioapic->lock);
+}
+
+#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
+
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
+{
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.vector != vector)
+			continue;
+
+		if (i == RTC_GSI)
+			rtc_irq_eoi(ioapic, vcpu);
+		/*
+		 * We are dropping lock while calling ack notifiers because ack
+		 * notifier callbacks for assigned devices call into IOAPIC
+		 * recursively. Since remote_irr is cleared only after call
+		 * to notifiers if the same vector will be delivered while lock
+		 * is dropped it will be put into irr and will be delivered
+		 * after ack notifier returns.
+		 */
+		spin_unlock(&ioapic->lock);
+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+		spin_lock(&ioapic->lock);
+
+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+			continue;
+
+		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+		ent->fields.remote_irr = 0;
+		if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
+			++ioapic->irq_eoi[i];
+			if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
+				/*
+				 * Real hardware does not deliver the interrupt
+				 * immediately during eoi broadcast, and this
+				 * lets a buggy guest make slow progress
+				 * even if it does not correctly handle a
+				 * level-triggered interrupt.  Emulate this
+				 * behavior if we detect an interrupt storm.
+				 */
+				schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
+				ioapic->irq_eoi[i] = 0;
+				trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
+			} else {
+				ioapic_service(ioapic, i, false);
+			}
+		} else {
+			ioapic->irq_eoi[i] = 0;
+		}
+	}
+}
+
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	smp_rmb();
+	return test_bit(vector, ioapic->handled_vectors);
+}
+
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+	spin_lock(&ioapic->lock);
+	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
+	spin_unlock(&ioapic->lock);
+}
+
+static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
+{
+	return container_of(dev, struct kvm_ioapic, dev);
+}
+
+static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
+{
+	return ((addr >= ioapic->base_address &&
+		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+			    void *val)
+{
+	struct kvm_ioapic *ioapic = to_ioapic(this);
+	u32 result;
+	if (!ioapic_in_range(ioapic, addr))
+		return -EOPNOTSUPP;
+
+	ioapic_debug("addr %lx\n", (unsigned long)addr);
+	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	addr &= 0xff;
+	spin_lock(&ioapic->lock);
+	switch (addr) {
+	case IOAPIC_REG_SELECT:
+		result = ioapic->ioregsel;
+		break;
+
+	case IOAPIC_REG_WINDOW:
+		result = ioapic_read_indirect(ioapic, addr, len);
+		break;
+
+	default:
+		result = 0;
+		break;
+	}
+	spin_unlock(&ioapic->lock);
+
+	switch (len) {
+	case 8:
+		*(u64 *) val = result;
+		break;
+	case 1:
+	case 2:
+	case 4:
+		memcpy(val, (char *)&result, len);
+		break;
+	default:
+		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+	}
+	return 0;
+}
+
+static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+			     const void *val)
+{
+	struct kvm_ioapic *ioapic = to_ioapic(this);
+	u32 data;
+	if (!ioapic_in_range(ioapic, addr))
+		return -EOPNOTSUPP;
+
+	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
+		     (void*)addr, len, val);
+	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	switch (len) {
+	case 8:
+	case 4:
+		data = *(u32 *) val;
+		break;
+	case 2:
+		data = *(u16 *) val;
+		break;
+	case 1:
+		data = *(u8  *) val;
+		break;
+	default:
+		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
+		return 0;
+	}
+
+	addr &= 0xff;
+	spin_lock(&ioapic->lock);
+	switch (addr) {
+	case IOAPIC_REG_SELECT:
+		ioapic->ioregsel = data & 0xFF; /* 8-bit register */
+		break;
+
+	case IOAPIC_REG_WINDOW:
+		ioapic_write_indirect(ioapic, data);
+		break;
+
+	default:
+		break;
+	}
+	spin_unlock(&ioapic->lock);
+	return 0;
+}
+
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+{
+	int i;
+
+	cancel_delayed_work_sync(&ioapic->eoi_inject);
+	for (i = 0; i < IOAPIC_NUM_PINS; i++)
+		ioapic->redirtbl[i].fields.mask = 1;
+	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+	ioapic->ioregsel = 0;
+	ioapic->irr = 0;
+	ioapic->id = 0;
+	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
+	rtc_irq_eoi_tracking_reset(ioapic);
+	update_handled_vectors(ioapic);
+}
+
+static const struct kvm_io_device_ops ioapic_mmio_ops = {
+	.read     = ioapic_mmio_read,
+	.write    = ioapic_mmio_write,
+};
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic;
+	int ret;
+
+	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+	if (!ioapic)
+		return -ENOMEM;
+	spin_lock_init(&ioapic->lock);
+	INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
+	kvm->arch.vioapic = ioapic;
+	kvm_ioapic_reset(ioapic);
+	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
+	ioapic->kvm = kvm;
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+				      IOAPIC_MEM_LENGTH, &ioapic->dev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret < 0) {
+		kvm->arch.vioapic = NULL;
+		kfree(ioapic);
+	}
+
+	return ret;
+}
+
+void kvm_ioapic_destroy(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	cancel_delayed_work_sync(&ioapic->eoi_inject);
+	if (ioapic) {
+		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+		kvm->arch.vioapic = NULL;
+		kfree(ioapic);
+	}
+}
+
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	if (!ioapic)
+		return -EINVAL;
+
+	spin_lock(&ioapic->lock);
+	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+	spin_unlock(&ioapic->lock);
+	return 0;
+}
+
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	if (!ioapic)
+		return -EINVAL;
+
+	spin_lock(&ioapic->lock);
+	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+	ioapic->irr = 0;
+	update_handled_vectors(ioapic);
+	kvm_vcpu_request_scan_ioapic(kvm);
+	kvm_ioapic_inject_all(ioapic, state->irr);
+	spin_unlock(&ioapic->lock);
+	return 0;
+}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
new file mode 100644
index 000000000000..3c9195535ffc
--- /dev/null
+++ b/arch/x86/kvm/ioapic.h
@@ -0,0 +1,119 @@
+#ifndef __KVM_IO_APIC_H
+#define __KVM_IO_APIC_H
+
+#include <linux/kvm_host.h>
+
+#include "iodev.h"
+
+struct kvm;
+struct kvm_vcpu;
+
+#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
+#define IOAPIC_EDGE_TRIG  0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
+#define IOAPIC_MEM_LENGTH            0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT  0x00
+#define IOAPIC_REG_WINDOW  0x10
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
+
+/*ioapic delivery mode*/
+#define	IOAPIC_FIXED			0x0
+#define	IOAPIC_LOWEST_PRIORITY		0x1
+#define	IOAPIC_PMI			0x2
+#define	IOAPIC_NMI			0x4
+#define	IOAPIC_INIT			0x5
+#define	IOAPIC_EXTINT			0x7
+
+#ifdef CONFIG_X86
+#define RTC_GSI 8
+#else
+#define RTC_GSI -1U
+#endif
+
+struct rtc_status {
+	int pending_eoi;
+	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
+struct kvm_ioapic {
+	u64 base_address;
+	u32 ioregsel;
+	u32 id;
+	u32 irr;
+	u32 pad;
+	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
+	unsigned long irq_states[IOAPIC_NUM_PINS];
+	struct kvm_io_device dev;
+	struct kvm *kvm;
+	void (*ack_notifier)(void *opaque, int irq);
+	spinlock_t lock;
+	DECLARE_BITMAP(handled_vectors, 256);
+	struct rtc_status rtc_status;
+	struct delayed_work eoi_inject;
+	u32 irq_eoi[IOAPIC_NUM_PINS];
+};
+
+#ifdef DEBUG
+#define ASSERT(x)  							\
+do {									\
+	if (!(x)) {							\
+		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
+		       __FILE__, __LINE__, #x);				\
+		BUG();							\
+	}								\
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
+{
+	return kvm->arch.vioapic;
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, unsigned int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+			int trigger_mode);
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_destroy(struct kvm *kvm);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
+		       int level, bool line_status);
+void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, unsigned long *dest_map);
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+			u32 *tmr);
+
+#endif
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
new file mode 100644
index 000000000000..17b73eeac8a4
--- /dev/null
+++ b/arch/x86/kvm/iommu.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <linux/dmar.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
+#include "assigned-dev.h"
+
+static bool allow_unsafe_assigned_interrupts;
+module_param_named(allow_unsafe_assigned_interrupts,
+		   allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
+ "Enable device assignment on platforms without interrupt remapping support.");
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+			   unsigned long npages)
+{
+	gfn_t end_gfn;
+	pfn_t pfn;
+
+	pfn     = gfn_to_pfn_memslot(slot, gfn);
+	end_gfn = gfn + npages;
+	gfn    += 1;
+
+	if (is_error_noslot_pfn(pfn))
+		return pfn;
+
+	while (gfn < end_gfn)
+		gfn_to_pfn_memslot(slot, gfn++);
+
+	return pfn;
+}
+
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+	unsigned long i;
+
+	for (i = 0; i < npages; ++i)
+		kvm_release_pfn_clean(pfn + i);
+}
+
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	gfn_t gfn, end_gfn;
+	pfn_t pfn;
+	int r = 0;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	gfn     = slot->base_gfn;
+	end_gfn = gfn + slot->npages;
+
+	flags = IOMMU_READ;
+	if (!(slot->flags & KVM_MEM_READONLY))
+		flags |= IOMMU_WRITE;
+	if (!kvm->arch.iommu_noncoherent)
+		flags |= IOMMU_CACHE;
+
+
+	while (gfn < end_gfn) {
+		unsigned long page_size;
+
+		/* Check if already mapped */
+		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
+			gfn += 1;
+			continue;
+		}
+
+		/* Get the page size we could use to map */
+		page_size = kvm_host_page_size(kvm, gfn);
+
+		/* Make sure the page_size does not exceed the memslot */
+		while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
+			page_size >>= 1;
+
+		/* Make sure gfn is aligned to the page size we want to map */
+		while ((gfn << PAGE_SHIFT) & (page_size - 1))
+			page_size >>= 1;
+
+		/* Make sure hva is aligned to the page size we want to map */
+		while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
+			page_size >>= 1;
+
+		/*
+		 * Pin all pages we are about to map in memory. This is
+		 * important because we unmap and unpin in 4kb steps later.
+		 */
+		pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
+		if (is_error_noslot_pfn(pfn)) {
+			gfn += 1;
+			continue;
+		}
+
+		/* Map into IO address space */
+		r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
+			      page_size, flags);
+		if (r) {
+			printk(KERN_ERR "kvm_iommu_map_address:"
+			       "iommu failed to map pfn=%llx\n", pfn);
+			kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
+			goto unmap_pages;
+		}
+
+		gfn += page_size >> PAGE_SHIFT;
+
+
+	}
+
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int idx, r = 0;
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+
+	if (kvm->arch.iommu_noncoherent)
+		kvm_arch_register_noncoherent_dma(kvm);
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+
+	kvm_for_each_memslot(memslot, slots) {
+		r = kvm_iommu_map_pages(kvm, memslot);
+		if (r)
+			break;
+	}
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return r;
+}
+
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
+{
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int r;
+	bool noncoherent;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	if (pdev == NULL)
+		return -ENODEV;
+
+	r = iommu_attach_device(domain, &pdev->dev);
+	if (r) {
+		dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
+		return r;
+	}
+
+	noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if (noncoherent != kvm->arch.iommu_noncoherent) {
+		kvm_iommu_unmap_memslots(kvm);
+		kvm->arch.iommu_noncoherent = noncoherent;
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
+	pci_set_dev_assigned(pdev);
+
+	dev_info(&pdev->dev, "kvm assign device\n");
+
+	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
+{
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	if (pdev == NULL)
+		return -ENODEV;
+
+	iommu_detach_device(domain, &pdev->dev);
+
+	pci_clear_dev_assigned(pdev);
+
+	dev_info(&pdev->dev, "kvm deassign device\n");
+
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+	int r;
+
+	if (!iommu_present(&pci_bus_type)) {
+		printk(KERN_ERR "%s: iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	mutex_lock(&kvm->slots_lock);
+
+	kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
+	if (!kvm->arch.iommu_domain) {
+		r = -ENOMEM;
+		goto out_unlock;
+	}
+
+	if (!allow_unsafe_assigned_interrupts &&
+	    !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
+		printk(KERN_WARNING "%s: No interrupt remapping support,"
+		       " disallowing device assignment."
+		       " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
+		       " module option.\n", __func__);
+		iommu_domain_free(kvm->arch.iommu_domain);
+		kvm->arch.iommu_domain = NULL;
+		r = -EPERM;
+		goto out_unlock;
+	}
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		kvm_iommu_unmap_memslots(kvm);
+
+out_unlock:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages)
+{
+	struct iommu_domain *domain;
+	gfn_t end_gfn, gfn;
+	pfn_t pfn;
+	u64 phys;
+
+	domain  = kvm->arch.iommu_domain;
+	end_gfn = base_gfn + npages;
+	gfn     = base_gfn;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return;
+
+	while (gfn < end_gfn) {
+		unsigned long unmap_pages;
+		size_t size;
+
+		/* Get physical address */
+		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+
+		if (!phys) {
+			gfn++;
+			continue;
+		}
+
+		pfn  = phys >> PAGE_SHIFT;
+
+		/* Unmap address from IO address space */
+		size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+		unmap_pages = 1ULL << get_order(size);
+
+		/* Unpin all pages we just unmapped to not leak any memory */
+		kvm_unpin_pages(kvm, pfn, unmap_pages);
+
+		gfn += unmap_pages;
+	}
+}
+
+void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int idx;
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+
+	kvm_for_each_memslot(memslot, slots)
+		kvm_iommu_unmap_pages(kvm, memslot);
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	if (kvm->arch.iommu_noncoherent)
+		kvm_arch_unregister_noncoherent_dma(kvm);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	mutex_lock(&kvm->slots_lock);
+	kvm_iommu_unmap_memslots(kvm);
+	kvm->arch.iommu_domain = NULL;
+	kvm->arch.iommu_noncoherent = false;
+	mutex_unlock(&kvm->slots_lock);
+
+	iommu_domain_free(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
new file mode 100644
index 000000000000..72298b3ac025
--- /dev/null
+++ b/arch/x86/kvm/irq_comm.c
@@ -0,0 +1,332 @@
+/*
+ * irq_comm.c: Common API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <trace/events/kvm.h>
+
+#include <asm/msidef.h>
+
+#include "irq.h"
+
+#include "ioapic.h"
+
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
+{
+	struct kvm_pic *pic = pic_irqchip(kvm);
+	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
+}
+
+static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int irq_source_id, int level,
+			      bool line_status)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
+				line_status);
+}
+
+inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
+{
+	return irq->delivery_mode == APIC_DM_LOWEST;
+}
+
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, unsigned long *dest_map)
+{
+	int i, r = -1;
+	struct kvm_vcpu *vcpu, *lowest = NULL;
+
+	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
+			kvm_is_dm_lowest_prio(irq)) {
+		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+		irq->delivery_mode = APIC_DM_FIXED;
+	}
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+		return r;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
+			continue;
+
+		if (!kvm_is_dm_lowest_prio(irq)) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, irq, dest_map);
+		} else if (kvm_lapic_enabled(vcpu)) {
+			if (!lowest)
+				lowest = vcpu;
+			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+				lowest = vcpu;
+		}
+	}
+
+	if (lowest)
+		r = kvm_apic_set_irq(lowest, irq, dest_map);
+
+	return r;
+}
+
+static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+				   struct kvm_lapic_irq *irq)
+{
+	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
+	irq->dest_id = (e->msi.address_lo &
+			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	irq->vector = (e->msi.data &
+			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+	irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+	irq->delivery_mode = e->msi.data & 0x700;
+	irq->level = 1;
+	irq->shorthand = 0;
+	/* TODO Deal with RH bit of MSI message address */
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+	struct kvm_lapic_irq irq;
+
+	if (!level)
+		return -1;
+
+	kvm_set_msi_irq(e, &irq);
+
+	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
+}
+
+
+static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			 struct kvm *kvm)
+{
+	struct kvm_lapic_irq irq;
+	int r;
+
+	kvm_set_msi_irq(e, &irq);
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+		return r;
+	else
+		return -EWOULDBLOCK;
+}
+
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
+{
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	struct kvm_kernel_irq_routing_entry *e;
+	int ret = -EINVAL;
+	int idx;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	/*
+	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
+	 * which would need to be retried from thread context;  when same GSI
+	 * is connected to both PIC and IOAPIC, we'd have to report a
+	 * partial failure here.
+	 * Since there's no easy way to do this, we only support injecting MSI
+	 * which is limited to 1:1 GSI mapping.
+	 */
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	return ret;
+}
+
+int kvm_request_irq_source_id(struct kvm *kvm)
+{
+	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
+
+	if (irq_source_id >= BITS_PER_LONG) {
+		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
+		irq_source_id = -EFAULT;
+		goto unlock;
+	}
+
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+	set_bit(irq_source_id, bitmap);
+unlock:
+	mutex_unlock(&kvm->irq_lock);
+
+	return irq_source_id;
+}
+
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
+{
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+
+	mutex_lock(&kvm->irq_lock);
+	if (irq_source_id < 0 ||
+	    irq_source_id >= BITS_PER_LONG) {
+		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
+		goto unlock;
+	}
+	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	if (!irqchip_in_kernel(kvm))
+		goto unlock;
+
+	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
+	kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+unlock:
+	mutex_unlock(&kvm->irq_lock);
+}
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn)
+{
+	mutex_lock(&kvm->irq_lock);
+	kimn->irq = irq;
+	hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_rcu(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask)
+{
+	struct kvm_irq_mask_notifier *kimn;
+	int idx, gsi;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
+			if (kimn->irq == gsi)
+				kimn->func(kimn, mask);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+	int delta;
+	unsigned max_pin;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		delta = 0;
+		switch (ue->u.irqchip.irqchip) {
+		case KVM_IRQCHIP_PIC_MASTER:
+			e->set = kvm_set_pic_irq;
+			max_pin = PIC_NUM_PINS;
+			break;
+		case KVM_IRQCHIP_PIC_SLAVE:
+			e->set = kvm_set_pic_irq;
+			max_pin = PIC_NUM_PINS;
+			delta = 8;
+			break;
+		case KVM_IRQCHIP_IOAPIC:
+			max_pin = KVM_IOAPIC_NUM_PINS;
+			e->set = kvm_set_ioapic_irq;
+			break;
+		default:
+			goto out;
+		}
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		if (e->irqchip.pin >= max_pin)
+			goto out;
+		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		break;
+	default:
+		goto out;
+	}
+
+	r = 0;
+out:
+	return r;
+}
+
+#define IOAPIC_ROUTING_ENTRY(irq) \
+	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
+	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
+#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
+
+#define PIC_ROUTING_ENTRY(irq) \
+	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
+	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
+#define ROUTING_ENTRY2(irq) \
+	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
+
+static const struct kvm_irq_routing_entry default_routing[] = {
+	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
+	ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
+	ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
+	ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
+	ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
+	ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
+	ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
+	ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
+	ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
+	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
+	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
+	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
+};
+
+int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, default_routing,
+				   ARRAY_SIZE(default_routing), 0);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd41b25..4f0c0b954686 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
 #define MAX_APIC_VECTOR			256
 #define APIC_VECTORS_PER_REG		32
 
+#define APIC_BROADCAST			0xFF
+#define X2APIC_BROADCAST		0xFFFFFFFFul
+
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
@@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-#define KVM_X2APIC_CID_BITS 0
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm)
 	new->cid_shift = 8;
 	new->cid_mask = 0;
 	new->lid_mask = 0xff;
+	new->broadcast = APIC_BROADCAST;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
-		u16 cid, lid;
-		u32 ldr;
 
 		if (!kvm_apic_present(vcpu))
 			continue;
 
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->cid_shift = 16;
+			new->cid_mask = new->lid_mask = 0xffff;
+			new->broadcast = X2APIC_BROADCAST;
+		} else if (kvm_apic_get_reg(apic, APIC_LDR)) {
+			if (kvm_apic_get_reg(apic, APIC_DFR) ==
+							APIC_DFR_CLUSTER) {
+				new->cid_shift = 4;
+				new->cid_mask = 0xf;
+				new->lid_mask = 0xf;
+			} else {
+				new->cid_shift = 8;
+				new->cid_mask = 0;
+				new->lid_mask = 0xff;
+			}
+		}
+
 		/*
 		 * All APICs have to be configured in the same mode by an OS.
 		 * We take advatage of this while building logical id loockup
-		 * table. After reset APICs are in xapic/flat mode, so if we
-		 * find apic with different setting we assume this is the mode
+		 * table. After reset APICs are in software disabled mode, so if
+		 * we find apic with different setting we assume this is the mode
 		 * OS wants all apics to be in; build lookup table accordingly.
 		 */
-		if (apic_x2apic_mode(apic)) {
-			new->ldr_bits = 32;
-			new->cid_shift = 16;
-			new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
-			new->lid_mask = 0xffff;
-		} else if (kvm_apic_sw_enabled(apic) &&
-				!new->cid_mask /* flat mode */ &&
-				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
-			new->cid_shift = 4;
-			new->cid_mask = 0xf;
-			new->lid_mask = 0xf;
-		}
+		if (kvm_apic_sw_enabled(apic))
+			break;
+	}
 
-		new->phys_map[kvm_apic_id(apic)] = apic;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr, aid;
 
+		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
 		cid = apic_cluster_id(new, ldr);
 		lid = apic_logical_id(new, ldr);
 
-		if (lid)
+		if (aid < ARRAY_SIZE(new->phys_map))
+			new->phys_map[aid] = apic;
+		if (lid && cid < ARRAY_SIZE(new->logical_map))
 			new->logical_map[cid][ffs(lid) - 1] = apic;
 	}
 out:
@@ -201,11 +216,13 @@ out:
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-	u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+	bool enabled = val & APIC_SPIV_APIC_ENABLED;
 
 	apic_set_reg(apic, APIC_SPIV, val);
-	if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
-		if (val & APIC_SPIV_APIC_ENABLED) {
+
+	if (enabled != apic->sw_enabled) {
+		apic->sw_enabled = enabled;
+		if (enabled) {
 			static_key_slow_dec_deferred(&apic_sw_disabled);
 			recalculate_apic_map(apic->vcpu->kvm);
 		} else
@@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) ==
-			APIC_LVT_TIMER_TSCDEADLINE);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
-	apic->irr_pending = true;
 	apic_set_vector(vec, apic->regs + APIC_IRR);
+	/*
+	 * irr_pending must be true if any interrupt is pending; set it after
+	 * APIC_IRR to avoid race with apic_clear_irr
+	 */
+	apic->irr_pending = true;
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 	vcpu = apic->vcpu;
 
-	apic_clear_vector(vec, apic->regs + APIC_IRR);
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
 		/* try to update RVI */
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-	else {
-		vec = apic_search_irr(apic);
-		apic->irr_pending = (vec != -1);
+	} else {
+		apic->irr_pending = false;
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
+		if (apic_search_irr(apic) != -1)
+			apic->irr_pending = true;
 	}
 }
 
@@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 	apic_update_ppr(apic);
 }
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+{
+	return dest == (apic_x2apic_mode(apic) ?
+			X2APIC_BROADCAST : APIC_BROADCAST);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
 {
-	return dest == 0xff || kvm_apic_id(apic) == dest;
+	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
 }
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 {
 	int result = 0;
 	u32 logical_id;
 
+	if (kvm_apic_broadcast(apic, mda))
+		return 1;
+
 	if (apic_x2apic_mode(apic)) {
 		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 		return logical_id & mda;
@@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 }
 
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-			   int short_hand, int dest, int dest_mode)
+			   int short_hand, unsigned int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	if (!map)
 		goto out;
 
+	if (irq->dest_id == map->broadcast)
+		goto out;
+
+	ret = true;
+
 	if (irq->dest_mode == 0) { /* physical mode */
-		if (irq->delivery_mode == APIC_DM_LOWEST ||
-				irq->dest_id == 0xff)
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
 			goto out;
-		dst = &map->phys_map[irq->dest_id & 0xff];
+
+		dst = &map->phys_map[irq->dest_id];
 	} else {
 		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+		u16 cid = apic_cluster_id(map, mda);
+
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
 
-		dst = map->logical_map[apic_cluster_id(map, mda)];
+		dst = map->logical_map[cid];
 
 		bitmap = apic_logical_id(map, mda);
 
@@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 			*r = 0;
 		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 	}
-
-	ret = true;
 out:
 	rcu_read_unlock();
 	return ret;
@@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic)
 				   apic->divide_count);
 }
 
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/*
+	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+	 * vcpu_enter_guest.
+	 */
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	atomic_inc(&apic->lapic_timer.pending);
+	/* FIXME: this code should not know anything about vcpus */
+	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+}
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
@@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 		if (likely(tscdeadline > guest_tsc)) {
 			ns = (tscdeadline - guest_tsc) * 1000000ULL;
 			do_div(ns, this_tsc_khz);
-		}
-		hrtimer_start(&apic->lapic_timer.timer,
-			ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+			hrtimer_start(&apic->lapic_timer.timer,
+				ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+		} else
+			apic_timer_expired(apic);
 
 		local_irq_restore(flags);
 	}
@@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 		break;
 
-	case APIC_LVTT:
-		if ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		    apic->lapic_timer.timer_mode_mask) !=
-		   (val & apic->lapic_timer.timer_mode_mask))
+	case APIC_LVTT: {
+		u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+		if (apic->lapic_timer.timer_mode != timer_mode) {
+			apic->lapic_timer.timer_mode = timer_mode;
 			hrtimer_cancel(&apic->lapic_timer.timer);
+		}
 
 		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
 		break;
+	}
 
 	case APIC_TMICT:
 		if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+	if (!apic->sw_enabled)
 		static_key_slow_dec_deferred(&apic_sw_disabled);
 
 	if (apic->regs)
@@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
-	/* Inject here so clearing tscdeadline won't override new value */
-	if (apic_has_pending_timer(vcpu))
-		kvm_inject_apic_timer_irqs(vcpu);
 	apic->lapic_timer.tscdeadline = data;
 	start_apic_timer(apic);
 }
@@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
 
+	if ((value & MSR_IA32_APICBASE_ENABLE) &&
+	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
+		pr_warn_once("APIC base relocation is unsupported by KVM");
+
 	/* with FSB delivery interrupt, we can restart APIC functionality */
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
 		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+	apic->lapic_timer.timer_mode = 0;
 	apic_set_reg(apic, APIC_LVT0,
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
-	struct kvm_vcpu *vcpu = apic->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially losing timer events in the !reinject
-	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-	 * in vcpu_enter_guest.
-	 */
-	if (!atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-	}
 
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
+	apic_timer_expired(apic);
 
 	if (lapic_is_periodic(apic)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
+	if (kvm_x86_ops->hwapic_irr_update)
+		kvm_x86_ops->hwapic_irr_update(vcpu,
+				apic_find_highest_irr(apic));
 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_ICR2)
+		return 1;
+
 	/* if this is ICR write vector before command */
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
 	return apic_reg_write(apic, reg, (u32)data);
 }
@@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_DFR || reg == APIC_ICR2) {
+		apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+			   reg);
+		return 1;
+	}
+
 	if (apic_reg_read(apic, reg, 4, &low))
 		return 1;
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_read(apic, APIC_ICR2, 4, &high);
 
 	*data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	unsigned int sipi_vector;
+	u8 sipi_vector;
 	unsigned long pe;
 
 	if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845fd8b9..c674fce53cf9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
+	u32 timer_mode;
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
@@ -22,6 +23,7 @@ struct kvm_lapic {
 	struct kvm_timer lapic_timer;
 	u32 divide_count;
 	struct kvm_vcpu *vcpu;
+	bool sw_enabled;
 	bool irr_pending;
 	/* Number of bits set in ISR. */
 	s16 isr_count;
@@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
 
 extern struct static_key_deferred apic_sw_disabled;
 
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
 {
 	if (static_key_false(&apic_sw_disabled.key))
-		return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-	return APIC_SPIV_APIC_ENABLED;
+		return apic->sw_enabled;
+	return true;
 }
 
 static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
 	ldr >>= 32 - map->ldr_bits;
 	cid = (ldr >> map->cid_shift) & map->cid_mask;
 
-	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
-
 	return cid;
 }
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 978f402006ee..f83fc6c5e0ba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_GEN_LOW_SHIFT		10
 #define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
 	u64 mask;
 
-	WARN_ON(gen > MMIO_MAX_GEN);
+	WARN_ON(gen & ~MMIO_GEN_MASK);
 
 	mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
 	mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) & ~PAGE_MASK;
 }
 
@@ -4449,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
 	 * zap all shadow pages.
 	 */
 	if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
-		printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
+		printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cefc5a43..41dd0387cccb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	WARN_ON(adjustment < 0);
-	if (host)
-		adjustment = svm_scale_tsc(vcpu, adjustment);
+	if (host) {
+		if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+			WARN_ON(adjustment < 0);
+		adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+	}
 
 	svm->vmcb->control.tsc_offset += adjustment;
 	if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm)
 {
 	int reg, dr;
 	unsigned long val;
-	int err;
 
 	if (svm->vcpu.guest_debug == 0) {
 		/*
@@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm)
 	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
 
 	if (dr >= 16) { /* mov to DRn */
+		if (!kvm_require_dr(&svm->vcpu, dr - 16))
+			return 1;
 		val = kvm_register_read(&svm->vcpu, reg);
 		kvm_set_dr(&svm->vcpu, dr - 16, val);
 	} else {
-		err = kvm_get_dr(&svm->vcpu, dr, &val);
-		if (!err)
-			kvm_register_write(&svm->vcpu, reg, val);
+		if (!kvm_require_dr(&svm->vcpu, dr))
+			return 1;
+		kvm_get_dr(&svm->vcpu, dr, &val);
+		kvm_register_write(&svm->vcpu, reg, val);
 	}
 
 	skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void)
 	return false;
 }
 
+static bool svm_xsaves_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.rdtscp_supported = svm_rdtscp_supported,
 	.invpcid_supported = svm_invpcid_supported,
 	.mpx_supported = svm_mpx_supported,
+	.xsaves_supported = svm_xsaves_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6b06ab8748dd..c2a34bb5ad93 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -5,6 +5,7 @@
 #include <asm/vmx.h>
 #include <asm/svm.h>
 #include <asm/clocksource.h>
+#include <asm/pvclock-abi.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window,
 #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
 	trace_kvm_ple_window(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_pvclock_update,
+	TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
+	TP_ARGS(vcpu_id, pvclock),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id			)
+		__field(	__u32,		version			)
+		__field(	__u64,		tsc_timestamp		)
+		__field(	__u64,		system_time		)
+		__field(	__u32,		tsc_to_system_mul	)
+		__field(	__s8,		tsc_shift		)
+		__field(	__u8,		flags			)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	   = vcpu_id;
+		__entry->version	   = pvclock->version;
+		__entry->tsc_timestamp	   = pvclock->tsc_timestamp;
+		__entry->system_time	   = pvclock->system_time;
+		__entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
+		__entry->tsc_shift	   = pvclock->tsc_shift;
+		__entry->flags		   = pvclock->flags;
+	),
+
+	TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
+		  "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
+		  "flags 0x%x }",
+		  __entry->vcpu_id,
+		  __entry->version,
+		  __entry->tsc_timestamp,
+		  __entry->system_time,
+		  __entry->tsc_to_system_mul,
+		  __entry->tsc_shift,
+		  __entry->flags)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c68351b..d4c58d884838 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 static bool __read_mostly nested = 0;
 module_param(nested, bool, S_IRUGO);
 
+static u64 __read_mostly host_xss;
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON						\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-	 | X86_CR4_OSXMMEXCPT)
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 ept_pointer;
+	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
 	u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
 	FIELD64(EPT_POINTER, ept_pointer),
+	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(HOST_RSP, host_rsp),
 	FIELD(HOST_RIP, host_rip),
 };
-static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
 
 static inline short vmcs_field_to_offset(unsigned long field)
 {
-	if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-		return -1;
+	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+	    vmcs_field_to_offset_table[field] == 0)
+		return -ENOENT;
+
 	return vmcs_field_to_offset_table[field];
 }
 
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
+static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
 }
 
+static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
+		vmx_xsaves_supported();
+}
+
 static inline bool is_exception(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
 	clear_atomic_switch_msr(vmx, MSR_EFER);
-	/* On ept, can't emulate nx, and must switch nx atomically */
-	if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
+
+	/*
+	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
+	 * On CPUs that support "load IA32_EFER", always switch EFER
+	 * atomically, since it's faster than switching it manually.
+	 */
+	if (cpu_has_load_ia32_efer ||
+	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
 		guest_efer = vmx->vcpu.arch.efer;
 		if (!(guest_efer & EFER_LMA))
 			guest_efer &= ~EFER_LME;
-		add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
+		if (guest_efer != host_efer)
+			add_atomic_switch_msr(vmx, MSR_EFER,
+					      guest_efer, host_efer);
 		return false;
 	}
 
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	nested_vmx_secondary_ctls_low = 0;
 	nested_vmx_secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-		SECONDARY_EXEC_UNRESTRICTED_GUEST |
-		SECONDARY_EXEC_WBINVD_EXITING;
+		SECONDARY_EXEC_WBINVD_EXITING |
+		SECONDARY_EXEC_XSAVES;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
-		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
 		return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		data = vcpu->arch.ia32_xss;
+		break;
 	case MSR_TSC_AUX:
 		if (!to_vmx(vcpu)->rdtscp_enabled)
 			return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		return 1; /* they are read-only */
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		/*
+		 * The only supported bit as of Skylake is bit 8, but
+		 * it is not supported on KVM.
+		 */
+		if (data != 0)
+			return 1;
+		vcpu->arch.ia32_xss = data;
+		if (vcpu->arch.ia32_xss != host_xss)
+			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+				vcpu->arch.ia32_xss, host_xss);
+		else
+			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+		break;
 	case MSR_TSC_AUX:
 		if (!vmx->rdtscp_enabled)
 			return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_ENABLE_INVPCID |
 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-			SECONDARY_EXEC_SHADOW_VMCS;
+			SECONDARY_EXEC_SHADOW_VMCS |
+			SECONDARY_EXEC_XSAVES;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		}
 	}
 
+	if (cpu_has_xsaves)
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	return 0;
 }
 
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
 	return 0;
 }
 
-static __init int hardware_setup(void)
-{
-	if (setup_vmcs_config(&vmcs_config) < 0)
-		return -EIO;
-
-	if (boot_cpu_has(X86_FEATURE_NX))
-		kvm_enable_efer_bits(EFER_NX);
-
-	if (!cpu_has_vmx_vpid())
-		enable_vpid = 0;
-	if (!cpu_has_vmx_shadow_vmcs())
-		enable_shadow_vmcs = 0;
-	if (enable_shadow_vmcs)
-		init_vmcs_shadow_fields();
-
-	if (!cpu_has_vmx_ept() ||
-	    !cpu_has_vmx_ept_4levels()) {
-		enable_ept = 0;
-		enable_unrestricted_guest = 0;
-		enable_ept_ad_bits = 0;
-	}
-
-	if (!cpu_has_vmx_ept_ad_bits())
-		enable_ept_ad_bits = 0;
-
-	if (!cpu_has_vmx_unrestricted_guest())
-		enable_unrestricted_guest = 0;
-
-	if (!cpu_has_vmx_flexpriority()) {
-		flexpriority_enabled = 0;
-
-		/*
-		 * set_apic_access_page_addr() is used to reload apic access
-		 * page upon invalidation.  No need to do anything if the
-		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
-		 */
-		kvm_x86_ops->set_apic_access_page_addr = NULL;
-	}
-
-	if (!cpu_has_vmx_tpr_shadow())
-		kvm_x86_ops->update_cr8_intercept = NULL;
-
-	if (enable_ept && !cpu_has_vmx_ept_2m_page())
-		kvm_disable_largepages();
-
-	if (!cpu_has_vmx_ple())
-		ple_gap = 0;
-
-	if (!cpu_has_vmx_apicv())
-		enable_apicv = 0;
-
-	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
-	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-	}
-
-	if (nested)
-		nested_vmx_setup_ctls_msrs();
-
-	return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-	free_kvm_area();
-}
-
 static bool emulation_required(struct kvm_vcpu *vcpu)
 {
 	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
 	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+#define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmx_xsaves_supported())
+		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+
 	return 0;
 }
 
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int dr, reg;
+	int dr, dr7, reg;
+
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+	/* First, if DR does not exist, trigger UD */
+	if (!kvm_require_dr(vcpu, dr))
+		return 1;
 
 	/* Do not handle if the CPL > 0, will trigger GP on re-entry */
 	if (!kvm_require_cpl(vcpu, 0))
 		return 1;
-	dr = vmcs_readl(GUEST_DR7);
-	if (dr & DR7_GD) {
+	dr7 = vmcs_readl(GUEST_DR7);
+	if (dr7 & DR7_GD) {
 		/*
 		 * As the vm-exit takes precedence over the debug trap, we
 		 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
 			vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-			vcpu->run->debug.arch.dr7 = dr;
-			vcpu->run->debug.arch.pc =
-				vmcs_readl(GUEST_CS_BASE) +
-				vmcs_readl(GUEST_RIP);
+			vcpu->run->debug.arch.dr7 = dr7;
+			vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
 			vcpu->run->debug.arch.exception = DB_VECTOR;
 			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
 			return 0;
 		} else {
-			vcpu->arch.dr7 &= ~DR7_GD;
+			vcpu->arch.dr6 &= ~15;
 			vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-			vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
 			kvm_queue_exception(vcpu, DB_VECTOR);
 			return 1;
 		}
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
 	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
 	if (exit_qualification & TYPE_MOV_FROM_DR) {
 		unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsaves(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
+static int handle_xrstors(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 	}
 
 	/* clear all local breakpoint enable flags */
-	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
 
 	/*
 	 * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	trace_kvm_page_fault(gpa, exit_qualification);
 
 	/* It is a write fault? */
-	error_code = exit_qualification & (1U << 1);
+	error_code = exit_qualification & PFERR_WRITE_MASK;
 	/* It is a fetch fault? */
-	error_code |= (exit_qualification & (1U << 2)) << 2;
+	error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
 	/* ept page table is present? */
-	error_code |= (exit_qualification >> 3) & 0x1;
+	error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
 
 	vcpu->arch.exit_qualification = exit_qualification;
 
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+static __init int hardware_setup(void)
+{
+	int r = -ENOMEM, i, msr;
+
+	rdmsrl_safe(MSR_EFER, &host_efer);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+		kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_a)
+		return r;
+
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_b)
+		goto out;
+
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy)
+		goto out1;
+
+	vmx_msr_bitmap_legacy_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy_x2apic)
+		goto out2;
+
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode)
+		goto out3;
+
+	vmx_msr_bitmap_longmode_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode_x2apic)
+		goto out4;
+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmread_bitmap)
+		goto out5;
+
+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmwrite_bitmap)
+		goto out6;
+
+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+	/*
+	 * Allow direct access to the PC debug port (it is often used for I/O
+	 * delays, but the vmexits simply slow things down).
+	 */
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
+
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+	if (setup_vmcs_config(&vmcs_config) < 0) {
+		r = -EIO;
+		goto out7;
+	}
+
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
+
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+	if (!cpu_has_vmx_shadow_vmcs())
+		enable_shadow_vmcs = 0;
+	if (enable_shadow_vmcs)
+		init_vmcs_shadow_fields();
+
+	if (!cpu_has_vmx_ept() ||
+	    !cpu_has_vmx_ept_4levels()) {
+		enable_ept = 0;
+		enable_unrestricted_guest = 0;
+		enable_ept_ad_bits = 0;
+	}
+
+	if (!cpu_has_vmx_ept_ad_bits())
+		enable_ept_ad_bits = 0;
+
+	if (!cpu_has_vmx_unrestricted_guest())
+		enable_unrestricted_guest = 0;
+
+	if (!cpu_has_vmx_flexpriority()) {
+		flexpriority_enabled = 0;
+
+		/*
+		 * set_apic_access_page_addr() is used to reload apic access
+		 * page upon invalidation.  No need to do anything if the
+		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
+		 */
+		kvm_x86_ops->set_apic_access_page_addr = NULL;
+	}
+
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
+	if (enable_ept && !cpu_has_vmx_ept_2m_page())
+		kvm_disable_largepages();
+
+	if (!cpu_has_vmx_ple())
+		ple_gap = 0;
+
+	if (!cpu_has_vmx_apicv())
+		enable_apicv = 0;
+
+	if (enable_apicv)
+		kvm_x86_ops->update_cr8_intercept = NULL;
+	else {
+		kvm_x86_ops->hwapic_irr_update = NULL;
+		kvm_x86_ops->deliver_posted_interrupt = NULL;
+		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+	}
+
+	if (nested)
+		nested_vmx_setup_ctls_msrs();
+
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+	memcpy(vmx_msr_bitmap_legacy_x2apic,
+			vmx_msr_bitmap_legacy, PAGE_SIZE);
+	memcpy(vmx_msr_bitmap_longmode_x2apic,
+			vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+	if (enable_apicv) {
+		for (msr = 0x800; msr <= 0x8ff; msr++)
+			vmx_disable_intercept_msr_read_x2apic(msr);
+
+		/* According SDM, in x2apic mode, the whole id reg is used.
+		 * But in KVM, it only use the highest eight bits. Need to
+		 * intercept it */
+		vmx_enable_intercept_msr_read_x2apic(0x802);
+		/* TMCCT */
+		vmx_enable_intercept_msr_read_x2apic(0x839);
+		/* TPR */
+		vmx_disable_intercept_msr_write_x2apic(0x808);
+		/* EOI */
+		vmx_disable_intercept_msr_write_x2apic(0x80b);
+		/* SELF-IPI */
+		vmx_disable_intercept_msr_write_x2apic(0x83f);
+	}
+
+	if (enable_ept) {
+		kvm_mmu_set_mask_ptes(0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+			0ull, VMX_EPT_EXECUTABLE_MASK);
+		ept_set_mmio_spte_mask();
+		kvm_enable_tdp();
+	} else
+		kvm_disable_tdp();
+
+	update_ple_window_actual_max();
+
+	return alloc_kvm_area();
+
+out7:
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+	free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+	free_page((unsigned long)vmx_io_bitmap_b);
+out:
+	free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
+}
+
+static __exit void hardware_unsetup(void)
+{
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+	free_page((unsigned long)vmx_vmread_bitmap);
+
+	free_kvm_area();
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-					unsigned long field, u64 *ret)
+static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+				  unsigned long field, u64 *ret)
 {
 	short offset = vmcs_field_to_offset(field);
 	char *p;
 
 	if (offset < 0)
-		return 0;
+		return offset;
 
 	p = ((char *)(get_vmcs12(vcpu))) + offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*ret = *((natural_width *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U16:
 		*ret = *((u16 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*ret = *((u32 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*ret = *((u64 *)p);
-		return 1;
+		return 0;
 	default:
-		return 0; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 }
 
 
-static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
-				    unsigned long field, u64 field_value){
+static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+				   unsigned long field, u64 field_value){
 	short offset = vmcs_field_to_offset(field);
 	char *p = ((char *) get_vmcs12(vcpu)) + offset;
 	if (offset < 0)
-		return false;
+		return offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_U16:
 		*(u16 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*(u32 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*(u64 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*(natural_width *)p = field_value;
-		return true;
+		return 0;
 	default:
-		return false; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 
 }
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 			field_value = vmcs_readl(field);
 			break;
+		default:
+			WARN_ON(1);
+			continue;
 		}
 		vmcs12_write_any(&vmx->vcpu, field, field_value);
 	}
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 				vmcs_writel(field, (long)field_value);
 				break;
+			default:
+				WARN_ON(1);
+				break;
 			}
 		}
 	}
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (!vmcs12_read_any(vcpu, field, &field_value)) {
+	if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (!vmcs12_write_any(vcpu, field, field_value)) {
+	if (vmcs12_write_any(vcpu, field, field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
+	[EXIT_REASON_XSAVES]                  = handle_xsaves,
+	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
 		return 1;
+	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+		/*
+		 * This should never happen, since it is not possible to
+		 * set XSS to a non-zero value---neither in L1 nor in L2.
+		 * If if it were, XSS would have to be checked against
+		 * the XSS exit bitmap in vmcs12.
+		 */
+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	default:
 		return 1;
 	}
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
 	u16 status;
 	u8 old;
 
+	if (vector == -1)
+		vector = 0;
+
 	status = vmcs_read16(GUEST_INTR_STATUS);
 	old = (u8)status & 0xff;
 	if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
 
 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 {
+	if (!is_guest_mode(vcpu)) {
+		vmx_set_rvi(max_irr);
+		return;
+	}
+
 	if (max_irr == -1)
 		return;
 
 	/*
-	 * If a vmexit is needed, vmx_check_nested_events handles it.
+	 * In guest mode.  If a vmexit is needed, vmx_check_nested_events
+	 * handles it.
 	 */
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+	if (nested_exit_on_intr(vcpu))
 		return;
 
-	if (!is_guest_mode(vcpu)) {
-		vmx_set_rvi(max_irr);
-		return;
-	}
-
 	/*
-	 * Fall back to pre-APICv interrupt injection since L2
+	 * Else, fall back to pre-APICv interrupt injection since L2
 	 * is run without virtual interrupt delivery.
 	 */
 	if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
 		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
 }
 
+static bool vmx_xsaves_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_XSAVES;
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
 	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
 	if (vmx_mpx_supported())
 		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
 
 	/* update exit information fields: */
 
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.check_intercept = vmx_check_intercept,
 	.handle_external_intr = vmx_handle_external_intr,
 	.mpx_supported = vmx_mpx_supported,
+	.xsaves_supported = vmx_xsaves_supported,
 
 	.check_nested_events = vmx_check_nested_events,
 
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-	int r, i, msr;
-
-	rdmsrl_safe(MSR_EFER, &host_efer);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-		kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_a)
-		return -ENOMEM;
-
-	r = -ENOMEM;
-
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_b)
-		goto out;
-
-	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy)
-		goto out1;
-
-	vmx_msr_bitmap_legacy_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy_x2apic)
-		goto out2;
-
-	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode)
-		goto out3;
-
-	vmx_msr_bitmap_longmode_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode_x2apic)
-		goto out4;
-	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmread_bitmap)
-		goto out5;
-
-	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmwrite_bitmap)
-		goto out6;
-
-	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
-	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
-
-	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
-
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+                     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out7;
+		return r;
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
 #endif
 
-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-	memcpy(vmx_msr_bitmap_legacy_x2apic,
-			vmx_msr_bitmap_legacy, PAGE_SIZE);
-	memcpy(vmx_msr_bitmap_longmode_x2apic,
-			vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-	if (enable_apicv) {
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			vmx_disable_intercept_msr_read_x2apic(msr);
-
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
-		/* TMCCT */
-		vmx_enable_intercept_msr_read_x2apic(0x839);
-		/* TPR */
-		vmx_disable_intercept_msr_write_x2apic(0x808);
-		/* EOI */
-		vmx_disable_intercept_msr_write_x2apic(0x80b);
-		/* SELF-IPI */
-		vmx_disable_intercept_msr_write_x2apic(0x83f);
-	}
-
-	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
-		ept_set_mmio_spte_mask();
-		kvm_enable_tdp();
-	} else
-		kvm_disable_tdp();
-
-	update_ple_window_actual_max();
-
 	return 0;
-
-out7:
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-	free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-	free_page((unsigned long)vmx_io_bitmap_b);
-out:
-	free_page((unsigned long)vmx_io_bitmap_a);
-	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-	free_page((unsigned long)vmx_io_bitmap_b);
-	free_page((unsigned long)vmx_io_bitmap_a);
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-	free_page((unsigned long)vmx_vmread_bitmap);
-
 #ifdef CONFIG_KEXEC
 	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df32a745..c259814200bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,6 +27,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "assigned-dev.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
 	if (!vcpu->arch.exception.pending) {
 	queue:
+		if (has_error && !is_protmode(vcpu))
+			has_error = false;
 		vcpu->arch.exception.pending = true;
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
 }
 EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+	if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+		return true;
+
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
 /*
  * This function will be used to read from the physical memory of the currently
  * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 	if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
 		return 1;
 
+	if (xcr0 & XSTATE_AVX512) {
+		if (!(xcr0 & XSTATE_YMM))
+			return 1;
+		if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+			return 1;
+	}
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
@@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+#ifdef CONFIG_X86_64
+	cr3 &= ~CR3_PCID_INVD;
+#endif
+
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 			vcpu->arch.eff_db[dr] = val;
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	case 6:
 		if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 		kvm_update_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	default: /* 7 */
 		if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
-	int res;
-
-	res = __kvm_set_dr(vcpu, dr, val);
-	if (res > 0)
-		kvm_queue_exception(vcpu, UD_VECTOR);
-	else if (res < 0)
+	if (__kvm_set_dr(vcpu, dr, val)) {
 		kvm_inject_gp(vcpu, 0);
-
-	return res;
+		return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_dr);
 
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 {
 	switch (dr) {
 	case 0 ... 3:
 		*val = vcpu->arch.db[dr];
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	case 6:
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 			*val = kvm_x86_ops->get_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	default: /* 7 */
 		*val = vcpu->arch.dr7;
 		break;
 	}
-
-	return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
-	if (_kvm_get_dr(vcpu, dr, val)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
 	bool vcpus_matched;
-	bool do_request = false;
 	struct kvm_arch *ka = &vcpu->kvm->arch;
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 
 	vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
 			 atomic_read(&vcpu->kvm->online_vcpus));
 
-	if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
-		if (!ka->use_master_clock)
-			do_request = 1;
-
-	if (!vcpus_matched && ka->use_master_clock)
-			do_request = 1;
-
-	if (do_request)
+	/*
+	 * Once the masterclock is enabled, always perform request in
+	 * order to update it.
+	 *
+	 * In order to enable masterclock, the host clocksource must be TSC
+	 * and the vcpus need to have matched TSCs.  When that happens,
+	 * perform request to enable masterclock.
+	 */
+	if (ka->use_master_clock ||
+	    (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
 		kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
 	trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_guest_tsc = tsc_timestamp;
 
+	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+		&guest_hv_clock, sizeof(guest_hv_clock))))
+		return 0;
+
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
 	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version += 2;
-
-	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-		&guest_hv_clock, sizeof(guest_hv_clock))))
-		return 0;
+	vcpu->hv_clock.version = guest_hv_clock.version + 2;
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	vcpu->hv_clock.flags = pvclock_flags;
 
+	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
 	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
 				&vcpu->hv_clock,
 				sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC_ADJUST:
 		if (guest_cpuid_has_tsc_adjust(vcpu)) {
 			if (!msr_info->host_initiated) {
-				u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
 	unsigned long val;
 
 	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-	_kvm_get_dr(vcpu, 6, &val);
+	kvm_get_dr(vcpu, 6, &val);
 	dbgregs->dr6 = val;
 	dbgregs->dr7 = vcpu->arch.dr7;
 	dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV */
+	*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+	/*
+	 * Copy each region from the possibly compacted offset to the
+	 * non-compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *src = get_xsave_addr(xsave, feature);
+
+		if (src) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest + offset, src, size);
+		}
+
+		valid -= feature;
+	}
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV and possibly XCOMP_BV.  */
+	xsave->xsave_hdr.xstate_bv = xstate_bv;
+	if (cpu_has_xsaves)
+		xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+	/*
+	 * Copy each region from the non-compacted offset to the
+	 * possibly compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *dest = get_xsave_addr(xsave, feature);
+
+		if (dest) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest, src + offset, size);
+		} else
+			WARN_ON_ONCE(1);
+
+		valid -= feature;
+	}
+}
+
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 {
 	if (cpu_has_xsave) {
-		memcpy(guest_xsave->region,
-			&vcpu->arch.guest_fpu.state->xsave,
-			vcpu->arch.guest_xstate_size);
-		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-			vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 		memcpy(guest_xsave->region,
 			&vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		 */
 		if (xstate_bv & ~kvm_supported_xcr0())
 			return -EINVAL;
-		memcpy(&vcpu->arch.guest_fpu.state->xsave,
-			guest_xsave->region, vcpu->arch.guest_xstate_size);
+		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
 		if (xstate_bv & ~XSTATE_FPSSE)
 			return -EINVAL;
@@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 
 	default:
-		;
+		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
@@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-	return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
 
 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 {
-	struct kvm_run *kvm_run = vcpu->run;
-	unsigned long eip = vcpu->arch.emulate_ctxt.eip;
-	u32 dr6 = 0;
-
 	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
 	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		struct kvm_run *kvm_run = vcpu->run;
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.guest_debug_dr7,
 					   vcpu->arch.eff_db);
 
 		if (dr6 != 0) {
 			kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
-			kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
-				get_segment_base(vcpu, VCPU_SREG_CS);
-
+			kvm_run->debug.arch.pc = eip;
 			kvm_run->debug.arch.exception = DB_VECTOR;
 			kvm_run->exit_reason = KVM_EXIT_DEBUG;
 			*r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 
 	if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
 	    !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.dr7,
 					   vcpu->arch.db);
 
@@ -5365,7 +5439,9 @@ restart:
 		kvm_rip_write(vcpu, ctxt->eip);
 		if (r == EMULATE_DONE)
 			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
-		__kvm_set_rflags(vcpu, ctxt->eflags);
+		if (!ctxt->have_exception ||
+		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+			__kvm_set_rflags(vcpu, ctxt->eflags);
 
 		/*
 		 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
 
+		if (vcpu->arch.exception.nr == DB_VECTOR &&
+		    (vcpu->arch.dr7 & DR7_GD)) {
+			vcpu->arch.dr7 &= ~DR7_GD;
+			kvm_update_dr7(vcpu);
+		}
+
 		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
 					  vcpu->arch.exception.has_error_code,
 					  vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu)
 		return err;
 
 	fpu_finit(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsaves)
+		vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
 	 * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
 	struct kvm_segment cs;
 
@@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type)
 		return -EINVAL;
 
+	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
-bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
-	unsigned long current_rip = kvm_rip_read(vcpu) +
-		get_segment_base(vcpu, VCPU_SREG_CS);
+	if (is_64_bit_mode(vcpu))
+		return kvm_rip_read(vcpu);
+	return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
+		     kvm_rip_read(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
 
-	return current_rip == linear_rip;
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+{
+	return kvm_get_linear_rip(vcpu) == linear_rip;
 }
 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7cb9c45a5fe0..cc1d61af6140 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-				| XSTATE_BNDREGS | XSTATE_BNDCSR)
+				| XSTATE_BNDREGS | XSTATE_BNDCSR \
+				| XSTATE_AVX512)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index aae94132bc24..c1c1544b8485 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -841,7 +841,7 @@ static void __init lguest_init_IRQ(void)
 {
 	unsigned int i;
 
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d973e61e450d..38dcec403b46 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -844,11 +844,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 	  unsigned int fault)
 {
 	struct task_struct *tsk = current;
-	struct mm_struct *mm = tsk->mm;
 	int code = BUS_ADRERR;
 
-	up_read(&mm->mmap_sem);
-
 	/* Kernel mode? Handle exceptions or die: */
 	if (!(error_code & PF_USER)) {
 		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
@@ -879,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	       unsigned long address, unsigned int fault)
 {
 	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
-		up_read(&current->mm->mmap_sem);
 		no_context(regs, error_code, address, 0, 0);
 		return;
 	}
@@ -887,14 +883,11 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	if (fault & VM_FAULT_OOM) {
 		/* Kernel mode? Handle exceptions or die: */
 		if (!(error_code & PF_USER)) {
-			up_read(&current->mm->mmap_sem);
 			no_context(regs, error_code, address,
 				   SIGSEGV, SEGV_MAPERR);
 			return;
 		}
 
-		up_read(&current->mm->mmap_sem);
-
 		/*
 		 * We ran out of memory, call the OOM killer, and return the
 		 * userspace (which will retry the fault, or kill us if we got
@@ -1062,7 +1055,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
 	struct mm_struct *mm;
-	int fault;
+	int fault, major = 0;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
@@ -1237,47 +1230,50 @@ good_area:
 	 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
 	 */
 	fault = handle_mm_fault(mm, vma, address, flags);
+	major |= fault & VM_FAULT_MAJOR;
 
 	/*
-	 * If we need to retry but a fatal signal is pending, handle the
-	 * signal first. We do not need to release the mmap_sem because it
-	 * would already be released in __lock_page_or_retry in mm/filemap.c.
+	 * If we need to retry the mmap_sem has already been released,
+	 * and if there is a fatal signal pending there is no guarantee
+	 * that we made any progress. Handle this case first.
 	 */
-	if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)))
+	if (unlikely(fault & VM_FAULT_RETRY)) {
+		/* Retry at most once */
+		if (flags & FAULT_FLAG_ALLOW_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+			if (!fatal_signal_pending(tsk))
+				goto retry;
+		}
+
+		/* User mode? Just return to handle the fatal exception */
+		if (flags & FAULT_FLAG_USER)
+			return;
+
+		/* Not returning to user mode? Handle exceptions or die: */
+		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
 		return;
+	}
 
+	up_read(&mm->mmap_sem);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, error_code, address, fault);
 		return;
 	}
 
 	/*
-	 * Major/minor page fault accounting is only done on the
-	 * initial attempt. If we go through a retry, it is extremely
-	 * likely that the page will be found in page cache at that point.
+	 * Major/minor page fault accounting. If any of the events
+	 * returned VM_FAULT_MAJOR, we account it as a major fault.
 	 */
-	if (flags & FAULT_FLAG_ALLOW_RETRY) {
-		if (fault & VM_FAULT_MAJOR) {
-			tsk->maj_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-				      regs, address);
-		} else {
-			tsk->min_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-				      regs, address);
-		}
-		if (fault & VM_FAULT_RETRY) {
-			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
-			 * of starvation. */
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
-			flags |= FAULT_FLAG_TRIED;
-			goto retry;
-		}
+	if (major) {
+		tsk->maj_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+	} else {
+		tsk->min_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
 	}
 
 	check_v8086_mode(regs, address, tsk);
-
-	up_read(&mm->mmap_sem);
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 207d9aef662d..d7547824e763 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -15,7 +15,7 @@
 static inline pte_t gup_get_pte(pte_t *ptep)
 {
 #ifndef CONFIG_X86_PAE
-	return ACCESS_ONCE(*ptep);
+	return READ_ONCE(*ptep);
 #else
 	/*
 	 * With get_user_pages_fast, we walk down the pagetables without taking
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dfaf2e0f5f8f..536ea2fb6e33 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -384,6 +384,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
 }
 
 /*
+ * Lookup the PMD entry for a virtual address. Return a pointer to the entry
+ * or NULL if not present.
+ */
+pmd_t *lookup_pmd_address(unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+
+	pgd = pgd_offset_k(address);
+	if (pgd_none(*pgd))
+		return NULL;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
+		return NULL;
+
+	return pmd_offset(pud, address);
+}
+
+/*
  * This is necessary because __pa() does not work on some
  * kinds of memory, like vmalloc() or the alloc_remap()
  * areas on 32-bit NUMA systems.  The percpu areas can
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index b9958c364075..44b9271580b5 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -210,6 +210,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 {
 	int polarity;
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
 		polarity = 0; /* active high */
 	else
@@ -224,13 +227,18 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
 		return -EBUSY;
 
+	dev->irq_managed = 1;
+
 	return 0;
 }
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0)
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
+		dev->irq_managed = 0;
+	}
 }
 
 struct pci_ops intel_mid_pci_ops = {
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index eb500c2592ad..5dc6ca5e1741 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1200,11 +1200,12 @@ static int pirq_enable_irq(struct pci_dev *dev)
 #ifdef CONFIG_X86_IO_APIC
 			struct pci_dev *temp_dev;
 			int irq;
-			struct io_apic_irq_attr irq_attr;
+
+			if (dev->irq_managed && dev->irq > 0)
+				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-						PCI_SLOT(dev->devfn),
-						pin - 1, &irq_attr);
+						PCI_SLOT(dev->devfn), pin - 1);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1218,7 +1219,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn),
-						pin - 1, &irq_attr);
+						pin - 1);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1228,6 +1229,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				dev->irq_managed = 1;
 				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1254,11 +1256,24 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
 	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
-	    dev->irq) {
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index b233681af4de..0ce673645432 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset, int limit)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode, err;
@@ -198,13 +198,13 @@ static int
 uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 		    bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest;
 	unsigned long mmr_value, mmr_offset;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
+	if (apic_set_affinity(data, mask, &dest))
 		return -1;
 
 	mmr_value = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 8c8298d78185..5c1f9ace7ae7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
 		unsigned long mfn;
 
 		if (!xen_feature(XENFEAT_auto_translated_physmap))
-			mfn = get_phys_to_machine(pfn);
+			mfn = __pfn_to_mfn(pfn);
 		else
 			mfn = pfn;
 		/*
@@ -1113,20 +1113,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
 	 * instead of somewhere later and be confusing. */
 	xen_mc_flush();
 }
-static void __init xen_pagetable_p2m_copy(void)
+
+static void __init xen_pagetable_p2m_free(void)
 {
 	unsigned long size;
 	unsigned long addr;
-	unsigned long new_mfn_list;
-
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
 
 	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 
-	new_mfn_list = xen_revector_p2m_tree();
 	/* No memory or already called. */
-	if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
+	if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
 		return;
 
 	/* using __ka address and sticking INVALID_P2M_ENTRY! */
@@ -1144,8 +1140,6 @@ static void __init xen_pagetable_p2m_copy(void)
 
 	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 	memblock_free(__pa(xen_start_info->mfn_list), size);
-	/* And revector! Bye bye old array */
-	xen_start_info->mfn_list = new_mfn_list;
 
 	/* At this stage, cleanup_highmap has already cleaned __ka space
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1169,17 +1163,35 @@ static void __init xen_pagetable_p2m_copy(void)
 }
 #endif
 
-static void __init xen_pagetable_init(void)
+static void __init xen_pagetable_p2m_setup(void)
 {
-	paging_init();
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
+	xen_vmalloc_p2m_tree();
+
 #ifdef CONFIG_X86_64
-	xen_pagetable_p2m_copy();
+	xen_pagetable_p2m_free();
 #endif
+	/* And revector! Bye bye old array */
+	xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
+}
+
+static void __init xen_pagetable_init(void)
+{
+	paging_init();
+	xen_post_allocator_init();
+
+	xen_pagetable_p2m_setup();
+
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
 
+	/* Remap memory freed due to conflicts with E820 map */
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		xen_remap_memory();
+
 	xen_setup_shared_info();
-	xen_post_allocator_init();
 }
 static void xen_write_cr2(unsigned long cr2)
 {
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b456b048eca9..edbc7a63fd73 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -3,21 +3,22 @@
  * guests themselves, but it must also access and update the p2m array
  * during suspend/resume when all the pages are reallocated.
  *
- * The p2m table is logically a flat array, but we implement it as a
- * three-level tree to allow the address space to be sparse.
+ * The logical flat p2m table is mapped to a linear kernel memory area.
+ * For accesses by Xen a three-level tree linked via mfns only is set up to
+ * allow the address space to be sparse.
  *
- *                               Xen
- *                                |
- *     p2m_top              p2m_top_mfn
- *       /  \                   /   \
- * p2m_mid p2m_mid	p2m_mid_mfn p2m_mid_mfn
- *    / \      / \         /           /
- *  p2m p2m p2m p2m p2m p2m p2m ...
+ *               Xen
+ *                |
+ *          p2m_top_mfn
+ *              /   \
+ * p2m_mid_mfn p2m_mid_mfn
+ *         /           /
+ *  p2m p2m p2m ...
  *
  * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
  *
- * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
- * maximum representable pseudo-physical address space is:
+ * The p2m_top_mfn level is limited to 1 page, so the maximum representable
+ * pseudo-physical address space is:
  *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
  *
  * P2M_PER_PAGE depends on the architecture, as a mfn is always
@@ -30,6 +31,9 @@
  * leaf entries, or for the top  root, or middle one, for which there is a void
  * entry, we assume it is  "missing". So (for example)
  *  pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+ * We have a dedicated page p2m_missing with all entries being
+ * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
+ * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
  *
  * We also have the possibility of setting 1-1 mappings on certain regions, so
  * that:
@@ -39,122 +43,20 @@
  * PCI BARs, or ACPI spaces), we can create mappings easily because we
  * get the PFN value to match the MFN.
  *
- * For this to work efficiently we have one new page p2m_identity and
- * allocate (via reserved_brk) any other pages we need to cover the sides
- * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
- * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
- * no other fancy value).
+ * For this to work efficiently we have one new page p2m_identity. All entries
+ * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
+ * recognizes that and MFNs, no other fancy value).
  *
  * On lookup we spot that the entry points to p2m_identity and return the
  * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
  * If the entry points to an allocated page, we just proceed as before and
- * return the PFN.  If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
  * appropriate functions (pfn_to_mfn).
  *
  * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
  * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
  * non-identity pfn. To protect ourselves against we elect to set (and get) the
  * IDENTITY_FRAME_BIT on all identity mapped PFNs.
- *
- * This simplistic diagram is used to explain the more subtle piece of code.
- * There is also a digram of the P2M at the end that can help.
- * Imagine your E820 looking as so:
- *
- *                    1GB                                           2GB    4GB
- * /-------------------+---------\/----\         /----------\    /---+-----\
- * | System RAM        | Sys RAM ||ACPI|         | reserved |    | Sys RAM |
- * \-------------------+---------/\----/         \----------/    \---+-----/
- *                               ^- 1029MB                       ^- 2001MB
- *
- * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
- *  2048MB = 524288 (0x80000)]
- *
- * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
- * is actually not present (would have to kick the balloon driver to put it in).
- *
- * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
- * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
- * of the PFN and the end PFN (263424 and 512256 respectively). The first step
- * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
- * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
- * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
- * required to split any existing p2m_mid_missing middle pages.
- *
- * With the E820 example above, 263424 is not 1GB aligned so we allocate a
- * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
- * Each entry in the allocate page is "missing" (points to p2m_missing).
- *
- * Next stage is to determine if we need to do a more granular boundary check
- * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
- * We check if the start pfn and end pfn violate that boundary check, and if
- * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
- * granularity of setting which PFNs are missing and which ones are identity.
- * In our example 263424 and 512256 both fail the check so we reserve_brk two
- * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
- * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
- *
- * At this point we would at minimum reserve_brk one page, but could be up to
- * three. Each call to set_phys_range_identity has at maximum a three page
- * cost. If we were to query the P2M at this stage, all those entries from
- * start PFN through end PFN (so 1029MB -> 2001MB) would return
- * INVALID_P2M_ENTRY ("missing").
- *
- * The next step is to walk from the start pfn to the end pfn setting
- * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
- * If we find that the middle entry is pointing to p2m_missing we can swap it
- * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
- * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
- * At this point we do not need to worry about boundary aligment (so no need to
- * reserve_brk a middle page, figure out which PFNs are "missing" and which
- * ones are identity), as that has been done earlier.  If we find that the
- * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
- * that page (which covers 512 PFNs) and set the appropriate PFN with
- * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
- * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
- * IDENTITY_FRAME_BIT set.
- *
- * All other regions that are void (or not filled) either point to p2m_missing
- * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
- * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
- * contain the INVALID_P2M_ENTRY value and are considered "missing."
- *
- * Finally, the region beyond the end of of the E820 (4 GB in this example)
- * is set to be identity (in case there are MMIO regions placed here).
- *
- * This is what the p2m ends up looking (for the E820 above) with this
- * fabulous drawing:
- *
- *    p2m         /--------------\
- *  /-----\       | &mfn_list[0],|                           /-----------------\
- *  |  0  |------>| &mfn_list[1],|    /---------------\      | ~0, ~0, ..      |
- *  |-----|       |  ..., ~0, ~0 |    | ~0, ~0, [x]---+----->| IDENTITY [@256] |
- *  |  1  |---\   \--------------/    | [p2m_identity]+\     | IDENTITY [@257] |
- *  |-----|    \                      | [p2m_identity]+\\    | ....            |
- *  |  2  |--\  \-------------------->|  ...          | \\   \----------------/
- *  |-----|   \                       \---------------/  \\
- *  |  3  |-\  \                                          \\  p2m_identity [1]
- *  |-----|  \  \-------------------->/---------------\   /-----------------\
- *  | ..  |\  |                       | [p2m_identity]+-->| ~0, ~0, ~0, ... |
- *  \-----/ | |                       | [p2m_identity]+-->| ..., ~0         |
- *          | |                       | ....          |   \-----------------/
- *          | |                       +-[x], ~0, ~0.. +\
- *          | |                       \---------------/ \
- *          | |                                          \-> /---------------\
- *          | V  p2m_mid_missing       p2m_missing           | IDENTITY[@0]  |
- *          | /-----------------\     /------------\         | IDENTITY[@256]|
- *          | | [p2m_missing]   +---->| ~0, ~0, ...|         | ~0, ~0, ....  |
- *          | | [p2m_missing]   +---->| ..., ~0    |         \---------------/
- *          | | ...             |     \------------/
- *          | \-----------------/
- *          |
- *          |     p2m_mid_identity
- *          |   /-----------------\
- *          \-->| [p2m_identity]  +---->[1]
- *              | [p2m_identity]  +---->[1]
- *              | ...             |
- *              \-----------------/
- *
- * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
  */
 
 #include <linux/init.h>
@@ -164,9 +66,11 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/bootmem.h>
+#include <linux/slab.h>
 
 #include <asm/cache.h>
 #include <asm/setup.h>
+#include <asm/uaccess.h>
 
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
@@ -178,31 +82,26 @@
 #include "multicalls.h"
 #include "xen-ops.h"
 
+#define PMDS_PER_MID_PAGE	(P2M_MID_PER_PAGE / PTRS_PER_PTE)
+
 static void __init m2p_override_init(void);
 
+unsigned long *xen_p2m_addr __read_mostly;
+EXPORT_SYMBOL_GPL(xen_p2m_addr);
+unsigned long xen_p2m_size __read_mostly;
+EXPORT_SYMBOL_GPL(xen_p2m_size);
 unsigned long xen_max_p2m_pfn __read_mostly;
+EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
+
+static DEFINE_SPINLOCK(p2m_update_lock);
 
 static unsigned long *p2m_mid_missing_mfn;
 static unsigned long *p2m_top_mfn;
 static unsigned long **p2m_top_mfn_p;
-
-/* Placeholders for holes in the address space */
-static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
-
-static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
-
-static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
-
-RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-
-/* For each I/O range remapped we may lose up to two leaf pages for the boundary
- * violations and three mid pages to cover up to 3GB. With
- * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
- * remapped region.
- */
-RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
+static unsigned long *p2m_missing;
+static unsigned long *p2m_identity;
+static pte_t *p2m_missing_pte;
+static pte_t *p2m_identity_pte;
 
 static inline unsigned p2m_top_index(unsigned long pfn)
 {
@@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn)
 	return pfn % P2M_PER_PAGE;
 }
 
-static void p2m_top_init(unsigned long ***top)
-{
-	unsigned i;
-
-	for (i = 0; i < P2M_TOP_PER_PAGE; i++)
-		top[i] = p2m_mid_missing;
-}
-
 static void p2m_top_mfn_init(unsigned long *top)
 {
 	unsigned i;
@@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top)
 		top[i] = p2m_mid_missing_mfn;
 }
 
-static void p2m_mid_init(unsigned long **mid, unsigned long *leaf)
+static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
 {
 	unsigned i;
 
 	for (i = 0; i < P2M_MID_PER_PAGE; i++)
-		mid[i] = leaf;
+		mid[i] = virt_to_mfn(leaf);
 }
 
-static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
+static void p2m_init(unsigned long *p2m)
 {
 	unsigned i;
 
-	for (i = 0; i < P2M_MID_PER_PAGE; i++)
-		mid[i] = virt_to_mfn(leaf);
+	for (i = 0; i < P2M_PER_PAGE; i++)
+		p2m[i] = INVALID_P2M_ENTRY;
 }
 
-static void p2m_init(unsigned long *p2m)
+static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
 {
 	unsigned i;
 
-	for (i = 0; i < P2M_MID_PER_PAGE; i++)
-		p2m[i] = INVALID_P2M_ENTRY;
+	for (i = 0; i < P2M_PER_PAGE; i++)
+		p2m[i] = IDENTITY_FRAME(pfn + i);
+}
+
+static void * __ref alloc_p2m_page(void)
+{
+	if (unlikely(!slab_is_available()))
+		return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+
+	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
+}
+
+/* Only to be called in case of a race for a page just allocated! */
+static void free_p2m_page(void *p)
+{
+	BUG_ON(!slab_is_available());
+	free_page((unsigned long)p);
 }
 
 /*
@@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m)
  */
 void __ref xen_build_mfn_list_list(void)
 {
-	unsigned long pfn;
+	unsigned long pfn, mfn;
+	pte_t *ptep;
+	unsigned int level, topidx, mididx;
+	unsigned long *mid_mfn_p;
 
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return;
 
 	/* Pre-initialize p2m_top_mfn to be completely missing */
 	if (p2m_top_mfn == NULL) {
-		p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+		p2m_mid_missing_mfn = alloc_p2m_page();
 		p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
 
-		p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+		p2m_top_mfn_p = alloc_p2m_page();
 		p2m_top_mfn_p_init(p2m_top_mfn_p);
 
-		p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+		p2m_top_mfn = alloc_p2m_page();
 		p2m_top_mfn_init(p2m_top_mfn);
 	} else {
 		/* Reinitialise, mfn's all change after migration */
 		p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
 	}
 
-	for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
-		unsigned topidx = p2m_top_index(pfn);
-		unsigned mididx = p2m_mid_index(pfn);
-		unsigned long **mid;
-		unsigned long *mid_mfn_p;
+	for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
+	     pfn += P2M_PER_PAGE) {
+		topidx = p2m_top_index(pfn);
+		mididx = p2m_mid_index(pfn);
 
-		mid = p2m_top[topidx];
 		mid_mfn_p = p2m_top_mfn_p[topidx];
+		ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
+				      &level);
+		BUG_ON(!ptep || level != PG_LEVEL_4K);
+		mfn = pte_mfn(*ptep);
+		ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
 
 		/* Don't bother allocating any mfn mid levels if
 		 * they're just missing, just update the stored mfn,
 		 * since all could have changed over a migrate.
 		 */
-		if (mid == p2m_mid_missing) {
+		if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
 			BUG_ON(mididx);
 			BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
 			p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
@@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void)
 		}
 
 		if (mid_mfn_p == p2m_mid_missing_mfn) {
-			/*
-			 * XXX boot-time only!  We should never find
-			 * missing parts of the mfn tree after
-			 * runtime.
-			 */
-			mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+			mid_mfn_p = alloc_p2m_page();
 			p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
 
 			p2m_top_mfn_p[topidx] = mid_mfn_p;
 		}
 
 		p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
-		mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
+		mid_mfn_p[mididx] = mfn;
 	}
 }
 
@@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void)
 /* Set up p2m_top to point to the domain-builder provided p2m pages */
 void __init xen_build_dynamic_phys_to_machine(void)
 {
-	unsigned long *mfn_list;
-	unsigned long max_pfn;
 	unsigned long pfn;
 
 	 if (xen_feature(XENFEAT_auto_translated_physmap))
 		return;
 
-	mfn_list = (unsigned long *)xen_start_info->mfn_list;
-	max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
-	xen_max_p2m_pfn = max_pfn;
+	xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
+	xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
 
-	p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
-	p2m_init(p2m_missing);
-	p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
-	p2m_init(p2m_identity);
+	for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
+		xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
 
-	p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
-	p2m_mid_init(p2m_mid_missing, p2m_missing);
-	p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
-	p2m_mid_init(p2m_mid_identity, p2m_identity);
+	xen_max_p2m_pfn = xen_p2m_size;
+}
 
-	p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
-	p2m_top_init(p2m_top);
+#define P2M_TYPE_IDENTITY	0
+#define P2M_TYPE_MISSING	1
+#define P2M_TYPE_PFN		2
+#define P2M_TYPE_UNKNOWN	3
 
-	/*
-	 * The domain builder gives us a pre-constructed p2m array in
-	 * mfn_list for all the pages initially given to us, so we just
-	 * need to graft that into our tree structure.
-	 */
-	for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
-		unsigned topidx = p2m_top_index(pfn);
-		unsigned mididx = p2m_mid_index(pfn);
+static int xen_p2m_elem_type(unsigned long pfn)
+{
+	unsigned long mfn;
 
-		if (p2m_top[topidx] == p2m_mid_missing) {
-			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-			p2m_mid_init(mid, p2m_missing);
+	if (pfn >= xen_p2m_size)
+		return P2M_TYPE_IDENTITY;
 
-			p2m_top[topidx] = mid;
-		}
+	mfn = xen_p2m_addr[pfn];
 
-		/*
-		 * As long as the mfn_list has enough entries to completely
-		 * fill a p2m page, pointing into the array is ok. But if
-		 * not the entries beyond the last pfn will be undefined.
-		 */
-		if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
-			unsigned long p2midx;
+	if (mfn == INVALID_P2M_ENTRY)
+		return P2M_TYPE_MISSING;
 
-			p2midx = max_pfn % P2M_PER_PAGE;
-			for ( ; p2midx < P2M_PER_PAGE; p2midx++)
-				mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY;
-		}
-		p2m_top[topidx][mididx] = &mfn_list[pfn];
-	}
+	if (mfn & IDENTITY_FRAME_BIT)
+		return P2M_TYPE_IDENTITY;
 
-	m2p_override_init();
+	return P2M_TYPE_PFN;
 }
-#ifdef CONFIG_X86_64
-unsigned long __init xen_revector_p2m_tree(void)
+
+static void __init xen_rebuild_p2m_list(unsigned long *p2m)
 {
-	unsigned long va_start;
-	unsigned long va_end;
+	unsigned int i, chunk;
 	unsigned long pfn;
-	unsigned long pfn_free = 0;
-	unsigned long *mfn_list = NULL;
-	unsigned long size;
-
-	va_start = xen_start_info->mfn_list;
-	/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
-	 * so make sure it is rounded up to that */
-	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
-	va_end = va_start + size;
-
-	/* If we were revectored already, don't do it again. */
-	if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
-		return 0;
+	unsigned long *mfns;
+	pte_t *ptep;
+	pmd_t *pmdp;
+	int type;
 
-	mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
-	if (!mfn_list) {
-		pr_warn("Could not allocate space for a new P2M tree!\n");
-		return xen_start_info->mfn_list;
-	}
-	/* Fill it out with INVALID_P2M_ENTRY value */
-	memset(mfn_list, 0xFF, size);
+	p2m_missing = alloc_p2m_page();
+	p2m_init(p2m_missing);
+	p2m_identity = alloc_p2m_page();
+	p2m_init(p2m_identity);
 
-	for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
-		unsigned topidx = p2m_top_index(pfn);
-		unsigned mididx;
-		unsigned long *mid_p;
+	p2m_missing_pte = alloc_p2m_page();
+	paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
+	p2m_identity_pte = alloc_p2m_page();
+	paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		set_pte(p2m_missing_pte + i,
+			pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
+		set_pte(p2m_identity_pte + i,
+			pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
+	}
 
-		if (!p2m_top[topidx])
+	for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
+		/*
+		 * Try to map missing/identity PMDs or p2m-pages if possible.
+		 * We have to respect the structure of the mfn_list_list
+		 * which will be built just afterwards.
+		 * Chunk size to test is one p2m page if we are in the middle
+		 * of a mfn_list_list mid page and the complete mid page area
+		 * if we are at index 0 of the mid page. Please note that a
+		 * mid page might cover more than one PMD, e.g. on 32 bit PAE
+		 * kernels.
+		 */
+		chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
+			P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
+
+		type = xen_p2m_elem_type(pfn);
+		i = 0;
+		if (type != P2M_TYPE_PFN)
+			for (i = 1; i < chunk; i++)
+				if (xen_p2m_elem_type(pfn + i) != type)
+					break;
+		if (i < chunk)
+			/* Reset to minimal chunk size. */
+			chunk = P2M_PER_PAGE;
+
+		if (type == P2M_TYPE_PFN || i < chunk) {
+			/* Use initial p2m page contents. */
+#ifdef CONFIG_X86_64
+			mfns = alloc_p2m_page();
+			copy_page(mfns, xen_p2m_addr + pfn);
+#else
+			mfns = xen_p2m_addr + pfn;
+#endif
+			ptep = populate_extra_pte((unsigned long)(p2m + pfn));
+			set_pte(ptep,
+				pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
 			continue;
+		}
 
-		if (p2m_top[topidx] == p2m_mid_missing)
+		if (chunk == P2M_PER_PAGE) {
+			/* Map complete missing or identity p2m-page. */
+			mfns = (type == P2M_TYPE_MISSING) ?
+				p2m_missing : p2m_identity;
+			ptep = populate_extra_pte((unsigned long)(p2m + pfn));
+			set_pte(ptep,
+				pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO));
 			continue;
+		}
 
-		mididx = p2m_mid_index(pfn);
-		mid_p = p2m_top[topidx][mididx];
-		if (!mid_p)
-			continue;
-		if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
-			continue;
+		/* Complete missing or identity PMD(s) can be mapped. */
+		ptep = (type == P2M_TYPE_MISSING) ?
+			p2m_missing_pte : p2m_identity_pte;
+		for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
+			pmdp = populate_extra_pmd(
+				(unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
+			set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
+		}
+	}
+}
 
-		if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
-			continue;
+void __init xen_vmalloc_p2m_tree(void)
+{
+	static struct vm_struct vm;
 
-		/* The old va. Rebase it on mfn_list */
-		if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
-			unsigned long *new;
+	vm.flags = VM_ALLOC;
+	vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
+			PMD_SIZE * PMDS_PER_MID_PAGE);
+	vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
+	pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
 
-			if (pfn_free  > (size / sizeof(unsigned long))) {
-				WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
-				     size / sizeof(unsigned long), pfn_free);
-				return 0;
-			}
-			new = &mfn_list[pfn_free];
+	xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
 
-			copy_page(new, mid_p);
-			p2m_top[topidx][mididx] = &mfn_list[pfn_free];
+	xen_rebuild_p2m_list(vm.addr);
 
-			pfn_free += P2M_PER_PAGE;
+	xen_p2m_addr = vm.addr;
+	xen_p2m_size = xen_max_p2m_pfn;
 
-		}
-		/* This should be the leafs allocated for identity from _brk. */
-	}
-	return (unsigned long)mfn_list;
+	xen_inv_extra_mem();
 
+	m2p_override_init();
 }
-#else
-unsigned long __init xen_revector_p2m_tree(void)
-{
-	return 0;
-}
-#endif
+
 unsigned long get_phys_to_machine(unsigned long pfn)
 {
-	unsigned topidx, mididx, idx;
+	pte_t *ptep;
+	unsigned int level;
+
+	if (unlikely(pfn >= xen_p2m_size)) {
+		if (pfn < xen_max_p2m_pfn)
+			return xen_chk_extra_mem(pfn);
 
-	if (unlikely(pfn >= MAX_P2M_PFN))
 		return IDENTITY_FRAME(pfn);
+	}
 
-	topidx = p2m_top_index(pfn);
-	mididx = p2m_mid_index(pfn);
-	idx = p2m_index(pfn);
+	ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
+	BUG_ON(!ptep || level != PG_LEVEL_4K);
 
 	/*
 	 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
 	 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
 	 * would be wrong.
 	 */
-	if (p2m_top[topidx][mididx] == p2m_identity)
+	if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
 		return IDENTITY_FRAME(pfn);
 
-	return p2m_top[topidx][mididx][idx];
+	return xen_p2m_addr[pfn];
 }
 EXPORT_SYMBOL_GPL(get_phys_to_machine);
 
-static void *alloc_p2m_page(void)
+/*
+ * Allocate new pmd(s). It is checked whether the old pmd is still in place.
+ * If not, nothing is changed. This is okay as the only reason for allocating
+ * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
+ * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
+ */
+static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
 {
-	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
-}
+	pte_t *ptechk;
+	pte_t *pteret = ptep;
+	pte_t *pte_newpg[PMDS_PER_MID_PAGE];
+	pmd_t *pmdp;
+	unsigned int level;
+	unsigned long flags;
+	unsigned long vaddr;
+	int i;
 
-static void free_p2m_page(void *p)
-{
-	free_page((unsigned long)p);
+	/* Do all allocations first to bail out in error case. */
+	for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
+		pte_newpg[i] = alloc_p2m_page();
+		if (!pte_newpg[i]) {
+			for (i--; i >= 0; i--)
+				free_p2m_page(pte_newpg[i]);
+
+			return NULL;
+		}
+	}
+
+	vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
+
+	for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
+		copy_page(pte_newpg[i], pte_pg);
+		paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
+
+		pmdp = lookup_pmd_address(vaddr);
+		BUG_ON(!pmdp);
+
+		spin_lock_irqsave(&p2m_update_lock, flags);
+
+		ptechk = lookup_address(vaddr, &level);
+		if (ptechk == pte_pg) {
+			set_pmd(pmdp,
+				__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
+			if (vaddr == (addr & ~(PMD_SIZE - 1)))
+				pteret = pte_offset_kernel(pmdp, addr);
+			pte_newpg[i] = NULL;
+		}
+
+		spin_unlock_irqrestore(&p2m_update_lock, flags);
+
+		if (pte_newpg[i]) {
+			paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
+			free_p2m_page(pte_newpg[i]);
+		}
+
+		vaddr += PMD_SIZE;
+	}
+
+	return pteret;
 }
 
 /*
@@ -530,58 +501,62 @@ static void free_p2m_page(void *p)
 static bool alloc_p2m(unsigned long pfn)
 {
 	unsigned topidx, mididx;
-	unsigned long ***top_p, **mid;
 	unsigned long *top_mfn_p, *mid_mfn;
-	unsigned long *p2m_orig;
+	pte_t *ptep, *pte_pg;
+	unsigned int level;
+	unsigned long flags;
+	unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
+	unsigned long p2m_pfn;
 
 	topidx = p2m_top_index(pfn);
 	mididx = p2m_mid_index(pfn);
 
-	top_p = &p2m_top[topidx];
-	mid = ACCESS_ONCE(*top_p);
+	ptep = lookup_address(addr, &level);
+	BUG_ON(!ptep || level != PG_LEVEL_4K);
+	pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
 
-	if (mid == p2m_mid_missing) {
-		/* Mid level is missing, allocate a new one */
-		mid = alloc_p2m_page();
-		if (!mid)
+	if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
+		/* PMD level is missing, allocate a new one */
+		ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
+		if (!ptep)
 			return false;
-
-		p2m_mid_init(mid, p2m_missing);
-
-		if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
-			free_p2m_page(mid);
 	}
 
-	top_mfn_p = &p2m_top_mfn[topidx];
-	mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
+	if (p2m_top_mfn) {
+		top_mfn_p = &p2m_top_mfn[topidx];
+		mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
 
-	BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
+		BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
 
-	if (mid_mfn == p2m_mid_missing_mfn) {
-		/* Separately check the mid mfn level */
-		unsigned long missing_mfn;
-		unsigned long mid_mfn_mfn;
-		unsigned long old_mfn;
+		if (mid_mfn == p2m_mid_missing_mfn) {
+			/* Separately check the mid mfn level */
+			unsigned long missing_mfn;
+			unsigned long mid_mfn_mfn;
+			unsigned long old_mfn;
 
-		mid_mfn = alloc_p2m_page();
-		if (!mid_mfn)
-			return false;
+			mid_mfn = alloc_p2m_page();
+			if (!mid_mfn)
+				return false;
 
-		p2m_mid_mfn_init(mid_mfn, p2m_missing);
+			p2m_mid_mfn_init(mid_mfn, p2m_missing);
 
-		missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
-		mid_mfn_mfn = virt_to_mfn(mid_mfn);
-		old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
-		if (old_mfn != missing_mfn) {
-			free_p2m_page(mid_mfn);
-			mid_mfn = mfn_to_virt(old_mfn);
-		} else {
-			p2m_top_mfn_p[topidx] = mid_mfn;
+			missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
+			mid_mfn_mfn = virt_to_mfn(mid_mfn);
+			old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
+			if (old_mfn != missing_mfn) {
+				free_p2m_page(mid_mfn);
+				mid_mfn = mfn_to_virt(old_mfn);
+			} else {
+				p2m_top_mfn_p[topidx] = mid_mfn;
+			}
 		}
+	} else {
+		mid_mfn = NULL;
 	}
 
-	p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
-	if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
+	p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
+	if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
+	    p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
 		/* p2m leaf page is missing */
 		unsigned long *p2m;
 
@@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn)
 		if (!p2m)
 			return false;
 
-		p2m_init(p2m);
-
-		if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
-			free_p2m_page(p2m);
+		if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
+			p2m_init(p2m);
 		else
-			mid_mfn[mididx] = virt_to_mfn(p2m);
-	}
-
-	return true;
-}
-
-static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
-{
-	unsigned topidx, mididx, idx;
-	unsigned long *p2m;
-
-	topidx = p2m_top_index(pfn);
-	mididx = p2m_mid_index(pfn);
-	idx = p2m_index(pfn);
-
-	/* Pfff.. No boundary cross-over, lets get out. */
-	if (!idx && check_boundary)
-		return false;
-
-	WARN(p2m_top[topidx][mididx] == p2m_identity,
-		"P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
-		topidx, mididx);
-
-	/*
-	 * Could be done by xen_build_dynamic_phys_to_machine..
-	 */
-	if (p2m_top[topidx][mididx] != p2m_missing)
-		return false;
-
-	/* Boundary cross-over for the edges: */
-	p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
-	p2m_init(p2m);
+			p2m_init_identity(p2m, pfn);
 
-	p2m_top[topidx][mididx] = p2m;
+		spin_lock_irqsave(&p2m_update_lock, flags);
 
-	return true;
-}
-
-static bool __init early_alloc_p2m_middle(unsigned long pfn)
-{
-	unsigned topidx = p2m_top_index(pfn);
-	unsigned long **mid;
-
-	mid = p2m_top[topidx];
-	if (mid == p2m_mid_missing) {
-		mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
-		p2m_mid_init(mid, p2m_missing);
-
-		p2m_top[topidx] = mid;
-	}
-	return true;
-}
-
-/*
- * Skim over the P2M tree looking at pages that are either filled with
- * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
- * replace the P2M leaf with a p2m_missing or p2m_identity.
- * Stick the old page in the new P2M tree location.
- */
-static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
-{
-	unsigned topidx;
-	unsigned mididx;
-	unsigned ident_pfns;
-	unsigned inv_pfns;
-	unsigned long *p2m;
-	unsigned idx;
-	unsigned long pfn;
-
-	/* We only look when this entails a P2M middle layer */
-	if (p2m_index(set_pfn))
-		return false;
-
-	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
-		topidx = p2m_top_index(pfn);
-
-		if (!p2m_top[topidx])
-			continue;
-
-		if (p2m_top[topidx] == p2m_mid_missing)
-			continue;
-
-		mididx = p2m_mid_index(pfn);
-		p2m = p2m_top[topidx][mididx];
-		if (!p2m)
-			continue;
-
-		if ((p2m == p2m_missing) || (p2m == p2m_identity))
-			continue;
-
-		if ((unsigned long)p2m == INVALID_P2M_ENTRY)
-			continue;
-
-		ident_pfns = 0;
-		inv_pfns = 0;
-		for (idx = 0; idx < P2M_PER_PAGE; idx++) {
-			/* IDENTITY_PFNs are 1:1 */
-			if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
-				ident_pfns++;
-			else if (p2m[idx] == INVALID_P2M_ENTRY)
-				inv_pfns++;
-			else
-				break;
+		if (pte_pfn(*ptep) == p2m_pfn) {
+			set_pte(ptep,
+				pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
+			if (mid_mfn)
+				mid_mfn[mididx] = virt_to_mfn(p2m);
+			p2m = NULL;
 		}
-		if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
-			goto found;
-	}
-	return false;
-found:
-	/* Found one, replace old with p2m_identity or p2m_missing */
-	p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
-
-	/* Reset where we want to stick the old page in. */
-	topidx = p2m_top_index(set_pfn);
-	mididx = p2m_mid_index(set_pfn);
-
-	/* This shouldn't happen */
-	if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
-		early_alloc_p2m_middle(set_pfn);
-
-	if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
-		return false;
-
-	p2m_init(p2m);
-	p2m_top[topidx][mididx] = p2m;
 
-	return true;
-}
-bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
-		if (!early_alloc_p2m_middle(pfn))
-			return false;
-
-		if (early_can_reuse_p2m_middle(pfn))
-			return __set_phys_to_machine(pfn, mfn);
-
-		if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
-			return false;
+		spin_unlock_irqrestore(&p2m_update_lock, flags);
 
-		if (!__set_phys_to_machine(pfn, mfn))
-			return false;
+		if (p2m)
+			free_p2m_page(p2m);
 	}
 
 	return true;
 }
 
-static void __init early_split_p2m(unsigned long pfn)
-{
-	unsigned long mididx, idx;
-
-	mididx = p2m_mid_index(pfn);
-	idx = p2m_index(pfn);
-
-	/*
-	 * Allocate new middle and leaf pages if this pfn lies in the
-	 * middle of one.
-	 */
-	if (mididx || idx)
-		early_alloc_p2m_middle(pfn);
-	if (idx)
-		early_alloc_p2m(pfn, false);
-}
-
 unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 				      unsigned long pfn_e)
 {
 	unsigned long pfn;
 
-	if (unlikely(pfn_s >= MAX_P2M_PFN))
+	if (unlikely(pfn_s >= xen_p2m_size))
 		return 0;
 
 	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
@@ -774,101 +602,51 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 	if (pfn_s > pfn_e)
 		return 0;
 
-	if (pfn_e > MAX_P2M_PFN)
-		pfn_e = MAX_P2M_PFN;
-
-	early_split_p2m(pfn_s);
-	early_split_p2m(pfn_e);
-
-	for (pfn = pfn_s; pfn < pfn_e;) {
-		unsigned topidx = p2m_top_index(pfn);
-		unsigned mididx = p2m_mid_index(pfn);
-
-		if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
-			break;
-		pfn++;
-
-		/*
-		 * If the PFN was set to a middle or leaf identity
-		 * page the remainder must also be identity, so skip
-		 * ahead to the next middle or leaf entry.
-		 */
-		if (p2m_top[topidx] == p2m_mid_identity)
-			pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
-		else if (p2m_top[topidx][mididx] == p2m_identity)
-			pfn = ALIGN(pfn, P2M_PER_PAGE);
-	}
+	if (pfn_e > xen_p2m_size)
+		pfn_e = xen_p2m_size;
 
-	WARN((pfn - pfn_s) != (pfn_e - pfn_s),
-		"Identity mapping failed. We are %ld short of 1-1 mappings!\n",
-		(pfn_e - pfn_s) - (pfn - pfn_s));
+	for (pfn = pfn_s; pfn < pfn_e; pfn++)
+		xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn);
 
 	return pfn - pfn_s;
 }
 
-/* Try to install p2m mapping; fail if intermediate bits missing */
 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 {
-	unsigned topidx, mididx, idx;
+	pte_t *ptep;
+	unsigned int level;
 
 	/* don't track P2M changes in autotranslate guests */
 	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
 		return true;
 
-	if (unlikely(pfn >= MAX_P2M_PFN)) {
+	if (unlikely(pfn >= xen_p2m_size)) {
 		BUG_ON(mfn != INVALID_P2M_ENTRY);
 		return true;
 	}
 
-	topidx = p2m_top_index(pfn);
-	mididx = p2m_mid_index(pfn);
-	idx = p2m_index(pfn);
-
-	/* For sparse holes were the p2m leaf has real PFN along with
-	 * PCI holes, stick in the PFN as the MFN value.
-	 *
-	 * set_phys_range_identity() will have allocated new middle
-	 * and leaf pages as required so an existing p2m_mid_missing
-	 * or p2m_missing mean that whole range will be identity so
-	 * these can be switched to p2m_mid_identity or p2m_identity.
-	 */
-	if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
-		if (p2m_top[topidx] == p2m_mid_identity)
-			return true;
-
-		if (p2m_top[topidx] == p2m_mid_missing) {
-			WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
-					p2m_mid_identity) != p2m_mid_missing);
-			return true;
-		}
-
-		if (p2m_top[topidx][mididx] == p2m_identity)
-			return true;
+	if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
+		return true;
 
-		/* Swap over from MISSING to IDENTITY if needed. */
-		if (p2m_top[topidx][mididx] == p2m_missing) {
-			WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
-				p2m_identity) != p2m_missing);
-			return true;
-		}
-	}
+	ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
+	BUG_ON(!ptep || level != PG_LEVEL_4K);
 
-	if (p2m_top[topidx][mididx] == p2m_missing)
+	if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
 		return mfn == INVALID_P2M_ENTRY;
 
-	p2m_top[topidx][mididx][idx] = mfn;
+	if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
+		return mfn == IDENTITY_FRAME(pfn);
 
-	return true;
+	return false;
 }
 
 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 {
-	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
+	if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
 		if (!alloc_p2m(pfn))
 			return false;
 
-		if (!__set_phys_to_machine(pfn, mfn))
-			return false;
+		return __set_phys_to_machine(pfn, mfn);
 	}
 
 	return true;
@@ -877,15 +655,16 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 #define M2P_OVERRIDE_HASH_SHIFT	10
 #define M2P_OVERRIDE_HASH	(1 << M2P_OVERRIDE_HASH_SHIFT)
 
-static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH);
+static struct list_head *m2p_overrides;
 static DEFINE_SPINLOCK(m2p_override_lock);
 
 static void __init m2p_override_init(void)
 {
 	unsigned i;
 
-	m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
-				   sizeof(unsigned long));
+	m2p_overrides = alloc_bootmem_align(
+				sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
+				sizeof(unsigned long));
 
 	for (i = 0; i < M2P_OVERRIDE_HASH; i++)
 		INIT_LIST_HEAD(&m2p_overrides[i]);
@@ -896,68 +675,9 @@ static unsigned long mfn_hash(unsigned long mfn)
 	return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
 }
 
-int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
-			    struct gnttab_map_grant_ref *kmap_ops,
-			    struct page **pages, unsigned int count)
-{
-	int i, ret = 0;
-	bool lazy = false;
-	pte_t *pte;
-
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
-	if (kmap_ops &&
-	    !in_interrupt() &&
-	    paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
-		arch_enter_lazy_mmu_mode();
-		lazy = true;
-	}
-
-	for (i = 0; i < count; i++) {
-		unsigned long mfn, pfn;
-
-		/* Do not add to override if the map failed. */
-		if (map_ops[i].status)
-			continue;
-
-		if (map_ops[i].flags & GNTMAP_contains_pte) {
-			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
-				(map_ops[i].host_addr & ~PAGE_MASK));
-			mfn = pte_mfn(*pte);
-		} else {
-			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
-		}
-		pfn = page_to_pfn(pages[i]);
-
-		WARN_ON(PagePrivate(pages[i]));
-		SetPagePrivate(pages[i]);
-		set_page_private(pages[i], mfn);
-		pages[i]->index = pfn_to_mfn(pfn);
-
-		if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		if (kmap_ops) {
-			ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
-			if (ret)
-				goto out;
-		}
-	}
-
-out:
-	if (lazy)
-		arch_leave_lazy_mmu_mode();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
-
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page,
-		struct gnttab_map_grant_ref *kmap_op)
+static int m2p_add_override(unsigned long mfn, struct page *page,
+			    struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -970,7 +690,7 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
 		ptep = lookup_address(address, &level);
 		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
-					"m2p_add_override: pfn %lx not mapped", pfn))
+			 "m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
 	}
 
@@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 	 * because mfn_to_pfn (that ends up being called by GUPF) will
 	 * return the backend pfn rather than the frontend pfn. */
 	pfn = mfn_to_pfn_no_overrides(mfn);
-	if (get_phys_to_machine(pfn) == mfn)
+	if (__pfn_to_mfn(pfn) == mfn)
 		set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(m2p_add_override);
 
-int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
-			      struct gnttab_map_grant_ref *kmap_ops,
-			      struct page **pages, unsigned int count)
+int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+			    struct gnttab_map_grant_ref *kmap_ops,
+			    struct page **pages, unsigned int count)
 {
 	int i, ret = 0;
 	bool lazy = false;
+	pte_t *pte;
 
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return 0;
@@ -1029,35 +749,75 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 	}
 
 	for (i = 0; i < count; i++) {
-		unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i]));
-		unsigned long pfn = page_to_pfn(pages[i]);
+		unsigned long mfn, pfn;
 
-		if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
-			ret = -EINVAL;
-			goto out;
+		/* Do not add to override if the map failed. */
+		if (map_ops[i].status)
+			continue;
+
+		if (map_ops[i].flags & GNTMAP_contains_pte) {
+			pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+				(map_ops[i].host_addr & ~PAGE_MASK));
+			mfn = pte_mfn(*pte);
+		} else {
+			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
 		}
+		pfn = page_to_pfn(pages[i]);
 
-		set_page_private(pages[i], INVALID_P2M_ENTRY);
-		WARN_ON(!PagePrivate(pages[i]));
-		ClearPagePrivate(pages[i]);
-		set_phys_to_machine(pfn, pages[i]->index);
+		WARN_ON(PagePrivate(pages[i]));
+		SetPagePrivate(pages[i]);
+		set_page_private(pages[i], mfn);
+		pages[i]->index = pfn_to_mfn(pfn);
 
-		if (kmap_ops)
-			ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
-		if (ret)
+		if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
+			ret = -ENOMEM;
 			goto out;
+		}
+
+		if (kmap_ops) {
+			ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
+			if (ret)
+				goto out;
+		}
 	}
 
 out:
 	if (lazy)
 		arch_leave_lazy_mmu_mode();
+
 	return ret;
 }
-EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
+EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
 
-int m2p_remove_override(struct page *page,
-			struct gnttab_map_grant_ref *kmap_op,
-			unsigned long mfn)
+static struct page *m2p_find_override(unsigned long mfn)
+{
+	unsigned long flags;
+	struct list_head *bucket;
+	struct page *p, *ret;
+
+	if (unlikely(!m2p_overrides))
+		return NULL;
+
+	ret = NULL;
+	bucket = &m2p_overrides[mfn_hash(mfn)];
+
+	spin_lock_irqsave(&m2p_override_lock, flags);
+
+	list_for_each_entry(p, bucket, lru) {
+		if (page_private(p) == mfn) {
+			ret = p;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&m2p_override_lock, flags);
+
+	return ret;
+}
+
+static int m2p_remove_override(struct page *page,
+			       struct gnttab_map_grant_ref *kmap_op,
+			       unsigned long mfn)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -1072,7 +832,7 @@ int m2p_remove_override(struct page *page,
 		ptep = lookup_address(address, &level);
 
 		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
-					"m2p_remove_override: pfn %lx not mapped", pfn))
+			 "m2p_remove_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
 	}
 
@@ -1102,9 +862,8 @@ int m2p_remove_override(struct page *page,
 			 * hypercall actually returned an error.
 			 */
 			if (kmap_op->handle == GNTST_general_error) {
-				printk(KERN_WARNING "m2p_remove_override: "
-						"pfn %lx mfn %lx, failed to modify kernel mappings",
-						pfn, mfn);
+				pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings",
+					pfn, mfn);
 				put_balloon_scratch_page();
 				return -1;
 			}
@@ -1112,14 +871,14 @@ int m2p_remove_override(struct page *page,
 			xen_mc_batch();
 
 			mcs = __xen_mc_entry(
-					sizeof(struct gnttab_unmap_and_replace));
+				sizeof(struct gnttab_unmap_and_replace));
 			unmap_op = mcs.args;
 			unmap_op->host_addr = kmap_op->host_addr;
 			unmap_op->new_addr = scratch_page_address;
 			unmap_op->handle = kmap_op->handle;
 
 			MULTI_grant_table_op(mcs.mc,
-					GNTTABOP_unmap_and_replace, unmap_op, 1);
+				GNTTABOP_unmap_and_replace, unmap_op, 1);
 
 			mcs = __xen_mc_entry(0);
 			MULTI_update_va_mapping(mcs.mc, scratch_page_address,
@@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page,
 	 * pfn again. */
 	mfn &= ~FOREIGN_FRAME_BIT;
 	pfn = mfn_to_pfn_no_overrides(mfn);
-	if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+	if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
 			m2p_find_override(mfn) == NULL)
 		set_phys_to_machine(pfn, mfn);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(m2p_remove_override);
 
-struct page *m2p_find_override(unsigned long mfn)
+int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+			      struct gnttab_map_grant_ref *kmap_ops,
+			      struct page **pages, unsigned int count)
 {
-	unsigned long flags;
-	struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)];
-	struct page *p, *ret;
+	int i, ret = 0;
+	bool lazy = false;
 
-	ret = NULL;
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return 0;
 
-	spin_lock_irqsave(&m2p_override_lock, flags);
+	if (kmap_ops &&
+	    !in_interrupt() &&
+	    paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+		arch_enter_lazy_mmu_mode();
+		lazy = true;
+	}
 
-	list_for_each_entry(p, bucket, lru) {
-		if (page_private(p) == mfn) {
-			ret = p;
-			break;
+	for (i = 0; i < count; i++) {
+		unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
+		unsigned long pfn = page_to_pfn(pages[i]);
+
+		if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+			ret = -EINVAL;
+			goto out;
 		}
-	}
 
-	spin_unlock_irqrestore(&m2p_override_lock, flags);
+		set_page_private(pages[i], INVALID_P2M_ENTRY);
+		WARN_ON(!PagePrivate(pages[i]));
+		ClearPagePrivate(pages[i]);
+		set_phys_to_machine(pfn, pages[i]->index);
+
+		if (kmap_ops)
+			ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
+		if (ret)
+			goto out;
+	}
 
+out:
+	if (lazy)
+		arch_leave_lazy_mmu_mode();
 	return ret;
 }
+EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
 
 unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
 {
@@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 #include "debugfs.h"
 static int p2m_dump_show(struct seq_file *m, void *v)
 {
-	static const char * const level_name[] = { "top", "middle",
-						"entry", "abnormal", "error"};
-#define TYPE_IDENTITY 0
-#define TYPE_MISSING 1
-#define TYPE_PFN 2
-#define TYPE_UNKNOWN 3
 	static const char * const type_name[] = {
-				[TYPE_IDENTITY] = "identity",
-				[TYPE_MISSING] = "missing",
-				[TYPE_PFN] = "pfn",
-				[TYPE_UNKNOWN] = "abnormal"};
-	unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
-	unsigned int uninitialized_var(prev_level);
-	unsigned int uninitialized_var(prev_type);
-
-	if (!p2m_top)
-		return 0;
-
-	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
-		unsigned topidx = p2m_top_index(pfn);
-		unsigned mididx = p2m_mid_index(pfn);
-		unsigned idx = p2m_index(pfn);
-		unsigned lvl, type;
-
-		lvl = 4;
-		type = TYPE_UNKNOWN;
-		if (p2m_top[topidx] == p2m_mid_missing) {
-			lvl = 0; type = TYPE_MISSING;
-		} else if (p2m_top[topidx] == NULL) {
-			lvl = 0; type = TYPE_UNKNOWN;
-		} else if (p2m_top[topidx][mididx] == NULL) {
-			lvl = 1; type = TYPE_UNKNOWN;
-		} else if (p2m_top[topidx][mididx] == p2m_identity) {
-			lvl = 1; type = TYPE_IDENTITY;
-		} else if (p2m_top[topidx][mididx] == p2m_missing) {
-			lvl = 1; type = TYPE_MISSING;
-		} else if (p2m_top[topidx][mididx][idx] == 0) {
-			lvl = 2; type = TYPE_UNKNOWN;
-		} else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
-			lvl = 2; type = TYPE_IDENTITY;
-		} else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
-			lvl = 2; type = TYPE_MISSING;
-		} else if (p2m_top[topidx][mididx][idx] == pfn) {
-			lvl = 2; type = TYPE_PFN;
-		} else if (p2m_top[topidx][mididx][idx] != pfn) {
-			lvl = 2; type = TYPE_PFN;
-		}
-		if (pfn == 0) {
-			prev_level = lvl;
-			prev_type = type;
-		}
-		if (pfn == MAX_DOMAIN_PAGES-1) {
-			lvl = 3;
-			type = TYPE_UNKNOWN;
-		}
-		if (prev_type != type) {
-			seq_printf(m, " [0x%lx->0x%lx] %s\n",
-				prev_pfn_type, pfn, type_name[prev_type]);
-			prev_pfn_type = pfn;
+				[P2M_TYPE_IDENTITY] = "identity",
+				[P2M_TYPE_MISSING] = "missing",
+				[P2M_TYPE_PFN] = "pfn",
+				[P2M_TYPE_UNKNOWN] = "abnormal"};
+	unsigned long pfn, first_pfn;
+	int type, prev_type;
+
+	prev_type = xen_p2m_elem_type(0);
+	first_pfn = 0;
+
+	for (pfn = 0; pfn < xen_p2m_size; pfn++) {
+		type = xen_p2m_elem_type(pfn);
+		if (type != prev_type) {
+			seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
+				   type_name[prev_type]);
 			prev_type = type;
-		}
-		if (prev_level != lvl) {
-			seq_printf(m, " [0x%lx->0x%lx] level %s\n",
-				prev_pfn_level, pfn, level_name[prev_level]);
-			prev_pfn_level = pfn;
-			prev_level = lvl;
+			first_pfn = pfn;
 		}
 	}
+	seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
+		   type_name[prev_type]);
 	return 0;
-#undef TYPE_IDENTITY
-#undef TYPE_MISSING
-#undef TYPE_PFN
-#undef TYPE_UNKNOWN
 }
 
 static int p2m_dump_open(struct inode *inode, struct file *filp)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 29834b3fd87f..dfd77dec8e2b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -30,6 +30,7 @@
 #include "xen-ops.h"
 #include "vdso.h"
 #include "p2m.h"
+#include "mmu.h"
 
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
@@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 /* Number of pages released from the initial allocation. */
 unsigned long xen_released_pages;
 
-/* Buffer used to remap identity mapped pages */
-unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
+/*
+ * Buffer used to remap identity mapped pages. We only need the virtual space.
+ * The physical page behind this address is remapped as needed to different
+ * buffer pages.
+ */
+#define REMAP_SIZE	(P2M_PER_PAGE - 3)
+static struct {
+	unsigned long	next_area_mfn;
+	unsigned long	target_pfn;
+	unsigned long	size;
+	unsigned long	mfns[REMAP_SIZE];
+} xen_remap_buf __initdata __aligned(PAGE_SIZE);
+static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
 
 /* 
  * The maximum amount of extra memory compared to the base size.  The
@@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
 
 static void __init xen_add_extra_mem(u64 start, u64 size)
 {
-	unsigned long pfn;
 	int i;
 
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
@@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
 		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
 
 	memblock_reserve(start, size);
+}
 
-	xen_max_p2m_pfn = PFN_DOWN(start + size);
-	for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
-		unsigned long mfn = pfn_to_mfn(pfn);
-
-		if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
-			continue;
-		WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
-			  pfn, mfn);
+static void __init xen_del_extra_mem(u64 start, u64 size)
+{
+	int i;
+	u64 start_r, size_r;
 
-		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		start_r = xen_extra_mem[i].start;
+		size_r = xen_extra_mem[i].size;
+
+		/* Start of region. */
+		if (start_r == start) {
+			BUG_ON(size > size_r);
+			xen_extra_mem[i].start += size;
+			xen_extra_mem[i].size -= size;
+			break;
+		}
+		/* End of region. */
+		if (start_r + size_r == start + size) {
+			BUG_ON(size > size_r);
+			xen_extra_mem[i].size -= size;
+			break;
+		}
+		/* Mid of region. */
+		if (start > start_r && start < start_r + size_r) {
+			BUG_ON(start + size > start_r + size_r);
+			xen_extra_mem[i].size = start - start_r;
+			/* Calling memblock_reserve() again is okay. */
+			xen_add_extra_mem(start + size, start_r + size_r -
+					  (start + size));
+			break;
+		}
 	}
+	memblock_free(start, size);
 }
 
-static unsigned long __init xen_do_chunk(unsigned long start,
-					 unsigned long end, bool release)
+/*
+ * Called during boot before the p2m list can take entries beyond the
+ * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
+ * invalid.
+ */
+unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
 {
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	unsigned long len = 0;
-	unsigned long pfn;
-	int ret;
+	int i;
+	unsigned long addr = PFN_PHYS(pfn);
 
-	for (pfn = start; pfn < end; pfn++) {
-		unsigned long frame;
-		unsigned long mfn = pfn_to_mfn(pfn);
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		if (addr >= xen_extra_mem[i].start &&
+		    addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
+			return INVALID_P2M_ENTRY;
+	}
 
-		if (release) {
-			/* Make sure pfn exists to start with */
-			if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
-				continue;
-			frame = mfn;
-		} else {
-			if (mfn != INVALID_P2M_ENTRY)
-				continue;
-			frame = pfn;
-		}
-		set_xen_guest_handle(reservation.extent_start, &frame);
-		reservation.nr_extents = 1;
+	return IDENTITY_FRAME(pfn);
+}
 
-		ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
-					   &reservation);
-		WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
-		     release ? "release" : "populate", pfn, ret);
+/*
+ * Mark all pfns of extra mem as invalid in p2m list.
+ */
+void __init xen_inv_extra_mem(void)
+{
+	unsigned long pfn, pfn_s, pfn_e;
+	int i;
 
-		if (ret == 1) {
-			if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
-				if (release)
-					break;
-				set_xen_guest_handle(reservation.extent_start, &frame);
-				reservation.nr_extents = 1;
-				ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-							   &reservation);
-				break;
-			}
-			len++;
-		} else
-			break;
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		pfn_s = PFN_DOWN(xen_extra_mem[i].start);
+		pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
+		for (pfn = pfn_s; pfn < pfn_e; pfn++)
+			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 	}
-	if (len)
-		printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
-		       release ? "Freeing" : "Populating",
-		       start, end, len,
-		       release ? "freed" : "added");
-
-	return len;
 }
 
 /*
@@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range(
 	return done;
 }
 
+static int __init xen_free_mfn(unsigned long mfn)
+{
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	set_xen_guest_handle(reservation.extent_start, &mfn);
+	reservation.nr_extents = 1;
+
+	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+}
+
 /*
- * This releases a chunk of memory and then does the identity map. It's used as
+ * This releases a chunk of memory and then does the identity map. It's used
  * as a fallback if the remapping fails.
  */
 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
 	unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
 	unsigned long *released)
 {
+	unsigned long len = 0;
+	unsigned long pfn, end;
+	int ret;
+
 	WARN_ON(start_pfn > end_pfn);
 
+	end = min(end_pfn, nr_pages);
+	for (pfn = start_pfn; pfn < end; pfn++) {
+		unsigned long mfn = pfn_to_mfn(pfn);
+
+		/* Make sure pfn exists to start with */
+		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
+			continue;
+
+		ret = xen_free_mfn(mfn);
+		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
+
+		if (ret == 1) {
+			if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
+				break;
+			len++;
+		} else
+			break;
+	}
+
 	/* Need to release pages first */
-	*released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
+	*released += len;
 	*identity += set_phys_range_identity(start_pfn, end_pfn);
 }
 
 /*
- * Helper function to update both the p2m and m2p tables.
+ * Helper function to update the p2m and m2p tables and kernel mapping.
  */
-static unsigned long __init xen_update_mem_tables(unsigned long pfn,
-						  unsigned long mfn)
+static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
 {
 	struct mmu_update update = {
 		.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
@@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn,
 	};
 
 	/* Update p2m */
-	if (!early_set_phys_to_machine(pfn, mfn)) {
+	if (!set_phys_to_machine(pfn, mfn)) {
 		WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
 		     pfn, mfn);
-		return false;
+		BUG();
 	}
 
 	/* Update m2p */
 	if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
 		WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
 		     mfn, pfn);
-		return false;
+		BUG();
 	}
 
-	return true;
+	/* Update kernel mapping, but not for highmem. */
+	if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
+		return;
+
+	if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
+					 mfn_pte(mfn, PAGE_KERNEL), 0)) {
+		WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
+		      mfn, pfn);
+		BUG();
+	}
 }
 
 /*
  * This function updates the p2m and m2p tables with an identity map from
- * start_pfn to start_pfn+size and remaps the underlying RAM of the original
- * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
- * to not exhaust the reserved brk space. Doing it in properly aligned blocks
- * ensures we only allocate the minimum required leaf pages in the p2m table. It
- * copies the existing mfns from the p2m table under the 1:1 map, overwrites
- * them with the identity map and then updates the p2m and m2p tables with the
- * remapped memory.
+ * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
+ * original allocation at remap_pfn. The information needed for remapping is
+ * saved in the memory itself to avoid the need for allocating buffers. The
+ * complete remap information is contained in a list of MFNs each containing
+ * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
+ * This enables us to preserve the original mfn sequence while doing the
+ * remapping at a time when the memory management is capable of allocating
+ * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
+ * its callers.
  */
-static unsigned long __init xen_do_set_identity_and_remap_chunk(
+static void __init xen_do_set_identity_and_remap_chunk(
         unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
 {
+	unsigned long buf = (unsigned long)&xen_remap_buf;
+	unsigned long mfn_save, mfn;
 	unsigned long ident_pfn_iter, remap_pfn_iter;
-	unsigned long ident_start_pfn_align, remap_start_pfn_align;
-	unsigned long ident_end_pfn_align, remap_end_pfn_align;
-	unsigned long ident_boundary_pfn, remap_boundary_pfn;
-	unsigned long ident_cnt = 0;
-	unsigned long remap_cnt = 0;
+	unsigned long ident_end_pfn = start_pfn + size;
 	unsigned long left = size;
-	unsigned long mod;
-	int i;
+	unsigned long ident_cnt = 0;
+	unsigned int i, chunk;
 
 	WARN_ON(size == 0);
 
 	BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
 
-	/*
-	 * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
-	 * blocks. We need to keep track of both the existing pfn mapping and
-	 * the new pfn remapping.
-	 */
-	mod = start_pfn % P2M_PER_PAGE;
-	ident_start_pfn_align =
-		mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
-	mod = remap_pfn % P2M_PER_PAGE;
-	remap_start_pfn_align =
-		mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
-	mod = (start_pfn + size) % P2M_PER_PAGE;
-	ident_end_pfn_align = start_pfn + size - mod;
-	mod = (remap_pfn + size) % P2M_PER_PAGE;
-	remap_end_pfn_align = remap_pfn + size - mod;
-
-	/* Iterate over each p2m leaf node in each range */
-	for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
-	     ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
-	     ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
-		/* Check we aren't past the end */
-		BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
-		BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
-
-		/* Save p2m mappings */
-		for (i = 0; i < P2M_PER_PAGE; i++)
-			xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
-
-		/* Set identity map which will free a p2m leaf */
-		ident_cnt += set_phys_range_identity(ident_pfn_iter,
-			ident_pfn_iter + P2M_PER_PAGE);
+	mfn_save = virt_to_mfn(buf);
 
-#ifdef DEBUG
-		/* Helps verify a p2m leaf has been freed */
-		for (i = 0; i < P2M_PER_PAGE; i++) {
-			unsigned int pfn = ident_pfn_iter + i;
-			BUG_ON(pfn_to_mfn(pfn) != pfn);
-		}
-#endif
-		/* Now remap memory */
-		for (i = 0; i < P2M_PER_PAGE; i++) {
-			unsigned long mfn = xen_remap_buf[i];
-
-			/* This will use the p2m leaf freed above */
-			if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
-				WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
-					remap_pfn_iter + i, mfn);
-				return 0;
-			}
-
-			remap_cnt++;
-		}
-
-		left -= P2M_PER_PAGE;
-	}
-
-	/* Max boundary space possible */
-	BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
+	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
+	     ident_pfn_iter < ident_end_pfn;
+	     ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
+		chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
 
-	/* Now handle the boundary conditions */
-	ident_boundary_pfn = start_pfn;
-	remap_boundary_pfn = remap_pfn;
-	for (i = 0; i < left; i++) {
-		unsigned long mfn;
+		/* Map first pfn to xen_remap_buf */
+		mfn = pfn_to_mfn(ident_pfn_iter);
+		set_pte_mfn(buf, mfn, PAGE_KERNEL);
 
-		/* These two checks move from the start to end boundaries */
-		if (ident_boundary_pfn == ident_start_pfn_align)
-			ident_boundary_pfn = ident_pfn_iter;
-		if (remap_boundary_pfn == remap_start_pfn_align)
-			remap_boundary_pfn = remap_pfn_iter;
+		/* Save mapping information in page */
+		xen_remap_buf.next_area_mfn = xen_remap_mfn;
+		xen_remap_buf.target_pfn = remap_pfn_iter;
+		xen_remap_buf.size = chunk;
+		for (i = 0; i < chunk; i++)
+			xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
 
-		/* Check we aren't past the end */
-		BUG_ON(ident_boundary_pfn >= start_pfn + size);
-		BUG_ON(remap_boundary_pfn >= remap_pfn + size);
-
-		mfn = pfn_to_mfn(ident_boundary_pfn);
-
-		if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
-			WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
-				remap_pfn_iter + i, mfn);
-			return 0;
-		}
-		remap_cnt++;
+		/* Put remap buf into list. */
+		xen_remap_mfn = mfn;
 
-		ident_boundary_pfn++;
-		remap_boundary_pfn++;
-	}
+		/* Set identity map */
+		ident_cnt += set_phys_range_identity(ident_pfn_iter,
+			ident_pfn_iter + chunk);
 
-	/* Finish up the identity map */
-	if (ident_start_pfn_align >= ident_end_pfn_align) {
-		/*
-                 * In this case we have an identity range which does not span an
-                 * aligned block so everything needs to be identity mapped here.
-                 * If we didn't check this we might remap too many pages since
-                 * the align boundaries are not meaningful in this case.
-	         */
-		ident_cnt += set_phys_range_identity(start_pfn,
-			start_pfn + size);
-	} else {
-		/* Remapped above so check each end of the chunk */
-		if (start_pfn < ident_start_pfn_align)
-			ident_cnt += set_phys_range_identity(start_pfn,
-				ident_start_pfn_align);
-		if (start_pfn + size > ident_pfn_iter)
-			ident_cnt += set_phys_range_identity(ident_pfn_iter,
-				start_pfn + size);
+		left -= chunk;
 	}
 
-	BUG_ON(ident_cnt != size);
-	BUG_ON(remap_cnt != size);
-
-	return size;
+	/* Restore old xen_remap_buf mapping */
+	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
 }
 
 /*
@@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk(
 static unsigned long __init xen_set_identity_and_remap_chunk(
         const struct e820entry *list, size_t map_size, unsigned long start_pfn,
 	unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
-	unsigned long *identity, unsigned long *remapped,
-	unsigned long *released)
+	unsigned long *identity, unsigned long *released)
 {
 	unsigned long pfn;
 	unsigned long i = 0;
@@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 		if (size > remap_range_size)
 			size = remap_range_size;
 
-		if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
-			WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
-				cur_pfn, size, remap_pfn);
-			xen_set_identity_and_release_chunk(cur_pfn,
-				cur_pfn + left, nr_pages, identity, released);
-			break;
-		}
+		xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
 
 		/* Update variables to reflect new mappings. */
 		i += size;
 		remap_pfn += size;
 		*identity += size;
-		*remapped += size;
 	}
 
 	/*
@@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 	return remap_pfn;
 }
 
-static unsigned long __init xen_set_identity_and_remap(
+static void __init xen_set_identity_and_remap(
 	const struct e820entry *list, size_t map_size, unsigned long nr_pages,
 	unsigned long *released)
 {
 	phys_addr_t start = 0;
 	unsigned long identity = 0;
-	unsigned long remapped = 0;
 	unsigned long last_pfn = nr_pages;
 	const struct e820entry *entry;
 	unsigned long num_released = 0;
@@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap(
 				last_pfn = xen_set_identity_and_remap_chunk(
 						list, map_size, start_pfn,
 						end_pfn, nr_pages, last_pfn,
-						&identity, &remapped,
-						&num_released);
+						&identity, &num_released);
 			start = end;
 		}
 	}
@@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap(
 	*released = num_released;
 
 	pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
-	pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
-		last_pfn);
 	pr_info("Released %ld page(s)\n", num_released);
+}
+
+/*
+ * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
+ * The remap information (which mfn remap to which pfn) is contained in the
+ * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
+ * This scheme allows to remap the different chunks in arbitrary order while
+ * the resulting mapping will be independant from the order.
+ */
+void __init xen_remap_memory(void)
+{
+	unsigned long buf = (unsigned long)&xen_remap_buf;
+	unsigned long mfn_save, mfn, pfn;
+	unsigned long remapped = 0;
+	unsigned int i;
+	unsigned long pfn_s = ~0UL;
+	unsigned long len = 0;
+
+	mfn_save = virt_to_mfn(buf);
+
+	while (xen_remap_mfn != INVALID_P2M_ENTRY) {
+		/* Map the remap information */
+		set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
 
-	return last_pfn;
+		BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
+
+		pfn = xen_remap_buf.target_pfn;
+		for (i = 0; i < xen_remap_buf.size; i++) {
+			mfn = xen_remap_buf.mfns[i];
+			xen_update_mem_tables(pfn, mfn);
+			remapped++;
+			pfn++;
+		}
+		if (pfn_s == ~0UL || pfn == pfn_s) {
+			pfn_s = xen_remap_buf.target_pfn;
+			len += xen_remap_buf.size;
+		} else if (pfn_s + len == xen_remap_buf.target_pfn) {
+			len += xen_remap_buf.size;
+		} else {
+			xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
+			pfn_s = xen_remap_buf.target_pfn;
+			len = xen_remap_buf.size;
+		}
+
+		mfn = xen_remap_mfn;
+		xen_remap_mfn = xen_remap_buf.next_area_mfn;
+	}
+
+	if (pfn_s != ~0UL && len)
+		xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
+
+	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
+
+	pr_info("Remapped %ld page(s)\n", remapped);
 }
+
 static unsigned long __init xen_get_max_pages(void)
 {
 	unsigned long max_pages = MAX_DOMAIN_PAGES;
@@ -569,7 +585,6 @@ char * __init xen_memory_setup(void)
 	int rc;
 	struct xen_memory_map memmap;
 	unsigned long max_pages;
-	unsigned long last_pfn = 0;
 	unsigned long extra_pages = 0;
 	int i;
 	int op;
@@ -616,17 +631,14 @@ char * __init xen_memory_setup(void)
 		extra_pages += max_pages - max_pfn;
 
 	/*
-	 * Set identity map on non-RAM pages and remap the underlying RAM.
+	 * Set identity map on non-RAM pages and prepare remapping the
+	 * underlying RAM.
 	 */
-	last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
-					      &xen_released_pages);
+	xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
+				   &xen_released_pages);
 
 	extra_pages += xen_released_pages;
 
-	if (last_pfn > max_pfn) {
-		max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
-		mem_end = PFN_PHYS(max_pfn);
-	}
 	/*
 	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
 	 * factor the base size.  On non-highmem systems, the base
@@ -653,6 +665,7 @@ char * __init xen_memory_setup(void)
 				size = min(size, (u64)extra_pages * PAGE_SIZE);
 				extra_pages -= size / PAGE_SIZE;
 				xen_add_extra_mem(addr, size);
+				xen_max_p2m_pfn = PFN_DOWN(addr + size);
 			} else
 				type = E820_UNUSABLE;
 		}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 4ab9298c5e17..5686bd9d58cc 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -29,11 +29,13 @@ void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
 void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_reserve_top(void);
-extern unsigned long xen_max_p2m_pfn;
 
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);
 
+unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
+void __init xen_inv_extra_mem(void);
+void __init xen_remap_memory(void);
 char * __init xen_memory_setup(void);
 char * xen_auto_xlated_memory_setup(void);
 void __init xen_arch_setup(void);
@@ -46,7 +48,7 @@ void xen_hvm_init_shared_info(void);
 void xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
-unsigned long __init xen_revector_p2m_tree(void);
+void __init xen_vmalloc_p2m_tree(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 81f57e8c8f1b..e31d4949124a 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -98,12 +98,6 @@ config XTENSA_VARIANT_DC233C
 	help
 	  This variant refers to Tensilica's Diamond 233L Standard core Rev.C (LE).
 
-config XTENSA_VARIANT_S6000
-	bool "s6000 - Stretch software configurable processor"
-	select VARIANT_IRQ_SWITCH
-	select ARCH_REQUIRE_GPIOLIB
-	select XTENSA_CALIBRATE_CCOUNT
-
 config XTENSA_VARIANT_CUSTOM
 	bool "Custom Xtensa processor configuration"
 	select MAY_HAVE_SMP
@@ -126,7 +120,6 @@ config XTENSA_VARIANT_NAME
 	default "dc232b"			if XTENSA_VARIANT_DC232B
 	default "dc233c"			if XTENSA_VARIANT_DC233C
 	default "fsf"				if XTENSA_VARIANT_FSF
-	default "s6000"				if XTENSA_VARIANT_S6000
 	default XTENSA_VARIANT_CUSTOM_NAME	if XTENSA_VARIANT_CUSTOM
 
 config XTENSA_VARIANT_MMU
@@ -191,7 +184,6 @@ config HOTPLUG_CPU
 
 config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
 	bool "Initialize Xtensa MMU inside the Linux kernel code"
-	depends on MMU
 	default y
 	help
 	  Earlier version initialized the MMU in the exception vector
@@ -311,15 +303,10 @@ config XTENSA_PLATFORM_XT2000
 	  XT2000 is the name of Tensilica's feature-rich emulation platform.
 	  This hardware is capable of running a full Linux distribution.
 
-config XTENSA_PLATFORM_S6105
-	bool "S6105"
-	select HAVE_IDE
-	select SERIAL_CONSOLE
-	select NO_IOPORT_MAP
-
 config XTENSA_PLATFORM_XTFPGA
 	bool "XTFPGA"
 	select ETHOC if ETHERNET
+	select PLATFORM_WANT_DEFAULT_MEM
 	select SERIAL_CONSOLE
 	select XTENSA_CALIBRATE_CCOUNT
 	help
@@ -406,6 +393,41 @@ source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
 
+config PLATFORM_WANT_DEFAULT_MEM
+	def_bool n
+
+config DEFAULT_MEM_START
+	hex "Physical address of the default memory area start"
+	depends on PLATFORM_WANT_DEFAULT_MEM
+	default 0x00000000 if MMU
+	default 0x40000000 if !MMU
+	help
+	  This is a fallback start address of the default memory area, it is
+	  used when no physical memory size is passed through DTB or through
+	  boot parameter from bootloader.
+
+	  In noMMU configuration the following parameters are derived from it:
+	  - kernel load address;
+	  - kernel entry point address;
+	  - relocatable vectors base address;
+	  - uBoot load address;
+	  - TASK_SIZE.
+
+	  If unsure, leave the default value here.
+
+config DEFAULT_MEM_SIZE
+	hex "Maximal size of the default memory area"
+	depends on PLATFORM_WANT_DEFAULT_MEM
+	default 0x04000000
+	help
+	  This is a fallback size of the default memory area, it is used when
+	  no physical memory size is passed through DTB or through boot
+	  parameter from bootloader.
+
+	  It's also used for TASK_SIZE calculation in noMMU configuration.
+
+	  If unsure, leave the default value here.
+
 endmenu
 
 menu "Executable file formats"
@@ -414,6 +436,12 @@ source "fs/Kconfig.binfmt"
 
 endmenu
 
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+endmenu
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug
index af7da74d535f..8430af27de0a 100644
--- a/arch/xtensa/Kconfig.debug
+++ b/arch/xtensa/Kconfig.debug
@@ -4,7 +4,7 @@ source "lib/Kconfig.debug"
 
 config DEBUG_TLB_SANITY
 	bool "Debug TLB sanity"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && MMU
 	help
 	  Enable this to turn on TLB sanity check on each entry to userspace.
 	  This check can spot missing TLB invalidation/wrong PTE permissions/
@@ -14,7 +14,7 @@ config DEBUG_TLB_SANITY
 
 config LD_NO_RELAX
 	bool "Disable linker relaxation"
-	default n
+	default y
 	help
 	  Enable this function to disable link-time optimizations.
 	  The default linker behavior is to combine identical literal
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 472533064b46..f9e6a068aafd 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -35,7 +35,6 @@ endif
 
 platform-$(CONFIG_XTENSA_PLATFORM_XT2000)	:= xt2000
 platform-$(CONFIG_XTENSA_PLATFORM_ISS)		:= iss
-platform-$(CONFIG_XTENSA_PLATFORM_S6105)	:= s6105
 platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	:= xtfpga
 
 PLATFORM = $(platform-y)
diff --git a/arch/xtensa/boot/boot-elf/boot.lds.S b/arch/xtensa/boot/boot-elf/boot.lds.S
index 932b58ef33d4..958b33af96b7 100644
--- a/arch/xtensa/boot/boot-elf/boot.lds.S
+++ b/arch/xtensa/boot/boot-elf/boot.lds.S
@@ -41,6 +41,7 @@ SECTIONS
 		__bss_end = .;
 	}
 
+#ifdef CONFIG_MMU
 	/*
 	 * This is a remapped copy of the Reset Vector Code.
 	 * It keeps gdb in sync with the PC after switching
@@ -51,4 +52,5 @@ SECTIONS
 	{
 		*(.ResetVector.remapped_text)
 	}
+#endif
 }
diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S
index 1388a499753b..9341a5750694 100644
--- a/arch/xtensa/boot/boot-elf/bootstrap.S
+++ b/arch/xtensa/boot/boot-elf/bootstrap.S
@@ -20,6 +20,7 @@
 #include <asm/page.h>
 #include <asm/cacheasm.h>
 #include <asm/initialize_mmu.h>
+#include <asm/vectors.h>
 #include <linux/linkage.h>
 
 	.section	.ResetVector.text, "ax"
@@ -34,12 +35,7 @@ _ResetVector:
 
 	.align 4
 RomInitAddr:
-#if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \
-	XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
-	.word 0x00003000
-#else
-	.word 0xd0003000
-#endif
+	.word	LOAD_MEMORY_ADDRESS
 RomBootParam:
 	.word _bootparam
 _bootparam:
@@ -79,6 +75,7 @@ reset:
 	movi	a4, 0
 	jx      a0
 
+#ifdef CONFIG_MMU
 	.align 4
 
 	.section	.ResetVector.remapped_text, "x"
@@ -102,3 +99,4 @@ _RemappedSetupMMU:
 #endif
 
 	.end    no-absolute-literals
+#endif
diff --git a/arch/xtensa/boot/boot-uboot/Makefile b/arch/xtensa/boot/boot-uboot/Makefile
index 545759819ef9..403fcf23405c 100644
--- a/arch/xtensa/boot/boot-uboot/Makefile
+++ b/arch/xtensa/boot/boot-uboot/Makefile
@@ -4,11 +4,15 @@
 # for more details.
 #
 
+ifdef CONFIG_MMU
 ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
 UIMAGE_LOADADDR = 0x00003000
 else
 UIMAGE_LOADADDR = 0xd0003000
 endif
+else
+UIMAGE_LOADADDR = $(shell printf "0x%x" $$(( ${CONFIG_DEFAULT_MEM_START} + 0x3000 )) )
+endif
 UIMAGE_COMPRESSION = gzip
 
 $(obj)/../uImage: vmlinux.bin.gz FORCE
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index b966baf82cae..e4d193e7a300 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -143,7 +143,6 @@ CONFIG_MMU=y
 #
 CONFIG_XTENSA_VARIANT_FSF=y
 # CONFIG_XTENSA_VARIANT_DC232B is not set
-# CONFIG_XTENSA_VARIANT_S6000 is not set
 # CONFIG_XTENSA_UNALIGNED_USER is not set
 # CONFIG_PREEMPT is not set
 CONFIG_XTENSA_CALIBRATE_CCOUNT=y
@@ -161,7 +160,6 @@ CONFIG_XTENSA_ISS_NETWORK=y
 #
 CONFIG_XTENSA_PLATFORM_ISS=y
 # CONFIG_XTENSA_PLATFORM_XT2000 is not set
-# CONFIG_XTENSA_PLATFORM_S6105 is not set
 # CONFIG_GENERIC_CALIBRATE_DELAY is not set
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=ttyS0,38400 eth0=tuntap,,tap0 ip=192.168.168.5:192.168.168.1 root=nfs nfsroot=192.168.168.1:/opt/montavista/pro/devkit/xtensa/linux_be/target"
@@ -759,3 +757,4 @@ CONFIG_GENERIC_FIND_LAST_BIT=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_DMA=y
 CONFIG_NLATTR=y
+CONFIG_LD_NO_RELAX=y
diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig
deleted file mode 100644
index 9471265b8ca6..000000000000
--- a/arch/xtensa/configs/s6105_defconfig
+++ /dev/null
@@ -1,615 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc7-s6
-# Tue Mar 10 11:09:26 2009
-#
-# CONFIG_FRAME_POINTER is not set
-CONFIG_ZONE_DMA=y
-CONFIG_XTENSA=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_HWEIGHT=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_NO_IOPORT_MAP=y
-CONFIG_HZ=100
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-
-#
-# General setup
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_TASKSTATS is not set
-# CONFIG_AUDIT is not set
-
-#
-# RCU Subsystem
-#
-# CONFIG_CLASSIC_RCU is not set
-# CONFIG_TREE_RCU is not set
-CONFIG_PREEMPT_RCU=y
-# CONFIG_RCU_TRACE is not set
-# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=16
-# CONFIG_GROUP_SCHED is not set
-# CONFIG_CGROUPS is not set
-# CONFIG_SYSFS_DEPRECATED_V2 is not set
-# CONFIG_RELAY is not set
-# CONFIG_NAMESPACES is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL=y
-CONFIG_EXPERT=y
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_AIO=y
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLAB=y
-# CONFIG_SLUB is not set
-# CONFIG_SLOB is not set
-# CONFIG_PROFILING is not set
-# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
-CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-# CONFIG_MODULES is not set
-CONFIG_BLOCK=y
-# CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_BLK_DEV_INTEGRITY is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
-# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
-# CONFIG_FREEZER is not set
-# CONFIG_MMU is not set
-CONFIG_VARIANT_IRQ_SWITCH=y
-
-#
-# Processor type and features
-#
-# CONFIG_XTENSA_VARIANT_FSF is not set
-# CONFIG_XTENSA_VARIANT_DC232B is not set
-CONFIG_XTENSA_VARIANT_S6000=y
-# CONFIG_XTENSA_UNALIGNED_USER is not set
-CONFIG_PREEMPT=y
-# CONFIG_HIGHMEM is not set
-CONFIG_XTENSA_CALIBRATE_CCOUNT=y
-CONFIG_SERIAL_CONSOLE=y
-# CONFIG_XTENSA_ISS_NETWORK is not set
-
-#
-# Bus options
-#
-# CONFIG_PCI is not set
-# CONFIG_ARCH_SUPPORTS_MSI is not set
-
-#
-# Platform options
-#
-# CONFIG_XTENSA_PLATFORM_ISS is not set
-# CONFIG_XTENSA_PLATFORM_XT2000 is not set
-CONFIG_XTENSA_PLATFORM_S6105=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS1,38400 debug bootmem_debug loglevel=7"
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-# CONFIG_PHYS_ADDR_T_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_VIRT_TO_BUS=y
-
-#
-# Executable file formats
-#
-CONFIG_KCORE_ELF=y
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_HAVE_AOUT is not set
-# CONFIG_BINFMT_MISC is not set
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_COMPAT_NET_DEV_OPS=y
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_NETWORK_SECMARK is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_IP_DCCP is not set
-# CONFIG_IP_SCTP is not set
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_NET_DSA is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-# CONFIG_NET_SCHED is not set
-# CONFIG_DCB is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_CAN is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
-# CONFIG_WIRELESS is not set
-# CONFIG_WIMAX is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_SYS_HYPERVISOR is not set
-# CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
-# CONFIG_PARPORT is not set
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-# CONFIG_BLK_DEV_XIP is not set
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-# CONFIG_BLK_DEV_HD is not set
-# CONFIG_MISC_DEVICES is not set
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
-# CONFIG_SCSI_NETLINK is not set
-# CONFIG_ATA is not set
-# CONFIG_MD is not set
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_MACVLAN is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_VETH is not set
-CONFIG_PHYLIB=y
-
-#
-# MII PHY device drivers
-#
-# CONFIG_MARVELL_PHY is not set
-# CONFIG_DAVICOM_PHY is not set
-# CONFIG_QSEMI_PHY is not set
-# CONFIG_LXT_PHY is not set
-# CONFIG_CICADA_PHY is not set
-# CONFIG_VITESSE_PHY is not set
-CONFIG_SMSC_PHY=y
-# CONFIG_BROADCOM_PHY is not set
-# CONFIG_ICPLUS_PHY is not set
-# CONFIG_REALTEK_PHY is not set
-# CONFIG_NATIONAL_PHY is not set
-# CONFIG_STE10XP is not set
-# CONFIG_LSI_ET1011C_PHY is not set
-# CONFIG_FIXED_PHY is not set
-# CONFIG_MDIO_BITBANG is not set
-# CONFIG_NET_ETHERNET is not set
-CONFIG_NETDEV_1000=y
-CONFIG_S6GMAC=y
-# CONFIG_NETDEV_10000 is not set
-
-#
-# Wireless LAN
-#
-# CONFIG_WLAN_PRE80211 is not set
-# CONFIG_WLAN_80211 is not set
-# CONFIG_IWLWIFI_LEDS is not set
-
-#
-# Enable WiMAX (Networking options) to see the WiMAX drivers
-#
-# CONFIG_WAN is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_ISDN is not set
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-# CONFIG_INPUT is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_SERIAL_8250_EXTENDED is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_IPMI_HANDLER is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_R3964 is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_TCG_TPM is not set
-# CONFIG_I2C is not set
-# CONFIG_SPI is not set
-CONFIG_ARCH_REQUIRE_GPIOLIB=y
-CONFIG_GPIOLIB=y
-# CONFIG_DEBUG_GPIO is not set
-# CONFIG_GPIO_SYSFS is not set
-
-#
-# Memory mapped GPIO expanders:
-#
-
-#
-# I2C GPIO expanders:
-#
-
-#
-# PCI GPIO expanders:
-#
-
-#
-# SPI GPIO expanders:
-#
-# CONFIG_W1 is not set
-# CONFIG_POWER_SUPPLY is not set
-# CONFIG_HWMON is not set
-# CONFIG_THERMAL is not set
-# CONFIG_THERMAL_HWMON is not set
-# CONFIG_WATCHDOG is not set
-CONFIG_SSB_POSSIBLE=y
-
-#
-# Sonics Silicon Backplane
-#
-# CONFIG_SSB is not set
-
-#
-# Multifunction device drivers
-#
-# CONFIG_MFD_CORE is not set
-# CONFIG_MFD_SM501 is not set
-# CONFIG_HTC_PASIC3 is not set
-# CONFIG_MFD_TMIO is not set
-# CONFIG_REGULATOR is not set
-
-#
-# Multimedia devices
-#
-
-#
-# Multimedia core support
-#
-# CONFIG_VIDEO_DEV is not set
-# CONFIG_DVB_CORE is not set
-# CONFIG_VIDEO_MEDIA is not set
-
-#
-# Multimedia drivers
-#
-# CONFIG_DAB is not set
-
-#
-# Graphics support
-#
-# CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-# CONFIG_FB is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Display device support
-#
-# CONFIG_DISPLAY_SUPPORT is not set
-# CONFIG_SOUND is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_MMC is not set
-# CONFIG_MEMSTICK is not set
-# CONFIG_NEW_LEDS is not set
-# CONFIG_ACCESSIBILITY is not set
-CONFIG_RTC_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-
-#
-# RTC interfaces
-#
-# CONFIG_RTC_INTF_SYSFS is not set
-# CONFIG_RTC_INTF_PROC is not set
-# CONFIG_RTC_INTF_DEV is not set
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-# CONFIG_RTC_DRV_DS1307 is not set
-# CONFIG_RTC_DRV_DS1374 is not set
-# CONFIG_RTC_DRV_DS1672 is not set
-# CONFIG_RTC_DRV_MAX6900 is not set
-# CONFIG_RTC_DRV_RS5C372 is not set
-# CONFIG_RTC_DRV_ISL1208 is not set
-# CONFIG_RTC_DRV_X1205 is not set
-# CONFIG_RTC_DRV_PCF8563 is not set
-# CONFIG_RTC_DRV_PCF8583 is not set
-CONFIG_RTC_DRV_M41T80=y
-# CONFIG_RTC_DRV_M41T80_WDT is not set
-# CONFIG_RTC_DRV_S35390A is not set
-# CONFIG_RTC_DRV_FM3130 is not set
-# CONFIG_RTC_DRV_RX8581 is not set
-
-#
-# SPI RTC drivers
-#
-
-#
-# Platform RTC drivers
-#
-# CONFIG_RTC_DRV_DS1286 is not set
-# CONFIG_RTC_DRV_DS1511 is not set
-# CONFIG_RTC_DRV_DS1553 is not set
-# CONFIG_RTC_DRV_DS1742 is not set
-# CONFIG_RTC_DRV_STK17TA8 is not set
-# CONFIG_RTC_DRV_M48T86 is not set
-# CONFIG_RTC_DRV_M48T35 is not set
-# CONFIG_RTC_DRV_M48T59 is not set
-# CONFIG_RTC_DRV_BQ4802 is not set
-# CONFIG_RTC_DRV_V3020 is not set
-
-#
-# on-CPU RTC drivers
-#
-# CONFIG_DMADEVICES is not set
-# CONFIG_UIO is not set
-# CONFIG_STAGING is not set
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_EXT4_FS is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-CONFIG_FILE_LOCKING=y
-# CONFIG_XFS_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_BTRFS_FS is not set
-# CONFIG_DNOTIFY is not set
-# CONFIG_INOTIFY is not set
-# CONFIG_QUOTA is not set
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-# CONFIG_CONFIGFS_FS is not set
-# CONFIG_MISC_FILESYSTEMS is not set
-# CONFIG_NETWORK_FILESYSTEMS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_NLS is not set
-# CONFIG_DLM is not set
-
-#
-# Kernel hacking
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=1024
-# CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_HEADERS_CHECK is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-# CONFIG_SCHED_DEBUG is not set
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_DEBUG_NOMMU_REGIONS=y
-# CONFIG_DEBUG_MEMORY_INIT is not set
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_BOOT_PRINTK_DELAY is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
-
-#
-# Tracers
-#
-# CONFIG_PREEMPT_TRACER is not set
-# CONFIG_SCHED_TRACER is not set
-# CONFIG_CONTEXT_SWITCH_TRACER is not set
-# CONFIG_BOOT_TRACER is not set
-# CONFIG_TRACE_BRANCH_PROFILING is not set
-# CONFIG_DYNAMIC_DEBUG is not set
-# CONFIG_SAMPLES is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-# CONFIG_SECURITYFS is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
-# CONFIG_CRYPTO is not set
-
-#
-# Library routines
-#
-CONFIG_GENERIC_FIND_LAST_BIT=y
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-# CONFIG_CRC_T10DIF is not set
-# CONFIG_CRC_ITU_T is not set
-# CONFIG_CRC32 is not set
-# CONFIG_CRC7 is not set
-# CONFIG_LIBCRC32C is not set
-CONFIG_PLIST=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_DMA=y
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index e72aaca7a77f..5f67ace97b32 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -67,6 +67,8 @@ extern void __invalidate_dcache_page_alias(unsigned long, unsigned long);
 #else
 static inline void __flush_invalidate_dcache_page_alias(unsigned long virt,
 							unsigned long phys) { }
+static inline void __invalidate_dcache_page_alias(unsigned long virt,
+						  unsigned long phys) { }
 #endif
 #if defined(CONFIG_MMU) && (ICACHE_WAY_SIZE > PAGE_SIZE)
 extern void __invalidate_icache_page_alias(unsigned long, unsigned long);
@@ -84,7 +86,8 @@ static inline void __invalidate_icache_page_alias(unsigned long virt,
  * (see also Documentation/cachetlb.txt)
  */
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) || defined(CONFIG_SMP)
+#if defined(CONFIG_MMU) && \
+	((DCACHE_WAY_SIZE > PAGE_SIZE) || defined(CONFIG_SMP))
 
 #ifdef CONFIG_SMP
 void flush_cache_all(void);
@@ -150,7 +153,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
+#if defined(CONFIG_MMU) && (DCACHE_WAY_SIZE > PAGE_SIZE)
 
 extern void copy_to_user_page(struct vm_area_struct*, struct page*,
 		unsigned long, void*, const void*, unsigned long);
diff --git a/arch/xtensa/include/asm/highmem.h b/arch/xtensa/include/asm/highmem.h
index 2c7901edffaf..01cef6b40829 100644
--- a/arch/xtensa/include/asm/highmem.h
+++ b/arch/xtensa/include/asm/highmem.h
@@ -25,7 +25,7 @@
 #define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
-#define kmap_prot		PAGE_KERNEL
+#define kmap_prot		PAGE_KERNEL_EXEC
 
 #if DCACHE_WAY_SIZE > PAGE_SIZE
 #define get_pkmap_color get_pkmap_color
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index 600781edc8a3..e256f2270ec9 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -26,8 +26,16 @@
 #include <asm/pgtable.h>
 #include <asm/vectors.h>
 
+#if XCHAL_HAVE_PTP_MMU
 #define CA_BYPASS	(_PAGE_CA_BYPASS | _PAGE_HW_WRITE | _PAGE_HW_EXEC)
 #define CA_WRITEBACK	(_PAGE_CA_WB     | _PAGE_HW_WRITE | _PAGE_HW_EXEC)
+#else
+#define CA_WRITEBACK	(0x4)
+#endif
+
+#ifndef XCHAL_SPANNING_WAY
+#define XCHAL_SPANNING_WAY 0
+#endif
 
 #ifdef __ASSEMBLY__
 
@@ -75,7 +83,7 @@
 
 	/* Step 1: invalidate mapping at 0x40000000..0x5FFFFFFF. */
 
-	movi	a2, 0x40000006
+	movi	a2, 0x40000000 | XCHAL_SPANNING_WAY
 	idtlb	a2
 	iitlb	a2
 	isync
@@ -141,9 +149,6 @@
 	jx	a4
 
 1:
-	movi    a2, VECBASE_RESET_VADDR
-	wsr	a2, vecbase
-
 	/* Step 5: remove temporary mapping. */
 	idtlb	a7
 	iitlb	a7
@@ -156,6 +161,33 @@
 #endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU &&
 	  XCHAL_HAVE_SPANNING_WAY */
 
+#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS
+	/* Enable data and instruction cache in the DEFAULT_MEMORY region
+	 * if the processor has DTLB and ITLB.
+	 */
+
+	movi	a5, PLATFORM_DEFAULT_MEM_START | XCHAL_SPANNING_WAY
+	movi	a6, ~_PAGE_ATTRIB_MASK
+	movi	a7, CA_WRITEBACK
+	movi	a8, 0x20000000
+	movi	a9, PLATFORM_DEFAULT_MEM_SIZE
+	j	2f
+1:
+	sub	a9, a9, a8
+2:
+	rdtlb1	a3, a5
+	ritlb1	a4, a5
+	and	a3, a3, a6
+	and	a4, a4, a6
+	or	a3, a3, a7
+	or	a4, a4, a7
+	wdtlb	a3, a5
+	witlb	a4, a5
+	add	a5, a5, a8
+	bltu	a8, a9, 1b
+
+#endif
+
 	.endm
 
 #endif /*__ASSEMBLY__*/
diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h
index d33c71a8c9ec..04c8ebdc4517 100644
--- a/arch/xtensa/include/asm/mmu_context.h
+++ b/arch/xtensa/include/asm/mmu_context.h
@@ -50,11 +50,7 @@ DECLARE_PER_CPU(unsigned long, asid_cache);
 #define ASID_MASK	((1 << XCHAL_MMU_ASID_BITS) - 1)
 #define ASID_INSERT(x)	(0x03020001 | (((x) & ASID_MASK) << 8))
 
-#ifdef CONFIG_MMU
 void init_mmu(void);
-#else
-static inline void init_mmu(void) { }
-#endif
 
 static inline void set_rasid_register (unsigned long val)
 {
diff --git a/arch/xtensa/include/asm/nommu_context.h b/arch/xtensa/include/asm/nommu_context.h
index 3407cf7989b7..22984fd1d846 100644
--- a/arch/xtensa/include/asm/nommu_context.h
+++ b/arch/xtensa/include/asm/nommu_context.h
@@ -1,3 +1,7 @@
+static inline void init_mmu(void)
+{
+}
+
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 }
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index abe24c6f8b2f..ad38500471fa 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -20,10 +20,10 @@
  * Fixed TLB translations in the processor.
  */
 
-#define XCHAL_KSEG_CACHED_VADDR 0xd0000000
-#define XCHAL_KSEG_BYPASS_VADDR 0xd8000000
-#define XCHAL_KSEG_PADDR        0x00000000
-#define XCHAL_KSEG_SIZE         0x08000000
+#define XCHAL_KSEG_CACHED_VADDR __XTENSA_UL_CONST(0xd0000000)
+#define XCHAL_KSEG_BYPASS_VADDR __XTENSA_UL_CONST(0xd8000000)
+#define XCHAL_KSEG_PADDR        __XTENSA_UL_CONST(0x00000000)
+#define XCHAL_KSEG_SIZE         __XTENSA_UL_CONST(0x08000000)
 
 /*
  * PAGE_SHIFT determines the page size
@@ -37,7 +37,7 @@
 #define PAGE_OFFSET	XCHAL_KSEG_CACHED_VADDR
 #define MAX_MEM_PFN	XCHAL_KSEG_SIZE
 #else
-#define PAGE_OFFSET	0
+#define PAGE_OFFSET	__XTENSA_UL_CONST(0)
 #define MAX_MEM_PFN	(PLATFORM_DEFAULT_MEM_START + PLATFORM_DEFAULT_MEM_SIZE)
 #endif
 
@@ -145,7 +145,7 @@ extern void copy_page(void *to, void *from);
  * some extra work
  */
 
-#if DCACHE_WAY_SIZE > PAGE_SIZE
+#if defined(CONFIG_MMU) && DCACHE_WAY_SIZE > PAGE_SIZE
 extern void clear_page_alias(void *vaddr, unsigned long paddr);
 extern void copy_page_alias(void *to, void *from,
 			    unsigned long to_paddr, unsigned long from_paddr);
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 0383aed59121..872bf0194e6d 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -178,6 +178,7 @@
 
 #else /* no mmu */
 
+# define _PAGE_CHG_MASK  (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 # define PAGE_NONE       __pgprot(0)
 # define PAGE_SHARED     __pgprot(0)
 # define PAGE_COPY       __pgprot(0)
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index c7211e7e182d..876eb380aa26 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -320,7 +320,7 @@ __asm__ __volatile__(					\
 ({								\
 	long __gu_err, __gu_val;				\
 	__get_user_size(__gu_val,(ptr),(size),__gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 
@@ -330,7 +330,7 @@ __asm__ __volatile__(					\
 	const __typeof__(*(ptr)) *__gu_addr = (ptr);			\
 	if (access_ok(VERIFY_READ,__gu_addr,size))			\
 		__get_user_size(__gu_val,__gu_addr,(size),__gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
 
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index f74ddfbb92ef..a46c53f36113 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -19,6 +19,7 @@
 #define _XTENSA_VECTORS_H
 
 #include <variant/core.h>
+#include <platform/hardware.h>
 
 #define XCHAL_KIO_CACHED_VADDR		0xe0000000
 #define XCHAL_KIO_BYPASS_VADDR		0xf0000000
@@ -51,13 +52,13 @@
   /* MMU Not being used - Virtual == Physical */
 
   /* VECBASE */
-  #define VIRTUAL_MEMORY_ADDRESS	0x00002000
+  #define VIRTUAL_MEMORY_ADDRESS	(PLATFORM_DEFAULT_MEM_START + 0x2000)
 
   /* Location of the start of the kernel text, _start */
-  #define KERNELOFFSET			0x00003000
+  #define KERNELOFFSET			(PLATFORM_DEFAULT_MEM_START + 0x3000)
 
   /* Loaded just above possibly live vectors */
-  #define LOAD_MEMORY_ADDRESS		0x00003000
+  #define LOAD_MEMORY_ADDRESS		(PLATFORM_DEFAULT_MEM_START + 0x3000)
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 00eed6786d7e..201aec0e0446 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -55,6 +55,12 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
+# define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
+					 * uninitialized */
+#else
+# define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
+#endif
 
 /*
  * Flags for msync
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index aeeb3cc8a410..15a461e2a0ed 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -112,6 +112,11 @@ ENTRY(_startup)
 
 	movi	a0, 0
 
+#if XCHAL_HAVE_VECBASE
+	movi    a2, VECBASE_RESET_VADDR
+	wsr	a2, vecbase
+#endif
+
 	/* Clear debugging registers. */
 
 #if XCHAL_HAVE_DEBUG
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 5d3f7a119ed1..83cf49685373 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -57,6 +57,7 @@ asmlinkage long xtensa_fadvise64_64(int fd, int advice,
 	return sys_fadvise64_64(fd, offset, len, advice);
 }
 
+#ifdef CONFIG_MMU
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
@@ -93,3 +94,4 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 			addr = COLOUR_ALIGN(addr, pgoff);
 	}
 }
+#endif
diff --git a/arch/xtensa/mm/Makefile b/arch/xtensa/mm/Makefile
index f54f78e24d7b..e601e2fbe8e6 100644
--- a/arch/xtensa/mm/Makefile
+++ b/arch/xtensa/mm/Makefile
@@ -2,6 +2,6 @@
 # Makefile for the Linux/Xtensa-specific parts of the memory manager.
 #
 
-obj-y			:= init.o cache.o misc.o
-obj-$(CONFIG_MMU)	+= fault.o mmu.o tlb.o
+obj-y			:= init.o misc.o
+obj-$(CONFIG_MMU)	+= cache.o fault.o mmu.o tlb.o
 obj-$(CONFIG_HIGHMEM)	+= highmem.o
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 77ed20209ca5..9a9a5935bd36 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -239,6 +239,17 @@ void __init bootmem_init(void)
 	unsigned long bootmap_start, bootmap_size;
 	int i;
 
+	/* Reserve all memory below PLATFORM_DEFAULT_MEM_START, as memory
+	 * accounting doesn't work for pages below that address.
+	 *
+	 * If PLATFORM_DEFAULT_MEM_START is zero reserve page at address 0:
+	 * successfull allocations should never return NULL.
+	 */
+	if (PLATFORM_DEFAULT_MEM_START)
+		mem_reserve(0, PLATFORM_DEFAULT_MEM_START, 0);
+	else
+		mem_reserve(0, 1, 0);
+
 	sysmem_dump();
 	max_low_pfn = max_pfn = 0;
 	min_low_pfn = ~0;
@@ -332,18 +343,24 @@ void __init mem_init(void)
 		"    pkmap   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
 		"    fixmap  : 0x%08lx - 0x%08lx  (%5lu kB)\n"
 #endif
+#ifdef CONFIG_MMU
 		"    vmalloc : 0x%08x - 0x%08x  (%5u MB)\n"
-		"    lowmem  : 0x%08x - 0x%08lx  (%5lu MB)\n",
+#endif
+		"    lowmem  : 0x%08lx - 0x%08lx  (%5lu MB)\n",
 #ifdef CONFIG_HIGHMEM
 		PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE,
 		(LAST_PKMAP*PAGE_SIZE) >> 10,
 		FIXADDR_START, FIXADDR_TOP,
 		(FIXADDR_TOP - FIXADDR_START) >> 10,
 #endif
+#ifdef CONFIG_MMU
 		VMALLOC_START, VMALLOC_END,
 		(VMALLOC_END - VMALLOC_START) >> 20,
 		PAGE_OFFSET, PAGE_OFFSET +
 		(max_low_pfn - min_low_pfn) * PAGE_SIZE,
+#else
+		min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE,
+#endif
 		((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20);
 }
 
diff --git a/arch/xtensa/platforms/s6105/Makefile b/arch/xtensa/platforms/s6105/Makefile
deleted file mode 100644
index 0be6194bcb72..000000000000
--- a/arch/xtensa/platforms/s6105/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Makefile for the Stretch S6105 eval board
-
-obj-y		:= setup.o device.o
diff --git a/arch/xtensa/platforms/s6105/device.c b/arch/xtensa/platforms/s6105/device.c
deleted file mode 100644
index 4f4fc971042f..000000000000
--- a/arch/xtensa/platforms/s6105/device.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * s6105 platform devices
- *
- * Copyright (c) 2009 emlix GmbH
- */
-
-#include <linux/kernel.h>
-#include <linux/gpio.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
-#include <linux/serial.h>
-#include <linux/serial_8250.h>
-
-#include <variant/hardware.h>
-#include <variant/dmac.h>
-
-#include <platform/gpio.h>
-
-#define GPIO3_INTNUM		3
-#define UART_INTNUM		4
-#define GMAC_INTNUM		5
-
-static const signed char gpio3_irq_mappings[] = {
-	S6_INTC_GPIO(3),
-	-1
-};
-
-static const signed char uart_irq_mappings[] = {
-	S6_INTC_UART(0),
-	S6_INTC_UART(1),
-	-1,
-};
-
-static const signed char gmac_irq_mappings[] = {
-	S6_INTC_GMAC_STAT,
-	S6_INTC_GMAC_ERR,
-	S6_INTC_DMA_HOSTTERMCNT(0),
-	S6_INTC_DMA_HOSTTERMCNT(1),
-	-1
-};
-
-const signed char *platform_irq_mappings[NR_IRQS] = {
-	[GPIO3_INTNUM] = gpio3_irq_mappings,
-	[UART_INTNUM] = uart_irq_mappings,
-	[GMAC_INTNUM] = gmac_irq_mappings,
-};
-
-static struct plat_serial8250_port serial_platform_data[] = {
-	{
-		.membase = (void *)S6_REG_UART + 0x0000,
-		.mapbase = S6_REG_UART + 0x0000,
-		.irq = UART_INTNUM,
-		.uartclk = S6_SCLK,
-		.regshift = 2,
-		.iotype = SERIAL_IO_MEM,
-		.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST,
-	},
-	{
-		.membase = (void *)S6_REG_UART + 0x1000,
-		.mapbase = S6_REG_UART + 0x1000,
-		.irq = UART_INTNUM,
-		.uartclk = S6_SCLK,
-		.regshift = 2,
-		.iotype = SERIAL_IO_MEM,
-		.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST,
-	},
-	{ },
-};
-
-static struct resource s6_gmac_resource[] = {
-	{
-		.name   = "mem",
-		.start  = (resource_size_t)S6_REG_GMAC,
-		.end    = (resource_size_t)S6_REG_GMAC + 0x10000 - 1,
-		.flags  = IORESOURCE_MEM,
-	},
-	{
-		.name   = "dma",
-		.start  = (resource_size_t)
-			DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACTX),
-		.end    = (resource_size_t)
-			DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACTX) + 0x100 - 1,
-		.flags  = IORESOURCE_DMA,
-	},
-	{
-		.name   = "dma",
-		.start  = (resource_size_t)
-			DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACRX),
-		.end    = (resource_size_t)
-			DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACRX) + 0x100 - 1,
-		.flags  = IORESOURCE_DMA,
-	},
-	{
-		.name   = "io",
-		.start  = (resource_size_t)S6_MEM_GMAC,
-		.end    = (resource_size_t)S6_MEM_GMAC + 0x2000000 - 1,
-		.flags  = IORESOURCE_IO,
-	},
-	{
-		.name   = "irq",
-		.start  = (resource_size_t)GMAC_INTNUM,
-		.flags  = IORESOURCE_IRQ,
-	},
-	{
-		.name   = "irq",
-		.start  = (resource_size_t)PHY_POLL,
-		.flags  = IORESOURCE_IRQ,
-	},
-};
-
-static int __init prepare_phy_irq(int pin)
-{
-	int irq;
-	if (gpio_request(pin, "s6gmac_phy") < 0)
-		goto fail;
-	if (gpio_direction_input(pin) < 0)
-		goto free;
-	irq = gpio_to_irq(pin);
-	if (irq < 0)
-		goto free;
-	if (irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW) < 0)
-		goto free;
-	return irq;
-free:
-	gpio_free(pin);
-fail:
-	return PHY_POLL;
-}
-
-static struct platform_device platform_devices[] = {
-	{
-		.name = "serial8250",
-		.id = PLAT8250_DEV_PLATFORM,
-		.dev = {
-			.platform_data = serial_platform_data,
-		},
-	},
-	{
-		.name = "s6gmac",
-		.id = 0,
-		.resource = s6_gmac_resource,
-		.num_resources = ARRAY_SIZE(s6_gmac_resource),
-	},
-	{
-		I2C_BOARD_INFO("m41t62", S6I2C_ADDR_M41T62),
-	},
-};
-
-static int __init device_init(void)
-{
-	int i;
-
-	s6_gmac_resource[5].start = prepare_phy_irq(GPIO_PHY_IRQ);
-
-	for (i = 0; i < ARRAY_SIZE(platform_devices); i++)
-		platform_device_register(&platform_devices[i]);
-	return 0;
-}
-arch_initcall_sync(device_init);
diff --git a/arch/xtensa/platforms/s6105/include/platform/gpio.h b/arch/xtensa/platforms/s6105/include/platform/gpio.h
deleted file mode 100644
index fa11aa4b61e9..000000000000
--- a/arch/xtensa/platforms/s6105/include/platform/gpio.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __ASM_XTENSA_S6105_GPIO_H
-#define __ASM_XTENSA_S6105_GPIO_H
-
-#define GPIO_BP_TEMP_ALARM	0
-#define GPIO_PB_RESET_IN	1
-#define GPIO_EXP_IRQ		2
-#define GPIO_TRIGGER_IRQ	3
-#define GPIO_RTC_IRQ		4
-#define GPIO_PHY_IRQ		5
-#define GPIO_IMAGER_RESET	6
-#define GPIO_SD_IRQ		7
-#define GPIO_MINI_BOOT_INH	8
-#define GPIO_BOARD_RESET	9
-#define GPIO_EXP_PRESENT	10
-#define GPIO_LED1_NGREEN	12
-#define GPIO_LED1_RED		13
-#define GPIO_LED0_NGREEN	14
-#define GPIO_LED0_NRED		15
-#define GPIO_SPI_CS0		16
-#define GPIO_SPI_CS1		17
-#define GPIO_SPI_CS3		19
-#define GPIO_SPI_CS4		20
-#define GPIO_SD_WP		21
-#define GPIO_BP_RESET		22
-#define GPIO_ALARM_OUT		23
-
-#endif /* __ASM_XTENSA_S6105_GPIO_H */
diff --git a/arch/xtensa/platforms/s6105/include/platform/hardware.h b/arch/xtensa/platforms/s6105/include/platform/hardware.h
deleted file mode 100644
index d628efac7089..000000000000
--- a/arch/xtensa/platforms/s6105/include/platform/hardware.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __XTENSA_S6105_HARDWARE_H
-#define __XTENSA_S6105_HARDWARE_H
-
-#define PLATFORM_DEFAULT_MEM_START	0x40000000
-#define PLATFORM_DEFAULT_MEM_SIZE	0x08000000
-
-#define MAX_DMA_ADDRESS			0
-
-#define KERNELOFFSET			(PLATFORM_DEFAULT_MEM_START + 0x1000)
-
-#endif /* __XTENSA_S6105_HARDWARE_H */
diff --git a/arch/xtensa/platforms/s6105/include/platform/serial.h b/arch/xtensa/platforms/s6105/include/platform/serial.h
deleted file mode 100644
index c8a771e5981b..000000000000
--- a/arch/xtensa/platforms/s6105/include/platform/serial.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_XTENSA_S6105_SERIAL_H
-#define __ASM_XTENSA_S6105_SERIAL_H
-
-#include <variant/hardware.h>
-
-#define BASE_BAUD (S6_SCLK / 16)
-
-#endif /* __ASM_XTENSA_S6105_SERIAL_H */
diff --git a/arch/xtensa/platforms/s6105/setup.c b/arch/xtensa/platforms/s6105/setup.c
deleted file mode 100644
index 86ce730f7913..000000000000
--- a/arch/xtensa/platforms/s6105/setup.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * s6105 control routines
- *
- * Copyright (c) 2009 emlix GmbH
- */
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/gpio.h>
-
-#include <asm/bootparam.h>
-
-#include <variant/hardware.h>
-#include <variant/gpio.h>
-
-#include <platform/gpio.h>
-
-void platform_halt(void)
-{
-	local_irq_disable();
-	while (1)
-		;
-}
-
-void platform_power_off(void)
-{
-	platform_halt();
-}
-
-void platform_restart(void)
-{
-	platform_halt();
-}
-
-void __init platform_setup(char **cmdline)
-{
-	unsigned long reg;
-
-	reg = readl(S6_REG_GREG1 + S6_GREG1_PLLSEL);
-	reg &= ~(S6_GREG1_PLLSEL_GMAC_MASK << S6_GREG1_PLLSEL_GMAC |
-		S6_GREG1_PLLSEL_GMII_MASK << S6_GREG1_PLLSEL_GMII);
-	reg |= S6_GREG1_PLLSEL_GMAC_125MHZ << S6_GREG1_PLLSEL_GMAC |
-		S6_GREG1_PLLSEL_GMII_125MHZ << S6_GREG1_PLLSEL_GMII;
-	writel(reg, S6_REG_GREG1 + S6_GREG1_PLLSEL);
-
-	reg = readl(S6_REG_GREG1 + S6_GREG1_CLKGATE);
-	reg &= ~(1 << S6_GREG1_BLOCK_SB);
-	reg &= ~(1 << S6_GREG1_BLOCK_GMAC);
-	writel(reg, S6_REG_GREG1 + S6_GREG1_CLKGATE);
-
-	reg = readl(S6_REG_GREG1 + S6_GREG1_BLOCKENA);
-	reg |= 1 << S6_GREG1_BLOCK_SB;
-	reg |= 1 << S6_GREG1_BLOCK_GMAC;
-	writel(reg, S6_REG_GREG1 + S6_GREG1_BLOCKENA);
-
-	printk(KERN_NOTICE "S6105 on Stretch S6000 - "
-		"Copyright (C) 2009 emlix GmbH <info@emlix.com>\n");
-}
-
-void __init platform_init(bp_tag_t *first)
-{
-	s6_gpio_init(0);
-	gpio_request(GPIO_LED1_NGREEN, "led1_green");
-	gpio_request(GPIO_LED1_RED, "led1_red");
-	gpio_direction_output(GPIO_LED1_NGREEN, 1);
-}
-
-void platform_heartbeat(void)
-{
-	static unsigned int c;
-
-	if (!(++c & 0x4F))
-		gpio_direction_output(GPIO_LED1_RED, !(c & 0x10));
-}
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index aeb316b7ff88..6edd20bb4565 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
@@ -17,8 +17,8 @@
 
 /* Memory configuration. */
 
-#define PLATFORM_DEFAULT_MEM_START 0x00000000
-#define PLATFORM_DEFAULT_MEM_SIZE  0x04000000
+#define PLATFORM_DEFAULT_MEM_START CONFIG_DEFAULT_MEM_START
+#define PLATFORM_DEFAULT_MEM_SIZE  CONFIG_DEFAULT_MEM_SIZE
 
 /* Interrupt configuration. */
 
diff --git a/arch/xtensa/variants/s6000/Makefile b/arch/xtensa/variants/s6000/Makefile
deleted file mode 100644
index 3e7ef0a0c498..000000000000
--- a/arch/xtensa/variants/s6000/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# s6000 Makefile
-
-obj-y		+= irq.o gpio.o dmac.o
-obj-$(CONFIG_XTENSA_CALIBRATE_CCOUNT)	+= delay.o
diff --git a/arch/xtensa/variants/s6000/delay.c b/arch/xtensa/variants/s6000/delay.c
deleted file mode 100644
index 39154563ee17..000000000000
--- a/arch/xtensa/variants/s6000/delay.c
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <asm/timex.h>
-#include <asm/io.h>
-#include <variant/hardware.h>
-
-#define LOOPS 10
-void platform_calibrate_ccount(void)
-{
-	u32 uninitialized_var(a);
-	u32 uninitialized_var(u);
-	u32 b;
-	u32 tstamp = S6_REG_GREG1 + S6_GREG1_GLOBAL_TIMER;
-	int i = LOOPS+1;
-	do {
-		u32 t = u;
-		asm volatile(
-		"1:	l32i %0, %2, 0 ;"
-		"	beq %0, %1, 1b ;"
-		: "=&a"(u) : "a"(t), "a"(tstamp));
-		b = get_ccount();
-		if (i == LOOPS)
-			a = b;
-	} while (--i >= 0);
-	b -= a;
-	ccount_freq = b * (100000UL / LOOPS);
-}
diff --git a/arch/xtensa/variants/s6000/dmac.c b/arch/xtensa/variants/s6000/dmac.c
deleted file mode 100644
index 340f5bb0b5ef..000000000000
--- a/arch/xtensa/variants/s6000/dmac.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Authors:	Oskar Schirmer <oskar@scara.com>
- *		Daniel Gloeckner <dg@emlix.com>
- * (c) 2008 emlix GmbH http://www.emlix.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <asm/cacheflush.h>
-#include <variant/dmac.h>
-
-/* DMA engine lookup */
-
-struct s6dmac_ctrl s6dmac_ctrl[S6_DMAC_NB];
-
-
-/* DMA control, per engine */
-
-void s6dmac_put_fifo_cache(u32 dmac, int chan, u32 src, u32 dst, u32 size)
-{
-	if (xtensa_need_flush_dma_source(src)) {
-		u32 base = src;
-		u32 span = size;
-		u32 chunk = readl(DMA_CHNL(dmac, chan) + S6_DMA_CMONCHUNK);
-		if (chunk && (size > chunk)) {
-			s32 skip =
-				readl(DMA_CHNL(dmac, chan) + S6_DMA_SRCSKIP);
-			u32 gaps = (size+chunk-1)/chunk - 1;
-			if (skip >= 0) {
-				span += gaps * skip;
-			} else if (-skip > chunk) {
-				s32 decr = gaps * (chunk + skip);
-				base += decr;
-				span = chunk - decr;
-			} else {
-				span = max(span + gaps * skip,
-					(chunk + skip) * gaps - skip);
-			}
-		}
-		flush_dcache_unaligned(base, span);
-	}
-	if (xtensa_need_invalidate_dma_destination(dst)) {
-		u32 base = dst;
-		u32 span = size;
-		u32 chunk = readl(DMA_CHNL(dmac, chan) + S6_DMA_CMONCHUNK);
-		if (chunk && (size > chunk)) {
-			s32 skip =
-				readl(DMA_CHNL(dmac, chan) + S6_DMA_DSTSKIP);
-			u32 gaps = (size+chunk-1)/chunk - 1;
-			if (skip >= 0) {
-				span += gaps * skip;
-			} else if (-skip > chunk) {
-				s32 decr = gaps * (chunk + skip);
-				base += decr;
-				span = chunk - decr;
-			} else {
-				span = max(span + gaps * skip,
-					(chunk + skip) * gaps - skip);
-			}
-		}
-		invalidate_dcache_unaligned(base, span);
-	}
-	s6dmac_put_fifo(dmac, chan, src, dst, size);
-}
-
-void s6dmac_disable_error_irqs(u32 dmac, u32 mask)
-{
-	unsigned long flags;
-	spinlock_t *spinl = &s6dmac_ctrl[_dmac_addr_index(dmac)].lock;
-	spin_lock_irqsave(spinl, flags);
-	_s6dmac_disable_error_irqs(dmac, mask);
-	spin_unlock_irqrestore(spinl, flags);
-}
-
-u32 s6dmac_int_sources(u32 dmac, u32 channel)
-{
-	u32 mask, ret, tmp;
-	mask = 1 << channel;
-
-	tmp = readl(dmac + S6_DMA_TERMCNTIRQSTAT);
-	tmp &= mask;
-	writel(tmp, dmac + S6_DMA_TERMCNTIRQCLR);
-	ret = tmp >> channel;
-
-	tmp = readl(dmac + S6_DMA_PENDCNTIRQSTAT);
-	tmp &= mask;
-	writel(tmp, dmac + S6_DMA_PENDCNTIRQCLR);
-	ret |= (tmp >> channel) << 1;
-
-	tmp = readl(dmac + S6_DMA_LOWWMRKIRQSTAT);
-	tmp &= mask;
-	writel(tmp, dmac + S6_DMA_LOWWMRKIRQCLR);
-	ret |= (tmp >> channel) << 2;
-
-	tmp = readl(dmac + S6_DMA_INTRAW0);
-	tmp &= (mask << S6_DMA_INT0_OVER) | (mask << S6_DMA_INT0_UNDER);
-	writel(tmp, dmac + S6_DMA_INTCLEAR0);
-
-	if (tmp & (mask << S6_DMA_INT0_UNDER))
-		ret |= 1 << 3;
-	if (tmp & (mask << S6_DMA_INT0_OVER))
-		ret |= 1 << 4;
-
-	tmp = readl(dmac + S6_DMA_MASTERERRINFO);
-	mask <<= S6_DMA_INT1_CHANNEL;
-	if (((tmp >> S6_DMA_MASTERERR_CHAN(0)) & S6_DMA_MASTERERR_CHAN_MASK)
-			== channel)
-		mask |= 1 << S6_DMA_INT1_MASTER;
-	if (((tmp >> S6_DMA_MASTERERR_CHAN(1)) & S6_DMA_MASTERERR_CHAN_MASK)
-			== channel)
-		mask |= 1 << (S6_DMA_INT1_MASTER + 1);
-	if (((tmp >> S6_DMA_MASTERERR_CHAN(2)) & S6_DMA_MASTERERR_CHAN_MASK)
-			== channel)
-		mask |= 1 << (S6_DMA_INT1_MASTER + 2);
-
-	tmp = readl(dmac + S6_DMA_INTRAW1) & mask;
-	writel(tmp, dmac + S6_DMA_INTCLEAR1);
-	ret |= ((tmp >> channel) & 1) << 5;
-	ret |= ((tmp >> S6_DMA_INT1_MASTER) & S6_DMA_INT1_MASTER_MASK) << 6;
-
-	return ret;
-}
-
-void s6dmac_release_chan(u32 dmac, int chan)
-{
-	if (chan >= 0)
-		s6dmac_disable_chan(dmac, chan);
-}
-
-
-/* global init */
-
-static inline void __init dmac_init(u32 dmac, u8 chan_nb)
-{
-	s6dmac_ctrl[S6_DMAC_INDEX(dmac)].dmac = dmac;
-	spin_lock_init(&s6dmac_ctrl[S6_DMAC_INDEX(dmac)].lock);
-	s6dmac_ctrl[S6_DMAC_INDEX(dmac)].chan_nb = chan_nb;
-	writel(S6_DMA_INT1_MASTER_MASK << S6_DMA_INT1_MASTER,
-		dmac + S6_DMA_INTCLEAR1);
-}
-
-static inline void __init dmac_master(u32 dmac,
-	u32 m0start, u32 m0end, u32 m1start, u32 m1end)
-{
-	writel(m0start, dmac + S6_DMA_MASTER0START);
-	writel(m0end - 1, dmac + S6_DMA_MASTER0END);
-	writel(m1start, dmac + S6_DMA_MASTER1START);
-	writel(m1end - 1, dmac + S6_DMA_MASTER1END);
-}
-
-static void __init s6_dmac_init(void)
-{
-	dmac_init(S6_REG_LMSDMA, S6_LMSDMA_NB);
-	dmac_master(S6_REG_LMSDMA,
-		S6_MEM_DDR, S6_MEM_PCIE_APER, S6_MEM_EFI, S6_MEM_GMAC);
-	dmac_init(S6_REG_NIDMA, S6_NIDMA_NB);
-	dmac_init(S6_REG_DPDMA, S6_DPDMA_NB);
-	dmac_master(S6_REG_DPDMA,
-		S6_MEM_DDR, S6_MEM_PCIE_APER, S6_REG_DP, S6_REG_DPDMA);
-	dmac_init(S6_REG_HIFDMA, S6_HIFDMA_NB);
-	dmac_master(S6_REG_HIFDMA,
-		S6_MEM_GMAC, S6_MEM_PCIE_CFG, S6_MEM_PCIE_APER, S6_MEM_AUX);
-}
-
-arch_initcall(s6_dmac_init);
diff --git a/arch/xtensa/variants/s6000/gpio.c b/arch/xtensa/variants/s6000/gpio.c
deleted file mode 100644
index da9e85c13b08..000000000000
--- a/arch/xtensa/variants/s6000/gpio.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * s6000 gpio driver
- *
- * Copyright (c) 2009 emlix GmbH
- * Authors:	Oskar Schirmer <oskar@scara.com>
- *		Johannes Weiner <hannes@cmpxchg.org>
- *		Daniel Gloeckner <dg@emlix.com>
- */
-#include <linux/bitops.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/gpio.h>
-
-#include <variant/hardware.h>
-
-#define IRQ_BASE XTENSA_NR_IRQS
-
-#define S6_GPIO_DATA		0x000
-#define S6_GPIO_IS		0x404
-#define S6_GPIO_IBE		0x408
-#define S6_GPIO_IEV		0x40C
-#define S6_GPIO_IE		0x410
-#define S6_GPIO_RIS		0x414
-#define S6_GPIO_MIS		0x418
-#define S6_GPIO_IC		0x41C
-#define S6_GPIO_AFSEL		0x420
-#define S6_GPIO_DIR		0x800
-#define S6_GPIO_BANK(nr)	((nr) * 0x1000)
-#define S6_GPIO_MASK(nr)	(4 << (nr))
-#define S6_GPIO_OFFSET(nr) \
-		(S6_GPIO_BANK((nr) >> 3) + S6_GPIO_MASK((nr) & 7))
-
-static int direction_input(struct gpio_chip *chip, unsigned int off)
-{
-	writeb(0, S6_REG_GPIO + S6_GPIO_DIR + S6_GPIO_OFFSET(off));
-	return 0;
-}
-
-static int get(struct gpio_chip *chip, unsigned int off)
-{
-	return readb(S6_REG_GPIO + S6_GPIO_DATA + S6_GPIO_OFFSET(off));
-}
-
-static int direction_output(struct gpio_chip *chip, unsigned int off, int val)
-{
-	unsigned rel = S6_GPIO_OFFSET(off);
-	writeb(~0, S6_REG_GPIO + S6_GPIO_DIR + rel);
-	writeb(val ? ~0 : 0, S6_REG_GPIO + S6_GPIO_DATA + rel);
-	return 0;
-}
-
-static void set(struct gpio_chip *chip, unsigned int off, int val)
-{
-	writeb(val ? ~0 : 0, S6_REG_GPIO + S6_GPIO_DATA + S6_GPIO_OFFSET(off));
-}
-
-static int to_irq(struct gpio_chip *chip, unsigned offset)
-{
-	if (offset < 8)
-		return offset + IRQ_BASE;
-	return -EINVAL;
-}
-
-static struct gpio_chip gpiochip = {
-	.owner = THIS_MODULE,
-	.direction_input = direction_input,
-	.get = get,
-	.direction_output = direction_output,
-	.set = set,
-	.to_irq = to_irq,
-	.base = 0,
-	.ngpio = 24,
-	.can_sleep = 0, /* no blocking io needed */
-	.exported = 0, /* no exporting to userspace */
-};
-
-int s6_gpio_init(u32 afsel)
-{
-	writeb(afsel, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_AFSEL);
-	writeb(afsel >> 8, S6_REG_GPIO + S6_GPIO_BANK(1) + S6_GPIO_AFSEL);
-	writeb(afsel >> 16, S6_REG_GPIO + S6_GPIO_BANK(2) + S6_GPIO_AFSEL);
-	return gpiochip_add(&gpiochip);
-}
-
-static void ack(struct irq_data *d)
-{
-	writeb(1 << (d->irq - IRQ_BASE), S6_REG_GPIO + S6_GPIO_IC);
-}
-
-static void mask(struct irq_data *d)
-{
-	u8 r = readb(S6_REG_GPIO + S6_GPIO_IE);
-	r &= ~(1 << (d->irq - IRQ_BASE));
-	writeb(r, S6_REG_GPIO + S6_GPIO_IE);
-}
-
-static void unmask(struct irq_data *d)
-{
-	u8 m = readb(S6_REG_GPIO + S6_GPIO_IE);
-	m |= 1 << (d->irq - IRQ_BASE);
-	writeb(m, S6_REG_GPIO + S6_GPIO_IE);
-}
-
-static int set_type(struct irq_data *d, unsigned int type)
-{
-	const u8 m = 1 << (d->irq - IRQ_BASE);
-	irq_flow_handler_t handler;
-	u8 reg;
-
-	if (type == IRQ_TYPE_PROBE) {
-		if ((readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_AFSEL) & m)
-		    || (readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IE) & m)
-		    || readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_DIR
-			      + S6_GPIO_MASK(irq - IRQ_BASE)))
-			return 0;
-		type = IRQ_TYPE_EDGE_BOTH;
-	}
-
-	reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IS);
-	if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) {
-		reg |= m;
-		handler = handle_level_irq;
-	} else {
-		reg &= ~m;
-		handler = handle_edge_irq;
-	}
-	writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IS);
-	__irq_set_handler_locked(irq, handler);
-
-	reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IEV);
-	if (type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING))
-		reg |= m;
-	else
-		reg &= ~m;
-	writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IEV);
-
-	reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IBE);
-	if ((type & IRQ_TYPE_EDGE_BOTH) == IRQ_TYPE_EDGE_BOTH)
-		reg |= m;
-	else
-		reg &= ~m;
-	writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IBE);
-	return 0;
-}
-
-static struct irq_chip gpioirqs = {
-	.name = "GPIO",
-	.irq_ack = ack,
-	.irq_mask = mask,
-	.irq_unmask = unmask,
-	.irq_set_type = set_type,
-};
-
-static u8 demux_masks[4];
-
-static void demux_irqs(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	u8 *mask = irq_desc_get_handler_data(desc);
-	u8 pending;
-	int cirq;
-
-	chip->irq_mask(&desc->irq_data);
-	chip->irq_ack(&desc->irq_data);
-	pending = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_MIS) & *mask;
-	cirq = IRQ_BASE - 1;
-	while (pending) {
-		int n = ffs(pending);
-		cirq += n;
-		pending >>= n;
-		generic_handle_irq(cirq);
-	}
-	chip->irq_unmask(&desc->irq_data);
-}
-
-extern const signed char *platform_irq_mappings[XTENSA_NR_IRQS];
-
-void __init variant_init_irq(void)
-{
-	int irq, n;
-	writeb(0, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IE);
-	for (irq = n = 0; irq < XTENSA_NR_IRQS; irq++) {
-		const signed char *mapping = platform_irq_mappings[irq];
-		int alone = 1;
-		u8 mask;
-		if (!mapping)
-			continue;
-		for(mask = 0; *mapping != -1; mapping++)
-			switch (*mapping) {
-			case S6_INTC_GPIO(0):
-				mask |= 1 << 0;
-				break;
-			case S6_INTC_GPIO(1):
-				mask |= 1 << 1;
-				break;
-			case S6_INTC_GPIO(2):
-				mask |= 1 << 2;
-				break;
-			case S6_INTC_GPIO(3):
-				mask |= 0x1f << 3;
-				break;
-			default:
-				alone = 0;
-			}
-		if (mask) {
-			int cirq, i;
-			if (!alone) {
-				printk(KERN_ERR "chained irq chips can't share"
-					" parent irq %i\n", irq);
-				continue;
-			}
-			demux_masks[n] = mask;
-			cirq = IRQ_BASE - 1;
-			do {
-				i = ffs(mask);
-				cirq += i;
-				mask >>= i;
-				irq_set_chip(cirq, &gpioirqs);
-				irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
-			} while (mask);
-			irq_set_handler_data(irq, demux_masks + n);
-			irq_set_chained_handler(irq, demux_irqs);
-			if (++n == ARRAY_SIZE(demux_masks))
-				break;
-		}
-	}
-}
diff --git a/arch/xtensa/variants/s6000/include/variant/core.h b/arch/xtensa/variants/s6000/include/variant/core.h
deleted file mode 100644
index af007953027e..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/core.h
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Xtensa processor core configuration information.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 1999-2008 Tensilica Inc.
- */
-
-#ifndef _XTENSA_CORE_CONFIGURATION_H
-#define _XTENSA_CORE_CONFIGURATION_H
-
-
-/****************************************************************************
-	    Parameters Useful for Any Code, USER or PRIVILEGED
- ****************************************************************************/
-
-/*
- *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
- *  configured, and a value of 0 otherwise.  These macros are always defined.
- */
-
-
-/*----------------------------------------------------------------------
-				ISA
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_BE			0	/* big-endian byte ordering */
-#define XCHAL_HAVE_WINDOWED		1	/* windowed registers option */
-#define XCHAL_NUM_AREGS			64	/* num of physical addr regs */
-#define XCHAL_NUM_AREGS_LOG2		6	/* log2(XCHAL_NUM_AREGS) */
-#define XCHAL_MAX_INSTRUCTION_SIZE	8	/* max instr bytes (3..8) */
-#define XCHAL_HAVE_DEBUG		1	/* debug option */
-#define XCHAL_HAVE_DENSITY		1	/* 16-bit instructions */
-#define XCHAL_HAVE_LOOPS		1	/* zero-overhead loops */
-#define XCHAL_HAVE_NSA			1	/* NSA/NSAU instructions */
-#define XCHAL_HAVE_MINMAX		1	/* MIN/MAX instructions */
-#define XCHAL_HAVE_SEXT			1	/* SEXT instruction */
-#define XCHAL_HAVE_CLAMPS		1	/* CLAMPS instruction */
-#define XCHAL_HAVE_MUL16		1	/* MUL16S/MUL16U instructions */
-#define XCHAL_HAVE_MUL32		1	/* MULL instruction */
-#define XCHAL_HAVE_MUL32_HIGH		1	/* MULUH/MULSH instructions */
-#define XCHAL_HAVE_DIV32		0	/* QUOS/QUOU/REMS/REMU instructions */
-#define XCHAL_HAVE_L32R			1	/* L32R instruction */
-#define XCHAL_HAVE_ABSOLUTE_LITERALS	1	/* non-PC-rel (extended) L32R */
-#define XCHAL_HAVE_CONST16		0	/* CONST16 instruction */
-#define XCHAL_HAVE_ADDX			1	/* ADDX#/SUBX# instructions */
-#define XCHAL_HAVE_WIDE_BRANCHES	0	/* B*.W18 or B*.W15 instr's */
-#define XCHAL_HAVE_PREDICTED_BRANCHES	0	/* B[EQ/EQZ/NE/NEZ]T instr's */
-#define XCHAL_HAVE_CALL4AND12		1	/* (obsolete option) */
-#define XCHAL_HAVE_ABS			1	/* ABS instruction */
-/*#define XCHAL_HAVE_POPC		0*/	/* POPC instruction */
-/*#define XCHAL_HAVE_CRC		0*/	/* CRC instruction */
-#define XCHAL_HAVE_RELEASE_SYNC		0	/* L32AI/S32RI instructions */
-#define XCHAL_HAVE_S32C1I		0	/* S32C1I instruction */
-#define XCHAL_HAVE_SPECULATION		0	/* speculation */
-#define XCHAL_HAVE_FULL_RESET		0	/* all regs/state reset */
-#define XCHAL_NUM_CONTEXTS		1	/* */
-#define XCHAL_NUM_MISC_REGS		4	/* num of scratch regs (0..4) */
-#define XCHAL_HAVE_TAP_MASTER		0	/* JTAG TAP control instr's */
-#define XCHAL_HAVE_PRID			0	/* processor ID register */
-#define XCHAL_HAVE_THREADPTR		0	/* THREADPTR register */
-#define XCHAL_HAVE_BOOLEANS		1	/* boolean registers */
-#define XCHAL_HAVE_CP			1	/* CPENABLE reg (coprocessor) */
-#define XCHAL_CP_MAXCFG			8	/* max allowed cp id plus one */
-#define XCHAL_HAVE_MAC16		0	/* MAC16 package */
-#define XCHAL_HAVE_VECTORFPU2005	0	/* vector floating-point pkg */
-#define XCHAL_HAVE_FP			1	/* floating point pkg */
-#define XCHAL_HAVE_VECTRA1		0	/* Vectra I  pkg */
-#define XCHAL_HAVE_VECTRALX		0	/* Vectra LX pkg */
-#define XCHAL_HAVE_HIFI2		0	/* HiFi2 Audio Engine pkg */
-
-
-/*----------------------------------------------------------------------
-				MISC
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_NUM_WRITEBUFFER_ENTRIES	8	/* size of write buffer */
-#define XCHAL_INST_FETCH_WIDTH		8	/* instr-fetch width in bytes */
-#define XCHAL_DATA_WIDTH		16	/* data width in bytes */
-/*  In T1050, applies to selected core load and store instructions (see ISA): */
-#define XCHAL_UNALIGNED_LOAD_EXCEPTION	1	/* unaligned loads cause exc. */
-#define XCHAL_UNALIGNED_STORE_EXCEPTION	1	/* unaligned stores cause exc.*/
-
-#define XCHAL_SW_VERSION		701001	/* sw version of this header */
-
-#define XCHAL_CORE_ID			"stretch_bali"	/* alphanum core name
-						   (CoreID) set in the Xtensa
-						   Processor Generator */
-
-#define XCHAL_BUILD_UNIQUE_ID		0x000104B9	/* 22-bit sw build ID */
-
-/*
- *  These definitions describe the hardware targeted by this software.
- */
-#define XCHAL_HW_CONFIGID0		0xC2F3F9FE	/* ConfigID hi 32 bits*/
-#define XCHAL_HW_CONFIGID1		0x054104B9	/* ConfigID lo 32 bits*/
-#define XCHAL_HW_VERSION_NAME		"LX1.0.2"	/* full version name */
-#define XCHAL_HW_VERSION_MAJOR		2100	/* major ver# of targeted hw */
-#define XCHAL_HW_VERSION_MINOR		2	/* minor ver# of targeted hw */
-#define XCHAL_HW_VERSION		210002	/* major*100+minor */
-#define XCHAL_HW_REL_LX1		1
-#define XCHAL_HW_REL_LX1_0		1
-#define XCHAL_HW_REL_LX1_0_2		1
-#define XCHAL_HW_CONFIGID_RELIABLE	1
-/*  If software targets a *range* of hardware versions, these are the bounds: */
-#define XCHAL_HW_MIN_VERSION_MAJOR	2100	/* major v of earliest tgt hw */
-#define XCHAL_HW_MIN_VERSION_MINOR	2	/* minor v of earliest tgt hw */
-#define XCHAL_HW_MIN_VERSION		210002	/* earliest targeted hw */
-#define XCHAL_HW_MAX_VERSION_MAJOR	2100	/* major v of latest tgt hw */
-#define XCHAL_HW_MAX_VERSION_MINOR	2	/* minor v of latest tgt hw */
-#define XCHAL_HW_MAX_VERSION		210002	/* latest targeted hw */
-
-
-/*----------------------------------------------------------------------
-				CACHE
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_ICACHE_LINESIZE		16	/* I-cache line size in bytes */
-#define XCHAL_DCACHE_LINESIZE		16	/* D-cache line size in bytes */
-#define XCHAL_ICACHE_LINEWIDTH		4	/* log2(I line size in bytes) */
-#define XCHAL_DCACHE_LINEWIDTH		4	/* log2(D line size in bytes) */
-
-#define XCHAL_ICACHE_SIZE		32768	/* I-cache size in bytes or 0 */
-#define XCHAL_DCACHE_SIZE		32768	/* D-cache size in bytes or 0 */
-
-#define XCHAL_DCACHE_IS_WRITEBACK	1	/* writeback feature */
-
-
-
-
-/****************************************************************************
-    Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code
- ****************************************************************************/
-
-
-#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY
-
-/*----------------------------------------------------------------------
-				CACHE
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_PIF			1	/* any outbound PIF present */
-
-/*  If present, cache size in bytes == (ways * 2^(linewidth + setwidth)).  */
-
-/*  Number of cache sets in log2(lines per way):  */
-#define XCHAL_ICACHE_SETWIDTH		9
-#define XCHAL_DCACHE_SETWIDTH		10
-
-/*  Cache set associativity (number of ways):  */
-#define XCHAL_ICACHE_WAYS		4
-#define XCHAL_DCACHE_WAYS		2
-
-/*  Cache features:  */
-#define XCHAL_ICACHE_LINE_LOCKABLE	1
-#define XCHAL_DCACHE_LINE_LOCKABLE	0
-#define XCHAL_ICACHE_ECC_PARITY		0
-#define XCHAL_DCACHE_ECC_PARITY		0
-
-/*  Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits):  */
-#define XCHAL_CA_BITS			4
-
-
-/*----------------------------------------------------------------------
-			INTERNAL I/D RAM/ROMs and XLMI
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_NUM_INSTROM		0	/* number of core instr. ROMs */
-#define XCHAL_NUM_INSTRAM		0	/* number of core instr. RAMs */
-#define XCHAL_NUM_DATAROM		0	/* number of core data ROMs */
-#define XCHAL_NUM_DATARAM		1	/* number of core data RAMs */
-#define XCHAL_NUM_URAM			0	/* number of core unified RAMs*/
-#define XCHAL_NUM_XLMI			1	/* number of core XLMI ports */
-
-/*  Data RAM 0:  */
-#define XCHAL_DATARAM0_VADDR		0x3FFF0000
-#define XCHAL_DATARAM0_PADDR		0x3FFF0000
-#define XCHAL_DATARAM0_SIZE		65536
-#define XCHAL_DATARAM0_ECC_PARITY	0
-
-/*  XLMI Port 0:  */
-#define XCHAL_XLMI0_VADDR		0x37F80000
-#define XCHAL_XLMI0_PADDR		0x37F80000
-#define XCHAL_XLMI0_SIZE		262144
-#define XCHAL_XLMI0_ECC_PARITY	0
-
-
-/*----------------------------------------------------------------------
-			INTERRUPTS and TIMERS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_INTERRUPTS		1	/* interrupt option */
-#define XCHAL_HAVE_HIGHPRI_INTERRUPTS	1	/* med/high-pri. interrupts */
-#define XCHAL_HAVE_NMI			1	/* non-maskable interrupt */
-#define XCHAL_HAVE_CCOUNT		1	/* CCOUNT reg. (timer option) */
-#define XCHAL_NUM_TIMERS		3	/* number of CCOMPAREn regs */
-#define XCHAL_NUM_INTERRUPTS		27	/* number of interrupts */
-#define XCHAL_NUM_INTERRUPTS_LOG2	5	/* ceil(log2(NUM_INTERRUPTS)) */
-#define XCHAL_NUM_EXTINTERRUPTS		20	/* num of external interrupts */
-#define XCHAL_NUM_INTLEVELS		4	/* number of interrupt levels
-						   (not including level zero) */
-#define XCHAL_EXCM_LEVEL		1	/* level masked by PS.EXCM */
-	/* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */
-
-/*  Masks of interrupts at each interrupt level:  */
-#define XCHAL_INTLEVEL1_MASK		0x01F07FFF
-#define XCHAL_INTLEVEL2_MASK		0x02018000
-#define XCHAL_INTLEVEL3_MASK		0x04060000
-#define XCHAL_INTLEVEL4_MASK		0x00000000
-#define XCHAL_INTLEVEL5_MASK		0x00080000
-#define XCHAL_INTLEVEL6_MASK		0x00000000
-#define XCHAL_INTLEVEL7_MASK		0x00000000
-
-/*  Masks of interrupts at each range 1..n of interrupt levels:  */
-#define XCHAL_INTLEVEL1_ANDBELOW_MASK	0x01F07FFF
-#define XCHAL_INTLEVEL2_ANDBELOW_MASK	0x03F1FFFF
-#define XCHAL_INTLEVEL3_ANDBELOW_MASK	0x07F7FFFF
-#define XCHAL_INTLEVEL4_ANDBELOW_MASK	0x07F7FFFF
-#define XCHAL_INTLEVEL5_ANDBELOW_MASK	0x07FFFFFF
-#define XCHAL_INTLEVEL6_ANDBELOW_MASK	0x07FFFFFF
-#define XCHAL_INTLEVEL7_ANDBELOW_MASK	0x07FFFFFF
-
-/*  Level of each interrupt:  */
-#define XCHAL_INT0_LEVEL		1
-#define XCHAL_INT1_LEVEL		1
-#define XCHAL_INT2_LEVEL		1
-#define XCHAL_INT3_LEVEL		1
-#define XCHAL_INT4_LEVEL		1
-#define XCHAL_INT5_LEVEL		1
-#define XCHAL_INT6_LEVEL		1
-#define XCHAL_INT7_LEVEL		1
-#define XCHAL_INT8_LEVEL		1
-#define XCHAL_INT9_LEVEL		1
-#define XCHAL_INT10_LEVEL		1
-#define XCHAL_INT11_LEVEL		1
-#define XCHAL_INT12_LEVEL		1
-#define XCHAL_INT13_LEVEL		1
-#define XCHAL_INT14_LEVEL		1
-#define XCHAL_INT15_LEVEL		2
-#define XCHAL_INT16_LEVEL		2
-#define XCHAL_INT17_LEVEL		3
-#define XCHAL_INT18_LEVEL		3
-#define XCHAL_INT19_LEVEL		5
-#define XCHAL_INT20_LEVEL		1
-#define XCHAL_INT21_LEVEL		1
-#define XCHAL_INT22_LEVEL		1
-#define XCHAL_INT23_LEVEL		1
-#define XCHAL_INT24_LEVEL		1
-#define XCHAL_INT25_LEVEL		2
-#define XCHAL_INT26_LEVEL		3
-#define XCHAL_DEBUGLEVEL		4	/* debug interrupt level */
-#define XCHAL_HAVE_DEBUG_EXTERN_INT	1	/* OCD external db interrupt */
-#define XCHAL_NMILEVEL			5	/* NMI "level" (for use with
-						   EXCSAVE/EPS/EPC_n, RFI n) */
-
-/*  Type of each interrupt:  */
-#define XCHAL_INT0_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT1_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT2_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT3_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT4_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT5_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT6_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT7_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT8_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT9_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT10_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT11_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT12_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT13_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT14_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT15_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT16_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT17_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT18_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT19_TYPE 	XTHAL_INTTYPE_NMI
-#define XCHAL_INT20_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT21_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT22_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT23_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT24_TYPE 	XTHAL_INTTYPE_TIMER
-#define XCHAL_INT25_TYPE 	XTHAL_INTTYPE_TIMER
-#define XCHAL_INT26_TYPE 	XTHAL_INTTYPE_TIMER
-
-/*  Masks of interrupts for each type of interrupt:  */
-#define XCHAL_INTTYPE_MASK_UNCONFIGURED	0xF8000000
-#define XCHAL_INTTYPE_MASK_SOFTWARE	0x00F00000
-#define XCHAL_INTTYPE_MASK_EXTERN_EDGE	0x00000000
-#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL	0x0007FFFF
-#define XCHAL_INTTYPE_MASK_TIMER	0x07000000
-#define XCHAL_INTTYPE_MASK_NMI		0x00080000
-#define XCHAL_INTTYPE_MASK_WRITE_ERROR	0x00000000
-
-/*  Interrupt numbers assigned to specific interrupt sources:  */
-#define XCHAL_TIMER0_INTERRUPT		24	/* CCOMPARE0 */
-#define XCHAL_TIMER1_INTERRUPT		25	/* CCOMPARE1 */
-#define XCHAL_TIMER2_INTERRUPT		26	/* CCOMPARE2 */
-#define XCHAL_TIMER3_INTERRUPT		XTHAL_TIMER_UNCONFIGURED
-#define XCHAL_NMI_INTERRUPT		19	/* non-maskable interrupt */
-
-/*  Interrupt numbers for levels at which only one interrupt is configured:  */
-#define XCHAL_INTLEVEL5_NUM		19
-/*  (There are many interrupts each at level(s) 1, 2, 3.)  */
-
-
-/*
- *  External interrupt vectors/levels.
- *  These macros describe how Xtensa processor interrupt numbers
- *  (as numbered internally, eg. in INTERRUPT and INTENABLE registers)
- *  map to external BInterrupt<n> pins, for those interrupts
- *  configured as external (level-triggered, edge-triggered, or NMI).
- *  See the Xtensa processor databook for more details.
- */
-
-/*  Core interrupt numbers mapped to each EXTERNAL interrupt number:  */
-#define XCHAL_EXTINT0_NUM		0	/* (intlevel 1) */
-#define XCHAL_EXTINT1_NUM		1	/* (intlevel 1) */
-#define XCHAL_EXTINT2_NUM		2	/* (intlevel 1) */
-#define XCHAL_EXTINT3_NUM		3	/* (intlevel 1) */
-#define XCHAL_EXTINT4_NUM		4	/* (intlevel 1) */
-#define XCHAL_EXTINT5_NUM		5	/* (intlevel 1) */
-#define XCHAL_EXTINT6_NUM		6	/* (intlevel 1) */
-#define XCHAL_EXTINT7_NUM		7	/* (intlevel 1) */
-#define XCHAL_EXTINT8_NUM		8	/* (intlevel 1) */
-#define XCHAL_EXTINT9_NUM		9	/* (intlevel 1) */
-#define XCHAL_EXTINT10_NUM		10	/* (intlevel 1) */
-#define XCHAL_EXTINT11_NUM		11	/* (intlevel 1) */
-#define XCHAL_EXTINT12_NUM		12	/* (intlevel 1) */
-#define XCHAL_EXTINT13_NUM		13	/* (intlevel 1) */
-#define XCHAL_EXTINT14_NUM		14	/* (intlevel 1) */
-#define XCHAL_EXTINT15_NUM		15	/* (intlevel 2) */
-#define XCHAL_EXTINT16_NUM		16	/* (intlevel 2) */
-#define XCHAL_EXTINT17_NUM		17	/* (intlevel 3) */
-#define XCHAL_EXTINT18_NUM		18	/* (intlevel 3) */
-#define XCHAL_EXTINT19_NUM		19	/* (intlevel 5) */
-
-
-/*----------------------------------------------------------------------
-			EXCEPTIONS and VECTORS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_XEA_VERSION		2	/* Xtensa Exception Architecture
-						   number: 1 == XEA1 (old)
-							   2 == XEA2 (new)
-							   0 == XEAX (extern) */
-#define XCHAL_HAVE_XEA1			0	/* Exception Architecture 1 */
-#define XCHAL_HAVE_XEA2			1	/* Exception Architecture 2 */
-#define XCHAL_HAVE_XEAX			0	/* External Exception Arch. */
-#define XCHAL_HAVE_EXCEPTIONS		1	/* exception option */
-#define XCHAL_HAVE_MEM_ECC_PARITY	0	/* local memory ECC/parity */
-#define XCHAL_HAVE_VECTOR_SELECT	0	/* relocatable vectors */
-#define XCHAL_HAVE_VECBASE		0	/* relocatable vectors */
-
-#define XCHAL_RESET_VECOFS		0x00000000
-#define XCHAL_RESET_VECTOR_VADDR	0x3FFE03D0
-#define XCHAL_RESET_VECTOR_PADDR	0x3FFE03D0
-#define XCHAL_USER_VECOFS		0x00000000
-#define XCHAL_USER_VECTOR_VADDR		0x40000220
-#define XCHAL_USER_VECTOR_PADDR		0x40000220
-#define XCHAL_KERNEL_VECOFS		0x00000000
-#define XCHAL_KERNEL_VECTOR_VADDR	0x40000200
-#define XCHAL_KERNEL_VECTOR_PADDR	0x40000200
-#define XCHAL_DOUBLEEXC_VECOFS		0x00000000
-#define XCHAL_DOUBLEEXC_VECTOR_VADDR	0x400002A0
-#define XCHAL_DOUBLEEXC_VECTOR_PADDR	0x400002A0
-#define XCHAL_WINDOW_OF4_VECOFS		0x00000000
-#define XCHAL_WINDOW_UF4_VECOFS		0x00000040
-#define XCHAL_WINDOW_OF8_VECOFS		0x00000080
-#define XCHAL_WINDOW_UF8_VECOFS		0x000000C0
-#define XCHAL_WINDOW_OF12_VECOFS	0x00000100
-#define XCHAL_WINDOW_UF12_VECOFS	0x00000140
-#define XCHAL_WINDOW_VECTORS_VADDR	0x40000000
-#define XCHAL_WINDOW_VECTORS_PADDR	0x40000000
-#define XCHAL_INTLEVEL2_VECOFS		0x00000000
-#define XCHAL_INTLEVEL2_VECTOR_VADDR	0x40000240
-#define XCHAL_INTLEVEL2_VECTOR_PADDR	0x40000240
-#define XCHAL_INTLEVEL3_VECOFS		0x00000000
-#define XCHAL_INTLEVEL3_VECTOR_VADDR	0x40000260
-#define XCHAL_INTLEVEL3_VECTOR_PADDR	0x40000260
-#define XCHAL_INTLEVEL4_VECOFS		0x00000000
-#define XCHAL_INTLEVEL4_VECTOR_VADDR	0x40000390
-#define XCHAL_INTLEVEL4_VECTOR_PADDR	0x40000390
-#define XCHAL_DEBUG_VECOFS		XCHAL_INTLEVEL4_VECOFS
-#define XCHAL_DEBUG_VECTOR_VADDR	XCHAL_INTLEVEL4_VECTOR_VADDR
-#define XCHAL_DEBUG_VECTOR_PADDR	XCHAL_INTLEVEL4_VECTOR_PADDR
-#define XCHAL_NMI_VECOFS		0x00000000
-#define XCHAL_NMI_VECTOR_VADDR		0x400003B0
-#define XCHAL_NMI_VECTOR_PADDR		0x400003B0
-#define XCHAL_INTLEVEL5_VECOFS		XCHAL_NMI_VECOFS
-#define XCHAL_INTLEVEL5_VECTOR_VADDR	XCHAL_NMI_VECTOR_VADDR
-#define XCHAL_INTLEVEL5_VECTOR_PADDR	XCHAL_NMI_VECTOR_PADDR
-
-
-/*----------------------------------------------------------------------
-				DEBUG
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_OCD			1	/* OnChipDebug option */
-#define XCHAL_NUM_IBREAK		2	/* number of IBREAKn regs */
-#define XCHAL_NUM_DBREAK		2	/* number of DBREAKn regs */
-#define XCHAL_HAVE_OCD_DIR_ARRAY	1	/* faster OCD option */
-
-
-/*----------------------------------------------------------------------
-				MMU
-  ----------------------------------------------------------------------*/
-
-/*  See core-matmap.h header file for more details.  */
-
-#define XCHAL_HAVE_TLBS			1	/* inverse of HAVE_CACHEATTR */
-#define XCHAL_HAVE_SPANNING_WAY		1	/* one way maps I+D 4GB vaddr */
-#define XCHAL_HAVE_IDENTITY_MAP		1	/* vaddr == paddr always */
-#define XCHAL_HAVE_CACHEATTR		0	/* CACHEATTR register present */
-#define XCHAL_HAVE_MIMIC_CACHEATTR	1	/* region protection */
-#define XCHAL_HAVE_XLT_CACHEATTR	0	/* region prot. w/translation */
-#define XCHAL_HAVE_PTP_MMU		0	/* full MMU (with page table
-						   [autorefill] and protection)
-						   usable for an MMU-based OS */
-/*  If none of the above last 4 are set, it's a custom TLB configuration.  */
-
-#define XCHAL_MMU_ASID_BITS		0	/* number of bits in ASIDs */
-#define XCHAL_MMU_RINGS			1	/* number of rings (1..4) */
-#define XCHAL_MMU_RING_BITS		0	/* num of bits in RING field */
-
-#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */
-
-
-#endif /* _XTENSA_CORE_CONFIGURATION_H */
-
diff --git a/arch/xtensa/variants/s6000/include/variant/dmac.h b/arch/xtensa/variants/s6000/include/variant/dmac.h
deleted file mode 100644
index 3f88d9fc6897..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/dmac.h
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * include/asm-xtensa/variant-s6000/dmac.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2006 Tensilica Inc.
- * Copyright (C) 2008 Emlix GmbH <info@emlix.com>
- * Authors:	Fabian Godehardt <fg@emlix.com>
- *		Oskar Schirmer <oskar@scara.com>
- *		Daniel Gloeckner <dg@emlix.com>
- */
-
-#ifndef __ASM_XTENSA_S6000_DMAC_H
-#define __ASM_XTENSA_S6000_DMAC_H
-#include <linux/io.h>
-#include <variant/hardware.h>
-
-/* DMA global */
-
-#define S6_DMA_INTSTAT0		0x000
-#define S6_DMA_INTSTAT1		0x004
-#define S6_DMA_INTENABLE0	0x008
-#define S6_DMA_INTENABLE1	0x00C
-#define S6_DMA_INTRAW0		0x010
-#define S6_DMA_INTRAW1		0x014
-#define S6_DMA_INTCLEAR0	0x018
-#define S6_DMA_INTCLEAR1	0x01C
-#define S6_DMA_INTSET0		0x020
-#define S6_DMA_INTSET1		0x024
-#define S6_DMA_INT0_UNDER		0
-#define S6_DMA_INT0_OVER		16
-#define S6_DMA_INT1_CHANNEL		0
-#define S6_DMA_INT1_MASTER		16
-#define S6_DMA_INT1_MASTER_MASK			7
-#define S6_DMA_TERMCNTIRQSTAT	0x028
-#define S6_DMA_TERMCNTIRQCLR	0x02C
-#define S6_DMA_TERMCNTIRQSET	0x030
-#define S6_DMA_PENDCNTIRQSTAT	0x034
-#define S6_DMA_PENDCNTIRQCLR	0x038
-#define S6_DMA_PENDCNTIRQSET	0x03C
-#define S6_DMA_LOWWMRKIRQSTAT	0x040
-#define S6_DMA_LOWWMRKIRQCLR	0x044
-#define S6_DMA_LOWWMRKIRQSET	0x048
-#define S6_DMA_MASTERERRINFO	0x04C
-#define S6_DMA_MASTERERR_CHAN(n)	(4*(n))
-#define S6_DMA_MASTERERR_CHAN_MASK		0xF
-#define S6_DMA_DESCRFIFO0	0x050
-#define S6_DMA_DESCRFIFO1	0x054
-#define S6_DMA_DESCRFIFO2	0x058
-#define S6_DMA_DESCRFIFO2_AUTODISABLE	24
-#define S6_DMA_DESCRFIFO3	0x05C
-#define S6_DMA_MASTER0START	0x060
-#define S6_DMA_MASTER0END	0x064
-#define S6_DMA_MASTER1START	0x068
-#define S6_DMA_MASTER1END	0x06C
-#define S6_DMA_NEXTFREE		0x070
-#define S6_DMA_NEXTFREE_CHAN		0
-#define S6_DMA_NEXTFREE_CHAN_MASK	0x1F
-#define S6_DMA_NEXTFREE_ENA		16
-#define S6_DMA_NEXTFREE_ENA_MASK	((1 << 16) - 1)
-#define S6_DMA_DPORTCTRLGRP(p)	((p) * 4 + 0x074)
-#define S6_DMA_DPORTCTRLGRP_FRAMEREP	0
-#define S6_DMA_DPORTCTRLGRP_NRCHANS	1
-#define S6_DMA_DPORTCTRLGRP_NRCHANS_1		0
-#define S6_DMA_DPORTCTRLGRP_NRCHANS_3		1
-#define S6_DMA_DPORTCTRLGRP_NRCHANS_4		2
-#define S6_DMA_DPORTCTRLGRP_NRCHANS_2		3
-#define S6_DMA_DPORTCTRLGRP_ENA		31
-
-
-/* DMA per channel */
-
-#define DMA_CHNL(dmac, n)	((dmac) + 0x1000 + (n) * 0x100)
-#define DMA_INDEX_CHNL(addr)	(((addr) >> 8) & 0xF)
-#define DMA_MASK_DMAC(addr)	((addr) & 0xFFFF0000)
-#define S6_DMA_CHNCTRL		0x000
-#define S6_DMA_CHNCTRL_ENABLE		0
-#define S6_DMA_CHNCTRL_PAUSE		1
-#define S6_DMA_CHNCTRL_PRIO		2
-#define S6_DMA_CHNCTRL_PRIO_MASK		3
-#define S6_DMA_CHNCTRL_PERIPHXFER	4
-#define S6_DMA_CHNCTRL_PERIPHENA	5
-#define S6_DMA_CHNCTRL_SRCINC		6
-#define S6_DMA_CHNCTRL_DSTINC		7
-#define S6_DMA_CHNCTRL_BURSTLOG		8
-#define S6_DMA_CHNCTRL_BURSTLOG_MASK		7
-#define S6_DMA_CHNCTRL_DESCFIFODEPTH	12
-#define S6_DMA_CHNCTRL_DESCFIFODEPTH_MASK	0x1F
-#define S6_DMA_CHNCTRL_DESCFIFOFULL	17
-#define S6_DMA_CHNCTRL_BWCONSEL		18
-#define S6_DMA_CHNCTRL_BWCONENA		19
-#define S6_DMA_CHNCTRL_PENDGCNTSTAT	20
-#define S6_DMA_CHNCTRL_PENDGCNTSTAT_MASK	0x3F
-#define S6_DMA_CHNCTRL_LOWWMARK		26
-#define S6_DMA_CHNCTRL_LOWWMARK_MASK		0xF
-#define S6_DMA_CHNCTRL_TSTAMP		30
-#define S6_DMA_TERMCNTNB	0x004
-#define S6_DMA_TERMCNTNB_MASK			0xFFFF
-#define S6_DMA_TERMCNTTMO	0x008
-#define S6_DMA_TERMCNTSTAT	0x00C
-#define S6_DMA_TERMCNTSTAT_MASK		0xFF
-#define S6_DMA_CMONCHUNK	0x010
-#define S6_DMA_SRCSKIP		0x014
-#define S6_DMA_DSTSKIP		0x018
-#define S6_DMA_CUR_SRC		0x024
-#define S6_DMA_CUR_DST		0x028
-#define S6_DMA_TIMESTAMP	0x030
-
-/* DMA channel lists */
-
-#define S6_DPDMA_CHAN(stream, channel)	(4 * (stream) + (channel))
-#define S6_DPDMA_NB	16
-
-#define S6_HIFDMA_GMACTX	0
-#define S6_HIFDMA_GMACRX	1
-#define S6_HIFDMA_I2S0		2
-#define S6_HIFDMA_I2S1		3
-#define S6_HIFDMA_EGIB		4
-#define S6_HIFDMA_PCITX		5
-#define S6_HIFDMA_PCIRX		6
-#define S6_HIFDMA_NB	7
-
-#define S6_NIDMA_NB	4
-
-#define S6_LMSDMA_NB	12
-
-/* controller access */
-
-#define S6_DMAC_NB	4
-#define S6_DMAC_INDEX(dmac)	(((unsigned)(dmac) >> 18) % S6_DMAC_NB)
-
-struct s6dmac_ctrl {
-	u32 dmac;
-	spinlock_t lock;
-	u8 chan_nb;
-};
-
-extern struct s6dmac_ctrl s6dmac_ctrl[S6_DMAC_NB];
-
-
-/* DMA control, per channel */
-
-static inline int s6dmac_fifo_full(u32 dmac, int chan)
-{
-	return (readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL)
-		& (1 << S6_DMA_CHNCTRL_DESCFIFOFULL)) && 1;
-}
-
-static inline int s6dmac_termcnt_irq(u32 dmac, int chan)
-{
-	u32 m = 1 << chan;
-	int r = (readl(dmac + S6_DMA_TERMCNTIRQSTAT) & m) && 1;
-	if (r)
-		writel(m, dmac + S6_DMA_TERMCNTIRQCLR);
-	return r;
-}
-
-static inline int s6dmac_pendcnt_irq(u32 dmac, int chan)
-{
-	u32 m = 1 << chan;
-	int r = (readl(dmac + S6_DMA_PENDCNTIRQSTAT) & m) && 1;
-	if (r)
-		writel(m, dmac + S6_DMA_PENDCNTIRQCLR);
-	return r;
-}
-
-static inline int s6dmac_lowwmark_irq(u32 dmac, int chan)
-{
-	int r = (readl(dmac + S6_DMA_LOWWMRKIRQSTAT) & (1 << chan)) ? 1 : 0;
-	if (r)
-		writel(1 << chan, dmac + S6_DMA_LOWWMRKIRQCLR);
-	return r;
-}
-
-static inline u32 s6dmac_pending_count(u32 dmac, int chan)
-{
-	return (readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL)
-			>> S6_DMA_CHNCTRL_PENDGCNTSTAT)
-		& S6_DMA_CHNCTRL_PENDGCNTSTAT_MASK;
-}
-
-static inline void s6dmac_set_terminal_count(u32 dmac, int chan, u32 n)
-{
-	n &= S6_DMA_TERMCNTNB_MASK;
-	n |= readl(DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB)
-	      & ~S6_DMA_TERMCNTNB_MASK;
-	writel(n, DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB);
-}
-
-static inline u32 s6dmac_get_terminal_count(u32 dmac, int chan)
-{
-	return (readl(DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB))
-		& S6_DMA_TERMCNTNB_MASK;
-}
-
-static inline u32 s6dmac_timestamp(u32 dmac, int chan)
-{
-	return readl(DMA_CHNL(dmac, chan) + S6_DMA_TIMESTAMP);
-}
-
-static inline u32 s6dmac_cur_src(u32 dmac, int chan)
-{
-	return readl(DMA_CHNL(dmac, chan) + S6_DMA_CUR_SRC);
-}
-
-static inline u32 s6dmac_cur_dst(u32 dmac, int chan)
-{
-	return readl(DMA_CHNL(dmac, chan) + S6_DMA_CUR_DST);
-}
-
-static inline void s6dmac_disable_chan(u32 dmac, int chan)
-{
-	u32 ctrl;
-	writel(readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL)
-		& ~(1 << S6_DMA_CHNCTRL_ENABLE),
-		DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL);
-	do
-		ctrl = readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL);
-	while (ctrl & (1 << S6_DMA_CHNCTRL_ENABLE));
-}
-
-static inline void s6dmac_set_stride_skip(u32 dmac, int chan,
-	int comchunk,		/* 0: disable scatter/gather */
-	int srcskip, int dstskip)
-{
-	writel(comchunk, DMA_CHNL(dmac, chan) + S6_DMA_CMONCHUNK);
-	writel(srcskip, DMA_CHNL(dmac, chan) + S6_DMA_SRCSKIP);
-	writel(dstskip, DMA_CHNL(dmac, chan) + S6_DMA_DSTSKIP);
-}
-
-static inline void s6dmac_enable_chan(u32 dmac, int chan,
-	int prio,               /* 0 (highest) .. 3 (lowest) */
-	int periphxfer,         /* <0: disable p.req.line, 0..1: mode */
-	int srcinc, int dstinc, /* 0: dont increment src/dst address */
-	int comchunk,		/* 0: disable scatter/gather */
-	int srcskip, int dstskip,
-	int burstsize,		/* 4 for I2S, 7 for everything else */
-	int bandwidthconserve,  /* <0: disable, 0..1: select */
-	int lowwmark,           /* 0..15 */
-	int timestamp,		/* 0: disable timestamp */
-	int enable)		/* 0: disable for now */
-{
-	writel(1, DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB);
-	writel(0, DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTTMO);
-	writel(lowwmark << S6_DMA_CHNCTRL_LOWWMARK,
-		DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL);
-	s6dmac_set_stride_skip(dmac, chan, comchunk, srcskip, dstskip);
-	writel(((enable ? 1 : 0) << S6_DMA_CHNCTRL_ENABLE) |
-		(prio << S6_DMA_CHNCTRL_PRIO) |
-		(((periphxfer > 0) ? 1 : 0) << S6_DMA_CHNCTRL_PERIPHXFER) |
-		(((periphxfer < 0) ? 0 : 1) << S6_DMA_CHNCTRL_PERIPHENA) |
-		((srcinc ? 1 : 0) << S6_DMA_CHNCTRL_SRCINC) |
-		((dstinc ? 1 : 0) << S6_DMA_CHNCTRL_DSTINC) |
-		(burstsize << S6_DMA_CHNCTRL_BURSTLOG) |
-		(((bandwidthconserve > 0) ? 1 : 0) << S6_DMA_CHNCTRL_BWCONSEL) |
-		(((bandwidthconserve < 0) ? 0 : 1) << S6_DMA_CHNCTRL_BWCONENA) |
-		(lowwmark << S6_DMA_CHNCTRL_LOWWMARK) |
-		((timestamp ? 1 : 0) << S6_DMA_CHNCTRL_TSTAMP),
-		DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL);
-}
-
-
-/* DMA control, per engine */
-
-static inline unsigned _dmac_addr_index(u32 dmac)
-{
-	unsigned i = S6_DMAC_INDEX(dmac);
-	if (s6dmac_ctrl[i].dmac != dmac)
-		BUG();
-	return i;
-}
-
-static inline void _s6dmac_disable_error_irqs(u32 dmac, u32 mask)
-{
-	writel(mask, dmac + S6_DMA_TERMCNTIRQCLR);
-	writel(mask, dmac + S6_DMA_PENDCNTIRQCLR);
-	writel(mask, dmac + S6_DMA_LOWWMRKIRQCLR);
-	writel(readl(dmac + S6_DMA_INTENABLE0)
-		& ~((mask << S6_DMA_INT0_UNDER) | (mask << S6_DMA_INT0_OVER)),
-		dmac + S6_DMA_INTENABLE0);
-	writel(readl(dmac + S6_DMA_INTENABLE1) & ~(mask << S6_DMA_INT1_CHANNEL),
-		dmac + S6_DMA_INTENABLE1);
-	writel((mask << S6_DMA_INT0_UNDER) | (mask << S6_DMA_INT0_OVER),
-		dmac + S6_DMA_INTCLEAR0);
-	writel(mask << S6_DMA_INT1_CHANNEL, dmac + S6_DMA_INTCLEAR1);
-}
-
-/*
- * request channel from specified engine
- * with chan<0, accept any channel
- * further parameters see s6dmac_enable_chan
- * returns < 0 upon error, channel nb otherwise
- */
-static inline int s6dmac_request_chan(u32 dmac, int chan,
-	int prio,
-	int periphxfer,
-	int srcinc, int dstinc,
-	int comchunk,
-	int srcskip, int dstskip,
-	int burstsize,
-	int bandwidthconserve,
-	int lowwmark,
-	int timestamp,
-	int enable)
-{
-	int r = chan;
-	unsigned long flags;
-	spinlock_t *spinl = &s6dmac_ctrl[_dmac_addr_index(dmac)].lock;
-	spin_lock_irqsave(spinl, flags);
-	if (r < 0) {
-		r = (readl(dmac + S6_DMA_NEXTFREE) >> S6_DMA_NEXTFREE_CHAN)
-			& S6_DMA_NEXTFREE_CHAN_MASK;
-	}
-	if (r >= s6dmac_ctrl[_dmac_addr_index(dmac)].chan_nb) {
-		if (chan < 0)
-			r = -EBUSY;
-		else
-			r = -ENXIO;
-	} else if (((readl(dmac + S6_DMA_NEXTFREE) >> S6_DMA_NEXTFREE_ENA)
-			>> r) & 1) {
-		r = -EBUSY;
-	} else {
-		s6dmac_enable_chan(dmac, r, prio, periphxfer,
-			srcinc, dstinc, comchunk, srcskip, dstskip, burstsize,
-			bandwidthconserve, lowwmark, timestamp, enable);
-	}
-	spin_unlock_irqrestore(spinl, flags);
-	return r;
-}
-
-static inline void s6dmac_put_fifo(u32 dmac, int chan,
-	u32 src, u32 dst, u32 size)
-{
-	unsigned long flags;
-	spinlock_t *spinl = &s6dmac_ctrl[_dmac_addr_index(dmac)].lock;
-	spin_lock_irqsave(spinl, flags);
-	writel(src, dmac + S6_DMA_DESCRFIFO0);
-	writel(dst, dmac + S6_DMA_DESCRFIFO1);
-	writel(size, dmac + S6_DMA_DESCRFIFO2);
-	writel(chan, dmac + S6_DMA_DESCRFIFO3);
-	spin_unlock_irqrestore(spinl, flags);
-}
-
-static inline u32 s6dmac_channel_enabled(u32 dmac, int chan)
-{
-	return readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL) &
-			(1 << S6_DMA_CHNCTRL_ENABLE);
-}
-
-/*
- * group 1-4 data port channels
- * with port=0..3, nrch=1-4 channels,
- * frrep=0/1 (dis- or enable frame repeat)
- */
-static inline void s6dmac_dp_setup_group(u32 dmac, int port,
-			int nrch, int frrep)
-{
-	static const u8 mask[4] = {0, 3, 1, 2};
-	BUG_ON(dmac != S6_REG_DPDMA);
-	if ((port < 0) || (port > 3) || (nrch < 1) || (nrch > 4))
-		return;
-	writel((mask[nrch - 1] << S6_DMA_DPORTCTRLGRP_NRCHANS)
-		| ((frrep ? 1 : 0) << S6_DMA_DPORTCTRLGRP_FRAMEREP),
-		dmac + S6_DMA_DPORTCTRLGRP(port));
-}
-
-static inline void s6dmac_dp_switch_group(u32 dmac, int port, int enable)
-{
-	u32 tmp;
-	BUG_ON(dmac != S6_REG_DPDMA);
-	tmp = readl(dmac + S6_DMA_DPORTCTRLGRP(port));
-	if (enable)
-		tmp |= (1 << S6_DMA_DPORTCTRLGRP_ENA);
-	else
-		tmp &= ~(1 << S6_DMA_DPORTCTRLGRP_ENA);
-	writel(tmp, dmac + S6_DMA_DPORTCTRLGRP(port));
-}
-
-extern void s6dmac_put_fifo_cache(u32 dmac, int chan,
-	u32 src, u32 dst, u32 size);
-extern void s6dmac_disable_error_irqs(u32 dmac, u32 mask);
-extern u32 s6dmac_int_sources(u32 dmac, u32 channel);
-extern void s6dmac_release_chan(u32 dmac, int chan);
-
-#endif /* __ASM_XTENSA_S6000_DMAC_H */
diff --git a/arch/xtensa/variants/s6000/include/variant/gpio.h b/arch/xtensa/variants/s6000/include/variant/gpio.h
deleted file mode 100644
index 8484ab0df461..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/gpio.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _XTENSA_VARIANT_S6000_GPIO_H
-#define _XTENSA_VARIANT_S6000_GPIO_H
-
-extern int s6_gpio_init(u32 afsel);
-
-#endif /* _XTENSA_VARIANT_S6000_GPIO_H */
diff --git a/arch/xtensa/variants/s6000/include/variant/hardware.h b/arch/xtensa/variants/s6000/include/variant/hardware.h
deleted file mode 100644
index 5d9ba098d84a..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/hardware.h
+++ /dev/null
@@ -1,259 +0,0 @@
-#ifndef __XTENSA_S6000_HARDWARE_H
-#define __XTENSA_S6000_HARDWARE_H
-
-#define S6_SCLK			1843200
-
-#define S6_MEM_REG              0x20000000
-#define S6_MEM_EFI              0x33F00000
-#define S6_MEM_PCIE_DATARAM1    0x34000000
-#define S6_MEM_XLMI             0x37F80000
-#define S6_MEM_PIF_DATARAM1     0x37FFC000
-#define S6_MEM_GMAC             0x38000000
-#define S6_MEM_I2S              0x3A000000
-#define S6_MEM_EGIB             0x3C000000
-#define S6_MEM_PCIE_CFG         0x3E000000
-#define S6_MEM_PIF_DATARAM      0x3FFE0000
-#define S6_MEM_XLMI_DATARAM     0x3FFF0000
-#define S6_MEM_DDR              0x40000000
-#define S6_MEM_PCIE_APER        0xC0000000
-#define S6_MEM_AUX              0xF0000000
-
-/* Device addresses */
-
-#define S6_REG_SCB              S6_MEM_REG
-#define S6_REG_NB               (S6_REG_SCB + 0x10000)
-#define S6_REG_LMSDMA           (S6_REG_SCB + 0x20000)
-#define S6_REG_NI               (S6_REG_SCB + 0x30000)
-#define S6_REG_NIDMA            (S6_REG_SCB + 0x40000)
-#define S6_REG_NS               (S6_REG_SCB + 0x50000)
-#define S6_REG_DDR              (S6_REG_SCB + 0x60000)
-#define S6_REG_GREG1            (S6_REG_SCB + 0x70000)
-#define S6_REG_DP               (S6_REG_SCB + 0x80000)
-#define S6_REG_DPDMA            (S6_REG_SCB + 0x90000)
-#define S6_REG_EGIB             (S6_REG_SCB + 0xA0000)
-#define S6_REG_PCIE             (S6_REG_SCB + 0xB0000)
-#define S6_REG_I2S              (S6_REG_SCB + 0xC0000)
-#define S6_REG_GMAC             (S6_REG_SCB + 0xD0000)
-#define S6_REG_HIFDMA           (S6_REG_SCB + 0xE0000)
-#define S6_REG_GREG2            (S6_REG_SCB + 0xF0000)
-
-#define S6_REG_APB              S6_REG_SCB
-#define S6_REG_UART             (S6_REG_APB + 0x0000)
-#define S6_REG_INTC             (S6_REG_APB + 0x2000)
-#define S6_REG_SPI              (S6_REG_APB + 0x3000)
-#define S6_REG_I2C              (S6_REG_APB + 0x4000)
-#define S6_REG_GPIO             (S6_REG_APB + 0x8000)
-
-/* Global register block */
-
-#define S6_GREG1_PLL_LOCKCLEAR	0x000
-#define S6_GREG1_PLL_LOCK_SYS		0
-#define S6_GREG1_PLL_LOCK_IO		1
-#define S6_GREG1_PLL_LOCK_AIM		2
-#define S6_GREG1_PLL_LOCK_DP0		3
-#define S6_GREG1_PLL_LOCK_DP2		4
-#define S6_GREG1_PLL_LOCK_DDR		5
-#define S6_GREG1_PLL_LOCKSTAT	0x004
-#define S6_GREG1_PLL_LOCKSTAT_CURLOCK	0
-#define S6_GREG1_PLL_LOCKSTAT_EVERUNLCK	8
-#define S6_GREG1_PLLSEL		0x010
-#define S6_GREG1_PLLSEL_AIM		0
-#define S6_GREG1_PLLSEL_AIM_DDR2		0
-#define S6_GREG1_PLLSEL_AIM_300MHZ		1
-#define S6_GREG1_PLLSEL_AIM_240MHZ		2
-#define S6_GREG1_PLLSEL_AIM_200MHZ		3
-#define S6_GREG1_PLLSEL_AIM_150MHZ		4
-#define S6_GREG1_PLLSEL_AIM_120MHZ		5
-#define S6_GREG1_PLLSEL_AIM_40MHZ		6
-#define S6_GREG1_PLLSEL_AIM_PLLAIMREF		7
-#define S6_GREG1_PLLSEL_AIM_MASK		7
-#define S6_GREG1_PLLSEL_DDR		8
-#define S6_GREG1_PLLSEL_DDR_HS			0
-#define S6_GREG1_PLLSEL_DDR_333MHZ		1
-#define S6_GREG1_PLLSEL_DDR_250MHZ		2
-#define S6_GREG1_PLLSEL_DDR_200MHZ		3
-#define S6_GREG1_PLLSEL_DDR_167MHZ		4
-#define S6_GREG1_PLLSEL_DDR_100MHZ		5
-#define S6_GREG1_PLLSEL_DDR_33MHZ		6
-#define S6_GREG1_PLLSEL_DDR_PLLIOREF		7
-#define S6_GREG1_PLLSEL_DDR_MASK		7
-#define S6_GREG1_PLLSEL_GMAC		16
-#define S6_GREG1_PLLSEL_GMAC_125MHZ		0
-#define S6_GREG1_PLLSEL_GMAC_25MHZ		1
-#define S6_GREG1_PLLSEL_GMAC_2500KHZ		2
-#define S6_GREG1_PLLSEL_GMAC_EXTERN		3
-#define S6_GREG1_PLLSEL_GMAC_MASK		3
-#define S6_GREG1_PLLSEL_GMII		18
-#define S6_GREG1_PLLSEL_GMII_111MHZ		0
-#define S6_GREG1_PLLSEL_GMII_IOREF		1
-#define S6_GREG1_PLLSEL_GMII_NONE		2
-#define S6_GREG1_PLLSEL_GMII_125MHZ		3
-#define S6_GREG1_PLLSEL_GMII_MASK		3
-#define S6_GREG1_SYSUNLOCKCNT	0x020
-#define S6_GREG1_IOUNLOCKCNT	0x024
-#define S6_GREG1_AIMUNLOCKCNT	0x028
-#define S6_GREG1_DP0UNLOCKCNT	0x02C
-#define S6_GREG1_DP2UNLOCKCNT	0x030
-#define S6_GREG1_DDRUNLOCKCNT	0x034
-#define S6_GREG1_CLKBAL0	0x040
-#define S6_GREG1_CLKBAL0_LSGB		0
-#define S6_GREG1_CLKBAL0_LSPX		8
-#define S6_GREG1_CLKBAL0_MEMDO		16
-#define S6_GREG1_CLKBAL0_HSXT1		24
-#define S6_GREG1_CLKBAL1	0x044
-#define S6_GREG1_CLKBAL1_HSISEF		0
-#define S6_GREG1_CLKBAL1_HSNI		8
-#define S6_GREG1_CLKBAL1_HSNS		16
-#define S6_GREG1_CLKBAL1_HSISEFCFG	24
-#define S6_GREG1_CLKBAL2	0x048
-#define S6_GREG1_CLKBAL2_LSNB		0
-#define S6_GREG1_CLKBAL2_LSSB		8
-#define S6_GREG1_CLKBAL2_LSREST		24
-#define S6_GREG1_CLKBAL3	0x04C
-#define S6_GREG1_CLKBAL3_ISEFXAD	0
-#define S6_GREG1_CLKBAL3_ISEFLMS	8
-#define S6_GREG1_CLKBAL3_ISEFISEF	16
-#define S6_GREG1_CLKBAL3_DDRDD		24
-#define S6_GREG1_CLKBAL4	0x050
-#define S6_GREG1_CLKBAL4_DDRDP		0
-#define S6_GREG1_CLKBAL4_DDRDO		8
-#define S6_GREG1_CLKBAL4_DDRNB		16
-#define S6_GREG1_CLKBAL4_DDRLMS		24
-#define S6_GREG1_BLOCKENA	0x100
-#define S6_GREG1_BLOCK_DDR		0
-#define S6_GREG1_BLOCK_DP		1
-#define S6_GREG1_BLOCK_NSNI		2
-#define S6_GREG1_BLOCK_PCIE		3
-#define S6_GREG1_BLOCK_GMAC		4
-#define S6_GREG1_BLOCK_I2S		5
-#define S6_GREG1_BLOCK_EGIB		6
-#define S6_GREG1_BLOCK_SB		7
-#define S6_GREG1_BLOCK_XT1		8
-#define S6_GREG1_CLKGATE	0x104
-#define S6_GREG1_BGATE_AIMNORTH		9
-#define S6_GREG1_BGATE_AIMEAST		10
-#define S6_GREG1_BGATE_AIMWEST		11
-#define S6_GREG1_BGATE_AIMSOUTH		12
-#define S6_GREG1_CHIPRES	0x108
-#define S6_GREG1_CHIPRES_SOFTRES	0
-#define S6_GREG1_CHIPRES_LOSTLOCK	1
-#define S6_GREG1_RESETCAUSE	0x10C
-#define S6_GREG1_RESETCAUSE_RESETN	0
-#define S6_GREG1_RESETCAUSE_GLOBAL	1
-#define S6_GREG1_RESETCAUSE_WDOGTIMER	2
-#define S6_GREG1_RESETCAUSE_SWCHIP	3
-#define S6_GREG1_RESETCAUSE_PLLSYSLOSS	4
-#define S6_GREG1_RESETCAUSE_PCIE	5
-#define S6_GREG1_RESETCAUSE_CREATEDGLOB	6
-#define S6_GREG1_REFCLOCKCNT	0x110
-#define S6_GREG1_RESETTIMER	0x114
-#define S6_GREG1_NMITIMER	0x118
-#define S6_GREG1_GLOBAL_TIMER	0x11C
-#define S6_GREG1_TIMER0		0x180
-#define S6_GREG1_TIMER1		0x184
-#define S6_GREG1_UARTCLOCKSEL	0x204
-#define S6_GREG1_CHIPVERSPACKG	0x208
-#define S6_GREG1_CHIPVERSPACKG_CHIPVID	0
-#define S6_GREG1_CHIPVERSPACKG_PACKSEL	8
-#define S6_GREG1_ONDIETERMCTRL	0x20C
-#define S6_GREG1_ONDIETERMCTRL_WEST	0
-#define S6_GREG1_ONDIETERMCTRL_NORTH	2
-#define S6_GREG1_ONDIETERMCTRL_EAST	4
-#define S6_GREG1_ONDIETERMCTRL_SOUTH	6
-#define S6_GREG1_ONDIETERMCTRL_NONE		0
-#define S6_GREG1_ONDIETERMCTRL_75OHM		2
-#define S6_GREG1_ONDIETERMCTRL_MASK		3
-#define S6_GREG1_BOOT_CFG0	0x210
-#define S6_GREG1_BOOT_CFG0_AIMSTRONG	1
-#define S6_GREG1_BOOT_CFG0_MINIBOOTDL	2
-#define S6_GREG1_BOOT_CFG0_OCDGPIO8SET	5
-#define S6_GREG1_BOOT_CFG0_OCDGPIOENA	6
-#define S6_GREG1_BOOT_CFG0_DOWNSTREAM	7
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV	8
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_300MHZ	1
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_240MHZ	2
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_200MHZ	3
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_150MHZ	4
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_120MHZ	5
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_40MHZ	6
-#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_MASK	7
-#define S6_GREG1_BOOT_CFG0_BALHSLMS	12
-#define S6_GREG1_BOOT_CFG0_BALHSNB	18
-#define S6_GREG1_BOOT_CFG0_BALHSXAD	24
-#define S6_GREG1_BOOT_CFG1	0x214
-#define S6_GREG1_BOOT_CFG1_PCIE1LANE	1
-#define S6_GREG1_BOOT_CFG1_MPLLPRESCALE	2
-#define S6_GREG1_BOOT_CFG1_MPLLNCY	4
-#define S6_GREG1_BOOT_CFG1_MPLLNCY5	9
-#define S6_GREG1_BOOT_CFG1_BALHSREST	14
-#define S6_GREG1_BOOT_CFG1_BALHSPSMEMS	20
-#define S6_GREG1_BOOT_CFG1_BALLSGI	26
-#define S6_GREG1_BOOT_CFG2	0x218
-#define S6_GREG1_BOOT_CFG2_PEID		0
-#define S6_GREG1_BOOT_CFG3	0x21C
-#define S6_GREG1_DRAMBUSYHOLDOF	0x220
-#define S6_GREG1_DRAMBUSYHOLDOF_XT0	0
-#define S6_GREG1_DRAMBUSYHOLDOF_XT1	4
-#define S6_GREG1_DRAMBUSYHOLDOF_XT_MASK		7
-#define S6_GREG1_PCIEBAR1SIZE	0x224
-#define S6_GREG1_PCIEBAR2SIZE	0x228
-#define S6_GREG1_PCIEVENDOR	0x22C
-#define S6_GREG1_PCIEDEVICE	0x230
-#define S6_GREG1_PCIEREV	0x234
-#define S6_GREG1_PCIECLASS	0x238
-#define S6_GREG1_XT1DCACHEMISS	0x240
-#define S6_GREG1_XT1ICACHEMISS	0x244
-#define S6_GREG1_HWSEMAPHORE(n)	(0x400 + 4 * (n))
-#define S6_GREG1_HWSEMAPHORE_NB		16
-
-/* peripheral interrupt numbers */
-
-#define S6_INTC_GPIO(n)			(n)		/* 0..3 */
-#define S6_INTC_I2C			4
-#define S6_INTC_SPI			5
-#define S6_INTC_NB_ERR			6
-#define S6_INTC_DMA_LMSERR		7
-#define S6_INTC_DMA_LMSLOWWMRK(n)	(8 + (n))	/* 0..11 */
-#define S6_INTC_DMA_LMSPENDCNT(n)	(20 + (n))	/* 0..11 */
-#define S6_INTC_DMA HOSTLOWWMRK(n)	(32 + (n))	/* 0..6 */
-#define S6_INTC_DMA_HOSTPENDCNT(n)	(39 + (n))	/* 0..6 */
-#define S6_INTC_DMA_HOSTERR		46
-#define S6_INTC_UART(n)			(47 + (n))	/* 0..1 */
-#define S6_INTC_XAD			49
-#define S6_INTC_NI_ERR			50
-#define S6_INTC_NI_INFIFOFULL		51
-#define S6_INTC_DMA_NIERR		52
-#define S6_INTC_DMA_NILOWWMRK(n)	(53 + (n))	/* 0..3 */
-#define S6_INTC_DMA_NIPENDCNT(n)	(57 + (n))	/* 0..3 */
-#define S6_INTC_DDR			61
-#define S6_INTC_NS_ERR			62
-#define S6_INTC_EFI_CFGERR		63
-#define S6_INTC_EFI_ISEFTEST		64
-#define S6_INTC_EFI_WRITEERR		65
-#define S6_INTC_NMI_TIMER		66
-#define S6_INTC_PLLLOCK_SYS		67
-#define S6_INTC_PLLLOCK_IO		68
-#define S6_INTC_PLLLOCK_AIM		69
-#define S6_INTC_PLLLOCK_DP0		70
-#define S6_INTC_PLLLOCK_DP2		71
-#define S6_INTC_I2S_ERR			72
-#define S6_INTC_GMAC_STAT		73
-#define S6_INTC_GMAC_ERR		74
-#define S6_INTC_GIB_ERR			75
-#define S6_INTC_PCIE_ERR		76
-#define S6_INTC_PCIE_MSI(n)		(77 + (n))	/* 0..3 */
-#define S6_INTC_PCIE_INTA		81
-#define S6_INTC_PCIE_INTB		82
-#define S6_INTC_PCIE_INTC		83
-#define S6_INTC_PCIE_INTD		84
-#define S6_INTC_SW(n)			(85 + (n))	/* 0..9 */
-#define S6_INTC_SW_ENABLE(n)		(85 + 256 + (n))
-#define S6_INTC_DMA_DP_ERR		95
-#define S6_INTC_DMA_DPLOWWMRK(n)	(96 + (n))	/* 0..3 */
-#define S6_INTC_DMA_DPPENDCNT(n)	(100 + (n))	/* 0..3 */
-#define S6_INTC_DMA_DPTERMCNT(n)	(104 + (n))	/* 0..3 */
-#define S6_INTC_TIMER0			108
-#define S6_INTC_TIMER1			109
-#define S6_INTC_DMA_HOSTTERMCNT(n)	(110 + (n))	/* 0..6 */
-
-#endif /* __XTENSA_S6000_HARDWARE_H */
diff --git a/arch/xtensa/variants/s6000/include/variant/irq.h b/arch/xtensa/variants/s6000/include/variant/irq.h
deleted file mode 100644
index 39ca751a6255..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/irq.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _XTENSA_S6000_IRQ_H
-#define _XTENSA_S6000_IRQ_H
-
-#define VARIANT_NR_IRQS 8 /* GPIO interrupts */
-
-extern void variant_irq_enable(unsigned int irq);
-
-#endif /* __XTENSA_S6000_IRQ_H */
diff --git a/arch/xtensa/variants/s6000/include/variant/tie-asm.h b/arch/xtensa/variants/s6000/include/variant/tie-asm.h
deleted file mode 100644
index f02d0a3a2e20..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/tie-asm.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * This header file contains assembly-language definitions (assembly
- * macros, etc.) for this specific Xtensa processor's TIE extensions
- * and options.  It is customized to this Xtensa processor configuration.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999-2008 Tensilica Inc.
- */
-
-#ifndef _XTENSA_CORE_TIE_ASM_H
-#define _XTENSA_CORE_TIE_ASM_H
-
-/*  Selection parameter values for save-area save/restore macros:  */
-/*  Option vs. TIE:  */
-#define XTHAL_SAS_TIE	0x0001	/* custom extension or coprocessor */
-#define XTHAL_SAS_OPT	0x0002	/* optional (and not a coprocessor) */
-/*  Whether used automatically by compiler:  */
-#define XTHAL_SAS_NOCC	0x0004	/* not used by compiler w/o special opts/code */
-#define XTHAL_SAS_CC	0x0008	/* used by compiler without special opts/code */
-/*  ABI handling across function calls:  */
-#define XTHAL_SAS_CALR	0x0010	/* caller-saved */
-#define XTHAL_SAS_CALE	0x0020	/* callee-saved */
-#define XTHAL_SAS_GLOB	0x0040	/* global across function calls (in thread) */
-/*  Misc  */
-#define XTHAL_SAS_ALL	0xFFFF	/* include all default NCP contents */
-
-
-
-/* Macro to save all non-coprocessor (extra) custom TIE and optional state
- * (not including zero-overhead loop registers).
- * Save area ptr (clobbered):  ptr  (16 byte aligned)
- * Scratch regs  (clobbered):  at1..at4  (only first XCHAL_NCP_NUM_ATMPS needed)
- */
-	.macro xchal_ncp_store  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL
-	xchal_sa_start	\continue, \ofs
-	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select
-	xchal_sa_align	\ptr, 0, 1024-4, 4, 4
-	rsr	\at1, BR		// boolean option
-	s32i	\at1, \ptr, .Lxchal_ofs_ + 0
-	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 4
-	.endif
-	.endm	// xchal_ncp_store
-
-/* Macro to save all non-coprocessor (extra) custom TIE and optional state
- * (not including zero-overhead loop registers).
- * Save area ptr (clobbered):  ptr  (16 byte aligned)
- * Scratch regs  (clobbered):  at1..at4  (only first XCHAL_NCP_NUM_ATMPS needed)
- */
-	.macro xchal_ncp_load  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL
-	xchal_sa_start	\continue, \ofs
-	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select
-	xchal_sa_align	\ptr, 0, 1024-4, 4, 4
-	l32i	\at1, \ptr, .Lxchal_ofs_ + 0
-	wsr	\at1, BR		// boolean option
-	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 4
-	.endif
-	.endm	// xchal_ncp_load
-
-
-
-#define XCHAL_NCP_NUM_ATMPS	1
-
-
-
-/* Macro to save the state of TIE coprocessor FPU.
- * Save area ptr (clobbered):  ptr  (16 byte aligned)
- * Scratch regs  (clobbered):  at1..at4  (only first XCHAL_CP0_NUM_ATMPS needed)
- */
-#define xchal_cp_FPU_store	xchal_cp0_store
-/* #define xchal_cp_FPU_store_a2	xchal_cp0_store a2 a3 a4 a5 a6 */
-	.macro	xchal_cp0_store  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL
-	xchal_sa_start \continue, \ofs
-	.ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select
-	xchal_sa_align	\ptr, 0, 0, 1, 16
-	rur232	\at1		// FCR
-	s32i	\at1, \ptr, 0
-	rur233	\at1		// FSR
-	s32i	\at1, \ptr, 4
-	SSI f0, \ptr,  8
-	SSI f1, \ptr,  12
-	SSI f2, \ptr,  16
-	SSI f3, \ptr,  20
-	SSI f4, \ptr,  24
-	SSI f5, \ptr,  28
-	SSI f6, \ptr,  32
-	SSI f7, \ptr,  36
-	SSI f8, \ptr,  40
-	SSI f9, \ptr,  44
-	SSI f10, \ptr,  48
-	SSI f11, \ptr,  52
-	SSI f12, \ptr,  56
-	SSI f13, \ptr,  60
-	SSI f14, \ptr,  64
-	SSI f15, \ptr,  68
-	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 72
-	.endif
-	.endm	// xchal_cp0_store
-
-/* Macro to restore the state of TIE coprocessor FPU.
- * Save area ptr (clobbered):  ptr  (16 byte aligned)
- * Scratch regs  (clobbered):  at1..at4  (only first XCHAL_CP0_NUM_ATMPS needed)
- */
-#define xchal_cp_FPU_load	xchal_cp0_load
-/* #define xchal_cp_FPU_load_a2	xchal_cp0_load a2 a3 a4 a5 a6 */
-	.macro	xchal_cp0_load  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL
-	xchal_sa_start \continue, \ofs
-	.ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select
-	xchal_sa_align	\ptr, 0, 0, 1, 16
-	l32i	\at1, \ptr, 0
-	wur232	\at1		// FCR
-	l32i	\at1, \ptr, 4
-	wur233	\at1		// FSR
-	LSI f0, \ptr,  8
-	LSI f1, \ptr,  12
-	LSI f2, \ptr,  16
-	LSI f3, \ptr,  20
-	LSI f4, \ptr,  24
-	LSI f5, \ptr,  28
-	LSI f6, \ptr,  32
-	LSI f7, \ptr,  36
-	LSI f8, \ptr,  40
-	LSI f9, \ptr,  44
-	LSI f10, \ptr,  48
-	LSI f11, \ptr,  52
-	LSI f12, \ptr,  56
-	LSI f13, \ptr,  60
-	LSI f14, \ptr,  64
-	LSI f15, \ptr,  68
-	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 72
-	.endif
-	.endm	// xchal_cp0_load
-
-#define XCHAL_CP0_NUM_ATMPS	1
-
-/* Macro to save the state of TIE coprocessor XAD.
- * Save area ptr (clobbered):  ptr  (16 byte aligned)
- * Scratch regs  (clobbered):  at1..at4  (only first XCHAL_CP6_NUM_ATMPS needed)
- */
-#define xchal_cp_XAD_store	xchal_cp6_store
-/* #define xchal_cp_XAD_store_a2	xchal_cp6_store a2 a3 a4 a5 a6 */
-	.macro	xchal_cp6_store  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL
-	xchal_sa_start \continue, \ofs
-	.ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select
-	xchal_sa_align	\ptr, 0, 0, 1, 16
-	rur0	\at1		// LDCBHI
-	s32i	\at1, \ptr, 0
-	rur1	\at1		// LDCBLO
-	s32i	\at1, \ptr, 4
-	rur2	\at1		// STCBHI
-	s32i	\at1, \ptr, 8
-	rur3	\at1		// STCBLO
-	s32i	\at1, \ptr, 12
-	rur8	\at1		// LDBRBASE
-	s32i	\at1, \ptr, 16
-	rur9	\at1		// LDBROFF
-	s32i	\at1, \ptr, 20
-	rur10	\at1		// LDBRINC
-	s32i	\at1, \ptr, 24
-	rur11	\at1		// STBRBASE
-	s32i	\at1, \ptr, 28
-	rur12	\at1		// STBROFF
-	s32i	\at1, \ptr, 32
-	rur13	\at1		// STBRINC
-	s32i	\at1, \ptr, 36
-	rur24	\at1		// SCRATCH0
-	s32i	\at1, \ptr, 40
-	rur25	\at1		// SCRATCH1
-	s32i	\at1, \ptr, 44
-	rur26	\at1		// SCRATCH2
-	s32i	\at1, \ptr, 48
-	rur27	\at1		// SCRATCH3
-	s32i	\at1, \ptr, 52
-	WRAS128I wra0, \ptr,  64
-	WRAS128I wra1, \ptr,  80
-	WRAS128I wra2, \ptr,  96
-	WRAS128I wra3, \ptr,  112
-	WRAS128I wra4, \ptr,  128
-	WRAS128I wra5, \ptr,  144
-	WRAS128I wra6, \ptr,  160
-	WRAS128I wra7, \ptr,  176
-	WRAS128I wra8, \ptr,  192
-	WRAS128I wra9, \ptr,  208
-	WRAS128I wra10, \ptr,  224
-	WRAS128I wra11, \ptr,  240
-	WRAS128I wra12, \ptr,  256
-	WRAS128I wra13, \ptr,  272
-	WRAS128I wra14, \ptr,  288
-	WRAS128I wra15, \ptr,  304
-	WRBS128I wrb0, \ptr,  320
-	WRBS128I wrb1, \ptr,  336
-	WRBS128I wrb2, \ptr,  352
-	WRBS128I wrb3, \ptr,  368
-	WRBS128I wrb4, \ptr,  384
-	WRBS128I wrb5, \ptr,  400
-	WRBS128I wrb6, \ptr,  416
-	WRBS128I wrb7, \ptr,  432
-	WRBS128I wrb8, \ptr,  448
-	WRBS128I wrb9, \ptr,  464
-	WRBS128I wrb10, \ptr,  480
-	WRBS128I wrb11, \ptr,  496
-	WRBS128I wrb12, \ptr,  512
-	WRBS128I wrb13, \ptr,  528
-	WRBS128I wrb14, \ptr,  544
-	WRBS128I wrb15, \ptr,  560
-	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 576
-	.endif
-	.endm	// xchal_cp6_store
-
-/* Macro to restore the state of TIE coprocessor XAD.
- * Save area ptr (clobbered):  ptr  (16 byte aligned)
- * Scratch regs  (clobbered):  at1..at4  (only first XCHAL_CP6_NUM_ATMPS needed)
- */
-#define xchal_cp_XAD_load	xchal_cp6_load
-/* #define xchal_cp_XAD_load_a2	xchal_cp6_load a2 a3 a4 a5 a6 */
-	.macro	xchal_cp6_load  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL
-	xchal_sa_start \continue, \ofs
-	.ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select
-	xchal_sa_align	\ptr, 0, 0, 1, 16
-	l32i	\at1, \ptr, 0
-	wur0	\at1		// LDCBHI
-	l32i	\at1, \ptr, 4
-	wur1	\at1		// LDCBLO
-	l32i	\at1, \ptr, 8
-	wur2	\at1		// STCBHI
-	l32i	\at1, \ptr, 12
-	wur3	\at1		// STCBLO
-	l32i	\at1, \ptr, 16
-	wur8	\at1		// LDBRBASE
-	l32i	\at1, \ptr, 20
-	wur9	\at1		// LDBROFF
-	l32i	\at1, \ptr, 24
-	wur10	\at1		// LDBRINC
-	l32i	\at1, \ptr, 28
-	wur11	\at1		// STBRBASE
-	l32i	\at1, \ptr, 32
-	wur12	\at1		// STBROFF
-	l32i	\at1, \ptr, 36
-	wur13	\at1		// STBRINC
-	l32i	\at1, \ptr, 40
-	wur24	\at1		// SCRATCH0
-	l32i	\at1, \ptr, 44
-	wur25	\at1		// SCRATCH1
-	l32i	\at1, \ptr, 48
-	wur26	\at1		// SCRATCH2
-	l32i	\at1, \ptr, 52
-	wur27	\at1		// SCRATCH3
-	WRBL128I wrb0, \ptr,  320
-	WRBL128I wrb1, \ptr,  336
-	WRBL128I wrb2, \ptr,  352
-	WRBL128I wrb3, \ptr,  368
-	WRBL128I wrb4, \ptr,  384
-	WRBL128I wrb5, \ptr,  400
-	WRBL128I wrb6, \ptr,  416
-	WRBL128I wrb7, \ptr,  432
-	WRBL128I wrb8, \ptr,  448
-	WRBL128I wrb9, \ptr,  464
-	WRBL128I wrb10, \ptr,  480
-	WRBL128I wrb11, \ptr,  496
-	WRBL128I wrb12, \ptr,  512
-	WRBL128I wrb13, \ptr,  528
-	WRBL128I wrb14, \ptr,  544
-	WRBL128I wrb15, \ptr,  560
-	WRAL128I wra0, \ptr,  64
-	WRAL128I wra1, \ptr,  80
-	WRAL128I wra2, \ptr,  96
-	WRAL128I wra3, \ptr,  112
-	WRAL128I wra4, \ptr,  128
-	WRAL128I wra5, \ptr,  144
-	WRAL128I wra6, \ptr,  160
-	WRAL128I wra7, \ptr,  176
-	WRAL128I wra8, \ptr,  192
-	WRAL128I wra9, \ptr,  208
-	WRAL128I wra10, \ptr,  224
-	WRAL128I wra11, \ptr,  240
-	WRAL128I wra12, \ptr,  256
-	WRAL128I wra13, \ptr,  272
-	WRAL128I wra14, \ptr,  288
-	WRAL128I wra15, \ptr,  304
-	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 576
-	.endif
-	.endm	// xchal_cp6_load
-
-#define XCHAL_CP6_NUM_ATMPS	1
-#define XCHAL_SA_NUM_ATMPS	1
-
-	/*  Empty macros for unconfigured coprocessors:  */
-	.macro xchal_cp1_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp1_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp2_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp2_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp3_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp3_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp4_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp4_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp5_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp5_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp7_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-	.macro xchal_cp7_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
-
-#endif /*_XTENSA_CORE_TIE_ASM_H*/
-
diff --git a/arch/xtensa/variants/s6000/include/variant/tie.h b/arch/xtensa/variants/s6000/include/variant/tie.h
deleted file mode 100644
index be7ea843d5df..000000000000
--- a/arch/xtensa/variants/s6000/include/variant/tie.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * This header file describes this specific Xtensa processor's TIE extensions
- * that extend basic Xtensa core functionality.  It is customized to this
- * Xtensa processor configuration.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999-2008 Tensilica Inc.
- */
-
-#ifndef _XTENSA_CORE_TIE_H
-#define _XTENSA_CORE_TIE_H
-
-#define XCHAL_CP_NUM			2	/* number of coprocessors */
-#define XCHAL_CP_MAX			7	/* max CP ID + 1 (0 if none) */
-#define XCHAL_CP_MASK			0x41	/* bitmask of all CPs by ID */
-#define XCHAL_CP_PORT_MASK		0x00	/* bitmask of only port CPs */
-
-/*  Basic parameters of each coprocessor:  */
-#define XCHAL_CP0_NAME			"FPU"
-#define XCHAL_CP0_IDENT			FPU
-#define XCHAL_CP0_SA_SIZE		72	/* size of state save area */
-#define XCHAL_CP0_SA_ALIGN		4	/* min alignment of save area */
-#define XCHAL_CP_ID_FPU			0	/* coprocessor ID (0..7) */
-#define XCHAL_CP6_NAME			"XAD"
-#define XCHAL_CP6_IDENT			XAD
-#define XCHAL_CP6_SA_SIZE		576	/* size of state save area */
-#define XCHAL_CP6_SA_ALIGN		16	/* min alignment of save area */
-#define XCHAL_CP_ID_XAD			6	/* coprocessor ID (0..7) */
-
-/*  Filler info for unassigned coprocessors, to simplify arrays etc:  */
-#define XCHAL_CP1_SA_SIZE		0
-#define XCHAL_CP1_SA_ALIGN		1
-#define XCHAL_CP2_SA_SIZE		0
-#define XCHAL_CP2_SA_ALIGN		1
-#define XCHAL_CP3_SA_SIZE		0
-#define XCHAL_CP3_SA_ALIGN		1
-#define XCHAL_CP4_SA_SIZE		0
-#define XCHAL_CP4_SA_ALIGN		1
-#define XCHAL_CP5_SA_SIZE		0
-#define XCHAL_CP5_SA_ALIGN		1
-#define XCHAL_CP7_SA_SIZE		0
-#define XCHAL_CP7_SA_ALIGN		1
-
-/*  Save area for non-coprocessor optional and custom (TIE) state:  */
-#define XCHAL_NCP_SA_SIZE		4
-#define XCHAL_NCP_SA_ALIGN		4
-
-/*  Total save area for optional and custom state (NCP + CPn):  */
-#define XCHAL_TOTAL_SA_SIZE		672	/* with 16-byte align padding */
-#define XCHAL_TOTAL_SA_ALIGN		16	/* actual minimum alignment */
-
-/*
- * Detailed contents of save areas.
- * NOTE:  caller must define the XCHAL_SA_REG macro (not defined here)
- * before expanding the XCHAL_xxx_SA_LIST() macros.
- *
- * XCHAL_SA_REG(s,ccused,abikind,kind,opt,name,galign,align,asize,
- *		dbnum,base,regnum,bitsz,gapsz,reset,x...)
- *
- *	s = passed from XCHAL_*_LIST(s), eg. to select how to expand
- *	ccused = set if used by compiler without special options or code
- *	abikind = 0 (caller-saved), 1 (callee-saved), or 2 (thread-global)
- *	kind = 0 (special reg), 1 (TIE user reg), or 2 (TIE regfile reg)
- *	opt = 0 (custom TIE extension or coprocessor), or 1 (optional reg)
- *	name = lowercase reg name (no quotes)
- *	galign = group byte alignment (power of 2) (galign >= align)
- *	align = register byte alignment (power of 2)
- *	asize = allocated size in bytes (asize*8 == bitsz + gapsz + padsz)
- *	  (not including any pad bytes required to galign this or next reg)
- *	dbnum = unique target number f/debug (see <xtensa-libdb-macros.h>)
- *	base = reg shortname w/o index (or sr=special, ur=TIE user reg)
- *	regnum = reg index in regfile, or special/TIE-user reg number
- *	bitsz = number of significant bits (regfile width, or ur/sr mask bits)
- *	gapsz = intervening bits, if bitsz bits not stored contiguously
- *	(padsz = pad bits at end [TIE regfile] or at msbits [ur,sr] of asize)
- *	reset = register reset value (or 0 if undefined at reset)
- *	x = reserved for future use (0 until then)
- *
- *  To filter out certain registers, e.g. to expand only the non-global
- *  registers used by the compiler, you can do something like this:
- *
- *  #define XCHAL_SA_REG(s,ccused,p...)	SELCC##ccused(p)
- *  #define SELCC0(p...)
- *  #define SELCC1(abikind,p...)	SELAK##abikind(p)
- *  #define SELAK0(p...)		REG(p)
- *  #define SELAK1(p...)		REG(p)
- *  #define SELAK2(p...)
- *  #define REG(kind,tie,name,galn,aln,asz,csz,dbnum,base,rnum,bsz,rst,x...) \
- *		...what you want to expand...
- */
-
-#define XCHAL_NCP_SA_NUM	1
-#define XCHAL_NCP_SA_LIST(s)	\
- XCHAL_SA_REG(s,0,0,0,1,             br, 4, 4, 4,0x0204,  sr,4  , 16,0,0,0)
-
-#define XCHAL_CP0_SA_NUM	18
-#define XCHAL_CP0_SA_LIST(s)	\
- XCHAL_SA_REG(s,0,0,1,0,            fcr, 4, 4, 4,0x03E8,  ur,232, 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,            fsr, 4, 4, 4,0x03E9,  ur,233, 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f0, 4, 4, 4,0x0030,   f,0  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f1, 4, 4, 4,0x0031,   f,1  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f2, 4, 4, 4,0x0032,   f,2  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f3, 4, 4, 4,0x0033,   f,3  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f4, 4, 4, 4,0x0034,   f,4  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f5, 4, 4, 4,0x0035,   f,5  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f6, 4, 4, 4,0x0036,   f,6  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f7, 4, 4, 4,0x0037,   f,7  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f8, 4, 4, 4,0x0038,   f,8  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,             f9, 4, 4, 4,0x0039,   f,9  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,            f10, 4, 4, 4,0x003A,   f,10 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,            f11, 4, 4, 4,0x003B,   f,11 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,            f12, 4, 4, 4,0x003C,   f,12 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,            f13, 4, 4, 4,0x003D,   f,13 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,            f14, 4, 4, 4,0x003E,   f,14 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,            f15, 4, 4, 4,0x003F,   f,15 , 32,0,0,0)
-
-#define XCHAL_CP1_SA_NUM	0
-#define XCHAL_CP1_SA_LIST(s)	/* empty */
-
-#define XCHAL_CP2_SA_NUM	0
-#define XCHAL_CP2_SA_LIST(s)	/* empty */
-
-#define XCHAL_CP3_SA_NUM	0
-#define XCHAL_CP3_SA_LIST(s)	/* empty */
-
-#define XCHAL_CP4_SA_NUM	0
-#define XCHAL_CP4_SA_LIST(s)	/* empty */
-
-#define XCHAL_CP5_SA_NUM	0
-#define XCHAL_CP5_SA_LIST(s)	/* empty */
-
-#define XCHAL_CP6_SA_NUM	46
-#define XCHAL_CP6_SA_LIST(s)	\
- XCHAL_SA_REG(s,0,0,1,0,         ldcbhi,16, 4, 4,0x0300,  ur,0  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,         ldcblo, 4, 4, 4,0x0301,  ur,1  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,         stcbhi, 4, 4, 4,0x0302,  ur,2  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,         stcblo, 4, 4, 4,0x0303,  ur,3  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,       ldbrbase, 4, 4, 4,0x0308,  ur,8  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,        ldbroff, 4, 4, 4,0x0309,  ur,9  , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,        ldbrinc, 4, 4, 4,0x030A,  ur,10 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,       stbrbase, 4, 4, 4,0x030B,  ur,11 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,        stbroff, 4, 4, 4,0x030C,  ur,12 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,        stbrinc, 4, 4, 4,0x030D,  ur,13 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,       scratch0, 4, 4, 4,0x0318,  ur,24 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,       scratch1, 4, 4, 4,0x0319,  ur,25 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,       scratch2, 4, 4, 4,0x031A,  ur,26 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,1,0,       scratch3, 4, 4, 4,0x031B,  ur,27 , 32,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra0,16,16,16,0x1010, wra,0  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra1,16,16,16,0x1011, wra,1  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra2,16,16,16,0x1012, wra,2  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra3,16,16,16,0x1013, wra,3  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra4,16,16,16,0x1014, wra,4  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra5,16,16,16,0x1015, wra,5  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra6,16,16,16,0x1016, wra,6  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra7,16,16,16,0x1017, wra,7  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra8,16,16,16,0x1018, wra,8  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wra9,16,16,16,0x1019, wra,9  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wra10,16,16,16,0x101A, wra,10 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wra11,16,16,16,0x101B, wra,11 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wra12,16,16,16,0x101C, wra,12 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wra13,16,16,16,0x101D, wra,13 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wra14,16,16,16,0x101E, wra,14 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wra15,16,16,16,0x101F, wra,15 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb0,16,16,16,0x1020, wrb,0  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb1,16,16,16,0x1021, wrb,1  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb2,16,16,16,0x1022, wrb,2  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb3,16,16,16,0x1023, wrb,3  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb4,16,16,16,0x1024, wrb,4  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb5,16,16,16,0x1025, wrb,5  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb6,16,16,16,0x1026, wrb,6  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb7,16,16,16,0x1027, wrb,7  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb8,16,16,16,0x1028, wrb,8  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,           wrb9,16,16,16,0x1029, wrb,9  ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wrb10,16,16,16,0x102A, wrb,10 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wrb11,16,16,16,0x102B, wrb,11 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wrb12,16,16,16,0x102C, wrb,12 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wrb13,16,16,16,0x102D, wrb,13 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wrb14,16,16,16,0x102E, wrb,14 ,128,0,0,0) \
- XCHAL_SA_REG(s,0,0,2,0,          wrb15,16,16,16,0x102F, wrb,15 ,128,0,0,0)
-
-#define XCHAL_CP7_SA_NUM	0
-#define XCHAL_CP7_SA_LIST(s)	/* empty */
-
-/* Byte length of instruction from its first nibble (op0 field), per FLIX.  */
-#define XCHAL_OP0_FORMAT_LENGTHS	3,3,3,3,3,3,3,3,2,2,2,2,2,2,8,8
-
-#endif /*_XTENSA_CORE_TIE_H*/
-
diff --git a/arch/xtensa/variants/s6000/irq.c b/arch/xtensa/variants/s6000/irq.c
deleted file mode 100644
index 81a241e79075..000000000000
--- a/arch/xtensa/variants/s6000/irq.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * s6000 irq crossbar
- *
- * Copyright (c) 2009 emlix GmbH
- * Authors:	Johannes Weiner <hannes@cmpxchg.org>
- *		Oskar Schirmer <oskar@scara.com>
- */
-#include <linux/io.h>
-#include <asm/irq.h>
-#include <variant/hardware.h>
-
-/* S6_REG_INTC */
-#define INTC_STATUS	0x000
-#define INTC_RAW	0x010
-#define INTC_STATUS_AG	0x100
-#define INTC_CFG(n)	(0x200 + 4 * (n))
-
-/*
- * The s6000 has a crossbar that multiplexes interrupt output lines
- * from the peripherals to input lines on the xtensa core.
- *
- * We leave the mapping decisions to the platform as it depends on the
- * actually connected peripherals which distribution makes sense.
- */
-extern const signed char *platform_irq_mappings[NR_IRQS];
-
-static unsigned long scp_to_intc_enable[] = {
-#define	TO_INTC_ENABLE(n)	(((n) << 1) + 1)
-	TO_INTC_ENABLE(0),
-	TO_INTC_ENABLE(1),
-	TO_INTC_ENABLE(2),
-	TO_INTC_ENABLE(3),
-	TO_INTC_ENABLE(4),
-	TO_INTC_ENABLE(5),
-	TO_INTC_ENABLE(6),
-	TO_INTC_ENABLE(7),
-	TO_INTC_ENABLE(8),
-	TO_INTC_ENABLE(9),
-	TO_INTC_ENABLE(10),
-	TO_INTC_ENABLE(11),
-	TO_INTC_ENABLE(12),
-	-1,
-	-1,
-	TO_INTC_ENABLE(13),
-	-1,
-	TO_INTC_ENABLE(14),
-	-1,
-	TO_INTC_ENABLE(15),
-#undef	TO_INTC_ENABLE
-};
-
-static void irq_set(unsigned int irq, int enable)
-{
-	unsigned long en;
-	const signed char *m = platform_irq_mappings[irq];
-
-	if (!m)
-		return;
-	en = enable ? scp_to_intc_enable[irq] : 0;
-	while (*m >= 0) {
-		writel(en, S6_REG_INTC + INTC_CFG(*m));
-		m++;
-	}
-}
-
-void variant_irq_enable(unsigned int irq)
-{
-	irq_set(irq, 1);
-}
-
-void variant_irq_disable(unsigned int irq)
-{
-	irq_set(irq, 0);
-}