diff options
Diffstat (limited to 'arch')
388 files changed, 11416 insertions, 22653 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fe44b2494609..df94ac1f75b6 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -428,3 +428,4 @@ source "arch/arc/Kconfig.debug" source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" +source "kernel/power/Kconfig" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 10bc3d4e8a44..db72fec0e160 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -12,7 +12,7 @@ ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := arc-linux-uclibc- endif -KBUILD_DEFCONFIG := fpga_defconfig +KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__ diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index cfaedd9c61c9..1c169dc74ad1 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -20,7 +20,7 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug"; }; aliases { @@ -41,9 +41,9 @@ #interrupt-cells = <1>; }; - uart0: serial@c0000000 { + uart0: serial@f0000000 { compatible = "ns8250"; - reg = <0xc0000000 0x2000>; + reg = <0xf0000000 0x2000>; interrupts = <11>; clock-frequency = <3686400>; baud = <115200>; @@ -52,21 +52,21 @@ no-loopback-test = <1>; }; - pgu0: pgu@c9000000 { + pgu0: pgu@f9000000 { compatible = "snps,arcpgufb"; - reg = <0xc9000000 0x400>; + reg = <0xf9000000 0x400>; }; - ps2: ps2@c9001000 { + ps2: ps2@f9001000 { compatible = "snps,arc_ps2"; - reg = <0xc9000400 0x14>; + reg = <0xf9000400 0x14>; interrupts = <13>; interrupt-names = "arc_ps2_irq"; }; - eth0: ethernet@c0003000 { + eth0: ethernet@f0003000 { compatible = "snps,oscilan"; - reg = <0xc0003000 0x44>; + reg = <0xf0003000 0x44>; interrupts = <7>, <8>; interrupt-names = "rx", "tx"; }; diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig deleted file mode 100644 index 49c93011ab96..000000000000 --- a/arch/arc/configs/fpga_noramfs_defconfig +++ /dev/null @@ -1,63 +0,0 @@ -CONFIG_CROSS_COMPILE="arc-linux-uclibc-" -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set -CONFIG_HIGH_RES_TIMERS=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NAMESPACES=y -# CONFIG_UTS_NS is not set -# CONFIG_PID_NS is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y -CONFIG_EMBEDDED=y -# CONFIG_SLUB_DEBUG is not set -# CONFIG_COMPAT_BRK is not set -CONFIG_KPROBES=y -CONFIG_MODULES=y -# CONFIG_LBDAF is not set -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_FPGA_LEGACY=y -# CONFIG_ARC_HAS_RTSC is not set -CONFIG_ARC_BUILTIN_DTB_NAME="angel4" -CONFIG_PREEMPT=y -# CONFIG_COMPACTION is not set -# CONFIG_CROSS_MEMORY_ATTACH is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_UNIX_DIAG=y -CONFIG_NET_KEY=y -CONFIG_INET=y -# CONFIG_IPV6 is not set -# CONFIG_STANDALONE is not set -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set -# CONFIG_BLK_DEV is not set -CONFIG_NETDEVICES=y -CONFIG_ARC_EMAC=y -CONFIG_LXT_PHY=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set -CONFIG_SERIAL_ARC=y -CONFIG_SERIAL_ARC_CONSOLE=y -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_TMPFS=y -# CONFIG_MISC_FILESYSTEMS is not set -CONFIG_NFS_FS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_XZ_DEC=y diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/nsim_700_defconfig index ef4d3bc7b6c0..ef4d3bc7b6c0 100644 --- a/arch/arc/configs/fpga_defconfig +++ b/arch/arc/configs/nsim_700_defconfig diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index 742816f1b210..27ecc6975a58 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -41,6 +41,15 @@ /****************************************************************** * IRQ Control Macros + * + * All of them have "memory" clobber (compiler barrier) which is needed to + * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available) + * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register) + * + * Noted at the time of Abilis Timer List corruption + * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67 + * Reasoning : https://lkml.org/lkml/2013/4/8/15 + * ******************************************************************/ /* diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index d01df0c517a2..20ebb602ea2f 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -26,8 +26,10 @@ #include <asm/setup.h> #include <asm/mach_desc.h> +#ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; +#endif struct plat_smp_ops plat_smp_ops; diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 6a3d9a6c4497..91bd5bd62857 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -177,6 +177,9 @@ dtb-$(CONFIG_MACH_KIRKWOOD) += kirkwood-b3.dtb \ dtb-$(CONFIG_ARCH_LPC32XX) += ea3250.dtb phy3250.dtb dtb-$(CONFIG_ARCH_MARCO) += marco-evb.dtb dtb-$(CONFIG_MACH_MESON6) += meson6-atv1200.dtb +dtb-$(CONFIG_ARCH_MMP) += pxa168-aspenite.dtb \ + pxa910-dkb.dtb \ + mmp2-brownstone.dtb dtb-$(CONFIG_ARCH_MOXART) += moxart-uc7112lx.dtb dtb-$(CONFIG_ARCH_MXC) += \ imx1-ads.dtb \ diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 87aa4f3b8b3d..53bbfc90b26a 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -100,7 +100,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "newhaven,nhd-4.3-480272ef-atxl", "panel-dpi"; label = "lcd"; pinctrl-names = "default"; @@ -112,11 +112,11 @@ clock-frequency = <9000000>; hactive = <480>; vactive = <272>; - hfront-porch = <8>; - hback-porch = <43>; - hsync-len = <4>; - vback-porch = <12>; - vfront-porch = <4>; + hfront-porch = <2>; + hback-porch = <2>; + hsync-len = <41>; + vfront-porch = <2>; + vback-porch = <2>; vsync-len = <10>; hsync-active = <0>; vsync-active = <0>; @@ -320,8 +320,7 @@ lcd_pins: lcd_pins { pinctrl-single,pins = < - /* GPIO 5_8 to select LCD / HDMI */ - 0x238 (PIN_OUTPUT_PULLUP | MUX_MODE7) + 0x1c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpcm_ad7.gpio1_7 */ >; }; }; diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi index 9721e55384ce..50096d3427eb 100644 --- a/arch/arm/boot/dts/armada-375.dtsi +++ b/arch/arm/boot/dts/armada-375.dtsi @@ -14,6 +14,7 @@ #include "skeleton.dtsi" #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/interrupt-controller/irq.h> +#include <dt-bindings/phy/phy.h> #define MBUS_ID(target,attributes) (((target) << 24) | ((attributes) << 16)) @@ -348,6 +349,12 @@ #clock-cells = <1>; }; + usbcluster: usb-cluster@18400 { + compatible = "marvell,armada-375-usb-cluster"; + reg = <0x18400 0x4>; + #phy-cells = <1>; + }; + mbusc: mbus-controller@20000 { compatible = "marvell,mbus-controller"; reg = <0x20000 0x100>, <0x20180 0x20>; @@ -398,6 +405,8 @@ reg = <0x50000 0x500>; interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gateclk 18>; + phys = <&usbcluster PHY_TYPE_USB2>; + phy-names = "usb"; status = "disabled"; }; @@ -414,6 +423,8 @@ reg = <0x58000 0x20000>,<0x5b880 0x80>; interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gateclk 16>; + phys = <&usbcluster PHY_TYPE_USB3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts index b5b84006469e..9198b719d0ef 100644 --- a/arch/arm/boot/dts/at91-sama5d4ek.dts +++ b/arch/arm/boot/dts/at91-sama5d4ek.dts @@ -9,12 +9,12 @@ * licensing only applies to this file, and not this project as a * whole. * - * a) This library is free software; you can redistribute it and/or + * a) This file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index cb100b03a362..dd1313cbc314 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -956,6 +956,14 @@ }; }; + rtc@fffffd20 { + compatible = "atmel,at91sam9260-rtt"; + reg = <0xfffffd20 0x10>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&clk32k>; + status = "disabled"; + }; + watchdog@fffffd40 { compatible = "atmel,at91sam9260-wdt"; reg = <0xfffffd40 0x10>; @@ -966,6 +974,12 @@ atmel,idle-halt; status = "disabled"; }; + + gpbr: syscon@fffffd50 { + compatible = "atmel,at91sam9260-gpbr", "syscon"; + reg = <0xfffffd50 0x10>; + status = "disabled"; + }; }; nand0: nand@40000000 { diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index a81aab4281a7..cdb9ed612109 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -828,12 +828,26 @@ clocks = <&mck>; }; + rtc@fffffd20 { + compatible = "atmel,at91sam9260-rtt"; + reg = <0xfffffd20 0x10>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&slow_xtal>; + status = "disabled"; + }; + watchdog@fffffd40 { compatible = "atmel,at91sam9260-wdt"; reg = <0xfffffd40 0x10>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; status = "disabled"; }; + + gpbr: syscon@fffffd50 { + compatible = "atmel,at91sam9260-gpbr", "syscon"; + reg = <0xfffffd50 0x10>; + status = "disabled"; + }; }; }; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 653e4395b7cb..1467750e3377 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -922,6 +922,27 @@ pinctrl-0 = <&pinctrl_can_rx_tx>; clocks = <&can_clk>; clock-names = "can_clk"; + }; + + rtc@fffffd20 { + compatible = "atmel,at91sam9260-rtt"; + reg = <0xfffffd20 0x10>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&slow_xtal>; + status = "disabled"; + }; + + rtc@fffffd50 { + compatible = "atmel,at91sam9260-rtt"; + reg = <0xfffffd50 0x10>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&slow_xtal>; + status = "disabled"; + }; + + gpbr: syscon@fffffd60 { + compatible = "atmel,at91sam9260-gpbr", "syscon"; + reg = <0xfffffd60 0x50>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index d2919108e92d..dfaacb113f2e 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -112,9 +112,23 @@ }; }; + shdwc@fffffd10 { + atmel,wakeup-counter = <10>; + atmel,wakeup-rtt-timer; + }; + + rtc@fffffd20 { + atmel,rtt-rtc-time-reg = <&gpbr 0x0>; + status = "okay"; + }; + watchdog@fffffd40 { status = "okay"; }; + + gpbr: syscon@fffffd50 { + status = "okay"; + }; }; nand0: nand@40000000 { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 6c0637a4bda5..2a8da8a884b4 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -492,6 +492,27 @@ }; }; + isi { + pinctrl_isi: isi-0 { + atmel,pins = <AT91_PIOB 8 AT91_PERIPH_B AT91_PINCTRL_NONE /* D8 */ + AT91_PIOB 9 AT91_PERIPH_B AT91_PINCTRL_NONE /* D9 */ + AT91_PIOB 10 AT91_PERIPH_B AT91_PINCTRL_NONE /* D10 */ + AT91_PIOB 11 AT91_PERIPH_B AT91_PINCTRL_NONE /* D11 */ + AT91_PIOB 20 AT91_PERIPH_A AT91_PINCTRL_NONE /* D0 */ + AT91_PIOB 21 AT91_PERIPH_A AT91_PINCTRL_NONE /* D1 */ + AT91_PIOB 22 AT91_PERIPH_A AT91_PINCTRL_NONE /* D2 */ + AT91_PIOB 23 AT91_PERIPH_A AT91_PINCTRL_NONE /* D3 */ + AT91_PIOB 24 AT91_PERIPH_A AT91_PINCTRL_NONE /* D4 */ + AT91_PIOB 25 AT91_PERIPH_A AT91_PINCTRL_NONE /* D5 */ + AT91_PIOB 26 AT91_PERIPH_A AT91_PINCTRL_NONE /* D6 */ + AT91_PIOB 27 AT91_PERIPH_A AT91_PINCTRL_NONE /* D7 */ + AT91_PIOB 28 AT91_PERIPH_A AT91_PINCTRL_NONE /* PCK */ + AT91_PIOB 29 AT91_PERIPH_A AT91_PINCTRL_NONE /* VSYNC */ + AT91_PIOB 30 AT91_PERIPH_A AT91_PINCTRL_NONE /* HSYNC */ + AT91_PIOB 31 AT91_PERIPH_A AT91_PINCTRL_NONE /* MCK */>; + }; + }; + usart0 { pinctrl_usart0: usart0-0 { atmel,pins = @@ -1035,6 +1056,17 @@ }; }; + isi@fffb4000 { + compatible = "atmel,at91sam9g45-isi"; + reg = <0xfffb4000 0x4000>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH 5>; + clocks = <&isi_clk>; + clock-names = "isi_clk"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_isi>; + status = "disabled"; + }; + pwm0: pwm@fffb8000 { compatible = "atmel,at91sam9rl-pwm"; reg = <0xfffb8000 0x300>; @@ -1199,12 +1231,26 @@ }; }; + rtc@fffffd20 { + compatible = "atmel,at91sam9260-rtt"; + reg = <0xfffffd20 0x10>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&clk32k>; + status = "disabled"; + }; + rtc@fffffdb0 { compatible = "atmel,at91rm9200-rtc"; reg = <0xfffffdb0 0x30>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; status = "disabled"; }; + + gpbr: syscon@fffffd60 { + compatible = "atmel,at91sam9260-gpbr", "syscon"; + reg = <0xfffffd60 0x10>; + status = "disabled"; + }; }; fb0: fb@0x00500000 { diff --git a/arch/arm/boot/dts/at91sam9m10g45ek.dts b/arch/arm/boot/dts/at91sam9m10g45ek.dts index d8dd22651090..33ce7ca2c404 100644 --- a/arch/arm/boot/dts/at91sam9m10g45ek.dts +++ b/arch/arm/boot/dts/at91sam9m10g45ek.dts @@ -161,6 +161,15 @@ pinctrl-0 = <&pinctrl_pwm_leds>; }; + rtc@fffffd20 { + atmel,rtt-rtc-time-reg = <&gpbr 0x0>; + status = "okay"; + }; + + gpbr: syscon@fffffd60 { + status = "okay"; + }; + rtc@fffffdb0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi index f0b4352650ed..72424371413e 100644 --- a/arch/arm/boot/dts/at91sam9rl.dtsi +++ b/arch/arm/boot/dts/at91sam9rl.dtsi @@ -1059,6 +1059,27 @@ clocks = <&slow_rc_osc &slow_osc>; }; }; + + rtc@fffffeb0 { + compatible = "atmel,at91rm9200-rtc"; + reg = <0xfffffeb0 0x40>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + status = "disabled"; + }; + + rtc@fffffd20 { + compatible = "atmel,at91sam9260-rtt"; + reg = <0xfffffd20 0x10>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&clk32k>; + status = "disabled"; + }; + + gpbr: syscon@fffffd60 { + compatible = "atmel,at91sam9260-gpbr", "syscon"; + reg = <0xfffffd60 0x10>; + status = "disabled"; + }; }; }; diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index 736092b1a535..10b725c7bfc0 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -304,7 +304,7 @@ /* VDD_GPU - over VDD_SMPS6 */ regulator-name = "smps6"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <12500000>; + regulator-max-microvolt = <1250000>; regulator-always-on; regulator-boot-on; }; @@ -313,7 +313,7 @@ /* CORE_VDD */ regulator-name = "smps7"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <1030000>; + regulator-max-microvolt = <1060000>; regulator-always-on; regulator-boot-on; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 63bf99be1762..22771bc1643a 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -742,7 +742,7 @@ }; wdt2: wdt@4ae14000 { - compatible = "ti,omap4-wdt"; + compatible = "ti,omap3-wdt"; reg = <0x4ae14000 0x80>; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "wd_timer2"; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index afc74fd4bb5e..89085d066c65 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -160,7 +160,7 @@ /* VDD_CORE */ regulator-name = "smps2"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <1030000>; + regulator-max-microvolt = <1060000>; regulator-boot-on; regulator-always-on; }; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 2c05b3f017fa..4bdcbd61ce47 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -1042,7 +1042,7 @@ #clock-cells = <0>; compatible = "ti,mux-clock"; clocks = <&sys_clkin1>, <&sys_clkin2>; - reg = <0x01a4>; + reg = <0x0164>; }; mlb_clk: mlb_clk { @@ -1084,14 +1084,14 @@ #clock-cells = <0>; compatible = "ti,mux-clock"; clocks = <&sys_clkin1>, <&sys_clkin2>; - reg = <0x01d0>; + reg = <0x0168>; }; video2_dpll_clk_mux: video2_dpll_clk_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; clocks = <&sys_clkin1>, <&sys_clkin2>; - reg = <0x01d4>; + reg = <0x016c>; }; wkupaon_iclk_mux: wkupaon_iclk_mux { diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 242ddda0a8cd..22465494b796 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -311,12 +311,13 @@ adc: adc@126C0000 { compatible = "samsung,exynos3250-adc", "samsung,exynos-adc-v2"; - reg = <0x126C0000 0x100>, <0x10020718 0x4>; + reg = <0x126C0000 0x100>; interrupts = <0 137 0>; clock-names = "adc", "sclk"; clocks = <&cmu CLK_TSADC>, <&cmu CLK_SCLK_TSADC>; #io-channel-cells = <1>; io-channel-ranges; + samsung,syscon-phandle = <&pmu_system_controller>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi index 2e9f1f7be77b..93b70402e943 100644 --- a/arch/arm/boot/dts/exynos4x12.dtsi +++ b/arch/arm/boot/dts/exynos4x12.dtsi @@ -108,13 +108,14 @@ adc: adc@126C0000 { compatible = "samsung,exynos-adc-v1"; - reg = <0x126C0000 0x100>, <0x10020718 0x4>; + reg = <0x126C0000 0x100>; interrupt-parent = <&combiner>; interrupts = <10 3>; clocks = <&clock CLK_TSADC>; clock-names = "adc"; #io-channel-cells = <1>; io-channel-ranges; + samsung,syscon-phandle = <&pmu_system_controller>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index d45a07ea3402..0a229fcd7acf 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -754,12 +754,13 @@ adc: adc@12D10000 { compatible = "samsung,exynos-adc-v1"; - reg = <0x12D10000 0x100>, <0x10040718 0x4>; + reg = <0x12D10000 0x100>; interrupts = <0 106 0>; clocks = <&clock CLK_ADC>; clock-names = "adc"; #io-channel-cells = <1>; io-channel-ranges; + samsung,syscon-phandle = <&pmu_system_controller>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index 90bf4011e319..517e50f6760b 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -541,12 +541,13 @@ adc: adc@12D10000 { compatible = "samsung,exynos-adc-v2"; - reg = <0x12D10000 0x100>, <0x10040720 0x4>; + reg = <0x12D10000 0x100>; interrupts = <0 106 0>; clocks = <&clock CLK_TSADC>; clock-names = "adc"; #io-channel-cells = <1>; io-channel-ranges; + samsung,syscon-phandle = <&pmu_system_controller>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 009abd69385d..327d362fe275 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -67,6 +67,7 @@ phy-mode = "rgmii"; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; + fsl,magic-packet; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index f1cd2147421d..6bfd0bc6e658 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -160,6 +160,7 @@ pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; phy-reset-gpios = <&gpio1 25 0>; + fsl,magic-packet; status = "okay"; }; diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts index 7f70a39459f6..350208c5e1ed 100644 --- a/arch/arm/boot/dts/mmp2-brownstone.dts +++ b/arch/arm/boot/dts/mmp2-brownstone.dts @@ -8,7 +8,7 @@ */ /dts-v1/; -/include/ "mmp2.dtsi" +#include "mmp2.dtsi" / { model = "Marvell MMP2 Brownstone Development Board"; diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi index 4e8b08c628c7..766bbb8495b6 100644 --- a/arch/arm/boot/dts/mmp2.dtsi +++ b/arch/arm/boot/dts/mmp2.dtsi @@ -7,7 +7,8 @@ * publishhed by the Free Software Foundation. */ -/include/ "skeleton.dtsi" +#include "skeleton.dtsi" +#include <dt-bindings/clock/marvell,mmp2.h> / { aliases { @@ -135,6 +136,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4030000 0x1000>; interrupts = <27>; + clocks = <&soc_clocks MMP2_CLK_UART0>; + resets = <&soc_clocks MMP2_CLK_UART0>; status = "disabled"; }; @@ -142,6 +145,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4017000 0x1000>; interrupts = <28>; + clocks = <&soc_clocks MMP2_CLK_UART1>; + resets = <&soc_clocks MMP2_CLK_UART1>; status = "disabled"; }; @@ -149,6 +154,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4018000 0x1000>; interrupts = <24>; + clocks = <&soc_clocks MMP2_CLK_UART2>; + resets = <&soc_clocks MMP2_CLK_UART2>; status = "disabled"; }; @@ -156,6 +163,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4016000 0x1000>; interrupts = <46>; + clocks = <&soc_clocks MMP2_CLK_UART3>; + resets = <&soc_clocks MMP2_CLK_UART3>; status = "disabled"; }; @@ -168,6 +177,8 @@ #gpio-cells = <2>; interrupts = <49>; interrupt-names = "gpio_mux"; + clocks = <&soc_clocks MMP2_CLK_GPIO>; + resets = <&soc_clocks MMP2_CLK_GPIO>; interrupt-controller; #interrupt-cells = <1>; ranges; @@ -201,6 +212,8 @@ compatible = "mrvl,mmp-twsi"; reg = <0xd4011000 0x1000>; interrupts = <7>; + clocks = <&soc_clocks MMP2_CLK_TWSI0>; + resets = <&soc_clocks MMP2_CLK_TWSI0>; #address-cells = <1>; #size-cells = <0>; mrvl,i2c-fast-mode; @@ -211,6 +224,8 @@ compatible = "mrvl,mmp-twsi"; reg = <0xd4025000 0x1000>; interrupts = <58>; + clocks = <&soc_clocks MMP2_CLK_TWSI1>; + resets = <&soc_clocks MMP2_CLK_TWSI1>; status = "disabled"; }; @@ -220,8 +235,20 @@ interrupts = <1 0>; interrupt-names = "rtc 1Hz", "rtc alarm"; interrupt-parent = <&intcmux5>; + clocks = <&soc_clocks MMP2_CLK_RTC>; + resets = <&soc_clocks MMP2_CLK_RTC>; status = "disabled"; }; }; + + soc_clocks: clocks{ + compatible = "marvell,mmp2-clock"; + reg = <0xd4050000 0x1000>, + <0xd4282800 0x400>, + <0xd4015000 0x1000>; + reg-names = "mpmu", "apmu", "apbc"; + #clock-cells = <1>; + #reset-cells = <1>; + }; }; }; diff --git a/arch/arm/boot/dts/omap2430-sdp.dts b/arch/arm/boot/dts/omap2430-sdp.dts index 05eca2e4430f..6b36ede58488 100644 --- a/arch/arm/boot/dts/omap2430-sdp.dts +++ b/arch/arm/boot/dts/omap2430-sdp.dts @@ -48,22 +48,22 @@ gpmc,device-width = <1>; gpmc,cycle2cycle-samecsen = <1>; gpmc,cycle2cycle-diffcsen = <1>; - gpmc,cs-on-ns = <7>; - gpmc,cs-rd-off-ns = <233>; - gpmc,cs-wr-off-ns = <233>; - gpmc,adv-on-ns = <22>; - gpmc,adv-rd-off-ns = <60>; - gpmc,adv-wr-off-ns = <60>; - gpmc,oe-on-ns = <67>; - gpmc,oe-off-ns = <210>; - gpmc,we-on-ns = <67>; - gpmc,we-off-ns = <210>; - gpmc,rd-cycle-ns = <233>; - gpmc,wr-cycle-ns = <233>; - gpmc,access-ns = <233>; - gpmc,page-burst-access-ns = <30>; - gpmc,bus-turnaround-ns = <30>; - gpmc,cycle2cycle-delay-ns = <30>; + gpmc,cs-on-ns = <6>; + gpmc,cs-rd-off-ns = <187>; + gpmc,cs-wr-off-ns = <187>; + gpmc,adv-on-ns = <18>; + gpmc,adv-rd-off-ns = <48>; + gpmc,adv-wr-off-ns = <48>; + gpmc,oe-on-ns = <60>; + gpmc,oe-off-ns = <169>; + gpmc,we-on-ns = <66>; + gpmc,we-off-ns = <169>; + gpmc,rd-cycle-ns = <187>; + gpmc,wr-cycle-ns = <187>; + gpmc,access-ns = <187>; + gpmc,page-burst-access-ns = <24>; + gpmc,bus-turnaround-ns = <24>; + gpmc,cycle2cycle-delay-ns = <24>; gpmc,wait-monitoring-ns = <0>; gpmc,clk-activation-ns = <0>; gpmc,wr-data-mux-bus-ns = <0>; diff --git a/arch/arm/boot/dts/pxa168-aspenite.dts b/arch/arm/boot/dts/pxa168-aspenite.dts index e762facb3fa4..0a988b3fb248 100644 --- a/arch/arm/boot/dts/pxa168-aspenite.dts +++ b/arch/arm/boot/dts/pxa168-aspenite.dts @@ -8,7 +8,7 @@ */ /dts-v1/; -/include/ "pxa168.dtsi" +#include "pxa168.dtsi" / { model = "Marvell PXA168 Aspenite Development Board"; diff --git a/arch/arm/boot/dts/pxa168.dtsi b/arch/arm/boot/dts/pxa168.dtsi index 975dad21ac38..b899e25cbb1b 100644 --- a/arch/arm/boot/dts/pxa168.dtsi +++ b/arch/arm/boot/dts/pxa168.dtsi @@ -7,7 +7,8 @@ * publishhed by the Free Software Foundation. */ -/include/ "skeleton.dtsi" +#include "skeleton.dtsi" +#include <dt-bindings/clock/marvell,pxa168.h> / { aliases { @@ -59,6 +60,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4017000 0x1000>; interrupts = <27>; + clocks = <&soc_clocks PXA168_CLK_UART0>; + resets = <&soc_clocks PXA168_CLK_UART0>; status = "disabled"; }; @@ -66,6 +69,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4018000 0x1000>; interrupts = <28>; + clocks = <&soc_clocks PXA168_CLK_UART1>; + resets = <&soc_clocks PXA168_CLK_UART1>; status = "disabled"; }; @@ -73,6 +78,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4026000 0x1000>; interrupts = <29>; + clocks = <&soc_clocks PXA168_CLK_UART2>; + resets = <&soc_clocks PXA168_CLK_UART2>; status = "disabled"; }; @@ -84,6 +91,8 @@ gpio-controller; #gpio-cells = <2>; interrupts = <49>; + clocks = <&soc_clocks PXA168_CLK_GPIO>; + resets = <&soc_clocks PXA168_CLK_GPIO>; interrupt-names = "gpio_mux"; interrupt-controller; #interrupt-cells = <1>; @@ -110,6 +119,8 @@ compatible = "mrvl,mmp-twsi"; reg = <0xd4011000 0x1000>; interrupts = <7>; + clocks = <&soc_clocks PXA168_CLK_TWSI0>; + resets = <&soc_clocks PXA168_CLK_TWSI0>; mrvl,i2c-fast-mode; status = "disabled"; }; @@ -118,6 +129,8 @@ compatible = "mrvl,mmp-twsi"; reg = <0xd4025000 0x1000>; interrupts = <58>; + clocks = <&soc_clocks PXA168_CLK_TWSI1>; + resets = <&soc_clocks PXA168_CLK_TWSI1>; status = "disabled"; }; @@ -126,8 +139,20 @@ reg = <0xd4010000 0x1000>; interrupts = <5 6>; interrupt-names = "rtc 1Hz", "rtc alarm"; + clocks = <&soc_clocks PXA168_CLK_RTC>; + resets = <&soc_clocks PXA168_CLK_RTC>; status = "disabled"; }; }; + + soc_clocks: clocks{ + compatible = "marvell,pxa168-clock"; + reg = <0xd4050000 0x1000>, + <0xd4282800 0x400>, + <0xd4015000 0x1000>; + reg-names = "mpmu", "apmu", "apbc"; + #clock-cells = <1>; + #reset-cells = <1>; + }; }; }; diff --git a/arch/arm/boot/dts/pxa910-dkb.dts b/arch/arm/boot/dts/pxa910-dkb.dts index 595492aa5053..c82f2810ec73 100644 --- a/arch/arm/boot/dts/pxa910-dkb.dts +++ b/arch/arm/boot/dts/pxa910-dkb.dts @@ -8,7 +8,7 @@ */ /dts-v1/; -/include/ "pxa910.dtsi" +#include "pxa910.dtsi" / { model = "Marvell PXA910 DKB Development Board"; diff --git a/arch/arm/boot/dts/pxa910.dtsi b/arch/arm/boot/dts/pxa910.dtsi index 0247c622f580..0868f6729be1 100644 --- a/arch/arm/boot/dts/pxa910.dtsi +++ b/arch/arm/boot/dts/pxa910.dtsi @@ -7,7 +7,8 @@ * publishhed by the Free Software Foundation. */ -/include/ "skeleton.dtsi" +#include "skeleton.dtsi" +#include <dt-bindings/clock/marvell,pxa910.h> / { aliases { @@ -71,6 +72,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4017000 0x1000>; interrupts = <27>; + clocks = <&soc_clocks PXA910_CLK_UART0>; + resets = <&soc_clocks PXA910_CLK_UART0>; status = "disabled"; }; @@ -78,6 +81,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4018000 0x1000>; interrupts = <28>; + clocks = <&soc_clocks PXA910_CLK_UART1>; + resets = <&soc_clocks PXA910_CLK_UART1>; status = "disabled"; }; @@ -85,6 +90,8 @@ compatible = "mrvl,mmp-uart"; reg = <0xd4036000 0x1000>; interrupts = <59>; + clocks = <&soc_clocks PXA910_CLK_UART2>; + resets = <&soc_clocks PXA910_CLK_UART2>; status = "disabled"; }; @@ -97,6 +104,8 @@ #gpio-cells = <2>; interrupts = <49>; interrupt-names = "gpio_mux"; + clocks = <&soc_clocks PXA910_CLK_GPIO>; + resets = <&soc_clocks PXA910_CLK_GPIO>; interrupt-controller; #interrupt-cells = <1>; ranges; @@ -124,6 +133,8 @@ #size-cells = <0>; reg = <0xd4011000 0x1000>; interrupts = <7>; + clocks = <&soc_clocks PXA910_CLK_TWSI0>; + resets = <&soc_clocks PXA910_CLK_TWSI0>; mrvl,i2c-fast-mode; status = "disabled"; }; @@ -134,6 +145,8 @@ #size-cells = <0>; reg = <0xd4037000 0x1000>; interrupts = <54>; + clocks = <&soc_clocks PXA910_CLK_TWSI1>; + resets = <&soc_clocks PXA910_CLK_TWSI1>; status = "disabled"; }; @@ -142,8 +155,21 @@ reg = <0xd4010000 0x1000>; interrupts = <5 6>; interrupt-names = "rtc 1Hz", "rtc alarm"; + clocks = <&soc_clocks PXA910_CLK_RTC>; + resets = <&soc_clocks PXA910_CLK_RTC>; status = "disabled"; }; }; + + soc_clocks: clocks{ + compatible = "marvell,pxa910-clock"; + reg = <0xd4050000 0x1000>, + <0xd4282800 0x400>, + <0xd4015000 0x1000>, + <0xd403b000 0x1000>; + reg-names = "mpmu", "apmu", "apbc", "apbcp"; + #clock-cells = <1>; + #reset-cells = <1>; + }; }; }; diff --git a/arch/arm/boot/dts/rk3288-evb-rk808.dts b/arch/arm/boot/dts/rk3288-evb-rk808.dts index d8c775e6d5fe..831a7aa85136 100644 --- a/arch/arm/boot/dts/rk3288-evb-rk808.dts +++ b/arch/arm/boot/dts/rk3288-evb-rk808.dts @@ -15,6 +15,13 @@ / { compatible = "rockchip,rk3288-evb-rk808", "rockchip,rk3288"; + + ext_gmac: external-gmac-clock { + compatible = "fixed-clock"; + clock-frequency = <125000000>; + clock-output-names = "ext_gmac"; + #clock-cells = <0>; + }; }; &cpu0 { @@ -152,3 +159,19 @@ }; }; }; + +&gmac { + phy_regulator = "vcc_phy"; + phy-mode = "rgmii"; + clock_in_out = "input"; + snps,reset-gpio = <&gpio4 7 0>; + snps,reset-active-low; + snps,reset-delays-us = <0 10000 1000000>; + assigned-clocks = <&cru SCLK_MAC>; + assigned-clock-parents = <&ext_gmac>; + pinctrl-names = "default"; + pinctrl-0 = <&rgmii_pins>; + tx_delay = <0x30>; + rx_delay = <0x10>; + status = "ok"; +}; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 3e067dd65d0c..048cb170c884 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -90,6 +90,17 @@ regulator-always-on; regulator-boot-on; }; + + vcc_phy: vcc-phy-regulator { + compatible = "regulator-fixed"; + enable-active-high; + gpio = <&gpio0 6 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <ð_phy_pwr>; + regulator-name = "vcc_phy"; + regulator-always-on; + regulator-boot-on; + }; }; &emmc { @@ -178,6 +189,12 @@ rockchip,pins = <0 14 RK_FUNC_GPIO &pcfg_pull_none>; }; }; + + eth_phy { + eth_phy_pwr: eth-phy-pwr { + rockchip,pins = <0 6 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; }; &usb_host0_ehci { diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index fd19f00784bd..910dcad2088a 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -380,6 +380,22 @@ status = "disabled"; }; + gmac: ethernet@ff290000 { + compatible = "rockchip,rk3288-gmac"; + reg = <0xff290000 0x10000>; + interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + rockchip,grf = <&grf>; + clocks = <&cru SCLK_MAC>, + <&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>, + <&cru SCLK_MACREF>, <&cru SCLK_MACREF_OUT>, + <&cru ACLK_GMAC>, <&cru PCLK_GMAC>; + clock-names = "stmmaceth", + "mac_clk_rx", "mac_clk_tx", + "clk_mac_ref", "clk_mac_refout", + "aclk_mac", "pclk_mac"; + }; + usb_host0_ehci: usb@ff500000 { compatible = "generic-ehci"; reg = <0xff500000 0x100>; @@ -725,6 +741,11 @@ bias-disable; }; + pcfg_pull_none_12ma: pcfg-pull-none-12ma { + bias-disable; + drive-strength = <12>; + }; + i2c0 { i2c0_xfer: i2c0-xfer { rockchip,pins = <0 15 RK_FUNC_1 &pcfg_pull_none>, @@ -1068,5 +1089,38 @@ rockchip,pins = <7 23 3 &pcfg_pull_none>; }; }; + + gmac { + rgmii_pins: rgmii-pins { + rockchip,pins = <3 30 3 &pcfg_pull_none>, + <3 31 3 &pcfg_pull_none>, + <3 26 3 &pcfg_pull_none>, + <3 27 3 &pcfg_pull_none>, + <3 28 3 &pcfg_pull_none_12ma>, + <3 29 3 &pcfg_pull_none_12ma>, + <3 24 3 &pcfg_pull_none_12ma>, + <3 25 3 &pcfg_pull_none_12ma>, + <4 0 3 &pcfg_pull_none>, + <4 5 3 &pcfg_pull_none>, + <4 6 3 &pcfg_pull_none>, + <4 9 3 &pcfg_pull_none_12ma>, + <4 4 3 &pcfg_pull_none_12ma>, + <4 1 3 &pcfg_pull_none>, + <4 3 3 &pcfg_pull_none>; + }; + + rmii_pins: rmii-pins { + rockchip,pins = <3 30 3 &pcfg_pull_none>, + <3 31 3 &pcfg_pull_none>, + <3 28 3 &pcfg_pull_none>, + <3 29 3 &pcfg_pull_none>, + <4 0 3 &pcfg_pull_none>, + <4 5 3 &pcfg_pull_none>, + <4 4 3 &pcfg_pull_none>, + <4 1 3 &pcfg_pull_none>, + <4 2 3 &pcfg_pull_none>, + <4 3 3 &pcfg_pull_none>; + }; + }; }; }; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index e0157b0f075c..1b0f30c2c4a5 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -9,12 +9,12 @@ * licensing only applies to this file, and not this project as a * whole. * - * a) This library is free software; you can redistribute it and/or + * a) This file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. @@ -45,6 +45,7 @@ #include "skeleton.dtsi" #include <dt-bindings/clock/at91.h> +#include <dt-bindings/dma/at91.h> #include <dt-bindings/pinctrl/at91.h> #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/gpio/gpio.h> @@ -302,6 +303,15 @@ #size-cells = <1>; ranges; + dma1: dma-controller@f0004000 { + compatible = "atmel,sama5d4-dma"; + reg = <0xf0004000 0x200>; + interrupts = <50 IRQ_TYPE_LEVEL_HIGH 0>; + #dma-cells = <1>; + clocks = <&dma1_clk>; + clock-names = "dma_clk"; + }; + ramc0: ramc@f0010000 { compatible = "atmel,sama5d3-ddramc"; reg = <0xf0010000 0x200>; @@ -309,6 +319,15 @@ clock-names = "ddrck", "mpddr"; }; + dma0: dma-controller@f0014000 { + compatible = "atmel,sama5d4-dma"; + reg = <0xf0014000 0x200>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH 0>; + #dma-cells = <1>; + clocks = <&dma0_clk>; + clock-names = "dma_clk"; + }; + pmc: pmc@f0018000 { compatible = "atmel,sama5d3-pmc"; reg = <0xf0018000 0x120>; @@ -761,6 +780,10 @@ compatible = "atmel,hsmci"; reg = <0xf8000000 0x600>; interrupts = <35 IRQ_TYPE_LEVEL_HIGH 0>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(0))>; + dma-names = "rxtx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0_clk_cmd_dat0 &pinctrl_mmc0_dat1_3>; status = "disabled"; @@ -776,6 +799,13 @@ compatible = "atmel,at91rm9200-spi"; reg = <0xf8010000 0x100>; interrupts = <37 IRQ_TYPE_LEVEL_HIGH 3>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(10))>, + <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(11))>; + dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_spi0>; clocks = <&spi0_clk>; @@ -787,6 +817,13 @@ compatible = "atmel,at91sam9x5-i2c"; reg = <0xf8014000 0x4000>; interrupts = <32 IRQ_TYPE_LEVEL_HIGH 6>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(2))>, + <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(3))>; + dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c0>; #address-cells = <1>; @@ -817,7 +854,14 @@ i2c2: i2c@f8024000 { compatible = "atmel,at91sam9x5-i2c"; reg = <0xf8024000 0x4000>; - interrupts = <34 4 6>; + interrupts = <34 IRQ_TYPE_LEVEL_HIGH 6>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(6))>, + <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(7))>; + dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c2>; #address-cells = <1>; @@ -830,6 +874,10 @@ compatible = "atmel,hsmci"; reg = <0xfc000000 0x600>; interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(1))>; + dma-names = "rxtx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc1_clk_cmd_dat0 &pinctrl_mmc1_dat1_3>; status = "disabled"; @@ -843,6 +891,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xfc008000 0x100>; interrupts = <29 IRQ_TYPE_LEVEL_HIGH 5>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(16))>, + <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(17))>; + dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usart2 &pinctrl_usart2_rts &pinctrl_usart2_cts>; clocks = <&usart2_clk>; @@ -854,6 +909,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xfc00c000 0x100>; interrupts = <30 IRQ_TYPE_LEVEL_HIGH 5>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(18))>, + <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(19))>; + dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usart3>; clocks = <&usart3_clk>; @@ -865,6 +927,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xfc010000 0x100>; interrupts = <31 IRQ_TYPE_LEVEL_HIGH 5>; + dmas = <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(20))>, + <&dma1 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(21))>; + dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usart4>; clocks = <&usart4_clk>; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index e3ab942fd148..7b4099fcf817 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -188,19 +188,11 @@ "apb0_ir1", "apb0_keypad"; }; - apb1_mux: apb1_mux@01c20058 { - #clock-cells = <0>; - compatible = "allwinner,sun4i-a10-apb1-mux-clk"; - reg = <0x01c20058 0x4>; - clocks = <&osc24M>, <&pll6 1>, <&osc32k>; - clock-output-names = "apb1_mux"; - }; - - apb1: apb1@01c20058 { + apb1: clk@01c20058 { #clock-cells = <0>; compatible = "allwinner,sun4i-a10-apb1-clk"; reg = <0x01c20058 0x4>; - clocks = <&apb1_mux>; + clocks = <&osc24M>, <&pll6 1>, <&osc32k>; clock-output-names = "apb1"; }; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 81ad4b94e812..1b76667f3182 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -176,19 +176,11 @@ "apb0_ir", "apb0_keypad"; }; - apb1_mux: apb1_mux@01c20058 { - #clock-cells = <0>; - compatible = "allwinner,sun4i-a10-apb1-mux-clk"; - reg = <0x01c20058 0x4>; - clocks = <&osc24M>, <&pll6 1>, <&osc32k>; - clock-output-names = "apb1_mux"; - }; - - apb1: apb1@01c20058 { + apb1: clk@01c20058 { #clock-cells = <0>; compatible = "allwinner,sun4i-a10-apb1-clk"; reg = <0x01c20058 0x4>; - clocks = <&apb1_mux>; + clocks = <&osc24M>, <&pll6 1>, <&osc32k>; clock-output-names = "apb1"; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index b131068f4f35..c35217ea1f64 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -161,19 +161,11 @@ clock-output-names = "apb0_codec", "apb0_pio", "apb0_ir"; }; - apb1_mux: apb1_mux@01c20058 { - #clock-cells = <0>; - compatible = "allwinner,sun4i-a10-apb1-mux-clk"; - reg = <0x01c20058 0x4>; - clocks = <&osc24M>, <&pll6 1>, <&osc32k>; - clock-output-names = "apb1_mux"; - }; - - apb1: apb1@01c20058 { + apb1: clk@01c20058 { #clock-cells = <0>; compatible = "allwinner,sun4i-a10-apb1-clk"; reg = <0x01c20058 0x4>; - clocks = <&apb1_mux>; + clocks = <&osc24M>, <&pll6 1>, <&osc32k>; clock-output-names = "apb1"; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index a400172a8a52..f47156b6572b 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -229,19 +229,11 @@ "apb1_daudio1"; }; - apb2_mux: apb2_mux@01c20058 { + apb2: clk@01c20058 { #clock-cells = <0>; - compatible = "allwinner,sun4i-a10-apb1-mux-clk"; + compatible = "allwinner,sun4i-a10-apb1-clk"; reg = <0x01c20058 0x4>; clocks = <&osc32k>, <&osc24M>, <&pll6 0>, <&pll6 0>; - clock-output-names = "apb2_mux"; - }; - - apb2: apb2@01c20058 { - #clock-cells = <0>; - compatible = "allwinner,sun6i-a31-apb2-div-clk"; - reg = <0x01c20058 0x4>; - clocks = <&apb2_mux>; clock-output-names = "apb2"; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 82a524ce28ad..e21ce5992d56 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -236,19 +236,11 @@ "apb0_iis2", "apb0_keypad"; }; - apb1_mux: apb1_mux@01c20058 { - #clock-cells = <0>; - compatible = "allwinner,sun4i-a10-apb1-mux-clk"; - reg = <0x01c20058 0x4>; - clocks = <&osc24M>, <&pll6 1>, <&osc32k>; - clock-output-names = "apb1_mux"; - }; - - apb1: apb1@01c20058 { + apb1: clk@01c20058 { #clock-cells = <0>; compatible = "allwinner,sun4i-a10-apb1-clk"; reg = <0x01c20058 0x4>; - clocks = <&apb1_mux>; + clocks = <&osc24M>, <&pll6 1>, <&osc32k>; clock-output-names = "apb1"; }; diff --git a/arch/arm/boot/dts/sun8i-a23.dtsi b/arch/arm/boot/dts/sun8i-a23.dtsi index 6086adbf9d74..0746cd1024d7 100644 --- a/arch/arm/boot/dts/sun8i-a23.dtsi +++ b/arch/arm/boot/dts/sun8i-a23.dtsi @@ -189,19 +189,11 @@ "apb1_daudio0", "apb1_daudio1"; }; - apb2_mux: apb2_mux_clk@01c20058 { + apb2: clk@01c20058 { #clock-cells = <0>; - compatible = "allwinner,sun4i-a10-apb1-mux-clk"; + compatible = "allwinner,sun4i-a10-apb1-clk"; reg = <0x01c20058 0x4>; clocks = <&osc32k>, <&osc24M>, <&pll6>, <&pll6>; - clock-output-names = "apb2_mux"; - }; - - apb2: apb2_clk@01c20058 { - #clock-cells = <0>; - compatible = "allwinner,sun6i-a31-apb2-div-clk"; - reg = <0x01c20058 0x4>; - clocks = <&apb2_mux>; clock-output-names = "apb2"; }; diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi index 222f3b3f4dd5..4296b5398bf5 100644 --- a/arch/arm/boot/dts/tegra114.dtsi +++ b/arch/arm/boot/dts/tegra114.dtsi @@ -1,5 +1,6 @@ #include <dt-bindings/clock/tegra114-car.h> #include <dt-bindings/gpio/tegra-gpio.h> +#include <dt-bindings/memory/tegra114-mc.h> #include <dt-bindings/pinctrl/pinctrl-tegra.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -50,6 +51,8 @@ resets = <&tegra_car 27>; reset-names = "dc"; + iommus = <&mc TEGRA_SWGROUP_DC>; + nvidia,head = <0>; rgb { @@ -67,6 +70,8 @@ resets = <&tegra_car 26>; reset-names = "dc"; + iommus = <&mc TEGRA_SWGROUP_DCB>; + nvidia,head = <1>; rgb { @@ -498,15 +503,15 @@ reset-names = "fuse"; }; - iommu@70019010 { - compatible = "nvidia,tegra114-smmu", "nvidia,tegra30-smmu"; - reg = <0x70019010 0x02c - 0x700191f0 0x010 - 0x70019228 0x074>; - nvidia,#asids = <4>; - dma-window = <0 0x40000000>; - nvidia,swgroups = <0x18659fe>; - nvidia,ahb = <&ahb>; + mc: memory-controller@70019000 { + compatible = "nvidia,tegra114-mc"; + reg = <0x70019000 0x1000>; + clocks = <&tegra_car TEGRA114_CLK_MC>; + clock-names = "mc"; + + interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; + + #iommu-cells = <1>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 51b373ff1065..4eb540be368f 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1942,4 +1942,48 @@ <&tegra_car TEGRA124_CLK_EXTERN1>; clock-names = "pll_a", "pll_a_out0", "mclk"; }; + + thermal-zones { + cpu { + trips { + trip@0 { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* There are currently no cooling maps because there are no cooling devices */ + }; + }; + + mem { + trips { + trip@0 { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* There are currently no cooling maps because there are no cooling devices */ + }; + }; + + gpu { + trips { + trip@0 { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* There are currently no cooling maps because there are no cooling devices */ + }; + }; + }; }; diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index df2b06b29985..4be06c6ea0c8 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -1,8 +1,10 @@ #include <dt-bindings/clock/tegra124-car.h> #include <dt-bindings/gpio/tegra-gpio.h> +#include <dt-bindings/memory/tegra124-mc.h> #include <dt-bindings/pinctrl/pinctrl-tegra.h> #include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h> #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/thermal/tegra124-soctherm.h> #include "skeleton.dtsi" @@ -102,6 +104,8 @@ resets = <&tegra_car 27>; reset-names = "dc"; + iommus = <&mc TEGRA_SWGROUP_DC>; + nvidia,head = <0>; }; @@ -115,6 +119,8 @@ resets = <&tegra_car 26>; reset-names = "dc"; + iommus = <&mc TEGRA_SWGROUP_DCB>; + nvidia,head = <1>; }; @@ -275,7 +281,8 @@ pinmux: pinmux@0,70000868 { compatible = "nvidia,tegra124-pinmux"; reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */ - <0x0 0x70003000 0x0 0x434>; /* Mux registers */ + <0x0 0x70003000 0x0 0x434>, /* Mux registers */ + <0x0 0x70000820 0x0 0x008>; /* MIPI pad control */ }; /* @@ -551,6 +558,17 @@ reset-names = "fuse"; }; + mc: memory-controller@0,70019000 { + compatible = "nvidia,tegra124-mc"; + reg = <0x0 0x70019000 0x0 0x1000>; + clocks = <&tegra_car TEGRA124_CLK_MC>; + clock-names = "mc"; + + interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; + + #iommu-cells = <1>; + }; + sata@0,70020000 { compatible = "nvidia,tegra124-ahci"; @@ -640,6 +658,18 @@ status = "disabled"; }; + soctherm: thermal-sensor@0,700e2000 { + compatible = "nvidia,tegra124-soctherm"; + reg = <0x0 0x700e2000 0x0 0x1000>; + interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car TEGRA124_CLK_TSENSOR>, + <&tegra_car TEGRA124_CLK_SOC_THERM>; + clock-names = "tsensor", "soctherm"; + resets = <&tegra_car 78>; + reset-names = "soctherm"; + #thermal-sensor-cells = <1>; + }; + ahub@0,70300000 { compatible = "nvidia,tegra124-ahub"; reg = <0x0 0x70300000 0x0 0x200>, @@ -881,6 +911,40 @@ }; }; + thermal-zones { + cpu { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>; + }; + + mem { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>; + }; + + gpu { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>; + }; + + pllx { + polling-delay-passive = <1000>; + polling-delay = <1000>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>; + }; + }; + timer { compatible = "arm,armv7-timer"; interrupts = <GIC_PPI 13 diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index b270b9e3d455..99475f6e76a3 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -1,5 +1,6 @@ #include <dt-bindings/clock/tegra30-car.h> #include <dt-bindings/gpio/tegra-gpio.h> +#include <dt-bindings/memory/tegra30-mc.h> #include <dt-bindings/pinctrl/pinctrl-tegra.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -166,6 +167,8 @@ resets = <&tegra_car 27>; reset-names = "dc"; + iommus = <&mc TEGRA_SWGROUP_DC>; + nvidia,head = <0>; rgb { @@ -183,6 +186,8 @@ resets = <&tegra_car 26>; reset-names = "dc"; + iommus = <&mc TEGRA_SWGROUP_DCB>; + nvidia,head = <1>; rgb { @@ -615,23 +620,15 @@ clock-names = "pclk", "clk32k_in"; }; - memory-controller@7000f000 { + mc: memory-controller@7000f000 { compatible = "nvidia,tegra30-mc"; - reg = <0x7000f000 0x010 - 0x7000f03c 0x1b4 - 0x7000f200 0x028 - 0x7000f284 0x17c>; + reg = <0x7000f000 0x400>; + clocks = <&tegra_car TEGRA30_CLK_MC>; + clock-names = "mc"; + interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; - }; - iommu@7000f010 { - compatible = "nvidia,tegra30-smmu"; - reg = <0x7000f010 0x02c - 0x7000f1f0 0x010 - 0x7000f228 0x05c>; - nvidia,#asids = <4>; /* # of ASIDs */ - dma-window = <0 0x40000000>; /* IOVA start & length */ - nvidia,ahb = <&ahb>; + #iommu-cells = <1>; }; fuse@7000f800 { diff --git a/arch/arm/configs/ape6evm_defconfig b/arch/arm/configs/ape6evm_defconfig index db81d8ce4c03..9e9a72e3d30f 100644 --- a/arch/arm/configs/ape6evm_defconfig +++ b/arch/arm/configs/ape6evm_defconfig @@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y CONFIG_VFP=y CONFIG_NEON=y CONFIG_BINFMT_MISC=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig index d9675c68a399..5666e3700a82 100644 --- a/arch/arm/configs/armadillo800eva_defconfig +++ b/arch/arm/configs/armadillo800eva_defconfig @@ -43,7 +43,7 @@ CONFIG_KEXEC=y CONFIG_VFP=y CONFIG_NEON=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig index 83a87e48901c..7117662bab2e 100644 --- a/arch/arm/configs/bcm_defconfig +++ b/arch/arm/configs/bcm_defconfig @@ -39,7 +39,7 @@ CONFIG_CPU_IDLE=y CONFIG_VFP=y CONFIG_NEON=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=y diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig index 1dde5daa84f9..3125e00f05ab 100644 --- a/arch/arm/configs/bockw_defconfig +++ b/arch/arm/configs/bockw_defconfig @@ -29,7 +29,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_ARM_APPENDED_DTB=y CONFIG_VFP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 759f9b0053e2..235842c9ba96 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -49,7 +49,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=m CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_ONDEMAND=m CONFIG_CPU_IDLE=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index c41990729024..5ef14de00a29 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -27,7 +27,7 @@ CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M" CONFIG_VFP=y CONFIG_NEON=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index eb440aae4283..ea316c4b890e 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -39,7 +39,6 @@ CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m CONFIG_PM=y CONFIG_APM_EMULATION=y -CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig index 1fe3621faf65..112543665dd7 100644 --- a/arch/arm/configs/hisi_defconfig +++ b/arch/arm/configs/hisi_defconfig @@ -18,7 +18,7 @@ CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_NEON=y CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig index 182e54692664..18e59feaa307 100644 --- a/arch/arm/configs/imote2_defconfig +++ b/arch/arm/configs/imote2_defconfig @@ -31,7 +31,6 @@ CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m CONFIG_PM=y CONFIG_APM_EMULATION=y -CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index f707cd2691cf..7c2075a07eba 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -54,7 +54,7 @@ CONFIG_ARM_IMX6Q_CPUFREQ=y CONFIG_VFP=y CONFIG_NEON=y CONFIG_BINFMT_MISC=m -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_PM_DEBUG=y CONFIG_PM_TEST_SUSPEND=y CONFIG_NET=y diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 20a3ff99fae2..a2067cbfe173 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -30,7 +30,7 @@ CONFIG_HIGHMEM=y CONFIG_VFP=y CONFIG_NEON=y # CONFIG_SUSPEND is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/kzm9g_defconfig b/arch/arm/configs/kzm9g_defconfig index 8cb115d74fdf..5d63fc5d2d48 100644 --- a/arch/arm/configs/kzm9g_defconfig +++ b/arch/arm/configs/kzm9g_defconfig @@ -43,7 +43,7 @@ CONFIG_KEXEC=y CONFIG_VFP=y CONFIG_NEON=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/lager_defconfig b/arch/arm/configs/lager_defconfig index 929c571ea29b..a82afc916a89 100644 --- a/arch/arm/configs/lager_defconfig +++ b/arch/arm/configs/lager_defconfig @@ -37,7 +37,7 @@ CONFIG_AUTO_ZRELADDR=y CONFIG_VFP=y CONFIG_NEON=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/mackerel_defconfig b/arch/arm/configs/mackerel_defconfig index 57ececba2ae6..05a529311b4d 100644 --- a/arch/arm/configs/mackerel_defconfig +++ b/arch/arm/configs/mackerel_defconfig @@ -28,7 +28,6 @@ CONFIG_KEXEC=y CONFIG_VFP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_PM=y -CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/marzen_defconfig b/arch/arm/configs/marzen_defconfig index ff91630d34e1..3c8b6d823189 100644 --- a/arch/arm/configs/marzen_defconfig +++ b/arch/arm/configs/marzen_defconfig @@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y CONFIG_VFP=y CONFIG_KEXEC=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index d7896580f3bb..2328fe752e9c 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -479,4 +479,4 @@ CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOCKUP_DETECTOR=y CONFIG_CRYPTO_DEV_TEGRA_AES=y -CONFIG_GENERIC_CPUFREQ_CPU0=y +CONFIG_CPUFREQ_DT=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 115cda9f3260..a7dce674f1be 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -63,7 +63,6 @@ CONFIG_FPE_NWFPE=y CONFIG_BINFMT_MISC=y CONFIG_PM=y # CONFIG_SUSPEND is not set -CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 3e09286f7ff1..c2c3a852af9f 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -127,6 +127,8 @@ CONFIG_SRAM=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_ATA=y +CONFIG_SATA_AHCI_PLATFORM=y CONFIG_MD=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_ARC is not set diff --git a/arch/arm/configs/prima2_defconfig b/arch/arm/configs/prima2_defconfig index 23591dba47a0..f610230b9c1f 100644 --- a/arch/arm/configs/prima2_defconfig +++ b/arch/arm/configs/prima2_defconfig @@ -18,7 +18,7 @@ CONFIG_PREEMPT=y CONFIG_AEABI=y CONFIG_KEXEC=y CONFIG_BINFMT_MISC=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig index b58fb32770a0..afa24799477a 100644 --- a/arch/arm/configs/sama5_defconfig +++ b/arch/arm/configs/sama5_defconfig @@ -32,7 +32,7 @@ CONFIG_VFP=y CONFIG_NEON=y CONFIG_KERNEL_MODE_NEON=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_PM_DEBUG=y CONFIG_PM_ADVANCED_DEBUG=y CONFIG_NET=y diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig index 63fb5316ff02..3df6ca0c1d1f 100644 --- a/arch/arm/configs/shmobile_defconfig +++ b/arch/arm/configs/shmobile_defconfig @@ -39,7 +39,7 @@ CONFIG_KEXEC=y CONFIG_VFP=y CONFIG_NEON=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -146,7 +146,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S35390A=y CONFIG_DMADEVICES=y CONFIG_SH_DMAE=y -CONFIG_RCAR_AUDMAC_PP=y CONFIG_RCAR_DMAC=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_PWM=y @@ -178,5 +177,5 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPU_THERMAL=y -CONFIG_GENERIC_CPUFREQ_CPU0=y +CONFIG_CPUFREQ_DT=y CONFIG_REGULATOR_DA9210=y diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index f7ac0379850f..7a342d2780a8 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -11,7 +11,7 @@ CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_VFP=y CONFIG_NEON=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 40750f93aa83..3ea9c3377ccb 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -46,7 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_IDLE=y CONFIG_VFP=y CONFIG_NEON=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index d219d6a43238..6a1c9898fd03 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -25,7 +25,7 @@ CONFIG_CPU_IDLE=y CONFIG_ARM_U8500_CPUIDLE=y CONFIG_VFP=y CONFIG_NEON=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig index 9e7a25639690..1bfaa7bfc392 100644 --- a/arch/arm/configs/vt8500_v6_v7_defconfig +++ b/arch/arm/configs/vt8500_v6_v7_defconfig @@ -16,7 +16,7 @@ CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_VFP=y CONFIG_NEON=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index e6e3446abdf6..b52101d37ec7 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -121,13 +121,12 @@ static inline unsigned long dma_max_pfn(struct device *dev) } #define dma_max_pfn(dev) dma_max_pfn(dev) -static inline int set_arch_dma_coherent_ops(struct device *dev) -{ - dev->archdata.dma_coherent = true; - set_dma_ops(dev, &arm_coherent_dma_ops); - return 0; -} -#define set_arch_dma_coherent_ops(dev) set_arch_dma_coherent_ops(dev) +#define arch_setup_dma_ops arch_setup_dma_ops +extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent); + +#define arch_teardown_dma_ops arch_teardown_dma_ops +extern void arch_teardown_dma_ops(struct device *dev); /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index b9db269c6e61..66ce17655bb9 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr = HCR_GUEST_MASK; +} + static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 53036e21756b..254e0650e48b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,8 +150,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index acb0d5712716..63e0ecc04901 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, @@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); /* diff --git a/arch/arm/include/asm/mach/irda.h b/arch/arm/include/asm/mach/irda.h deleted file mode 100644 index 38f77b5e56cf..000000000000 --- a/arch/arm/include/asm/mach/irda.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * arch/arm/include/asm/mach/irda.h - * - * Copyright (C) 2004 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __ASM_ARM_MACH_IRDA_H -#define __ASM_ARM_MACH_IRDA_H - -struct irda_platform_data { - int (*startup)(struct device *); - void (*shutdown)(struct device *); - int (*set_power)(struct device *, unsigned int state); - void (*set_speed)(struct device *, unsigned int speed); -}; - -#endif diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index ac4bfae26702..0fa418463f49 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tickets = READ_ONCE(lock->tickets); return (tickets.next - tickets.owner) > 1; } #define arch_spin_is_contended arch_spin_is_contended diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e34934f63a49..f7c65adaa428 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -484,7 +484,7 @@ static void armpmu_disable(struct pmu *pmu) armpmu->stop(armpmu); } -#ifdef CONFIG_PM_RUNTIME +#ifdef CONFIG_PM static int armpmu_runtime_resume(struct device *dev) { struct arm_pmu_platdata *plat = dev_get_platdata(dev); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8361652b6dab..f9c863911038 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -18,6 +18,7 @@ #include <linux/bootmem.h> #include <linux/seq_file.h> #include <linux/screen_info.h> +#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/init.h> #include <linux/kexec.h> @@ -806,6 +807,7 @@ static int __init customize_machine(void) * machine from the device tree, if no callback is provided, * otherwise we would always need an init_machine callback. */ + of_iommu_init(); if (machine_desc->init_machine) machine_desc->init_machine(); #ifdef CONFIG_OF diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9e193c8a959e..2d6d91001062 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) int err; struct kvm_vcpu *vcpu; + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) { + err = -EBUSY; + goto out; + } + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) { err = -ENOMEM; @@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; int ret; if (likely(vcpu->arch.has_run_once)) @@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; /* - * Initialize the VGIC before running a vcpu the first time on - * this VM. + * Map the VGIC hardware resources before running a vcpu the first + * time on this VM. */ - if (unlikely(!vgic_initialized(vcpu->kvm))) { - ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); if (ret) return ret; } + /* + * Enable the arch timers only if we have an in-kernel VGIC + * and it has been properly initialized, since we cannot handle + * interrupts from the virtual timer with a userspace gic. + */ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + kvm_timer_enable(kvm); + return 0; } @@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same target. + */ + if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) + return -EINVAL; + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + bool set = (init->features[i / 32] & (1 << (i % 32))); + + if (set && i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same feature set. + */ + if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && + test_bit(i, vcpu->arch.features) != set) + return -EINVAL; + + if (set) + set_bit(i, vcpu->arch.features); + } + + vcpu->arch.target = phys_target; + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + + static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { @@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, return ret; /* + * Ensure a rebooted VM will fault in RAM pages and detect if the + * guest MMU is turned off and flush the caches as needed. + */ + if (vcpu->arch.has_run_once) + stage2_unmap_vm(vcpu->kvm); + + vcpu_reset_hcr(vcpu); + + /* * Handle the "start in power-off" case by marking the VCPU as paused. */ - if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) vcpu->arch.pause = true; + else + vcpu->arch.pause = false; return 0; } diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index cc0b78769bd8..384bab67c462 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } @@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void) } } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - - /* We can only cope with guest==host and only on A15/A7 (for now). */ - if (init->target != kvm_target_cpu()) - return -EINVAL; - - vcpu->arch.target = init->target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (test_bit(i, (void *)init->features)) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 4cb5a93182e9..5d3bfc0eb3f0 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); - trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : - KVM_TRACE_MMIO_READ_UNSATISFIED, - mmio.len, fault_ipa, - (mmio.is_write) ? data : 0); + if (mmio.is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), + mmio.len); - if (mmio.is_write) + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, + fault_ipa, data); mmio_write_buf(mmio.data, mmio.len, data); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + fault_ipa, 0); + } if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 8664ff17cbbe..1dc9778a00af 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) unmap_range(kvm, kvm->arch.pgd, start, size); } +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any reguler RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + /** * kvm_free_stage2_pgd - free all stage-2 tables * @kvm: The KVM struct pointer for the VM. @@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; + bool fault_ipa_uncached; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { @@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!hugetlb && !force_pte) hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT; + if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); @@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, + fault_ipa_uncached); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, + fault_ipa_uncached); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } @@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = vm_end; } while (hva < reg_end); - if (ret) { - spin_lock(&kvm->mmu_lock); + spin_lock(&kvm->mmu_lock); + if (ret) unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); - spin_unlock(&kvm->mmu_lock); - } + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); return ret; } @@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { + /* + * Readonly memslots are not incoherent with the caches by definition, + * but in practice, they are used mostly to emulate ROMs or NOR flashes + * that the guest may consider devices and hence map as uncached. + * To prevent incoherency issues in these cases, tag all readonly + * regions as incoherent. + */ + if (slot->flags & KVM_MEM_READONLY) + slot->flags |= KVM_MEMSLOT_INCOHERENT; return 0; } diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 09cf37737ee2..58cb3248d277 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -15,6 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/preempt.h> #include <linux/kvm_host.h> #include <linux/wait.h> @@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) { + int i; + struct kvm_vcpu *tmp; + + /* + * The KVM ABI specifies that a system event exit may call KVM_RUN + * again and may perform shutdown/reboot at a later time that when the + * actual request is made. Since we are implementing PSCI and a + * caller of PSCI reboot and shutdown expects that the system shuts + * down or reboots immediately, let's make sure that VCPUs are not run + * after this call is handled and before the VCPUs have been + * re-initialized. + */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + tmp->arch.pause = true; + kvm_vcpu_kick(tmp); + } + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; diff --git a/arch/arm/mach-davinci/pm_domain.c b/arch/arm/mach-davinci/pm_domain.c index 6b98413cebd6..641edc313938 100644 --- a/arch/arm/mach-davinci/pm_domain.c +++ b/arch/arm/mach-davinci/pm_domain.c @@ -14,7 +14,7 @@ #include <linux/pm_clock.h> #include <linux/platform_device.h> -#ifdef CONFIG_PM_RUNTIME +#ifdef CONFIG_PM static int davinci_pm_runtime_suspend(struct device *dev) { int ret; diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index e4a00bafffc1..603820e5aba7 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -21,7 +21,7 @@ menuconfig ARCH_EXYNOS select HAVE_S3C_RTC if RTC_CLASS select PINCTRL select PINCTRL_EXYNOS - select PM_GENERIC_DOMAINS if PM_RUNTIME + select PM_GENERIC_DOMAINS if PM select S5P_DEV_MFC select SRAM select MFD_SYSCON diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 5057d61298b7..2f7616889c3f 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -31,6 +31,8 @@ #include <linux/micrel_phy.h> #include <linux/mfd/syscon.h> #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> +#include <linux/fec.h> +#include <linux/netdevice.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> #include <asm/system_misc.h> @@ -39,6 +41,35 @@ #include "cpuidle.h" #include "hardware.h" +static struct fec_platform_data fec_pdata; + +static void imx6q_fec_sleep_enable(int enabled) +{ + struct regmap *gpr; + + gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + if (!IS_ERR(gpr)) { + if (enabled) + regmap_update_bits(gpr, IOMUXC_GPR13, + IMX6Q_GPR13_ENET_STOP_REQ, + IMX6Q_GPR13_ENET_STOP_REQ); + + else + regmap_update_bits(gpr, IOMUXC_GPR13, + IMX6Q_GPR13_ENET_STOP_REQ, 0); + } else + pr_err("failed to find fsl,imx6q-iomux-gpr regmap\n"); +} + +static void __init imx6q_enet_plt_init(void) +{ + struct device_node *np; + + np = of_find_node_by_path("/soc/aips-bus@02100000/ethernet@02188000"); + if (np && of_get_property(np, "fsl,magic-packet", NULL)) + fec_pdata.sleep_mode_enable = imx6q_fec_sleep_enable; +} + /* For imx6q sabrelite board: set KSZ9021RN RGMII pad skew */ static int ksz9021rn_phy_fixup(struct phy_device *phydev) { @@ -261,6 +292,12 @@ static void __init imx6q_axi_init(void) } } +/* Add auxdata to pass platform data */ +static const struct of_dev_auxdata imx6q_auxdata_lookup[] __initconst = { + OF_DEV_AUXDATA("fsl,imx6q-fec", 0x02188000, NULL, &fec_pdata), + { /* sentinel */ } +}; + static void __init imx6q_init_machine(void) { struct device *parent; @@ -274,11 +311,13 @@ static void __init imx6q_init_machine(void) imx6q_enet_phy_init(); - of_platform_populate(NULL, of_default_bus_match_table, NULL, parent); + of_platform_populate(NULL, of_default_bus_match_table, + imx6q_auxdata_lookup, parent); imx_anatop_init(); cpu_is_imx6q() ? imx6q_pm_init() : imx6dl_pm_init(); imx6q_1588_init(); + imx6q_enet_plt_init(); imx6q_axi_init(); } diff --git a/arch/arm/mach-imx/mach-imx6sx.c b/arch/arm/mach-imx/mach-imx6sx.c index 7a96c6577234..747b012665f5 100644 --- a/arch/arm/mach-imx/mach-imx6sx.c +++ b/arch/arm/mach-imx/mach-imx6sx.c @@ -12,12 +12,62 @@ #include <linux/regmap.h> #include <linux/mfd/syscon.h> #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> +#include <linux/fec.h> +#include <linux/netdevice.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> #include "common.h" #include "cpuidle.h" +static struct fec_platform_data fec_pdata[2]; + +static void imx6sx_fec1_sleep_enable(int enabled) +{ + struct regmap *gpr; + + gpr = syscon_regmap_lookup_by_compatible("fsl,imx6sx-iomuxc-gpr"); + if (!IS_ERR(gpr)) { + if (enabled) + regmap_update_bits(gpr, IOMUXC_GPR4, + IMX6SX_GPR4_FEC_ENET1_STOP_REQ, + IMX6SX_GPR4_FEC_ENET1_STOP_REQ); + else + regmap_update_bits(gpr, IOMUXC_GPR4, + IMX6SX_GPR4_FEC_ENET1_STOP_REQ, 0); + } else + pr_err("failed to find fsl,imx6sx-iomux-gpr regmap\n"); +} + +static void imx6sx_fec2_sleep_enable(int enabled) +{ + struct regmap *gpr; + + gpr = syscon_regmap_lookup_by_compatible("fsl,imx6sx-iomuxc-gpr"); + if (!IS_ERR(gpr)) { + if (enabled) + regmap_update_bits(gpr, IOMUXC_GPR4, + IMX6SX_GPR4_FEC_ENET2_STOP_REQ, + IMX6SX_GPR4_FEC_ENET2_STOP_REQ); + else + regmap_update_bits(gpr, IOMUXC_GPR4, + IMX6SX_GPR4_FEC_ENET2_STOP_REQ, 0); + } else + pr_err("failed to find fsl,imx6sx-iomux-gpr regmap\n"); +} + +static void __init imx6sx_enet_plt_init(void) +{ + struct device_node *np; + + np = of_find_node_by_path("/soc/aips-bus@02100000/ethernet@02188000"); + if (np && of_get_property(np, "fsl,magic-packet", NULL)) + fec_pdata[0].sleep_mode_enable = imx6sx_fec1_sleep_enable; + np = of_find_node_by_path("/soc/aips-bus@02100000/ethernet@021b4000"); + if (np && of_get_property(np, "fsl,magic-packet", NULL)) + fec_pdata[1].sleep_mode_enable = imx6sx_fec2_sleep_enable; +} + static int ar8031_phy_fixup(struct phy_device *dev) { u16 val; diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c index ca79ddac38bc..ef6041e7e675 100644 --- a/arch/arm/mach-keystone/pm_domain.c +++ b/arch/arm/mach-keystone/pm_domain.c @@ -19,7 +19,7 @@ #include <linux/clk-provider.h> #include <linux/of.h> -#ifdef CONFIG_PM_RUNTIME +#ifdef CONFIG_PM static int keystone_pm_runtime_suspend(struct device *dev) { int ret; diff --git a/arch/arm/mach-mmp/Kconfig b/arch/arm/mach-mmp/Kconfig index ebdba87b9671..fdbfadf00c84 100644 --- a/arch/arm/mach-mmp/Kconfig +++ b/arch/arm/mach-mmp/Kconfig @@ -86,11 +86,12 @@ config MACH_GPLUGD config MACH_MMP_DT bool "Support MMP (ARMv5) platforms from device tree" - select CPU_PXA168 - select CPU_PXA910 select USE_OF select PINCTRL select PINCTRL_SINGLE + select COMMON_CLK + select ARCH_HAS_RESET_CONTROLLER + select CPU_MOHAWK help Include support for Marvell MMP2 based platforms using the device tree. Needn't select any other machine while @@ -99,10 +100,12 @@ config MACH_MMP_DT config MACH_MMP2_DT bool "Support MMP2 (ARMv7) platforms from device tree" depends on !CPU_MOHAWK - select CPU_MMP2 select USE_OF select PINCTRL select PINCTRL_SINGLE + select COMMON_CLK + select ARCH_HAS_RESET_CONTROLLER + select CPU_PJ4 help Include support for Marvell MMP2 based platforms using the device tree. @@ -111,21 +114,18 @@ endmenu config CPU_PXA168 bool - select COMMON_CLK select CPU_MOHAWK help Select code specific to PXA168 config CPU_PXA910 bool - select COMMON_CLK select CPU_MOHAWK help Select code specific to PXA910 config CPU_MMP2 bool - select COMMON_CLK select CPU_PJ4 help Select code specific to MMP2. MMP2 is ARMv7 compatible. diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c index cca529ceecb7..b2296c9309b8 100644 --- a/arch/arm/mach-mmp/mmp-dt.c +++ b/arch/arm/mach-mmp/mmp-dt.c @@ -11,63 +11,42 @@ #include <linux/irqchip.h> #include <linux/of_platform.h> +#include <linux/clk-provider.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> +#include <asm/hardware/cache-tauros2.h> #include "common.h" extern void __init mmp_dt_init_timer(void); -static const struct of_dev_auxdata pxa168_auxdata_lookup[] __initconst = { - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4026000, "pxa2xx-uart.2", NULL), - OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL), - OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4025000, "pxa2xx-i2c.1", NULL), - OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp-gpio", NULL), - OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL), - {} +static const char *pxa168_dt_board_compat[] __initdata = { + "mrvl,pxa168-aspenite", + NULL, }; -static const struct of_dev_auxdata pxa910_auxdata_lookup[] __initconst = { - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4036000, "pxa2xx-uart.2", NULL), - OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL), - OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4037000, "pxa2xx-i2c.1", NULL), - OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp-gpio", NULL), - OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL), - {} +static const char *pxa910_dt_board_compat[] __initdata = { + "mrvl,pxa910-dkb", + NULL, }; -static void __init pxa168_dt_init(void) -{ - of_platform_populate(NULL, of_default_bus_match_table, - pxa168_auxdata_lookup, NULL); -} - -static void __init pxa910_dt_init(void) +static void __init mmp_init_time(void) { - of_platform_populate(NULL, of_default_bus_match_table, - pxa910_auxdata_lookup, NULL); +#ifdef CONFIG_CACHE_TAUROS2 + tauros2_init(0); +#endif + mmp_dt_init_timer(); + of_clk_init(NULL); } -static const char *mmp_dt_board_compat[] __initdata = { - "mrvl,pxa168-aspenite", - "mrvl,pxa910-dkb", - NULL, -}; - DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)") .map_io = mmp_map_io, - .init_time = mmp_dt_init_timer, - .init_machine = pxa168_dt_init, - .dt_compat = mmp_dt_board_compat, + .init_time = mmp_init_time, + .dt_compat = pxa168_dt_board_compat, MACHINE_END DT_MACHINE_START(PXA910_DT, "Marvell PXA910 (Device Tree Support)") .map_io = mmp_map_io, - .init_time = mmp_dt_init_timer, - .init_machine = pxa910_dt_init, - .dt_compat = mmp_dt_board_compat, + .init_time = mmp_init_time, + .dt_compat = pxa910_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c index 023cb453f157..998c0f533abc 100644 --- a/arch/arm/mach-mmp/mmp2-dt.c +++ b/arch/arm/mach-mmp/mmp2-dt.c @@ -12,29 +12,22 @@ #include <linux/io.h> #include <linux/irqchip.h> #include <linux/of_platform.h> +#include <linux/clk-provider.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> +#include <asm/hardware/cache-tauros2.h> #include "common.h" extern void __init mmp_dt_init_timer(void); -static const struct of_dev_auxdata mmp2_auxdata_lookup[] __initconst = { - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4030000, "pxa2xx-uart.0", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.1", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.2", NULL), - OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4016000, "pxa2xx-uart.3", NULL), - OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL), - OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4025000, "pxa2xx-i2c.1", NULL), - OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp2-gpio", NULL), - OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL), - {} -}; - -static void __init mmp2_dt_init(void) +static void __init mmp_init_time(void) { - of_platform_populate(NULL, of_default_bus_match_table, - mmp2_auxdata_lookup, NULL); +#ifdef CONFIG_CACHE_TAUROS2 + tauros2_init(0); +#endif + mmp_dt_init_timer(); + of_clk_init(NULL); } static const char *mmp2_dt_board_compat[] __initdata = { @@ -44,7 +37,6 @@ static const char *mmp2_dt_board_compat[] __initdata = { DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)") .map_io = mmp_map_io, - .init_time = mmp_dt_init_timer, - .init_machine = mmp2_dt_init, + .init_time = mmp_init_time, .dt_compat = mmp2_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c index 3f2d39672393..c40e209de65c 100644 --- a/arch/arm/mach-omap1/pm_bus.c +++ b/arch/arm/mach-omap1/pm_bus.c @@ -21,7 +21,7 @@ #include "soc.h" -#ifdef CONFIG_PM_RUNTIME +#ifdef CONFIG_PM static int omap1_pm_runtime_suspend(struct device *dev) { int ret; @@ -59,7 +59,7 @@ static struct dev_pm_domain default_pm_domain = { #define OMAP1_PM_DOMAIN (&default_pm_domain) #else #define OMAP1_PM_DOMAIN NULL -#endif /* CONFIG_PM_RUNTIME */ +#endif /* CONFIG_PM */ static struct pm_clk_notifier_block platform_bus_notifier = { .pm_domain = OMAP1_PM_DOMAIN, diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index f0edec199cd4..6ab656cc4f16 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -15,7 +15,7 @@ config ARCH_OMAP3 select ARM_CPU_SUSPEND if PM select OMAP_INTERCONNECT select PM_OPP if PM - select PM_RUNTIME if CPU_IDLE + select PM if CPU_IDLE select SOC_HAS_OMAP2_SDRC config ARCH_OMAP4 @@ -32,7 +32,7 @@ config ARCH_OMAP4 select PL310_ERRATA_588369 if CACHE_L2X0 select PL310_ERRATA_727915 if CACHE_L2X0 select PM_OPP if PM - select PM_RUNTIME if CPU_IDLE + select PM if CPU_IDLE select ARM_ERRATA_754322 select ARM_ERRATA_775420 @@ -103,7 +103,7 @@ config ARCH_OMAP2PLUS_TYPICAL select I2C_OMAP select MENELAUS if ARCH_OMAP2 select NEON if CPU_V7 - select PM_RUNTIME + select PM select REGULATOR select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4 select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4 diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c index 5c5ebb4db5f7..644ff3231bb8 100644 --- a/arch/arm/mach-omap2/cclock3xxx_data.c +++ b/arch/arm/mach-omap2/cclock3xxx_data.c @@ -111,6 +111,7 @@ static struct clk dpll3_ck; static const char *dpll3_ck_parent_names[] = { "sys_ck", + "sys_ck", }; static const struct clk_ops dpll3_ck_ops = { @@ -733,6 +734,10 @@ static const char *corex2_fck_parent_names[] = { DEFINE_STRUCT_CLK_HW_OMAP(corex2_fck, NULL); DEFINE_STRUCT_CLK(corex2_fck, corex2_fck_parent_names, core_ck_ops); +static const char *cpefuse_fck_parent_names[] = { + "sys_ck", +}; + static struct clk cpefuse_fck; static struct clk_hw_omap cpefuse_fck_hw = { @@ -744,7 +749,7 @@ static struct clk_hw_omap cpefuse_fck_hw = { .clkdm_name = "core_l4_clkdm", }; -DEFINE_STRUCT_CLK(cpefuse_fck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(cpefuse_fck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk csi2_96m_fck; @@ -775,7 +780,7 @@ static struct clk_hw_omap d2d_26m_fck_hw = { .clkdm_name = "d2d_clkdm", }; -DEFINE_STRUCT_CLK(d2d_26m_fck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(d2d_26m_fck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk des1_ick; @@ -1046,7 +1051,7 @@ static struct clk_hw_omap dss2_alwon_fck_hw = { .clkdm_name = "dss_clkdm", }; -DEFINE_STRUCT_CLK(dss2_alwon_fck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(dss2_alwon_fck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk dss_96m_fck; @@ -1368,7 +1373,7 @@ DEFINE_STRUCT_CLK(gpio1_dbck, gpio1_dbck_parent_names, aes2_ick_ops); static struct clk wkup_l4_ick; DEFINE_STRUCT_CLK_HW_OMAP(wkup_l4_ick, "wkup_clkdm"); -DEFINE_STRUCT_CLK(wkup_l4_ick, dpll3_ck_parent_names, core_l4_ick_ops); +DEFINE_STRUCT_CLK(wkup_l4_ick, cpefuse_fck_parent_names, core_l4_ick_ops); static struct clk gpio1_ick; @@ -1862,7 +1867,7 @@ static struct clk_hw_omap hecc_ck_hw = { .clkdm_name = "core_l3_clkdm", }; -DEFINE_STRUCT_CLK(hecc_ck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(hecc_ck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk hsotgusb_fck_am35xx; @@ -1875,7 +1880,7 @@ static struct clk_hw_omap hsotgusb_fck_am35xx_hw = { .clkdm_name = "core_l3_clkdm", }; -DEFINE_STRUCT_CLK(hsotgusb_fck_am35xx, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(hsotgusb_fck_am35xx, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk hsotgusb_ick_3430es1; @@ -2411,7 +2416,7 @@ static struct clk_hw_omap modem_fck_hw = { .clkdm_name = "d2d_clkdm", }; -DEFINE_STRUCT_CLK(modem_fck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(modem_fck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk mspro_fck; @@ -2710,7 +2715,7 @@ static struct clk_hw_omap sr1_fck_hw = { .clkdm_name = "wkup_clkdm", }; -DEFINE_STRUCT_CLK(sr1_fck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(sr1_fck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk sr2_fck; @@ -2724,7 +2729,7 @@ static struct clk_hw_omap sr2_fck_hw = { .clkdm_name = "wkup_clkdm", }; -DEFINE_STRUCT_CLK(sr2_fck, dpll3_ck_parent_names, aes2_ick_ops); +DEFINE_STRUCT_CLK(sr2_fck, cpefuse_fck_parent_names, aes2_ick_ops); static struct clk sr_l4_ick; diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index 641337c6cde9..a4282e79143e 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -270,8 +270,6 @@ extern const struct clksel_rate div31_1to31_rates[]; extern void __iomem *clk_memmaps[]; -extern int am33xx_clk_init(void); - extern int omap2_clkops_enable_clkdm(struct clk_hw *hw); extern void omap2_clkops_disable_clkdm(struct clk_hw *hw); diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index 20e120d071dd..c2da2a0fe5ad 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -474,7 +474,7 @@ void omap3_noncore_dpll_disable(struct clk_hw *hw) */ long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate, unsigned long *best_parent_rate, - struct clk **best_parent_clk) + struct clk_hw **best_parent_clk) { struct clk_hw_omap *clk = to_clk_hw_omap(hw); struct dpll_data *dd; @@ -488,10 +488,10 @@ long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate, if (__clk_get_rate(dd->clk_bypass) == rate && (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { - *best_parent_clk = dd->clk_bypass; + *best_parent_clk = __clk_get_hw(dd->clk_bypass); } else { rate = omap2_dpll_round_rate(hw, rate, best_parent_rate); - *best_parent_clk = dd->clk_ref; + *best_parent_clk = __clk_get_hw(dd->clk_ref); } *best_parent_rate = rate; diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c index 535822fcf4bb..0e58e5a85d53 100644 --- a/arch/arm/mach-omap2/dpll44xx.c +++ b/arch/arm/mach-omap2/dpll44xx.c @@ -223,7 +223,7 @@ out: */ long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate, unsigned long *best_parent_rate, - struct clk **best_parent_clk) + struct clk_hw **best_parent_clk) { struct clk_hw_omap *clk = to_clk_hw_omap(hw); struct dpll_data *dd; @@ -237,11 +237,11 @@ long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate, if (__clk_get_rate(dd->clk_bypass) == rate && (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { - *best_parent_clk = dd->clk_bypass; + *best_parent_clk = __clk_get_hw(dd->clk_bypass); } else { rate = omap4_dpll_regm4xen_round_rate(hw, rate, best_parent_rate); - *best_parent_clk = dd->clk_ref; + *best_parent_clk = __clk_get_hw(dd->clk_ref); } *best_parent_rate = rate; diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 53841dea80ea..c25feba05818 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -471,11 +471,15 @@ void __init omap3xxx_check_revision(void) cpu_rev = "1.0"; break; case 1: - /* FALLTHROUGH */ - default: omap_revision = AM437X_REV_ES1_1; cpu_rev = "1.1"; break; + case 2: + /* FALLTHROUGH */ + default: + omap_revision = AM437X_REV_ES1_2; + cpu_rev = "1.2"; + break; } break; case 0xb8f2: diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 4fc838354e31..a1bd6affb508 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -361,7 +361,7 @@ static void __init omap_hwmod_init_postsetup(void) u8 postsetup_state; /* Set the default postsetup state for all hwmods */ -#ifdef CONFIG_PM_RUNTIME +#ifdef CONFIG_PM postsetup_state = _HWMOD_STATE_IDLE; #else postsetup_state = _HWMOD_STATE_ENABLED; diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 8c58b71c2727..be9541e18650 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -588,7 +588,7 @@ odbs_exit: return ERR_PTR(ret); } -#ifdef CONFIG_PM_RUNTIME +#ifdef CONFIG_PM static int _od_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h index 4376f59626d1..c1a3b4416311 100644 --- a/arch/arm/mach-omap2/soc.h +++ b/arch/arm/mach-omap2/soc.h @@ -446,6 +446,7 @@ IS_OMAP_TYPE(3430, 0x3430) #define AM437X_CLASS 0x43700000 #define AM437X_REV_ES1_0 (AM437X_CLASS | (0x10 << 8)) #define AM437X_REV_ES1_1 (AM437X_CLASS | (0x11 << 8)) +#define AM437X_REV_ES1_2 (AM437X_CLASS | (0x12 << 8)) #define OMAP443X_CLASS 0x44300044 #define OMAP4430_REV_ES1_0 (OMAP443X_CLASS | (0x10 << 8)) diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 7dd894ece9ae..d28ecb9ef172 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -37,7 +37,7 @@ #include <asm/mach/arch.h> #include <asm/mach/flash.h> -#include <asm/mach/irda.h> +#include <linux/platform_data/irda-sa11x0.h> #include <asm/mach/map.h> #include <mach/assabet.h> #include <linux/platform_data/mfd-mcp-sa11x0.h> diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index b90c7d828391..7fcbe3d119c7 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -43,7 +43,7 @@ #include <asm/mach/arch.h> #include <asm/mach/flash.h> #include <asm/mach/map.h> -#include <asm/mach/irda.h> +#include <linux/platform_data/irda-sa11x0.h> #include <asm/hardware/scoop.h> #include <asm/mach/sharpsl_param.h> diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c index 3c43219bc881..c6b412054a3c 100644 --- a/arch/arm/mach-sa1100/h3100.c +++ b/arch/arm/mach-sa1100/h3100.c @@ -18,7 +18,7 @@ #include <asm/mach-types.h> #include <asm/mach/arch.h> -#include <asm/mach/irda.h> +#include <linux/platform_data/irda-sa11x0.h> #include <mach/h3xxx.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 5be54c214c7c..118338efd790 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -18,7 +18,7 @@ #include <asm/mach-types.h> #include <asm/mach/arch.h> -#include <asm/mach/irda.h> +#include <linux/platform_data/irda-sa11x0.h> #include <mach/h3xxx.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index b47262afb240..f8197eb6e566 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -32,7 +32,6 @@ #include <linux/pinctrl/machine.h> #include <linux/platform_data/camera-rcar.h> #include <linux/platform_data/gpio-rcar.h> -#include <linux/platform_data/rcar-du.h> #include <linux/platform_data/usb-rcar-gen2-phy.h> #include <linux/platform_device.h> #include <linux/phy.h> @@ -83,61 +82,6 @@ * */ -/* DU */ -static struct rcar_du_encoder_data lager_du_encoders[] = { - { - .type = RCAR_DU_ENCODER_VGA, - .output = RCAR_DU_OUTPUT_DPAD0, - }, { - .type = RCAR_DU_ENCODER_NONE, - .output = RCAR_DU_OUTPUT_LVDS1, - .connector.lvds.panel = { - .width_mm = 210, - .height_mm = 158, - .mode = { - .pixelclock = 65000000, - .hactive = 1024, - .hfront_porch = 20, - .hback_porch = 160, - .hsync_len = 136, - .vactive = 768, - .vfront_porch = 3, - .vback_porch = 29, - .vsync_len = 6, - }, - }, - }, -}; - -static const struct rcar_du_platform_data lager_du_pdata __initconst = { - .encoders = lager_du_encoders, - .num_encoders = ARRAY_SIZE(lager_du_encoders), -}; - -static const struct resource du_resources[] __initconst = { - DEFINE_RES_MEM(0xfeb00000, 0x70000), - DEFINE_RES_MEM_NAMED(0xfeb90000, 0x1c, "lvds.0"), - DEFINE_RES_MEM_NAMED(0xfeb94000, 0x1c, "lvds.1"), - DEFINE_RES_IRQ(gic_spi(256)), - DEFINE_RES_IRQ(gic_spi(268)), - DEFINE_RES_IRQ(gic_spi(269)), -}; - -static void __init lager_add_du_device(void) -{ - struct platform_device_info info = { - .name = "rcar-du-r8a7790", - .id = -1, - .res = du_resources, - .num_res = ARRAY_SIZE(du_resources), - .data = &lager_du_pdata, - .size_data = sizeof(lager_du_pdata), - .dma_mask = DMA_BIT_MASK(32), - }; - - platform_device_register_full(&info); -} - /* LEDS */ static struct gpio_led lager_leds[] = { { @@ -800,8 +744,6 @@ static void __init lager_add_standard_devices(void) platform_device_register_full(ðer_info); - lager_add_du_device(); - platform_device_register_resndata(NULL, "qspi", 0, qspi_resources, ARRAY_SIZE(qspi_resources), diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index 994dc7d86ae2..598f704f76ae 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -27,7 +27,6 @@ #include <linux/pinctrl/machine.h> #include <linux/platform_data/camera-rcar.h> #include <linux/platform_data/gpio-rcar.h> -#include <linux/platform_data/rcar-du.h> #include <linux/platform_data/usb-rcar-phy.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> @@ -171,62 +170,6 @@ static struct platform_device hspi_device = { .num_resources = ARRAY_SIZE(hspi_resources), }; -/* - * DU - * - * The panel only specifies the [hv]display and [hv]total values. The position - * and width of the sync pulses don't matter, they're copied from VESA timings. - */ -static struct rcar_du_encoder_data du_encoders[] = { - { - .type = RCAR_DU_ENCODER_VGA, - .output = RCAR_DU_OUTPUT_DPAD0, - }, { - .type = RCAR_DU_ENCODER_LVDS, - .output = RCAR_DU_OUTPUT_DPAD1, - .connector.lvds.panel = { - .width_mm = 210, - .height_mm = 158, - .mode = { - .pixelclock = 65000000, - .hactive = 1024, - .hfront_porch = 20, - .hback_porch = 160, - .hsync_len = 136, - .vactive = 768, - .vfront_porch = 3, - .vback_porch = 29, - .vsync_len = 6, - }, - }, - }, -}; - -static const struct rcar_du_platform_data du_pdata __initconst = { - .encoders = du_encoders, - .num_encoders = ARRAY_SIZE(du_encoders), -}; - -static const struct resource du_resources[] __initconst = { - DEFINE_RES_MEM(0xfff80000, 0x40000), - DEFINE_RES_IRQ(gic_iid(0x3f)), -}; - -static void __init marzen_add_du_device(void) -{ - struct platform_device_info info = { - .name = "rcar-du-r8a7779", - .id = -1, - .res = du_resources, - .num_res = ARRAY_SIZE(du_resources), - .data = &du_pdata, - .size_data = sizeof(du_pdata), - .dma_mask = DMA_BIT_MASK(32), - }; - - platform_device_register_full(&info); -} - /* LEDS */ static struct gpio_led marzen_leds[] = { { @@ -385,7 +328,6 @@ static void __init marzen_init(void) platform_device_register_full(&vin1_info); platform_device_register_full(&vin3_info); platform_add_devices(marzen_devices, ARRAY_SIZE(marzen_devices)); - marzen_add_du_device(); } static const char *marzen_boards_compat_dt[] __initdata = { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e8907117861e..7864797609b3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1947,9 +1947,8 @@ EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); * arm_iommu_create_mapping) * * Attaches specified io address space mapping to the provided device, - * this replaces the dma operations (dma_map_ops pointer) with the - * IOMMU aware version. More than one client might be attached to - * the same io address space mapping. + * More than one client might be attached to the same io address space + * mapping. */ int arm_iommu_attach_device(struct device *dev, struct dma_iommu_mapping *mapping) @@ -1962,7 +1961,6 @@ int arm_iommu_attach_device(struct device *dev, kref_get(&mapping->kref); dev->archdata.mapping = mapping; - set_dma_ops(dev, &iommu_ops); pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; @@ -1974,7 +1972,6 @@ EXPORT_SYMBOL_GPL(arm_iommu_attach_device); * @dev: valid struct device pointer * * Detaches the provided device from a previously attached map. - * This voids the dma operations (dma_map_ops pointer) */ void arm_iommu_detach_device(struct device *dev) { @@ -1989,10 +1986,83 @@ void arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); dev->archdata.mapping = NULL; - set_dma_ops(dev, NULL); pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); } EXPORT_SYMBOL_GPL(arm_iommu_detach_device); -#endif +static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) +{ + return coherent ? &iommu_coherent_ops : &iommu_ops; +} + +static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu) +{ + struct dma_iommu_mapping *mapping; + + if (!iommu) + return false; + + mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); + if (IS_ERR(mapping)) { + pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", + size, dev_name(dev)); + return false; + } + + if (arm_iommu_attach_device(dev, mapping)) { + pr_warn("Failed to attached device %s to IOMMU_mapping\n", + dev_name(dev)); + arm_iommu_release_mapping(mapping); + return false; + } + + return true; +} + +static void arm_teardown_iommu_dma_ops(struct device *dev) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + + arm_iommu_detach_device(dev); + arm_iommu_release_mapping(mapping); +} + +#else + +static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu) +{ + return false; +} + +static void arm_teardown_iommu_dma_ops(struct device *dev) { } + +#define arm_get_iommu_dma_map_ops arm_get_dma_map_ops + +#endif /* CONFIG_ARM_DMA_USE_IOMMU */ + +static struct dma_map_ops *arm_get_dma_map_ops(bool coherent) +{ + return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; +} + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent) +{ + struct dma_map_ops *dma_ops; + + dev->archdata.dma_coherent = coherent; + if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) + dma_ops = arm_get_iommu_dma_map_ops(coherent); + else + dma_ops = arm_get_dma_map_ops(coherent); + + set_dma_ops(dev, dma_ops); +} + +void arch_teardown_dma_ops(struct device *dev) +{ + arm_teardown_iommu_dma_ops(dev); +} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 688db03ef5b8..b1f9a20a3677 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -14,7 +14,9 @@ config ARM64 select ARM_ARCH_TIMER select ARM_GIC select AUDIT_ARCH_COMPAT_GENERIC + select ARM_GIC_V2M if PCI_MSI select ARM_GIC_V3 + select ARM_GIC_V3_ITS if PCI_MSI select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index dd301be89ecc..5376d908eabe 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,6 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -13,14 +14,12 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_RESOURCE_COUNTERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y CONFIG_CGROUP_HUGETLB=y # CONFIG_UTS_NS is not set # CONFIG_IPC_NS is not set -# CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y @@ -92,7 +91,6 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_CONSOLE=y # CONFIG_HW_RANDOM is not set -# CONFIG_HMC_DRV is not set CONFIG_SPI=y CONFIG_SPI_PL022=y CONFIG_GPIO_PL061=y @@ -133,6 +131,8 @@ CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA=y +CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=y CONFIG_CUSE=y CONFIG_VFAT_FS=y @@ -152,14 +152,15 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_LOCKUP_DETECTOR=y # CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set # CONFIG_FTRACE is not set +CONFIG_KEYS=y CONFIG_SECURITY=y CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_GHASH_ARM64_CE=y -CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 6b61091c7f4c..55103e50c51b 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -27,6 +27,7 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h +generic-y += msi.h generic-y += mutex.h generic-y += pci.h generic-y += pci-bridge.h diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index d34189bceff7..9ce3e680ae1c 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -52,13 +52,14 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) dev->archdata.dma_ops = ops; } -static inline int set_arch_dma_coherent_ops(struct device *dev) +static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent) { - dev->archdata.dma_coherent = true; - set_dma_ops(dev, &coherent_swiotlb_dma_ops); - return 0; + dev->archdata.dma_coherent = coherent; + if (coherent) + set_dma_ops(dev, &coherent_swiotlb_dma_ops); } -#define set_arch_dma_coherent_ops set_arch_dma_coherent_ops +#define arch_setup_dma_ops arch_setup_dma_ops /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5674a55b5518..8127e45e2637 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2012c4ba8d67..0b7dfdb931df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -165,8 +165,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); @@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); +void force_vm_exit(const cpumask_t *mask); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 0caf7a59f6a1..14a74f136272 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, @@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); if (!icache_is_aliasing()) { /* PIPT */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index df22314f57cf..210d632aa5ad 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -298,7 +298,6 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) #define pud_write(pud) pte_write(pud_pte(pud)) #define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) @@ -401,7 +400,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); } -#define pud_page(pud) pmd_page(pud_pmd(pud)) +#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ @@ -437,6 +436,8 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); } +#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) + #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c45b7b1b7197..cee128732435 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - arch_spinlock_t lockval = ACCESS_ONCE(*lock); + arch_spinlock_t lockval = READ_ONCE(*lock); return (lockval.next - lockval.owner) > 1; } #define arch_spin_is_contended arch_spin_is_contended diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 3425f311c49e..f1dbca7d5c96 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -540,6 +540,8 @@ const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE .cpu_init_idle = cpu_psci_cpu_init_idle, +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND .cpu_suspend = cpu_psci_cpu_suspend, #endif #ifdef CONFIG_SMP diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 3771b72b6569..2d6b6065fe7f 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -5,6 +5,7 @@ #include <asm/debug-monitors.h> #include <asm/pgtable.h> #include <asm/memory.h> +#include <asm/mmu_context.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/tlbflush.h> @@ -98,7 +99,18 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ ret = __cpu_suspend_enter(arg, fn); if (ret == 0) { - cpu_switch_mm(mm->pgd, mm); + /* + * We are resuming from reset with TTBR0_EL1 set to the + * idmap to enable the MMU; restore the active_mm mappings in + * TTBR0_EL1 unless the active_mm == &init_mm, in which case + * the thread entered __cpu_suspend with TTBR0_EL1 set to + * reserved TTBR0 page tables and should be restored as such. + */ + if (mm == &init_mm) + cpu_set_reserved_ttbr0(); + else + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 76794692c20b..9535bd555d1d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; return 0; } @@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - int phys_target = kvm_target_cpu(); - - if (init->target != phys_target) - return -EINVAL; - - vcpu->arch.target = phys_target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (init->features[i / 32] & (1 << (i % 32))) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index bf69601be546..cf33f33333cc 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -182,9 +182,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, static const char units[] = "KMGTPE"; u64 prot = val & pg_level[level].mask; - if (addr < LOWEST_ADDR) - return; - if (!st->level) { st->level = level; st->current_prot = prot; @@ -272,7 +269,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) { - pgd_t *pgd = pgd_offset(mm, 0); + pgd_t *pgd = pgd_offset(mm, 0UL); unsigned i; unsigned long addr; diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c index b0a608da7bd1..b964c667aced 100644 --- a/arch/cris/arch-v10/lib/usercopy.c +++ b/arch/cris/arch-v10/lib/usercopy.c @@ -30,8 +30,7 @@ /* Copy to userspace. This is based on the memcpy used for kernel-to-kernel copying; see "string.c". */ -unsigned long -__copy_user (void __user *pdst, const void *psrc, unsigned long pn) +unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -187,13 +186,14 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) return retn; } +EXPORT_SYMBOL(__copy_user); /* Copy from user to kernel, zeroing the bytes that were inaccessible in userland. The return-value is the number of bytes that were inaccessible. */ -unsigned long -__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) +unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, + unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -369,11 +369,10 @@ copy_exception_bytes: return retn + n; } +EXPORT_SYMBOL(__copy_user_zeroing); /* Zero userspace. */ - -unsigned long -__do_clear_user (void __user *pto, unsigned long pn) +unsigned long __do_clear_user(void __user *pto, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -521,3 +520,4 @@ __do_clear_user (void __user *pto, unsigned long pn) return retn; } +EXPORT_SYMBOL(__do_clear_user); diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 15a9ed1d579c..4fc16b44fff2 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -108,6 +108,7 @@ config ETRAX_AXISFLASHMAP select MTD_JEDECPROBE select MTD_BLOCK select MTD_COMPLEX_MAPPINGS + select MTD_MTDRAM help This option enables MTD mapping of flash devices. Needed to use flash memories. If unsure, say Y. @@ -358,13 +359,6 @@ config ETRAX_SPI_MMC default MMC select SPI select MMC_SPI - select ETRAX_SPI_MMC_BOARD - -# For the parts that can't be a module (due to restrictions in -# framework elsewhere). -config ETRAX_SPI_MMC_BOARD - boolean - default n # While the board info is MMC_SPI only, the drivers are written to be # independent of MMC_SPI, so we'll keep SPI non-dependent on the diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile index 39aa3c117a86..15fbfefced2c 100644 --- a/arch/cris/arch-v32/drivers/Makefile +++ b/arch/cris/arch-v32/drivers/Makefile @@ -10,4 +10,3 @@ obj-$(CONFIG_ETRAX_IOP_FW_LOAD) += iop_fw_load.o obj-$(CONFIG_ETRAX_I2C) += i2c.o obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL) += sync_serial.o obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_ETRAX_SPI_MMC_BOARD) += board_mmcspi.o diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h index c073cf4ba016..d9cc856f89fb 100644 --- a/arch/cris/arch-v32/drivers/i2c.h +++ b/arch/cris/arch-v32/drivers/i2c.h @@ -2,7 +2,6 @@ #include <linux/init.h> /* High level I2C actions */ -int __init i2c_init(void); int i2c_write(unsigned char theSlave, void *data, size_t nbytes); int i2c_read(unsigned char theSlave, void *data, size_t nbytes); int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue); diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 5a149134cfb5..08a313fc2241 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1,8 +1,7 @@ /* - * Simple synchronous serial port driver for ETRAX FS and Artpec-3. - * - * Copyright (c) 2005 Axis Communications AB + * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3. * + * Copyright (c) 2005, 2008 Axis Communications AB * Author: Mikael Starvik * */ @@ -16,16 +15,17 @@ #include <linux/mutex.h> #include <linux/interrupt.h> #include <linux/poll.h> -#include <linux/init.h> -#include <linux/timer.h> -#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/device.h> #include <linux/wait.h> #include <asm/io.h> -#include <dma.h> +#include <mach/dma.h> #include <pinmux.h> #include <hwregs/reg_rdwr.h> #include <hwregs/sser_defs.h> +#include <hwregs/timer_defs.h> #include <hwregs/dma_defs.h> #include <hwregs/dma.h> #include <hwregs/intr_vect_defs.h> @@ -59,22 +59,23 @@ /* the rest of the data pointed out by Descr1 and set readp to the start */ /* of Descr2 */ -#define SYNC_SERIAL_MAJOR 125 - /* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */ /* words can be handled */ -#define IN_BUFFER_SIZE 12288 -#define IN_DESCR_SIZE 256 -#define NBR_IN_DESCR (IN_BUFFER_SIZE/IN_DESCR_SIZE) +#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE +#define NBR_IN_DESCR (8*6) +#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR) -#define OUT_BUFFER_SIZE 1024*8 #define NBR_OUT_DESCR 8 +#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR) #define DEFAULT_FRAME_RATE 0 #define DEFAULT_WORD_RATE 7 +/* To be removed when we move to pure udev. */ +#define SYNC_SERIAL_MAJOR 125 + /* NOTE: Enabling some debug will likely cause overrun or underrun, - * especially if manual mode is use. + * especially if manual mode is used. */ #define DEBUG(x) #define DEBUGREAD(x) @@ -85,11 +86,28 @@ #define DEBUGTRDMA(x) #define DEBUGOUTBUF(x) -typedef struct sync_port -{ - reg_scope_instances regi_sser; - reg_scope_instances regi_dmain; - reg_scope_instances regi_dmaout; +enum syncser_irq_setup { + no_irq_setup = 0, + dma_irq_setup = 1, + manual_irq_setup = 2, +}; + +struct sync_port { + unsigned long regi_sser; + unsigned long regi_dmain; + unsigned long regi_dmaout; + + /* Interrupt vectors. */ + unsigned long dma_in_intr_vect; /* Used for DMA in. */ + unsigned long dma_out_intr_vect; /* Used for DMA out. */ + unsigned long syncser_intr_vect; /* Used when no DMA. */ + + /* DMA number for in and out. */ + unsigned int dma_in_nbr; + unsigned int dma_out_nbr; + + /* DMA owner. */ + enum dma_owner req_dma; char started; /* 1 if port has been started */ char port_nbr; /* Port 0 or 1 */ @@ -99,22 +117,29 @@ typedef struct sync_port char use_dma; /* 1 if port uses dma */ char tr_running; - char init_irqs; + enum syncser_irq_setup init_irqs; int output; int input; /* Next byte to be read by application */ - volatile unsigned char *volatile readp; + unsigned char *readp; /* Next byte to be written by etrax */ - volatile unsigned char *volatile writep; + unsigned char *writep; unsigned int in_buffer_size; + unsigned int in_buffer_len; unsigned int inbufchunk; - unsigned char out_buffer[OUT_BUFFER_SIZE] __attribute__ ((aligned(32))); - unsigned char in_buffer[IN_BUFFER_SIZE]__attribute__ ((aligned(32))); - unsigned char flip[IN_BUFFER_SIZE] __attribute__ ((aligned(32))); - struct dma_descr_data* next_rx_desc; - struct dma_descr_data* prev_rx_desc; + /* Data buffers for in and output. */ + unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32); + unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32); + unsigned char flip[IN_BUFFER_SIZE] __aligned(32); + struct timespec timestamp[NBR_IN_DESCR]; + struct dma_descr_data *next_rx_desc; + struct dma_descr_data *prev_rx_desc; + + struct timeval last_timestamp; + int read_ts_idx; + int write_ts_idx; /* Pointer to the first available descriptor in the ring, * unless active_tr_descr == catch_tr_descr and a dma @@ -135,114 +160,138 @@ typedef struct sync_port /* Number of bytes currently locked for being read by DMA */ int out_buf_count; - dma_descr_data in_descr[NBR_IN_DESCR] __attribute__ ((__aligned__(16))); - dma_descr_context in_context __attribute__ ((__aligned__(32))); - dma_descr_data out_descr[NBR_OUT_DESCR] - __attribute__ ((__aligned__(16))); - dma_descr_context out_context __attribute__ ((__aligned__(32))); + dma_descr_context in_context __aligned(32); + dma_descr_context out_context __aligned(32); + dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16); + dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16); + wait_queue_head_t out_wait_q; wait_queue_head_t in_wait_q; spinlock_t lock; -} sync_port; +}; static DEFINE_MUTEX(sync_serial_mutex); static int etrax_sync_serial_init(void); static void initialize_port(int portnbr); static inline int sync_data_avail(struct sync_port *port); -static int sync_serial_open(struct inode *, struct file*); -static int sync_serial_release(struct inode*, struct file*); +static int sync_serial_open(struct inode *, struct file *); +static int sync_serial_release(struct inode *, struct file *); static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); -static int sync_serial_ioctl(struct file *, - unsigned int cmd, unsigned long arg); -static ssize_t sync_serial_write(struct file * file, const char * buf, +static long sync_serial_ioctl(struct file *file, + unsigned int cmd, unsigned long arg); +static int sync_serial_ioctl_unlocked(struct file *file, + unsigned int cmd, unsigned long arg); +static ssize_t sync_serial_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos); -static ssize_t sync_serial_read(struct file *file, char *buf, +static ssize_t sync_serial_read(struct file *file, char __user *buf, size_t count, loff_t *ppos); -#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \ - defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \ - (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \ - defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)) +#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \ + defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \ + (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \ + defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))) #define SYNC_SER_DMA +#else +#define SYNC_SER_MANUAL #endif -static void send_word(sync_port* port); -static void start_dma_out(struct sync_port *port, const char *data, int count); -static void start_dma_in(sync_port* port); #ifdef SYNC_SER_DMA +static void start_dma_out(struct sync_port *port, const char *data, int count); +static void start_dma_in(struct sync_port *port); static irqreturn_t tr_interrupt(int irq, void *dev_id); static irqreturn_t rx_interrupt(int irq, void *dev_id); #endif - -#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \ - !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \ - (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \ - !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)) -#define SYNC_SER_MANUAL -#endif #ifdef SYNC_SER_MANUAL +static void send_word(struct sync_port *port); static irqreturn_t manual_interrupt(int irq, void *dev_id); #endif -#ifdef CONFIG_ETRAXFS /* ETRAX FS */ -#define OUT_DMA_NBR 4 -#define IN_DMA_NBR 5 -#define PINMUX_SSER pinmux_sser0 -#define SYNCSER_INST regi_sser0 -#define SYNCSER_INTR_VECT SSER0_INTR_VECT -#define OUT_DMA_INST regi_dma4 -#define IN_DMA_INST regi_dma5 -#define DMA_OUT_INTR_VECT DMA4_INTR_VECT -#define DMA_IN_INTR_VECT DMA5_INTR_VECT -#define REQ_DMA_SYNCSER dma_sser0 -#else /* Artpec-3 */ -#define OUT_DMA_NBR 6 -#define IN_DMA_NBR 7 -#define PINMUX_SSER pinmux_sser -#define SYNCSER_INST regi_sser -#define SYNCSER_INTR_VECT SSER_INTR_VECT -#define OUT_DMA_INST regi_dma6 -#define IN_DMA_INST regi_dma7 -#define DMA_OUT_INTR_VECT DMA6_INTR_VECT -#define DMA_IN_INTR_VECT DMA7_INTR_VECT -#define REQ_DMA_SYNCSER dma_sser +#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed +#define artpec_request_dma crisv32_request_dma +#define artpec_free_dma crisv32_free_dma + +#ifdef CONFIG_ETRAXFS +/* ETRAX FS */ +#define DMA_OUT_NBR0 SYNC_SER0_TX_DMA_NBR +#define DMA_IN_NBR0 SYNC_SER0_RX_DMA_NBR +#define DMA_OUT_NBR1 SYNC_SER1_TX_DMA_NBR +#define DMA_IN_NBR1 SYNC_SER1_RX_DMA_NBR +#define PINMUX_SSER0 pinmux_sser0 +#define PINMUX_SSER1 pinmux_sser1 +#define SYNCSER_INST0 regi_sser0 +#define SYNCSER_INST1 regi_sser1 +#define SYNCSER_INTR_VECT0 SSER0_INTR_VECT +#define SYNCSER_INTR_VECT1 SSER1_INTR_VECT +#define OUT_DMA_INST0 regi_dma4 +#define IN_DMA_INST0 regi_dma5 +#define DMA_OUT_INTR_VECT0 DMA4_INTR_VECT +#define DMA_OUT_INTR_VECT1 DMA7_INTR_VECT +#define DMA_IN_INTR_VECT0 DMA5_INTR_VECT +#define DMA_IN_INTR_VECT1 DMA6_INTR_VECT +#define REQ_DMA_SYNCSER0 dma_sser0 +#define REQ_DMA_SYNCSER1 dma_sser1 +#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA) +#define PORT1_DMA 1 +#else +#define PORT1_DMA 0 +#endif +#elif defined(CONFIG_CRIS_MACH_ARTPEC3) +/* ARTPEC-3 */ +#define DMA_OUT_NBR0 SYNC_SER_TX_DMA_NBR +#define DMA_IN_NBR0 SYNC_SER_RX_DMA_NBR +#define PINMUX_SSER0 pinmux_sser +#define SYNCSER_INST0 regi_sser +#define SYNCSER_INTR_VECT0 SSER_INTR_VECT +#define OUT_DMA_INST0 regi_dma6 +#define IN_DMA_INST0 regi_dma7 +#define DMA_OUT_INTR_VECT0 DMA6_INTR_VECT +#define DMA_IN_INTR_VECT0 DMA7_INTR_VECT +#define REQ_DMA_SYNCSER0 dma_sser +#define REQ_DMA_SYNCSER1 dma_sser #endif -/* The ports */ -static struct sync_port ports[]= -{ - { - .regi_sser = SYNCSER_INST, - .regi_dmaout = OUT_DMA_INST, - .regi_dmain = IN_DMA_INST, #if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA) - .use_dma = 1, +#define PORT0_DMA 1 #else - .use_dma = 0, +#define PORT0_DMA 0 #endif - } -#ifdef CONFIG_ETRAXFS - , +/* The ports */ +static struct sync_port ports[] = { { - .regi_sser = regi_sser1, - .regi_dmaout = regi_dma6, - .regi_dmain = regi_dma7, -#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA) - .use_dma = 1, -#else - .use_dma = 0, -#endif - } + .regi_sser = SYNCSER_INST0, + .regi_dmaout = OUT_DMA_INST0, + .regi_dmain = IN_DMA_INST0, + .use_dma = PORT0_DMA, + .dma_in_intr_vect = DMA_IN_INTR_VECT0, + .dma_out_intr_vect = DMA_OUT_INTR_VECT0, + .dma_in_nbr = DMA_IN_NBR0, + .dma_out_nbr = DMA_OUT_NBR0, + .req_dma = REQ_DMA_SYNCSER0, + .syncser_intr_vect = SYNCSER_INTR_VECT0, + }, +#ifdef CONFIG_ETRAXFS + { + .regi_sser = SYNCSER_INST1, + .regi_dmaout = regi_dma6, + .regi_dmain = regi_dma7, + .use_dma = PORT1_DMA, + .dma_in_intr_vect = DMA_IN_INTR_VECT1, + .dma_out_intr_vect = DMA_OUT_INTR_VECT1, + .dma_in_nbr = DMA_IN_NBR1, + .dma_out_nbr = DMA_OUT_NBR1, + .req_dma = REQ_DMA_SYNCSER1, + .syncser_intr_vect = SYNCSER_INTR_VECT1, + }, #endif }; #define NBR_PORTS ARRAY_SIZE(ports) -static const struct file_operations sync_serial_fops = { +static const struct file_operations syncser_fops = { .owner = THIS_MODULE, .write = sync_serial_write, .read = sync_serial_read, @@ -253,61 +302,40 @@ static const struct file_operations sync_serial_fops = { .llseek = noop_llseek, }; -static int __init etrax_sync_serial_init(void) -{ - ports[0].enabled = 0; -#ifdef CONFIG_ETRAXFS - ports[1].enabled = 0; -#endif - if (register_chrdev(SYNC_SERIAL_MAJOR, "sync serial", - &sync_serial_fops) < 0) { - printk(KERN_WARNING - "Unable to get major for synchronous serial port\n"); - return -EBUSY; - } - - /* Initialize Ports */ -#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) - if (crisv32_pinmux_alloc_fixed(PINMUX_SSER)) { - printk(KERN_WARNING - "Unable to alloc pins for synchronous serial port 0\n"); - return -EIO; - } - ports[0].enabled = 1; - initialize_port(0); -#endif - -#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) - if (crisv32_pinmux_alloc_fixed(pinmux_sser1)) { - printk(KERN_WARNING - "Unable to alloc pins for synchronous serial port 0\n"); - return -EIO; - } - ports[1].enabled = 1; - initialize_port(1); -#endif +static dev_t syncser_first; +static int minor_count = NBR_PORTS; +#define SYNCSER_NAME "syncser" +static struct cdev *syncser_cdev; +static struct class *syncser_class; -#ifdef CONFIG_ETRAXFS - printk(KERN_INFO "ETRAX FS synchronous serial port driver\n"); -#else - printk(KERN_INFO "Artpec-3 synchronous serial port driver\n"); -#endif - return 0; +static void sync_serial_start_port(struct sync_port *port) +{ + reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); + reg_sser_rw_tr_cfg tr_cfg = + REG_RD(sser, port->regi_sser, rw_tr_cfg); + reg_sser_rw_rec_cfg rec_cfg = + REG_RD(sser, port->regi_sser, rw_rec_cfg); + cfg.en = regk_sser_yes; + tr_cfg.tr_en = regk_sser_yes; + rec_cfg.rec_en = regk_sser_yes; + REG_WR(sser, port->regi_sser, rw_cfg, cfg); + REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg); + REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); + port->started = 1; } static void __init initialize_port(int portnbr) { - int __attribute__((unused)) i; struct sync_port *port = &ports[portnbr]; - reg_sser_rw_cfg cfg = {0}; - reg_sser_rw_frm_cfg frm_cfg = {0}; - reg_sser_rw_tr_cfg tr_cfg = {0}; - reg_sser_rw_rec_cfg rec_cfg = {0}; + reg_sser_rw_cfg cfg = { 0 }; + reg_sser_rw_frm_cfg frm_cfg = { 0 }; + reg_sser_rw_tr_cfg tr_cfg = { 0 }; + reg_sser_rw_rec_cfg rec_cfg = { 0 }; - DEBUG(printk(KERN_DEBUG "Init sync serial port %d\n", portnbr)); + DEBUG(pr_info("Init sync serial port %d\n", portnbr)); port->port_nbr = portnbr; - port->init_irqs = 1; + port->init_irqs = no_irq_setup; port->out_rd_ptr = port->out_buffer; port->out_buf_count = 0; @@ -318,10 +346,11 @@ static void __init initialize_port(int portnbr) port->readp = port->flip; port->writep = port->flip; port->in_buffer_size = IN_BUFFER_SIZE; + port->in_buffer_len = 0; port->inbufchunk = IN_DESCR_SIZE; - port->next_rx_desc = &port->in_descr[0]; - port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR-1]; - port->prev_rx_desc->eol = 1; + + port->read_ts_idx = 0; + port->write_ts_idx = 0; init_waitqueue_head(&port->out_wait_q); init_waitqueue_head(&port->in_wait_q); @@ -368,14 +397,18 @@ static void __init initialize_port(int portnbr) REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); #ifdef SYNC_SER_DMA - /* Setup the descriptor ring for dma out/transmit. */ - for (i = 0; i < NBR_OUT_DESCR; i++) { - port->out_descr[i].wait = 0; - port->out_descr[i].intr = 1; - port->out_descr[i].eol = 0; - port->out_descr[i].out_eop = 0; - port->out_descr[i].next = - (dma_descr_data *)virt_to_phys(&port->out_descr[i+1]); + { + int i; + /* Setup the descriptor ring for dma out/transmit. */ + for (i = 0; i < NBR_OUT_DESCR; i++) { + dma_descr_data *descr = &port->out_descr[i]; + descr->wait = 0; + descr->intr = 1; + descr->eol = 0; + descr->out_eop = 0; + descr->next = + (dma_descr_data *)virt_to_phys(&descr[i+1]); + } } /* Create a ring from the list. */ @@ -391,201 +424,116 @@ static void __init initialize_port(int portnbr) static inline int sync_data_avail(struct sync_port *port) { - int avail; - unsigned char *start; - unsigned char *end; - - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - /* 0123456789 0123456789 - * ----- - ----- - * ^rp ^wp ^wp ^rp - */ - - if (end >= start) - avail = end - start; - else - avail = port->in_buffer_size - (start - end); - return avail; -} - -static inline int sync_data_avail_to_end(struct sync_port *port) -{ - int avail; - unsigned char *start; - unsigned char *end; - - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - /* 0123456789 0123456789 - * ----- ----- - * ^rp ^wp ^wp ^rp - */ - - if (end >= start) - avail = end - start; - else - avail = port->flip + port->in_buffer_size - start; - return avail; + return port->in_buffer_len; } static int sync_serial_open(struct inode *inode, struct file *file) { + int ret = 0; int dev = iminor(inode); - int ret = -EBUSY; - sync_port *port; - reg_dma_rw_cfg cfg = {.en = regk_dma_yes}; - reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes}; + struct sync_port *port; +#ifdef SYNC_SER_DMA + reg_dma_rw_cfg cfg = { .en = regk_dma_yes }; + reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes }; +#endif - mutex_lock(&sync_serial_mutex); - DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev)); + DEBUG(pr_debug("Open sync serial port %d\n", dev)); - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev)); - ret = -ENODEV; - goto out; + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); + return -ENODEV; } port = &ports[dev]; /* Allow open this device twice (assuming one reader and one writer) */ - if (port->busy == 2) - { - DEBUG(printk(KERN_DEBUG "Device is busy.. \n")); - goto out; + if (port->busy == 2) { + DEBUG(pr_info("syncser%d is busy\n", dev)); + return -EBUSY; } + mutex_lock(&sync_serial_mutex); - if (port->init_irqs) { - if (port->use_dma) { - if (port == &ports[0]) { -#ifdef SYNC_SER_DMA - if (request_irq(DMA_OUT_INTR_VECT, - tr_interrupt, - 0, - "synchronous serial 0 dma tr", - &ports[0])) { - printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ"); - goto out; - } else if (request_irq(DMA_IN_INTR_VECT, - rx_interrupt, - 0, - "synchronous serial 1 dma rx", - &ports[0])) { - free_irq(DMA_OUT_INTR_VECT, &port[0]); - printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ"); - goto out; - } else if (crisv32_request_dma(OUT_DMA_NBR, - "synchronous serial 0 dma tr", - DMA_VERBOSE_ON_ERROR, - 0, - REQ_DMA_SYNCSER)) { - free_irq(DMA_OUT_INTR_VECT, &port[0]); - free_irq(DMA_IN_INTR_VECT, &port[0]); - printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel"); - goto out; - } else if (crisv32_request_dma(IN_DMA_NBR, - "synchronous serial 0 dma rec", - DMA_VERBOSE_ON_ERROR, - 0, - REQ_DMA_SYNCSER)) { - crisv32_free_dma(OUT_DMA_NBR); - free_irq(DMA_OUT_INTR_VECT, &port[0]); - free_irq(DMA_IN_INTR_VECT, &port[0]); - printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel"); - goto out; - } -#endif - } -#ifdef CONFIG_ETRAXFS - else if (port == &ports[1]) { + /* Clear any stale date left in the flip buffer */ + port->readp = port->writep = port->flip; + port->in_buffer_len = 0; + port->read_ts_idx = 0; + port->write_ts_idx = 0; + + if (port->init_irqs != no_irq_setup) { + /* Init only on first call. */ + port->busy++; + mutex_unlock(&sync_serial_mutex); + return 0; + } + if (port->use_dma) { #ifdef SYNC_SER_DMA - if (request_irq(DMA6_INTR_VECT, - tr_interrupt, - 0, - "synchronous serial 1 dma tr", - &ports[1])) { - printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ"); - goto out; - } else if (request_irq(DMA7_INTR_VECT, - rx_interrupt, - 0, - "synchronous serial 1 dma rx", - &ports[1])) { - free_irq(DMA6_INTR_VECT, &ports[1]); - printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ"); - goto out; - } else if (crisv32_request_dma( - SYNC_SER1_TX_DMA_NBR, - "synchronous serial 1 dma tr", - DMA_VERBOSE_ON_ERROR, - 0, - dma_sser1)) { - free_irq(DMA6_INTR_VECT, &ports[1]); - free_irq(DMA7_INTR_VECT, &ports[1]); - printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel"); - goto out; - } else if (crisv32_request_dma( - SYNC_SER1_RX_DMA_NBR, - "synchronous serial 3 dma rec", - DMA_VERBOSE_ON_ERROR, - 0, - dma_sser1)) { - crisv32_free_dma(SYNC_SER1_TX_DMA_NBR); - free_irq(DMA6_INTR_VECT, &ports[1]); - free_irq(DMA7_INTR_VECT, &ports[1]); - printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel"); - goto out; - } -#endif - } + const char *tmp; + DEBUG(pr_info("Using DMA for syncser%d\n", dev)); + + tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx"; + if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0, + tmp, port)) { + pr_err("Can't alloc syncser%d TX IRQ", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + if (artpec_request_dma(port->dma_out_nbr, tmp, + DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) { + free_irq(port->dma_out_intr_vect, port); + pr_err("Can't alloc syncser%d TX DMA", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx"; + if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0, + tmp, port)) { + artpec_free_dma(port->dma_out_nbr); + free_irq(port->dma_out_intr_vect, port); + pr_err("Can't alloc syncser%d RX IRQ", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + if (artpec_request_dma(port->dma_in_nbr, tmp, + DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) { + artpec_free_dma(port->dma_out_nbr); + free_irq(port->dma_out_intr_vect, port); + free_irq(port->dma_in_intr_vect, port); + pr_err("Can't alloc syncser%d RX DMA", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + /* Enable DMAs */ + REG_WR(dma, port->regi_dmain, rw_cfg, cfg); + REG_WR(dma, port->regi_dmaout, rw_cfg, cfg); + /* Enable DMA IRQs */ + REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask); + REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask); + /* Set up wordsize = 1 for DMAs. */ + DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1); + DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1); + + start_dma_in(port); + port->init_irqs = dma_irq_setup; #endif - /* Enable DMAs */ - REG_WR(dma, port->regi_dmain, rw_cfg, cfg); - REG_WR(dma, port->regi_dmaout, rw_cfg, cfg); - /* Enable DMA IRQs */ - REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask); - REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask); - /* Set up wordsize = 1 for DMAs. */ - DMA_WR_CMD (port->regi_dmain, regk_dma_set_w_size1); - DMA_WR_CMD (port->regi_dmaout, regk_dma_set_w_size1); - - start_dma_in(port); - port->init_irqs = 0; - } else { /* !port->use_dma */ + } else { /* !port->use_dma */ #ifdef SYNC_SER_MANUAL - if (port == &ports[0]) { - if (request_irq(SYNCSER_INTR_VECT, - manual_interrupt, - 0, - "synchronous serial manual irq", - &ports[0])) { - printk("Can't allocate sync serial manual irq"); - goto out; - } - } -#ifdef CONFIG_ETRAXFS - else if (port == &ports[1]) { - if (request_irq(SSER1_INTR_VECT, - manual_interrupt, - 0, - "synchronous serial manual irq", - &ports[1])) { - printk(KERN_CRIT "Can't allocate sync serial manual irq"); - goto out; - } - } -#endif - port->init_irqs = 0; + const char *tmp = dev == 0 ? "syncser0 manual irq" : + "syncser1 manual irq"; + if (request_irq(port->syncser_intr_vect, manual_interrupt, + 0, tmp, port)) { + pr_err("Can't alloc syncser%d manual irq", + dev); + ret = -EBUSY; + goto unlock_and_exit; + } + port->init_irqs = manual_irq_setup; #else - panic("sync_serial: Manual mode not supported.\n"); + panic("sync_serial: Manual mode not supported\n"); #endif /* SYNC_SER_MANUAL */ - } - - } /* port->init_irqs */ - + } port->busy++; ret = 0; -out: + +unlock_and_exit: mutex_unlock(&sync_serial_mutex); return ret; } @@ -593,18 +541,17 @@ out: static int sync_serial_release(struct inode *inode, struct file *file) { int dev = iminor(inode); - sync_port *port; + struct sync_port *port; - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk("Invalid minor %d\n", dev)); + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); return -ENODEV; } port = &ports[dev]; if (port->busy) port->busy--; if (!port->busy) - /* XXX */ ; + /* XXX */; return 0; } @@ -612,21 +559,15 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait) { int dev = iminor(file_inode(file)); unsigned int mask = 0; - sync_port *port; - DEBUGPOLL( static unsigned int prev_mask = 0; ); + struct sync_port *port; + DEBUGPOLL( + static unsigned int prev_mask; + ); port = &ports[dev]; - if (!port->started) { - reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); - reg_sser_rw_rec_cfg rec_cfg = - REG_RD(sser, port->regi_sser, rw_rec_cfg); - cfg.en = regk_sser_yes; - rec_cfg.rec_en = port->input; - REG_WR(sser, port->regi_sser, rw_cfg, cfg); - REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); - port->started = 1; - } + if (!port->started) + sync_serial_start_port(port); poll_wait(file, &port->out_wait_q, wait); poll_wait(file, &port->in_wait_q, wait); @@ -645,33 +586,175 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait) if (port->input && sync_data_avail(port) >= port->inbufchunk) mask |= POLLIN | POLLRDNORM; - DEBUGPOLL(if (mask != prev_mask) - printk("sync_serial_poll: mask 0x%08X %s %s\n", mask, - mask&POLLOUT?"POLLOUT":"", mask&POLLIN?"POLLIN":""); - prev_mask = mask; - ); + DEBUGPOLL( + if (mask != prev_mask) + pr_info("sync_serial_poll: mask 0x%08X %s %s\n", + mask, + mask & POLLOUT ? "POLLOUT" : "", + mask & POLLIN ? "POLLIN" : ""); + prev_mask = mask; + ); return mask; } -static int sync_serial_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static ssize_t __sync_serial_read(struct file *file, + char __user *buf, + size_t count, + loff_t *ppos, + struct timespec *ts) +{ + unsigned long flags; + int dev = MINOR(file->f_dentry->d_inode->i_rdev); + int avail; + struct sync_port *port; + unsigned char *start; + unsigned char *end; + + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); + return -ENODEV; + } + port = &ports[dev]; + + if (!port->started) + sync_serial_start_port(port); + + /* Calculate number of available bytes */ + /* Save pointers to avoid that they are modified by interrupt */ + spin_lock_irqsave(&port->lock, flags); + start = port->readp; + end = port->writep; + spin_unlock_irqrestore(&port->lock, flags); + + while ((start == end) && !port->in_buffer_len) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + wait_event_interruptible(port->in_wait_q, + !(start == end && !port->full)); + + if (signal_pending(current)) + return -EINTR; + + spin_lock_irqsave(&port->lock, flags); + start = port->readp; + end = port->writep; + spin_unlock_irqrestore(&port->lock, flags); + } + + DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n", + dev, count, + start - port->flip, end - port->flip, + port->in_buffer_size)); + + /* Lazy read, never return wrapped data. */ + if (end > start) + avail = end - start; + else + avail = port->flip + port->in_buffer_size - start; + + count = count > avail ? avail : count; + if (copy_to_user(buf, start, count)) + return -EFAULT; + + /* If timestamp requested, find timestamp of first returned byte + * and copy it. + * N.B: Applications that request timstamps MUST read data in + * chunks that are multiples of IN_DESCR_SIZE. + * Otherwise the timestamps will not be aligned to the data read. + */ + if (ts != NULL) { + int idx = port->read_ts_idx; + memcpy(ts, &port->timestamp[idx], sizeof(struct timespec)); + port->read_ts_idx += count / IN_DESCR_SIZE; + if (port->read_ts_idx >= NBR_IN_DESCR) + port->read_ts_idx = 0; + } + + spin_lock_irqsave(&port->lock, flags); + port->readp += count; + /* Check for wrap */ + if (port->readp >= port->flip + port->in_buffer_size) + port->readp = port->flip; + port->in_buffer_len -= count; + port->full = 0; + spin_unlock_irqrestore(&port->lock, flags); + + DEBUGREAD(pr_info("r %d\n", count)); + + return count; +} + +static ssize_t sync_serial_input(struct file *file, unsigned long arg) +{ + struct ssp_request req; + int count; + int ret; + + /* Copy the request structure from user-mode. */ + ret = copy_from_user(&req, (struct ssp_request __user *)arg, + sizeof(struct ssp_request)); + + if (ret) { + DEBUG(pr_info("sync_serial_input copy from user failed\n")); + return -EFAULT; + } + + /* To get the timestamps aligned, make sure that 'len' + * is a multiple of IN_DESCR_SIZE. + */ + if ((req.len % IN_DESCR_SIZE) != 0) { + DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n", + req.len, IN_DESCR_SIZE)); + return -EFAULT; + } + + /* Do the actual read. */ + /* Note that req.buf is actually a pointer to user space. */ + count = __sync_serial_read(file, req.buf, req.len, + NULL, &req.ts); + + if (count < 0) { + DEBUG(pr_info("sync_serial_input read failed\n")); + return count; + } + + /* Copy the request back to user-mode. */ + ret = copy_to_user((struct ssp_request __user *)arg, &req, + sizeof(struct ssp_request)); + + if (ret) { + DEBUG(pr_info("syncser input copy2user failed\n")); + return -EFAULT; + } + + /* Return the number of bytes read. */ + return count; +} + + +static int sync_serial_ioctl_unlocked(struct file *file, + unsigned int cmd, unsigned long arg) { int return_val = 0; int dma_w_size = regk_dma_set_w_size1; int dev = iminor(file_inode(file)); - sync_port *port; + struct sync_port *port; reg_sser_rw_tr_cfg tr_cfg; reg_sser_rw_rec_cfg rec_cfg; reg_sser_rw_frm_cfg frm_cfg; reg_sser_rw_cfg gen_cfg; reg_sser_rw_intr_mask intr_mask; - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk("Invalid minor %d\n", dev)); + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); return -1; } - port = &ports[dev]; + + if (cmd == SSP_INPUT) + return sync_serial_input(file, arg); + + port = &ports[dev]; spin_lock_irq(&port->lock); tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); @@ -680,11 +763,9 @@ static int sync_serial_ioctl(struct file *file, gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg); intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask); - switch(cmd) - { + switch (cmd) { case SSP_SPEED: - if (GET_SPEED(arg) == CODEC) - { + if (GET_SPEED(arg) == CODEC) { unsigned int freq; gen_cfg.base_freq = regk_sser_f32; @@ -701,15 +782,25 @@ static int sync_serial_ioctl(struct file *file, case FREQ_256kHz: gen_cfg.clk_div = 125 * (1 << (freq - FREQ_256kHz)) - 1; - break; + break; case FREQ_512kHz: gen_cfg.clk_div = 62; - break; + break; case FREQ_1MHz: case FREQ_2MHz: case FREQ_4MHz: gen_cfg.clk_div = 8 * (1 << freq) - 1; - break; + break; + } + } else if (GET_SPEED(arg) == CODEC_f32768) { + gen_cfg.base_freq = regk_sser_f32_768; + switch (GET_FREQ(arg)) { + case FREQ_4096kHz: + gen_cfg.clk_div = 7; + break; + default: + spin_unlock_irq(&port->lock); + return -EINVAL; } } else { gen_cfg.base_freq = regk_sser_f29_493; @@ -767,62 +858,64 @@ static int sync_serial_ioctl(struct file *file, break; case SSP_MODE: - switch(arg) - { - case MASTER_OUTPUT: - port->output = 1; - port->input = 0; - frm_cfg.out_on = regk_sser_tr; - frm_cfg.frame_pin_dir = regk_sser_out; - gen_cfg.clk_dir = regk_sser_out; - break; - case SLAVE_OUTPUT: - port->output = 1; - port->input = 0; - frm_cfg.frame_pin_dir = regk_sser_in; - gen_cfg.clk_dir = regk_sser_in; - break; - case MASTER_INPUT: - port->output = 0; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_out; - frm_cfg.out_on = regk_sser_intern_tb; - gen_cfg.clk_dir = regk_sser_out; - break; - case SLAVE_INPUT: - port->output = 0; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_in; - gen_cfg.clk_dir = regk_sser_in; - break; - case MASTER_BIDIR: - port->output = 1; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_out; - frm_cfg.out_on = regk_sser_intern_tb; - gen_cfg.clk_dir = regk_sser_out; - break; - case SLAVE_BIDIR: - port->output = 1; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_in; - gen_cfg.clk_dir = regk_sser_in; - break; - default: - spin_unlock_irq(&port->lock); - return -EINVAL; + switch (arg) { + case MASTER_OUTPUT: + port->output = 1; + port->input = 0; + frm_cfg.out_on = regk_sser_tr; + frm_cfg.frame_pin_dir = regk_sser_out; + gen_cfg.clk_dir = regk_sser_out; + break; + case SLAVE_OUTPUT: + port->output = 1; + port->input = 0; + frm_cfg.frame_pin_dir = regk_sser_in; + gen_cfg.clk_dir = regk_sser_in; + break; + case MASTER_INPUT: + port->output = 0; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_out; + frm_cfg.out_on = regk_sser_intern_tb; + gen_cfg.clk_dir = regk_sser_out; + break; + case SLAVE_INPUT: + port->output = 0; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_in; + gen_cfg.clk_dir = regk_sser_in; + break; + case MASTER_BIDIR: + port->output = 1; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_out; + frm_cfg.out_on = regk_sser_intern_tb; + gen_cfg.clk_dir = regk_sser_out; + break; + case SLAVE_BIDIR: + port->output = 1; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_in; + gen_cfg.clk_dir = regk_sser_in; + break; + default: + spin_unlock_irq(&port->lock); + return -EINVAL; } - if (!port->use_dma || (arg == MASTER_OUTPUT || arg == SLAVE_OUTPUT)) + if (!port->use_dma || arg == MASTER_OUTPUT || + arg == SLAVE_OUTPUT) intr_mask.rdav = regk_sser_yes; break; case SSP_FRAME_SYNC: if (arg & NORMAL_SYNC) { frm_cfg.rec_delay = 1; frm_cfg.tr_delay = 1; - } - else if (arg & EARLY_SYNC) + } else if (arg & EARLY_SYNC) frm_cfg.rec_delay = frm_cfg.tr_delay = 0; - else if (arg & SECOND_WORD_SYNC) { + else if (arg & LATE_SYNC) { + frm_cfg.tr_delay = 2; + frm_cfg.rec_delay = 2; + } else if (arg & SECOND_WORD_SYNC) { frm_cfg.rec_delay = 7; frm_cfg.tr_delay = 1; } @@ -914,15 +1007,12 @@ static int sync_serial_ioctl(struct file *file, frm_cfg.type = regk_sser_level; frm_cfg.tr_delay = 1; frm_cfg.level = regk_sser_neg_lo; - if (arg & SPI_SLAVE) - { + if (arg & SPI_SLAVE) { rec_cfg.clk_pol = regk_sser_neg; gen_cfg.clk_dir = regk_sser_in; port->input = 1; port->output = 0; - } - else - { + } else { gen_cfg.out_clk_pol = regk_sser_pos; port->input = 0; port->output = 1; @@ -965,19 +1055,19 @@ static int sync_serial_ioctl(struct file *file, } static long sync_serial_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) + unsigned int cmd, unsigned long arg) { - long ret; + long ret; - mutex_lock(&sync_serial_mutex); - ret = sync_serial_ioctl_unlocked(file, cmd, arg); - mutex_unlock(&sync_serial_mutex); + mutex_lock(&sync_serial_mutex); + ret = sync_serial_ioctl_unlocked(file, cmd, arg); + mutex_unlock(&sync_serial_mutex); - return ret; + return ret; } /* NOTE: sync_serial_write does not support concurrency */ -static ssize_t sync_serial_write(struct file *file, const char *buf, +static ssize_t sync_serial_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int dev = iminor(file_inode(file)); @@ -993,7 +1083,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, unsigned char *buf_stop_ptr; /* Last byte + 1 */ if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { - DEBUG(printk("Invalid minor %d\n", dev)); + DEBUG(pr_info("Invalid minor %d\n", dev)); return -ENODEV; } port = &ports[dev]; @@ -1006,9 +1096,9 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, * |_________|___________________|________________________| * ^ rd_ptr ^ wr_ptr */ - DEBUGWRITE(printk(KERN_DEBUG "W d%d c %lu a: %p c: %p\n", - port->port_nbr, count, port->active_tr_descr, - port->catch_tr_descr)); + DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n", + port->port_nbr, count, port->active_tr_descr, + port->catch_tr_descr)); /* Read variables that may be updated by interrupts */ spin_lock_irqsave(&port->lock, flags); @@ -1020,7 +1110,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, if (port->tr_running && ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) || out_buf_count >= OUT_BUFFER_SIZE)) { - DEBUGWRITE(printk(KERN_DEBUG "sser%d full\n", dev)); + DEBUGWRITE(pr_info("sser%d full\n", dev)); return -EAGAIN; } @@ -1043,15 +1133,16 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, if (copy_from_user(wr_ptr, buf, trunc_count)) return -EFAULT; - DEBUGOUTBUF(printk(KERN_DEBUG "%-4d + %-4d = %-4d %p %p %p\n", - out_buf_count, trunc_count, - port->out_buf_count, port->out_buffer, - wr_ptr, buf_stop_ptr)); + DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d %p %p %p\n", + out_buf_count, trunc_count, + port->out_buf_count, port->out_buffer, + wr_ptr, buf_stop_ptr)); /* Make sure transmitter/receiver is running */ if (!port->started) { reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); - reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg); + reg_sser_rw_rec_cfg rec_cfg = + REG_RD(sser, port->regi_sser, rw_rec_cfg); cfg.en = regk_sser_yes; rec_cfg.rec_en = port->input; REG_WR(sser, port->regi_sser, rw_cfg, cfg); @@ -1068,8 +1159,11 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, spin_lock_irqsave(&port->lock, flags); port->out_buf_count += trunc_count; if (port->use_dma) { +#ifdef SYNC_SER_DMA start_dma_out(port, wr_ptr, trunc_count); +#endif } else if (!port->tr_running) { +#ifdef SYNC_SER_MANUAL reg_sser_rw_intr_mask intr_mask; intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask); /* Start sender by writing data */ @@ -1077,14 +1171,15 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, /* and enable transmitter ready IRQ */ intr_mask.trdy = 1; REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask); +#endif } spin_unlock_irqrestore(&port->lock, flags); /* Exit if non blocking */ if (file->f_flags & O_NONBLOCK) { - DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu %08x\n", - port->port_nbr, trunc_count, - REG_RD_INT(dma, port->regi_dmaout, r_intr))); + DEBUGWRITE(pr_info("w d%d c %u %08x\n", + port->port_nbr, trunc_count, + REG_RD_INT(dma, port->regi_dmaout, r_intr))); return trunc_count; } @@ -1094,105 +1189,32 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, if (signal_pending(current)) return -EINTR; - DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu\n", - port->port_nbr, trunc_count)); + DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count)); return trunc_count; } -static ssize_t sync_serial_read(struct file * file, char * buf, +static ssize_t sync_serial_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - int dev = iminor(file_inode(file)); - int avail; - sync_port *port; - unsigned char* start; - unsigned char* end; - unsigned long flags; - - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk("Invalid minor %d\n", dev)); - return -ENODEV; - } - port = &ports[dev]; - - DEBUGREAD(printk("R%d c %d ri %lu wi %lu /%lu\n", dev, count, port->readp - port->flip, port->writep - port->flip, port->in_buffer_size)); - - if (!port->started) - { - reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); - reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); - reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg); - cfg.en = regk_sser_yes; - tr_cfg.tr_en = regk_sser_yes; - rec_cfg.rec_en = regk_sser_yes; - REG_WR(sser, port->regi_sser, rw_cfg, cfg); - REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg); - REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); - port->started = 1; - } - - /* Calculate number of available bytes */ - /* Save pointers to avoid that they are modified by interrupt */ - spin_lock_irqsave(&port->lock, flags); - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - spin_unlock_irqrestore(&port->lock, flags); - while ((start == end) && !port->full) /* No data */ - { - DEBUGREAD(printk(KERN_DEBUG "&")); - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - - wait_event_interruptible(port->in_wait_q, - !(start == end && !port->full)); - if (signal_pending(current)) - return -EINTR; - - spin_lock_irqsave(&port->lock, flags); - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - spin_unlock_irqrestore(&port->lock, flags); - } - - /* Lazy read, never return wrapped data. */ - if (port->full) - avail = port->in_buffer_size; - else if (end > start) - avail = end - start; - else - avail = port->flip + port->in_buffer_size - start; - - count = count > avail ? avail : count; - if (copy_to_user(buf, start, count)) - return -EFAULT; - /* Disable interrupts while updating readp */ - spin_lock_irqsave(&port->lock, flags); - port->readp += count; - if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */ - port->readp = port->flip; - port->full = 0; - spin_unlock_irqrestore(&port->lock, flags); - DEBUGREAD(printk("r %d\n", count)); - return count; + return __sync_serial_read(file, buf, count, ppos, NULL); } -static void send_word(sync_port* port) +#ifdef SYNC_SER_MANUAL +static void send_word(struct sync_port *port) { reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); reg_sser_rw_tr_data tr_data = {0}; - switch(tr_cfg.sample_size) + switch (tr_cfg.sample_size) { + case 8: + port->out_buf_count--; + tr_data.data = *port->out_rd_ptr++; + REG_WR(sser, port->regi_sser, rw_tr_data, tr_data); + if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE) + port->out_rd_ptr = port->out_buffer; + break; + case 12: { - case 8: - port->out_buf_count--; - tr_data.data = *port->out_rd_ptr++; - REG_WR(sser, port->regi_sser, rw_tr_data, tr_data); - if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE) - port->out_rd_ptr = port->out_buffer; - break; - case 12: - { int data = (*port->out_rd_ptr++) << 8; data |= *port->out_rd_ptr++; port->out_buf_count -= 2; @@ -1200,8 +1222,8 @@ static void send_word(sync_port* port) REG_WR(sser, port->regi_sser, rw_tr_data, tr_data); if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE) port->out_rd_ptr = port->out_buffer; + break; } - break; case 16: port->out_buf_count -= 2; tr_data.data = *(unsigned short *)port->out_rd_ptr; @@ -1233,27 +1255,28 @@ static void send_word(sync_port* port) break; } } +#endif -static void start_dma_out(struct sync_port *port, - const char *data, int count) +#ifdef SYNC_SER_DMA +static void start_dma_out(struct sync_port *port, const char *data, int count) { - port->active_tr_descr->buf = (char *) virt_to_phys((char *) data); + port->active_tr_descr->buf = (char *)virt_to_phys((char *)data); port->active_tr_descr->after = port->active_tr_descr->buf + count; port->active_tr_descr->intr = 1; port->active_tr_descr->eol = 1; port->prev_tr_descr->eol = 0; - DEBUGTRDMA(printk(KERN_DEBUG "Inserting eolr:%p eol@:%p\n", + DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n", port->prev_tr_descr, port->active_tr_descr)); port->prev_tr_descr = port->active_tr_descr; - port->active_tr_descr = phys_to_virt((int) port->active_tr_descr->next); + port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next); if (!port->tr_running) { reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); - port->out_context.next = 0; + port->out_context.next = NULL; port->out_context.saved_data = (dma_descr_data *)virt_to_phys(port->prev_tr_descr); port->out_context.saved_data_buf = port->prev_tr_descr->buf; @@ -1263,57 +1286,58 @@ static void start_dma_out(struct sync_port *port, tr_cfg.tr_en = regk_sser_yes; REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg); - DEBUGTRDMA(printk(KERN_DEBUG "dma s\n");); + DEBUGTRDMA(pr_info(KERN_INFO "dma s\n");); } else { DMA_CONTINUE_DATA(port->regi_dmaout); - DEBUGTRDMA(printk(KERN_DEBUG "dma c\n");); + DEBUGTRDMA(pr_info("dma c\n");); } port->tr_running = 1; } -static void start_dma_in(sync_port *port) +static void start_dma_in(struct sync_port *port) { int i; char *buf; + unsigned long flags; + spin_lock_irqsave(&port->lock, flags); port->writep = port->flip; + spin_unlock_irqrestore(&port->lock, flags); - if (port->writep > port->flip + port->in_buffer_size) { - panic("Offset too large in sync serial driver\n"); - return; - } - buf = (char*)virt_to_phys(port->in_buffer); + buf = (char *)virt_to_phys(port->in_buffer); for (i = 0; i < NBR_IN_DESCR; i++) { port->in_descr[i].buf = buf; port->in_descr[i].after = buf + port->inbufchunk; port->in_descr[i].intr = 1; - port->in_descr[i].next = (dma_descr_data*)virt_to_phys(&port->in_descr[i+1]); + port->in_descr[i].next = + (dma_descr_data *)virt_to_phys(&port->in_descr[i+1]); port->in_descr[i].buf = buf; buf += port->inbufchunk; } /* Link the last descriptor to the first */ - port->in_descr[i-1].next = (dma_descr_data*)virt_to_phys(&port->in_descr[0]); + port->in_descr[i-1].next = + (dma_descr_data *)virt_to_phys(&port->in_descr[0]); port->in_descr[i-1].eol = regk_sser_yes; port->next_rx_desc = &port->in_descr[0]; port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1]; - port->in_context.saved_data = (dma_descr_data*)virt_to_phys(&port->in_descr[0]); + port->in_context.saved_data = + (dma_descr_data *)virt_to_phys(&port->in_descr[0]); port->in_context.saved_data_buf = port->in_descr[0].buf; DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context)); } -#ifdef SYNC_SER_DMA static irqreturn_t tr_interrupt(int irq, void *dev_id) { reg_dma_r_masked_intr masked; - reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes}; + reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes }; reg_dma_rw_stat stat; int i; int found = 0; int stop_sser = 0; for (i = 0; i < NBR_PORTS; i++) { - sync_port *port = &ports[i]; - if (!port->enabled || !port->use_dma) + struct sync_port *port = &ports[i]; + if (!port->enabled || !port->use_dma) continue; /* IRQ active for the port? */ @@ -1338,19 +1362,20 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) int sent; sent = port->catch_tr_descr->after - port->catch_tr_descr->buf; - DEBUGTXINT(printk(KERN_DEBUG "%-4d - %-4d = %-4d\t" - "in descr %p (ac: %p)\n", - port->out_buf_count, sent, - port->out_buf_count - sent, - port->catch_tr_descr, - port->active_tr_descr);); + DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t" + "in descr %p (ac: %p)\n", + port->out_buf_count, sent, + port->out_buf_count - sent, + port->catch_tr_descr, + port->active_tr_descr);); port->out_buf_count -= sent; port->catch_tr_descr = phys_to_virt((int) port->catch_tr_descr->next); port->out_rd_ptr = phys_to_virt((int) port->catch_tr_descr->buf); } else { - int i, sent; + reg_sser_rw_tr_cfg tr_cfg; + int j, sent; /* EOL handler. * Note that if an EOL was encountered during the irq * locked section of sync_ser_write the DMA will be @@ -1358,11 +1383,11 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) * The remaining descriptors will be traversed by * the descriptor interrupts as usual. */ - i = 0; + j = 0; while (!port->catch_tr_descr->eol) { sent = port->catch_tr_descr->after - port->catch_tr_descr->buf; - DEBUGOUTBUF(printk(KERN_DEBUG + DEBUGOUTBUF(pr_info( "traversing descr %p -%d (%d)\n", port->catch_tr_descr, sent, @@ -1370,16 +1395,15 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) port->out_buf_count -= sent; port->catch_tr_descr = phys_to_virt( (int)port->catch_tr_descr->next); - i++; - if (i >= NBR_OUT_DESCR) { + j++; + if (j >= NBR_OUT_DESCR) { /* TODO: Reset and recover */ panic("sync_serial: missing eol"); } } sent = port->catch_tr_descr->after - port->catch_tr_descr->buf; - DEBUGOUTBUF(printk(KERN_DEBUG - "eol at descr %p -%d (%d)\n", + DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n", port->catch_tr_descr, sent, port->out_buf_count)); @@ -1394,15 +1418,13 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) OUT_BUFFER_SIZE) port->out_rd_ptr = port->out_buffer; - reg_sser_rw_tr_cfg tr_cfg = - REG_RD(sser, port->regi_sser, rw_tr_cfg); - DEBUGTXINT(printk(KERN_DEBUG + tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); + DEBUGTXINT(pr_info( "tr_int DMA stop %d, set catch @ %p\n", port->out_buf_count, port->active_tr_descr)); if (port->out_buf_count != 0) - printk(KERN_CRIT "sync_ser: buffer not " - "empty after eol.\n"); + pr_err("sync_ser: buf not empty after eol\n"); port->catch_tr_descr = port->active_tr_descr; port->tr_running = 0; tr_cfg.tr_en = regk_sser_no; @@ -1414,62 +1436,79 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) return IRQ_RETVAL(found); } /* tr_interrupt */ + +static inline void handle_rx_packet(struct sync_port *port) +{ + int idx; + reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes }; + unsigned long flags; + + DEBUGRXINT(pr_info(KERN_INFO "!")); + spin_lock_irqsave(&port->lock, flags); + + /* If we overrun the user experience is crap regardless if we + * drop new or old data. Its much easier to get it right when + * dropping new data so lets do that. + */ + if ((port->writep + port->inbufchunk <= + port->flip + port->in_buffer_size) && + (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) { + memcpy(port->writep, + phys_to_virt((unsigned)port->next_rx_desc->buf), + port->inbufchunk); + port->writep += port->inbufchunk; + if (port->writep >= port->flip + port->in_buffer_size) + port->writep = port->flip; + + /* Timestamp the new data chunk. */ + if (port->write_ts_idx == NBR_IN_DESCR) + port->write_ts_idx = 0; + idx = port->write_ts_idx++; + do_posix_clock_monotonic_gettime(&port->timestamp[idx]); + port->in_buffer_len += port->inbufchunk; + } + spin_unlock_irqrestore(&port->lock, flags); + + port->next_rx_desc->eol = 1; + port->prev_rx_desc->eol = 0; + /* Cache bug workaround */ + flush_dma_descr(port->prev_rx_desc, 0); + port->prev_rx_desc = port->next_rx_desc; + port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next); + /* Cache bug workaround */ + flush_dma_descr(port->prev_rx_desc, 1); + /* wake up the waiting process */ + wake_up_interruptible(&port->in_wait_q); + DMA_CONTINUE(port->regi_dmain); + REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr); + +} + static irqreturn_t rx_interrupt(int irq, void *dev_id) { reg_dma_r_masked_intr masked; - reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes}; int i; int found = 0; - for (i = 0; i < NBR_PORTS; i++) - { - sync_port *port = &ports[i]; + DEBUG(pr_info("rx_interrupt\n")); + + for (i = 0; i < NBR_PORTS; i++) { + struct sync_port *port = &ports[i]; - if (!port->enabled || !port->use_dma ) + if (!port->enabled || !port->use_dma) continue; masked = REG_RD(dma, port->regi_dmain, r_masked_intr); - if (masked.data) /* Descriptor interrupt */ - { - found = 1; - while (REG_RD(dma, port->regi_dmain, rw_data) != - virt_to_phys(port->next_rx_desc)) { - DEBUGRXINT(printk(KERN_DEBUG "!")); - if (port->writep + port->inbufchunk > port->flip + port->in_buffer_size) { - int first_size = port->flip + port->in_buffer_size - port->writep; - memcpy((char*)port->writep, phys_to_virt((unsigned)port->next_rx_desc->buf), first_size); - memcpy(port->flip, phys_to_virt((unsigned)port->next_rx_desc->buf+first_size), port->inbufchunk - first_size); - port->writep = port->flip + port->inbufchunk - first_size; - } else { - memcpy((char*)port->writep, - phys_to_virt((unsigned)port->next_rx_desc->buf), - port->inbufchunk); - port->writep += port->inbufchunk; - if (port->writep >= port->flip + port->in_buffer_size) - port->writep = port->flip; - } - if (port->writep == port->readp) - { - port->full = 1; - } - - port->next_rx_desc->eol = 1; - port->prev_rx_desc->eol = 0; - /* Cache bug workaround */ - flush_dma_descr(port->prev_rx_desc, 0); - port->prev_rx_desc = port->next_rx_desc; - port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next); - /* Cache bug workaround */ - flush_dma_descr(port->prev_rx_desc, 1); - /* wake up the waiting process */ - wake_up_interruptible(&port->in_wait_q); - DMA_CONTINUE(port->regi_dmain); - REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr); + if (!masked.data) + continue; - } - } + /* Descriptor interrupt */ + found = 1; + while (REG_RD(dma, port->regi_dmain, rw_data) != + virt_to_phys(port->next_rx_desc)) + handle_rx_packet(port); } return IRQ_RETVAL(found); } /* rx_interrupt */ @@ -1478,75 +1517,83 @@ static irqreturn_t rx_interrupt(int irq, void *dev_id) #ifdef SYNC_SER_MANUAL static irqreturn_t manual_interrupt(int irq, void *dev_id) { + unsigned long flags; int i; int found = 0; reg_sser_r_masked_intr masked; - for (i = 0; i < NBR_PORTS; i++) - { - sync_port *port = &ports[i]; + for (i = 0; i < NBR_PORTS; i++) { + struct sync_port *port = &ports[i]; if (!port->enabled || port->use_dma) - { continue; - } masked = REG_RD(sser, port->regi_sser, r_masked_intr); - if (masked.rdav) /* Data received? */ - { - reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg); - reg_sser_r_rec_data data = REG_RD(sser, port->regi_sser, r_rec_data); + /* Data received? */ + if (masked.rdav) { + reg_sser_rw_rec_cfg rec_cfg = + REG_RD(sser, port->regi_sser, rw_rec_cfg); + reg_sser_r_rec_data data = REG_RD(sser, + port->regi_sser, r_rec_data); found = 1; /* Read data */ - switch(rec_cfg.sample_size) - { + spin_lock_irqsave(&port->lock, flags); + switch (rec_cfg.sample_size) { case 8: *port->writep++ = data.data & 0xff; break; case 12: *port->writep = (data.data & 0x0ff0) >> 4; *(port->writep + 1) = data.data & 0x0f; - port->writep+=2; + port->writep += 2; break; case 16: - *(unsigned short*)port->writep = data.data; - port->writep+=2; + *(unsigned short *)port->writep = data.data; + port->writep += 2; break; case 24: - *(unsigned int*)port->writep = data.data; - port->writep+=3; + *(unsigned int *)port->writep = data.data; + port->writep += 3; break; case 32: - *(unsigned int*)port->writep = data.data; - port->writep+=4; + *(unsigned int *)port->writep = data.data; + port->writep += 4; break; } - if (port->writep >= port->flip + port->in_buffer_size) /* Wrap? */ + /* Wrap? */ + if (port->writep >= port->flip + port->in_buffer_size) port->writep = port->flip; if (port->writep == port->readp) { - /* receive buffer overrun, discard oldest data - */ + /* Receive buf overrun, discard oldest data */ port->readp++; - if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */ + /* Wrap? */ + if (port->readp >= port->flip + + port->in_buffer_size) port->readp = port->flip; } + spin_unlock_irqrestore(&port->lock, flags); if (sync_data_avail(port) >= port->inbufchunk) - wake_up_interruptible(&port->in_wait_q); /* Wake up application */ + /* Wake up application */ + wake_up_interruptible(&port->in_wait_q); } - if (masked.trdy) /* Transmitter ready? */ - { + /* Transmitter ready? */ + if (masked.trdy) { found = 1; - if (port->out_buf_count > 0) /* More data to send */ + /* More data to send */ + if (port->out_buf_count > 0) send_word(port); - else /* transmission finished */ - { + else { + /* Transmission finished */ reg_sser_rw_intr_mask intr_mask; - intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask); + intr_mask = REG_RD(sser, port->regi_sser, + rw_intr_mask); intr_mask.trdy = 0; - REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask); - wake_up_interruptible(&port->out_wait_q); /* Wake up application */ + REG_WR(sser, port->regi_sser, + rw_intr_mask, intr_mask); + /* Wake up application */ + wake_up_interruptible(&port->out_wait_q); } } } @@ -1554,4 +1601,109 @@ static irqreturn_t manual_interrupt(int irq, void *dev_id) } #endif +static int __init etrax_sync_serial_init(void) +{ +#if 1 + /* This code will be removed when we move to udev for all devices. */ + syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0); + if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) { + pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR); + return -1; + } +#else + /* Allocate dynamic major number. */ + if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) { + pr_err("Failed to allocate character device region\n"); + return -1; + } +#endif + syncser_cdev = cdev_alloc(); + if (!syncser_cdev) { + pr_err("Failed to allocate cdev for syncser\n"); + unregister_chrdev_region(syncser_first, minor_count); + return -1; + } + cdev_init(syncser_cdev, &syncser_fops); + + /* Create a sysfs class for syncser */ + syncser_class = class_create(THIS_MODULE, "syncser_class"); + + /* Initialize Ports */ +#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) + if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) { + pr_warn("Unable to alloc pins for synchronous serial port 0\n"); + unregister_chrdev_region(syncser_first, minor_count); + return -EIO; + } + initialize_port(0); + ports[0].enabled = 1; + /* Register with sysfs so udev can pick it up. */ + device_create(syncser_class, NULL, syncser_first, NULL, + "%s%d", SYNCSER_NAME, 0); +#endif + +#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) + if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) { + pr_warn("Unable to alloc pins for synchronous serial port 1\n"); + unregister_chrdev_region(syncser_first, minor_count); + class_destroy(syncser_class); + return -EIO; + } + initialize_port(1); + ports[1].enabled = 1; + /* Register with sysfs so udev can pick it up. */ + device_create(syncser_class, NULL, syncser_first, NULL, + "%s%d", SYNCSER_NAME, 0); +#endif + + /* Add it to system */ + if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) { + pr_err("Failed to add syncser as char device\n"); + device_destroy(syncser_class, syncser_first); + class_destroy(syncser_class); + cdev_del(syncser_cdev); + unregister_chrdev_region(syncser_first, minor_count); + return -1; + } + + + pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n", + SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first)); + + return 0; +} + +static void __exit etrax_sync_serial_exit(void) +{ + int i; + device_destroy(syncser_class, syncser_first); + class_destroy(syncser_class); + + if (syncser_cdev) { + cdev_del(syncser_cdev); + unregister_chrdev_region(syncser_first, minor_count); + } + for (i = 0; i < NBR_PORTS; i++) { + struct sync_port *port = &ports[i]; + if (port->init_irqs == dma_irq_setup) { + /* Free dma irqs and dma channels. */ +#ifdef SYNC_SER_DMA + artpec_free_dma(port->dma_in_nbr); + artpec_free_dma(port->dma_out_nbr); + free_irq(port->dma_out_intr_vect, port); + free_irq(port->dma_in_intr_vect, port); +#endif + } else if (port->init_irqs == manual_irq_setup) { + /* Free manual irq. */ + free_irq(port->syncser_intr_vect, port); + } + } + + pr_info("ARTPEC synchronous serial port unregistered\n"); +} + module_init(etrax_sync_serial_init); +module_exit(etrax_sync_serial_exit); + +MODULE_LICENSE("GPL"); + diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c index 610909b003f6..02e33ebe51ec 100644 --- a/arch/cris/arch-v32/kernel/debugport.c +++ b/arch/cris/arch-v32/kernel/debugport.c @@ -3,7 +3,9 @@ */ #include <linux/console.h> +#include <linux/kernel.h> #include <linux/init.h> +#include <linux/string.h> #include <hwregs/reg_rdwr.h> #include <hwregs/reg_map.h> #include <hwregs/ser_defs.h> @@ -65,6 +67,7 @@ struct dbg_port ports[] = }, #endif }; + static struct dbg_port *port = #if defined(CONFIG_ETRAX_DEBUG_PORT0) &ports[0]; @@ -97,14 +100,19 @@ static struct dbg_port *kgdb_port = #endif #endif -static void -start_port(struct dbg_port* p) +static void start_port(struct dbg_port *p) { - if (!p) - return; + /* Set up serial port registers */ + reg_ser_rw_tr_ctrl tr_ctrl = {0}; + reg_ser_rw_tr_dma_en tr_dma_en = {0}; - if (p->started) + reg_ser_rw_rec_ctrl rec_ctrl = {0}; + reg_ser_rw_tr_baud_div tr_baud_div = {0}; + reg_ser_rw_rec_baud_div rec_baud_div = {0}; + + if (!p || p->started) return; + p->started = 1; if (p->nbr == 1) @@ -118,36 +126,24 @@ start_port(struct dbg_port* p) crisv32_pinmux_alloc_fixed(pinmux_ser4); #endif - /* Set up serial port registers */ - reg_ser_rw_tr_ctrl tr_ctrl = {0}; - reg_ser_rw_tr_dma_en tr_dma_en = {0}; - - reg_ser_rw_rec_ctrl rec_ctrl = {0}; - reg_ser_rw_tr_baud_div tr_baud_div = {0}; - reg_ser_rw_rec_baud_div rec_baud_div = {0}; - tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493; tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no; tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8; tr_ctrl.en = rec_ctrl.en = 1; - if (p->parity == 'O') - { + if (p->parity == 'O') { tr_ctrl.par_en = regk_ser_yes; tr_ctrl.par = regk_ser_odd; rec_ctrl.par_en = regk_ser_yes; rec_ctrl.par = regk_ser_odd; - } - else if (p->parity == 'E') - { + } else if (p->parity == 'E') { tr_ctrl.par_en = regk_ser_yes; tr_ctrl.par = regk_ser_even; rec_ctrl.par_en = regk_ser_yes; rec_ctrl.par = regk_ser_odd; } - if (p->bits == 7) - { + if (p->bits == 7) { tr_ctrl.data_bits = regk_ser_bits7; rec_ctrl.data_bits = regk_ser_bits7; } @@ -161,8 +157,7 @@ start_port(struct dbg_port* p) #ifdef CONFIG_ETRAX_KGDB /* Use polling to get a single character from the kernel debug port */ -int -getDebugChar(void) +int getDebugChar(void) { reg_ser_rs_stat_din stat; reg_ser_rw_ack_intr ack_intr = { 0 }; @@ -179,8 +174,7 @@ getDebugChar(void) } /* Use polling to put a single character to the kernel debug port */ -void -putDebugChar(int val) +void putDebugChar(int val) { reg_ser_r_stat_din stat; do { @@ -190,12 +184,48 @@ putDebugChar(int val) } #endif /* CONFIG_ETRAX_KGDB */ +static void __init early_putch(int c) +{ + reg_ser_r_stat_din stat; + /* Wait until transmitter is ready and send. */ + do + stat = REG_RD(ser, port->instance, r_stat_din); + while (!stat.tr_rdy); + REG_WR_INT(ser, port->instance, rw_dout, c); +} + +static void __init +early_console_write(struct console *con, const char *s, unsigned n) +{ + extern void reset_watchdog(void); + int i; + + /* Send data. */ + for (i = 0; i < n; i++) { + /* TODO: the '\n' -> '\n\r' translation should be done at the + receiver. Remove it when the serial driver removes it. */ + if (s[i] == '\n') + early_putch('\r'); + early_putch(s[i]); + reset_watchdog(); + } +} + +static struct console early_console_dev __initdata = { + .name = "early", + .write = early_console_write, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1 +}; + /* Register console for printk's, etc. */ -int __init -init_etrax_debug(void) +int __init init_etrax_debug(void) { start_port(port); + /* Register an early console if a debug port was chosen. */ + register_console(&early_console_dev); + #ifdef CONFIG_ETRAX_KGDB start_port(kgdb_port); #endif /* CONFIG_ETRAX_KGDB */ diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c index ee66866538f8..eb74dabbeb96 100644 --- a/arch/cris/arch-v32/kernel/time.c +++ b/arch/cris/arch-v32/kernel/time.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/threads.h> #include <linux/cpufreq.h> +#include <linux/mm.h> #include <asm/types.h> #include <asm/signal.h> #include <asm/io.h> @@ -56,7 +57,6 @@ static int __init etrax_init_cont_rotime(void) } arch_initcall(etrax_init_cont_rotime); - unsigned long timer_regs[NR_CPUS] = { regi_timer0, @@ -68,9 +68,8 @@ unsigned long timer_regs[NR_CPUS] = extern int set_rtc_mmss(unsigned long nowtime); #ifdef CONFIG_CPU_FREQ -static int -cris_time_freq_notifier(struct notifier_block *nb, unsigned long val, - void *data); +static int cris_time_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data); static struct notifier_block cris_time_freq_notifier_block = { .notifier_call = cris_time_freq_notifier, @@ -87,7 +86,6 @@ unsigned long get_ns_in_jiffie(void) return ns; } - /* From timer MDS describing the hardware watchdog: * 4.3.1 Watchdog Operation * The watchdog timer is an 8-bit timer with a configurable start value. @@ -109,11 +107,18 @@ static short int watchdog_key = 42; /* arbitrary 7 bit number */ * is used though, so set this really low. */ #define WATCHDOG_MIN_FREE_PAGES 8 +/* for reliable NICE_DOGGY behaviour */ +static int bite_in_progress; + void reset_watchdog(void) { #if defined(CONFIG_ETRAX_WATCHDOG) reg_timer_rw_wd_ctrl wd_ctrl = { 0 }; +#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY) + if (unlikely(bite_in_progress)) + return; +#endif /* Only keep watchdog happy as long as we have memory left! */ if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) { /* Reset the watchdog with the inverse of the old key */ @@ -148,7 +153,9 @@ void handle_watchdog_bite(struct pt_regs *regs) #if defined(CONFIG_ETRAX_WATCHDOG) extern int cause_of_death; + nmi_enter(); oops_in_progress = 1; + bite_in_progress = 1; printk(KERN_WARNING "Watchdog bite\n"); /* Check if forced restart or unexpected watchdog */ @@ -170,6 +177,7 @@ void handle_watchdog_bite(struct pt_regs *regs) printk(KERN_WARNING "Oops: bitten by watchdog\n"); show_registers(regs); oops_in_progress = 0; + printk("\n"); /* Flush mtdoops. */ #ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY reset_watchdog(); #endif @@ -202,7 +210,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id) /* Reset watchdog otherwise it resets us! */ reset_watchdog(); - /* Update statistics. */ + /* Update statistics. */ update_process_times(user_mode(regs)); cris_do_profile(regs); /* Save profiling information */ @@ -213,7 +221,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id) /* Call the real timer interrupt handler */ xtime_update(1); - return IRQ_HANDLED; + return IRQ_HANDLED; } /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */ @@ -293,14 +301,13 @@ void __init time_init(void) #ifdef CONFIG_CPU_FREQ cpufreq_register_notifier(&cris_time_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); + CPUFREQ_TRANSITION_NOTIFIER); #endif } #ifdef CONFIG_CPU_FREQ -static int -cris_time_freq_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static int cris_time_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data) { struct cpufreq_freqs *freqs = data; if (val == CPUFREQ_POSTCHANGE) { diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c index 0b5b70d5f58a..f0f335d8aa79 100644 --- a/arch/cris/arch-v32/lib/usercopy.c +++ b/arch/cris/arch-v32/lib/usercopy.c @@ -26,8 +26,7 @@ /* Copy to userspace. This is based on the memcpy used for kernel-to-kernel copying; see "string.c". */ -unsigned long -__copy_user (void __user *pdst, const void *psrc, unsigned long pn) +unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -155,13 +154,13 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) return retn; } +EXPORT_SYMBOL(__copy_user); /* Copy from user to kernel, zeroing the bytes that were inaccessible in userland. The return-value is the number of bytes that were inaccessible. */ - -unsigned long -__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) +unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, + unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -321,11 +320,10 @@ copy_exception_bytes: return retn + n; } +EXPORT_SYMBOL(__copy_user_zeroing); /* Zero userspace. */ - -unsigned long -__do_clear_user (void __user *pto, unsigned long pn) +unsigned long __do_clear_user(void __user *pto, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -468,3 +466,4 @@ __do_clear_user (void __user *pto, unsigned long pn) return retn; } +EXPORT_SYMBOL(__do_clear_user); diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c index 38f29eec14a6..05a04708b8eb 100644 --- a/arch/cris/arch-v32/mach-fs/pinmux.c +++ b/arch/cris/arch-v32/mach-fs/pinmux.c @@ -26,7 +26,29 @@ static DEFINE_SPINLOCK(pinmux_lock); static void crisv32_pinmux_set(int port); -int crisv32_pinmux_init(void) +static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin, + enum pin_mode mode) +{ + int i; + + for (i = first_pin; i <= last_pin; i++) { + if ((pins[port][i] != pinmux_none) + && (pins[port][i] != pinmux_gpio) + && (pins[port][i] != mode)) { +#ifdef DEBUG + panic("Pinmux alloc failed!\n"); +#endif + return -EPERM; + } + } + + for (i = first_pin; i <= last_pin; i++) + pins[port][i] = mode; + + crisv32_pinmux_set(port); +} + +static int crisv32_pinmux_init(void) { static int initialized; @@ -37,20 +59,20 @@ int crisv32_pinmux_init(void) pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 = pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes; REG_WR(pinmux, regi_pinmux, rw_pa, pa); - crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio); - crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio); - crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio); - crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio); } return 0; } -int -crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode) +int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, + enum pin_mode mode) { - int i; unsigned long flags; + int ret; crisv32_pinmux_init(); @@ -59,26 +81,11 @@ crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode) spin_lock_irqsave(&pinmux_lock, flags); - for (i = first_pin; i <= last_pin; i++) { - if ((pins[port][i] != pinmux_none) - && (pins[port][i] != pinmux_gpio) - && (pins[port][i] != mode)) { - spin_unlock_irqrestore(&pinmux_lock, flags); -#ifdef DEBUG - panic("Pinmux alloc failed!\n"); -#endif - return -EPERM; - } - } - - for (i = first_pin; i <= last_pin; i++) - pins[port][i] = mode; - - crisv32_pinmux_set(port); + ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode); spin_unlock_irqrestore(&pinmux_lock, flags); - return 0; + return ret; } int crisv32_pinmux_alloc_fixed(enum fixed_function function) @@ -98,58 +105,58 @@ int crisv32_pinmux_alloc_fixed(enum fixed_function function) switch (function) { case pinmux_ser1: - ret = crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed); hwprot.ser1 = regk_pinmux_yes; break; case pinmux_ser2: - ret = crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed); hwprot.ser2 = regk_pinmux_yes; break; case pinmux_ser3: - ret = crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed); hwprot.ser3 = regk_pinmux_yes; break; case pinmux_sser0: - ret = crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); hwprot.sser0 = regk_pinmux_yes; break; case pinmux_sser1: - ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); hwprot.sser1 = regk_pinmux_yes; break; case pinmux_ata0: - ret = crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed); hwprot.ata0 = regk_pinmux_yes; break; case pinmux_ata1: - ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed); hwprot.ata1 = regk_pinmux_yes; break; case pinmux_ata2: - ret = crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed); hwprot.ata2 = regk_pinmux_yes; break; case pinmux_ata3: - ret = crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed); hwprot.ata2 = regk_pinmux_yes; break; case pinmux_ata: - ret = crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed); hwprot.ata = regk_pinmux_yes; break; case pinmux_eth1: - ret = crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed); hwprot.eth1 = regk_pinmux_yes; hwprot.eth1_mgm = regk_pinmux_yes; break; case pinmux_timer: - ret = crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); hwprot.timer = regk_pinmux_yes; spin_unlock_irqrestore(&pinmux_lock, flags); return ret; @@ -188,9 +195,19 @@ void crisv32_pinmux_set(int port) #endif } -int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) +static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) { int i; + + for (i = first_pin; i <= last_pin; i++) + pins[port][i] = pinmux_none; + + crisv32_pinmux_set(port); + return 0; +} + +int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) +{ unsigned long flags; crisv32_pinmux_init(); @@ -199,11 +216,7 @@ int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) return -EINVAL; spin_lock_irqsave(&pinmux_lock, flags); - - for (i = first_pin; i <= last_pin; i++) - pins[port][i] = pinmux_none; - - crisv32_pinmux_set(port); + __crisv32_pinmux_dealloc(port, first_pin, last_pin); spin_unlock_irqrestore(&pinmux_lock, flags); return 0; @@ -226,58 +239,58 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function) switch (function) { case pinmux_ser1: - ret = crisv32_pinmux_dealloc(PORT_C, 4, 7); + ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7); hwprot.ser1 = regk_pinmux_no; break; case pinmux_ser2: - ret = crisv32_pinmux_dealloc(PORT_C, 8, 11); + ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11); hwprot.ser2 = regk_pinmux_no; break; case pinmux_ser3: - ret = crisv32_pinmux_dealloc(PORT_C, 12, 15); + ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15); hwprot.ser3 = regk_pinmux_no; break; case pinmux_sser0: - ret = crisv32_pinmux_dealloc(PORT_C, 0, 3); - ret |= crisv32_pinmux_dealloc(PORT_C, 16, 16); + ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3); + ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16); hwprot.sser0 = regk_pinmux_no; break; case pinmux_sser1: - ret = crisv32_pinmux_dealloc(PORT_D, 0, 4); + ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4); hwprot.sser1 = regk_pinmux_no; break; case pinmux_ata0: - ret = crisv32_pinmux_dealloc(PORT_D, 5, 7); - ret |= crisv32_pinmux_dealloc(PORT_D, 15, 17); + ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7); + ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17); hwprot.ata0 = regk_pinmux_no; break; case pinmux_ata1: - ret = crisv32_pinmux_dealloc(PORT_D, 0, 4); - ret |= crisv32_pinmux_dealloc(PORT_E, 17, 17); + ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4); + ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17); hwprot.ata1 = regk_pinmux_no; break; case pinmux_ata2: - ret = crisv32_pinmux_dealloc(PORT_C, 11, 15); - ret |= crisv32_pinmux_dealloc(PORT_E, 3, 3); + ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15); + ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3); hwprot.ata2 = regk_pinmux_no; break; case pinmux_ata3: - ret = crisv32_pinmux_dealloc(PORT_C, 8, 10); - ret |= crisv32_pinmux_dealloc(PORT_C, 0, 2); + ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10); + ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2); hwprot.ata2 = regk_pinmux_no; break; case pinmux_ata: - ret = crisv32_pinmux_dealloc(PORT_B, 0, 15); - ret |= crisv32_pinmux_dealloc(PORT_D, 8, 15); + ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15); + ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15); hwprot.ata = regk_pinmux_no; break; case pinmux_eth1: - ret = crisv32_pinmux_dealloc(PORT_E, 0, 17); + ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17); hwprot.eth1 = regk_pinmux_no; hwprot.eth1_mgm = regk_pinmux_no; break; case pinmux_timer: - ret = crisv32_pinmux_dealloc(PORT_C, 16, 16); + ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16); hwprot.timer = regk_pinmux_no; spin_unlock_irqrestore(&pinmux_lock, flags); return ret; @@ -293,7 +306,8 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function) return ret; } -void crisv32_pinmux_dump(void) +#ifdef DEBUG +static void crisv32_pinmux_dump(void) { int i, j; @@ -305,5 +319,5 @@ void crisv32_pinmux_dump(void) printk(KERN_DEBUG " Pin %d = %d\n", j, pins[i][j]); } } - +#endif __initcall(crisv32_pinmux_init); diff --git a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h index c2b3036779df..09bf0c90d2d3 100644 --- a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h +++ b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h @@ -28,11 +28,9 @@ enum fixed_function { pinmux_timer }; -int crisv32_pinmux_init(void); int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode); int crisv32_pinmux_alloc_fixed(enum fixed_function function); int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin); int crisv32_pinmux_dealloc_fixed(enum fixed_function function); -void crisv32_pinmux_dump(void); #endif diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index d5f124832fd1..889f2de050a3 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,8 +1,4 @@ -header-y += arch-v10/ -header-y += arch-v32/ - - generic-y += barrier.h generic-y += clkdev.h generic-y += cputime.h diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 7d47b366ad82..01f66b8f15e5 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -1,8 +1,8 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -header-y += arch-v10/ -header-y += arch-v32/ +header-y += ../arch-v10/arch/ +header-y += ../arch-v32/arch/ header-y += auxvec.h header-y += bitsperlong.h header-y += byteorder.h diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c index 5868cee20ebd..3908b942fd4c 100644 --- a/arch/cris/kernel/crisksyms.c +++ b/arch/cris/kernel/crisksyms.c @@ -47,16 +47,16 @@ EXPORT_SYMBOL(__negdi2); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); -/* Userspace access functions */ -EXPORT_SYMBOL(__copy_user_zeroing); -EXPORT_SYMBOL(__copy_user); - #undef memcpy #undef memset extern void * memset(void *, int, __kernel_size_t); extern void * memcpy(void *, const void *, __kernel_size_t); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); +#ifdef CONFIG_ETRAX_ARCH_V32 +#undef strcmp +EXPORT_SYMBOL(strcmp); +#endif #ifdef CONFIG_ETRAX_FAST_TIMER /* Fast timer functions */ @@ -66,3 +66,4 @@ EXPORT_SYMBOL(del_fast_timer); EXPORT_SYMBOL(schedule_usleep); #endif EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_from_user); diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index 0ffda73734f5..da4c72401e27 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -14,6 +14,10 @@ #include <linux/init.h> #include <linux/module.h> +#include <linux/utsname.h> +#ifdef CONFIG_KALLSYMS +#include <linux/kallsyms.h> +#endif #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -34,25 +38,24 @@ static int kstack_depth_to_print = 24; void (*nmi_handler)(struct pt_regs *); -void -show_trace(unsigned long *stack) +void show_trace(unsigned long *stack) { unsigned long addr, module_start, module_end; extern char _stext, _etext; int i; - printk("\nCall Trace: "); + pr_err("\nCall Trace: "); i = 1; module_start = VMALLOC_START; module_end = VMALLOC_END; - while (((long)stack & (THREAD_SIZE-1)) != 0) { + while (((long)stack & (THREAD_SIZE - 1)) != 0) { if (__get_user(addr, stack)) { /* This message matches "failing address" marked s390 in ksymoops, so lines containing it will not be filtered out by ksymoops. */ - printk("Failing address 0x%lx\n", (unsigned long)stack); + pr_err("Failing address 0x%lx\n", (unsigned long)stack); break; } stack++; @@ -68,10 +71,14 @@ show_trace(unsigned long *stack) if (((addr >= (unsigned long)&_stext) && (addr <= (unsigned long)&_etext)) || ((addr >= module_start) && (addr <= module_end))) { +#ifdef CONFIG_KALLSYMS + print_ip_sym(addr); +#else if (i && ((i % 8) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); + pr_err("\n "); + pr_err("[<%08lx>] ", addr); i++; +#endif } } } @@ -111,21 +118,21 @@ show_stack(struct task_struct *task, unsigned long *sp) stack = sp; - printk("\nStack from %08lx:\n ", (unsigned long)stack); + pr_err("\nStack from %08lx:\n ", (unsigned long)stack); for (i = 0; i < kstack_depth_to_print; i++) { if (((long)stack & (THREAD_SIZE-1)) == 0) break; if (i && ((i % 8) == 0)) - printk("\n "); + pr_err("\n "); if (__get_user(addr, stack)) { /* This message matches "failing address" marked s390 in ksymoops, so lines containing it will not be filtered out by ksymoops. */ - printk("Failing address 0x%lx\n", (unsigned long)stack); + pr_err("Failing address 0x%lx\n", (unsigned long)stack); break; } stack++; - printk("%08lx ", addr); + pr_err("%08lx ", addr); } show_trace(sp); } @@ -139,33 +146,32 @@ show_stack(void) unsigned long *sp = (unsigned long *)rdusp(); int i; - printk("Stack dump [0x%08lx]:\n", (unsigned long)sp); + pr_err("Stack dump [0x%08lx]:\n", (unsigned long)sp); for (i = 0; i < 16; i++) - printk("sp + %d: 0x%08lx\n", i*4, sp[i]); + pr_err("sp + %d: 0x%08lx\n", i*4, sp[i]); return 0; } #endif -void -set_nmi_handler(void (*handler)(struct pt_regs *)) +void set_nmi_handler(void (*handler)(struct pt_regs *)) { nmi_handler = handler; arch_enable_nmi(); } #ifdef CONFIG_DEBUG_NMI_OOPS -void -oops_nmi_handler(struct pt_regs *regs) +void oops_nmi_handler(struct pt_regs *regs) { stop_watchdog(); oops_in_progress = 1; - printk("NMI!\n"); + pr_err("NMI!\n"); show_registers(regs); oops_in_progress = 0; + oops_exit(); + pr_err("\n"); /* Flush mtdoops. */ } -static int __init -oops_nmi_register(void) +static int __init oops_nmi_register(void) { set_nmi_handler(oops_nmi_handler); return 0; @@ -180,8 +186,7 @@ __initcall(oops_nmi_register); * similar to an Oops dump, and if the kernel is configured to be a nice * doggy, then halt instead of reboot. */ -void -watchdog_bite_hook(struct pt_regs *regs) +void watchdog_bite_hook(struct pt_regs *regs) { #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY local_irq_disable(); @@ -196,8 +201,7 @@ watchdog_bite_hook(struct pt_regs *regs) } /* This is normally the Oops function. */ -void -die_if_kernel(const char *str, struct pt_regs *regs, long err) +void die_if_kernel(const char *str, struct pt_regs *regs, long err) { if (user_mode(regs)) return; @@ -211,13 +215,17 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) stop_watchdog(); #endif + oops_enter(); handle_BUG(regs); - printk("%s: %04lx\n", str, err & 0xffff); + pr_err("Linux %s %s\n", utsname()->release, utsname()->version); + pr_err("%s: %04lx\n", str, err & 0xffff); show_registers(regs); + oops_exit(); oops_in_progress = 0; + pr_err("\n"); /* Flush mtdoops. */ #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY reset_watchdog(); @@ -225,8 +233,7 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) do_exit(SIGSEGV); } -void __init -trap_init(void) +void __init trap_init(void) { /* Nothing needs to be done */ } diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index c81af5bd9167..1e7fd45b60f8 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -11,13 +11,15 @@ #include <linux/gfp.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/proc_fs.h> +#include <linux/kcore.h> #include <asm/tlb.h> #include <asm/sections.h> unsigned long empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); -void __init -mem_init(void) +void __init mem_init(void) { BUG_ON(!mem_map); @@ -31,10 +33,36 @@ mem_init(void) mem_init_print_info(NULL); } -/* free the pages occupied by initialization code */ +/* Free a range of init pages. Virtual addresses. */ -void -free_initmem(void) +void free_init_pages(const char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + free_page(addr); + totalram_pages++; + } + + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + +/* Free the pages occupied by initialization code. */ + +void free_initmem(void) { free_initmem_default(-1); } + +/* Free the pages occupied by initrd code. */ + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", + start, + end); +} +#endif diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c index f9ca44bdea20..80fdb995a8ce 100644 --- a/arch/cris/mm/ioremap.c +++ b/arch/cris/mm/ioremap.c @@ -76,10 +76,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l * Must be freed with iounmap. */ -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) { return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0); } +EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h index 263511719a4a..69952c184207 100644 --- a/arch/hexagon/include/asm/cache.h +++ b/arch/hexagon/include/asm/cache.h @@ -1,7 +1,7 @@ /* * Cache definitions for the Hexagon architecture * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. + * Copyright (c) 2010-2011,2014 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -25,6 +25,8 @@ #define L1_CACHE_SHIFT (5) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES + #define __cacheline_aligned __aligned(L1_CACHE_BYTES) #define ____cacheline_aligned __aligned(L1_CACHE_BYTES) diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h index 49e0896ec240..b86f9f300e94 100644 --- a/arch/hexagon/include/asm/cacheflush.h +++ b/arch/hexagon/include/asm/cacheflush.h @@ -21,10 +21,7 @@ #ifndef _ASM_CACHEFLUSH_H #define _ASM_CACHEFLUSH_H -#include <linux/cache.h> -#include <linux/mm.h> -#include <asm/string.h> -#include <asm-generic/cacheflush.h> +#include <linux/mm_types.h> /* Cache flushing: * @@ -41,6 +38,20 @@ #define LINESIZE 32 #define LINEBITS 5 +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + /* * Flush Dcache range through current map. */ @@ -49,7 +60,6 @@ extern void flush_dcache_range(unsigned long start, unsigned long end); /* * Flush Icache range through current map. */ -#undef flush_icache_range extern void flush_icache_range(unsigned long start, unsigned long end); /* @@ -79,19 +89,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, /* generic_ptrace_pokedata doesn't wind up here, does it? */ } -#undef copy_to_user_page -static inline void copy_to_user_page(struct vm_area_struct *vma, - struct page *page, - unsigned long vaddr, - void *dst, void *src, int len) -{ - memcpy(dst, src, len); - if (vma->vm_flags & VM_EXEC) { - flush_icache_range((unsigned long) dst, - (unsigned long) dst + len); - } -} +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long vaddr, void *dst, void *src, int len); +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end); extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end); diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index 70298996e9b2..66f5e9a61efc 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -24,14 +24,9 @@ #ifdef __KERNEL__ #include <linux/types.h> -#include <linux/delay.h> -#include <linux/vmalloc.h> -#include <asm/string.h> -#include <asm/mem-layout.h> #include <asm/iomap.h> #include <asm/page.h> #include <asm/cacheflush.h> -#include <asm/tlbflush.h> /* * We don't have PCI yet. diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c index 0e7c1dbb37b2..6981949f5df3 100644 --- a/arch/hexagon/kernel/setup.c +++ b/arch/hexagon/kernel/setup.c @@ -19,6 +19,7 @@ */ #include <linux/init.h> +#include <linux/delay.h> #include <linux/bootmem.h> #include <linux/mmzone.h> #include <linux/mm.h> diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 7858663352b9..110dab152f82 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -1,7 +1,7 @@ /* * Kernel traps/events for Hexagon processor * - * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -423,7 +423,7 @@ void do_trap0(struct pt_regs *regs) */ info.si_code = TRAP_BRKPT; info.si_addr = (void __user *) pt_elr(regs); - send_sig_info(SIGTRAP, &info, current); + force_sig_info(SIGTRAP, &info, current); } else { #ifdef CONFIG_KGDB kgdb_handle_exception(pt_cause(regs), SIGTRAP, diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 44d8c47bae2f..5f268c1071b3 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -1,7 +1,7 @@ /* * Linker script for Hexagon kernel * - * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -59,7 +59,7 @@ SECTIONS INIT_DATA_SECTION(PAGE_SIZE) _sdata = .; - RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE) + RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE) RO_DATA_SECTION(PAGE_SIZE) _edata = .; diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c index 0c76c802e31c..a7c6d827d8b6 100644 --- a/arch/hexagon/mm/cache.c +++ b/arch/hexagon/mm/cache.c @@ -127,3 +127,13 @@ void flush_cache_all_hexagon(void) local_irq_restore(flags); mb(); } + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long vaddr, void *dst, void *src, int len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) { + flush_icache_range((unsigned long) dst, + (unsigned long) dst + len); + } +} diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c index 5905fd5f97f6..d27d67224046 100644 --- a/arch/hexagon/mm/ioremap.c +++ b/arch/hexagon/mm/ioremap.c @@ -20,6 +20,7 @@ #include <linux/io.h> #include <linux/vmalloc.h> +#include <linux/mm.h> void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) { diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 536d13b0bea6..074e52bf815c 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -20,7 +20,6 @@ config IA64 select HAVE_DYNAMIC_FTRACE if (!ITANIUM) select HAVE_FUNCTION_TRACER select HAVE_DMA_ATTRS - select HAVE_KVM select TTY select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG @@ -232,7 +231,7 @@ config IA64_SGI_UV config IA64_HP_SIM bool "Ski-simulator" select SWIOTLB - depends on !PM_RUNTIME + depends on !PM endchoice @@ -640,8 +639,6 @@ source "security/Kconfig" source "crypto/Kconfig" -source "arch/ia64/kvm/Kconfig" - source "lib/Kconfig" config IOMMU_HELPER diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 5441b14994fc..970d0bd99621 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ -core-$(CONFIG_KVM) += arch/ia64/kvm/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h deleted file mode 100644 index 4729752b7256..000000000000 --- a/arch/ia64/include/asm/kvm_host.h +++ /dev/null @@ -1,609 +0,0 @@ -/* - * kvm_host.h: used for kvm module, and hold ia64-specific sections. - * - * Copyright (C) 2007, Intel Corporation. - * - * Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#ifndef __ASM_KVM_HOST_H -#define __ASM_KVM_HOST_H - -#define KVM_USER_MEM_SLOTS 32 - -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 -#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS - -/* define exit reasons from vmm to kvm*/ -#define EXIT_REASON_VM_PANIC 0 -#define EXIT_REASON_MMIO_INSTRUCTION 1 -#define EXIT_REASON_PAL_CALL 2 -#define EXIT_REASON_SAL_CALL 3 -#define EXIT_REASON_SWITCH_RR6 4 -#define EXIT_REASON_VM_DESTROY 5 -#define EXIT_REASON_EXTERNAL_INTERRUPT 6 -#define EXIT_REASON_IPI 7 -#define EXIT_REASON_PTC_G 8 -#define EXIT_REASON_DEBUG 20 - -/*Define vmm address space and vm data space.*/ -#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) -#define KVM_VMM_SHIFT 24 -#define KVM_VMM_BASE 0xD000000000000000 -#define VMM_SIZE (__IA64_UL_CONST(8)<<20) - -/* - * Define vm_buffer, used by PAL Services, base address. - * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M - */ -#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) -#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20) - -/* - * kvm guest's data area looks as follow: - * - * +----------------------+ ------- KVM_VM_DATA_SIZE - * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET - * | | | / | - * | .......... | | /vcpu's struct&stack | - * | .......... | | /---------------------|---- 0 - * | vcpu[5]'s data | | / vpd | - * | vcpu[4]'s data | |/-----------------------| - * | vcpu[3]'s data | / vtlb | - * | vcpu[2]'s data | /|------------------------| - * | vcpu[1]'s data |/ | vhpt | - * | vcpu[0]'s data |____________________________| - * +----------------------+ | - * | memory dirty log | | - * +----------------------+ | - * | vm's data struct | | - * +----------------------+ | - * | | | - * | | | - * | | | - * | | | - * | | | - * | | | - * | | | - * | vm's p2m table | | - * | | | - * | | | - * | | | | - * vm's data->| | | | - * +----------------------+ ------- 0 - * To support large memory, needs to increase the size of p2m. - * To support more vcpus, needs to ensure it has enough space to - * hold vcpus' data. - */ - -#define KVM_VM_DATA_SHIFT 26 -#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) -#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE) - -#define KVM_P2M_BASE KVM_VM_DATA_BASE -#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20) - -#define VHPT_SHIFT 16 -#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT) -#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5)) - -#define VTLB_SHIFT 16 -#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT) -#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5)) - -#define VPD_SHIFT 16 -#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT) - -#define VCPU_STRUCT_SHIFT 16 -#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) - -/* - * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h - */ -#define KVM_STK_SHIFT 16 -#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT) - -#define KVM_VM_STRUCT_SHIFT 19 -#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) - -#define KVM_MEM_DIRY_LOG_SHIFT 19 -#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT) - -#ifndef __ASSEMBLY__ - -/*Define the max vcpus and memory for Guests.*/ -#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\ - KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data) -#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT) - -#define VMM_LOG_LEN 256 - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/kvm.h> -#include <linux/kvm_para.h> -#include <linux/kvm_types.h> - -#include <asm/pal.h> -#include <asm/sal.h> -#include <asm/page.h> - -struct kvm_vcpu_data { - char vcpu_vhpt[VHPT_SIZE]; - char vcpu_vtlb[VTLB_SIZE]; - char vcpu_vpd[VPD_SIZE]; - char vcpu_struct[VCPU_STRUCT_SIZE]; -}; - -struct kvm_vm_data { - char kvm_p2m[KVM_P2M_SIZE]; - char kvm_vm_struct[KVM_VM_STRUCT_SIZE]; - char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE]; - struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; -}; - -#define VCPU_BASE(n) (KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, vcpu_data[n])) -#define KVM_VM_BASE (KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_vm_struct)) -#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_mem_dirty_log) - -#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt)) -#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb)) -#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd)) -#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \ - offsetof(struct kvm_vcpu_data, vcpu_struct)) - -/*IO section definitions*/ -#define IOREQ_READ 1 -#define IOREQ_WRITE 0 - -#define STATE_IOREQ_NONE 0 -#define STATE_IOREQ_READY 1 -#define STATE_IOREQ_INPROCESS 2 -#define STATE_IORESP_READY 3 - -/*Guest Physical address layout.*/ -#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */ -#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */ -#define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */ -#define GPFN_PIB (3UL << 60) /* PIB base */ -#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ -#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ -#define GPFN_GFW (6UL << 60) /* Guest Firmware */ -#define GPFN_PHYS_MMIO (7UL << 60) /* Directed MMIO Range */ - -#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ -#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ -#define INVALID_MFN (~0UL) -#define MEM_G (1UL << 30) -#define MEM_M (1UL << 20) -#define MMIO_START (3 * MEM_G) -#define MMIO_SIZE (512 * MEM_M) -#define VGA_IO_START 0xA0000UL -#define VGA_IO_SIZE 0x20000 -#define LEGACY_IO_START (MMIO_START + MMIO_SIZE) -#define LEGACY_IO_SIZE (64 * MEM_M) -#define IO_SAPIC_START 0xfec00000UL -#define IO_SAPIC_SIZE 0x100000 -#define PIB_START 0xfee00000UL -#define PIB_SIZE 0x200000 -#define GFW_START (4 * MEM_G - 16 * MEM_M) -#define GFW_SIZE (16 * MEM_M) - -/*Deliver mode, defined for ioapic.c*/ -#define dest_Fixed IOSAPIC_FIXED -#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY - -#define NMI_VECTOR 2 -#define ExtINT_VECTOR 0 -#define NULL_VECTOR (-1) -#define IA64_SPURIOUS_INT_VECTOR 0x0f - -#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24) - -/* - *Delivery mode - */ -#define SAPIC_DELIV_SHIFT 8 -#define SAPIC_FIXED 0x0 -#define SAPIC_LOWEST_PRIORITY 0x1 -#define SAPIC_PMI 0x2 -#define SAPIC_NMI 0x4 -#define SAPIC_INIT 0x5 -#define SAPIC_EXTINT 0x7 - -/* - * vcpu->requests bit members for arch - */ -#define KVM_REQ_PTC_G 32 -#define KVM_REQ_RESUME 33 - -struct kvm_mmio_req { - uint64_t addr; /* physical address */ - uint64_t size; /* size in bytes */ - uint64_t data; /* data (or paddr of data) */ - uint8_t state:4; - uint8_t dir:1; /* 1=read, 0=write */ -}; - -/*Pal data struct */ -struct kvm_pal_call{ - /*In area*/ - uint64_t gr28; - uint64_t gr29; - uint64_t gr30; - uint64_t gr31; - /*Out area*/ - struct ia64_pal_retval ret; -}; - -/* Sal data structure */ -struct kvm_sal_call{ - /*In area*/ - uint64_t in0; - uint64_t in1; - uint64_t in2; - uint64_t in3; - uint64_t in4; - uint64_t in5; - uint64_t in6; - uint64_t in7; - struct sal_ret_values ret; -}; - -/*Guest change rr6*/ -struct kvm_switch_rr6 { - uint64_t old_rr; - uint64_t new_rr; -}; - -union ia64_ipi_a{ - unsigned long val; - struct { - unsigned long rv : 3; - unsigned long ir : 1; - unsigned long eid : 8; - unsigned long id : 8; - unsigned long ib_base : 44; - }; -}; - -union ia64_ipi_d { - unsigned long val; - struct { - unsigned long vector : 8; - unsigned long dm : 3; - unsigned long ig : 53; - }; -}; - -/*ipi check exit data*/ -struct kvm_ipi_data{ - union ia64_ipi_a addr; - union ia64_ipi_d data; -}; - -/*global purge data*/ -struct kvm_ptc_g { - unsigned long vaddr; - unsigned long rr; - unsigned long ps; - struct kvm_vcpu *vcpu; -}; - -/*Exit control data */ -struct exit_ctl_data{ - uint32_t exit_reason; - uint32_t vm_status; - union { - struct kvm_mmio_req ioreq; - struct kvm_pal_call pal_data; - struct kvm_sal_call sal_data; - struct kvm_switch_rr6 rr_data; - struct kvm_ipi_data ipi_data; - struct kvm_ptc_g ptc_g_data; - } u; -}; - -union pte_flags { - unsigned long val; - struct { - unsigned long p : 1; /*0 */ - unsigned long : 1; /* 1 */ - unsigned long ma : 3; /* 2-4 */ - unsigned long a : 1; /* 5 */ - unsigned long d : 1; /* 6 */ - unsigned long pl : 2; /* 7-8 */ - unsigned long ar : 3; /* 9-11 */ - unsigned long ppn : 38; /* 12-49 */ - unsigned long : 2; /* 50-51 */ - unsigned long ed : 1; /* 52 */ - }; -}; - -union ia64_pta { - unsigned long val; - struct { - unsigned long ve : 1; - unsigned long reserved0 : 1; - unsigned long size : 6; - unsigned long vf : 1; - unsigned long reserved1 : 6; - unsigned long base : 49; - }; -}; - -struct thash_cb { - /* THASH base information */ - struct thash_data *hash; /* hash table pointer */ - union ia64_pta pta; - int num; -}; - -struct kvm_vcpu_stat { - u32 halt_wakeup; -}; - -struct kvm_vcpu_arch { - int launched; - int last_exit; - int last_run_cpu; - int vmm_tr_slot; - int vm_tr_slot; - int sn_rtc_tr_slot; - -#define KVM_MP_STATE_RUNNABLE 0 -#define KVM_MP_STATE_UNINITIALIZED 1 -#define KVM_MP_STATE_INIT_RECEIVED 2 -#define KVM_MP_STATE_HALTED 3 - int mp_state; - -#define MAX_PTC_G_NUM 3 - int ptc_g_count; - struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM]; - - /*halt timer to wake up sleepy vcpus*/ - struct hrtimer hlt_timer; - long ht_active; - - struct kvm_lapic *apic; /* kernel irqchip context */ - struct vpd *vpd; - - /* Exit data for vmm_transition*/ - struct exit_ctl_data exit_data; - - cpumask_t cache_coherent_map; - - unsigned long vmm_rr; - unsigned long host_rr6; - unsigned long psbits[8]; - unsigned long cr_iipa; - unsigned long cr_isr; - unsigned long vsa_base; - unsigned long dirty_log_lock_pa; - unsigned long __gp; - /* TR and TC. */ - struct thash_data itrs[NITRS]; - struct thash_data dtrs[NDTRS]; - /* Bit is set if there is a tr/tc for the region. */ - unsigned char itr_regions; - unsigned char dtr_regions; - unsigned char tc_regions; - /* purge all */ - unsigned long ptce_base; - unsigned long ptce_count[2]; - unsigned long ptce_stride[2]; - /* itc/itm */ - unsigned long last_itc; - long itc_offset; - unsigned long itc_check; - unsigned long timer_check; - unsigned int timer_pending; - unsigned int timer_fired; - - unsigned long vrr[8]; - unsigned long ibr[8]; - unsigned long dbr[8]; - unsigned long insvc[4]; /* Interrupt in service. */ - unsigned long xtp; - - unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_saved_rr0; /* from kvm_arch */ - unsigned long metaphysical_saved_rr4; /* from kvm_arch */ - unsigned long fp_psr; /*used for lazy float register */ - unsigned long saved_gp; - /*for phycial emulation */ - int mode_flags; - struct thash_cb vtlb; - struct thash_cb vhpt; - char irq_check; - char irq_new_pending; - - unsigned long opcode; - unsigned long cause; - char log_buf[VMM_LOG_LEN]; - union context host; - union context guest; - - char mmio_data[8]; -}; - -struct kvm_vm_stat { - u64 remote_tlb_flush; -}; - -struct kvm_sal_data { - unsigned long boot_ip; - unsigned long boot_gp; -}; - -struct kvm_arch_memory_slot { -}; - -struct kvm_arch { - spinlock_t dirty_log_lock; - - unsigned long vm_base; - unsigned long metaphysical_rr0; - unsigned long metaphysical_rr4; - unsigned long vmm_init_rr; - - int is_sn2; - - struct kvm_ioapic *vioapic; - struct kvm_vm_stat stat; - struct kvm_sal_data rdv_sal_data; - - struct list_head assigned_dev_head; - struct iommu_domain *iommu_domain; - bool iommu_noncoherent; - - unsigned long irq_sources_bitmap; - unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; -}; - -union cpuid3_t { - u64 value; - struct { - u64 number : 8; - u64 revision : 8; - u64 model : 8; - u64 family : 8; - u64 archrev : 8; - u64 rv : 24; - }; -}; - -struct kvm_pt_regs { - /* The following registers are saved by SAVE_MIN: */ - unsigned long b6; /* scratch */ - unsigned long b7; /* scratch */ - - unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ - unsigned long ar_ssd; /* reserved for future use (scratch) */ - - unsigned long r8; /* scratch (return value register 0) */ - unsigned long r9; /* scratch (return value register 1) */ - unsigned long r10; /* scratch (return value register 2) */ - unsigned long r11; /* scratch (return value register 3) */ - - unsigned long cr_ipsr; /* interrupted task's psr */ - unsigned long cr_iip; /* interrupted task's instruction pointer */ - unsigned long cr_ifs; /* interrupted task's function state */ - - unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ - unsigned long ar_pfs; /* prev function state */ - unsigned long ar_rsc; /* RSE configuration */ - /* The following two are valid only if cr_ipsr.cpl > 0: */ - unsigned long ar_rnat; /* RSE NaT */ - unsigned long ar_bspstore; /* RSE bspstore */ - - unsigned long pr; /* 64 predicate registers (1 bit each) */ - unsigned long b0; /* return pointer (bp) */ - unsigned long loadrs; /* size of dirty partition << 16 */ - - unsigned long r1; /* the gp pointer */ - unsigned long r12; /* interrupted task's memory stack pointer */ - unsigned long r13; /* thread pointer */ - - unsigned long ar_fpsr; /* floating point status (preserved) */ - unsigned long r15; /* scratch */ - - /* The remaining registers are NOT saved for system calls. */ - unsigned long r14; /* scratch */ - unsigned long r2; /* scratch */ - unsigned long r3; /* scratch */ - unsigned long r16; /* scratch */ - unsigned long r17; /* scratch */ - unsigned long r18; /* scratch */ - unsigned long r19; /* scratch */ - unsigned long r20; /* scratch */ - unsigned long r21; /* scratch */ - unsigned long r22; /* scratch */ - unsigned long r23; /* scratch */ - unsigned long r24; /* scratch */ - unsigned long r25; /* scratch */ - unsigned long r26; /* scratch */ - unsigned long r27; /* scratch */ - unsigned long r28; /* scratch */ - unsigned long r29; /* scratch */ - unsigned long r30; /* scratch */ - unsigned long r31; /* scratch */ - unsigned long ar_ccv; /* compare/exchange value (scratch) */ - - /* - * Floating point registers that the kernel considers scratch: - */ - struct ia64_fpreg f6; /* scratch */ - struct ia64_fpreg f7; /* scratch */ - struct ia64_fpreg f8; /* scratch */ - struct ia64_fpreg f9; /* scratch */ - struct ia64_fpreg f10; /* scratch */ - struct ia64_fpreg f11; /* scratch */ - - unsigned long r4; /* preserved */ - unsigned long r5; /* preserved */ - unsigned long r6; /* preserved */ - unsigned long r7; /* preserved */ - unsigned long eml_unat; /* used for emulating instruction */ - unsigned long pad0; /* alignment pad */ -}; - -static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) -{ - return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1; -} - -typedef int kvm_vmm_entry(void); -typedef void kvm_tramp_entry(union context *host, union context *guest); - -struct kvm_vmm_info{ - struct module *module; - kvm_vmm_entry *vmm_entry; - kvm_tramp_entry *tramp_entry; - unsigned long vmm_ivt; - unsigned long patch_mov_ar; - unsigned long patch_mov_ar_sn2; -}; - -int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); -int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); -void kvm_sal_emul(struct kvm_vcpu *vcpu); - -#define __KVM_HAVE_ARCH_VM_ALLOC 1 -struct kvm *kvm_arch_alloc_vm(void); -void kvm_arch_free_vm(struct kvm *kvm); - -static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_free_memslot(struct kvm *kvm, - struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} -static inline void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) {} -static inline void kvm_arch_hardware_unsetup(void) {} - -#endif /* __ASSEMBLY__*/ - -#endif diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 14aa1c58912b..0ec484d2dcbc 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -35,8 +35,8 @@ extern void *per_cpu_init(void); /* * Be extremely careful when taking the address of this variable! Due to virtual - * remapping, it is different from the canonical address returned by __get_cpu_var(var)! - * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly + * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)! + * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly * more efficient. */ #define __ia64_per_cpu_var(var) (*({ \ diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h deleted file mode 100644 index 42b233bedeb5..000000000000 --- a/arch/ia64/include/asm/pvclock-abi.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * same structure to x86's - * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic. - * For now, define same duplicated definitions. - */ - -#ifndef _ASM_IA64__PVCLOCK_ABI_H -#define _ASM_IA64__PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ - -/* - * These structs MUST NOT be changed. - * They are the ABI between hypervisor and guest OS. - * KVM is using this. - * - * pvclock_vcpu_time_info holds the system time and the tsc timestamp - * of the last update. So the guest can use the tsc delta to get a - * more precise system time. There is one per virtual cpu. - * - * pvclock_wall_clock references the point in time when the system - * time was zero (usually boot time), thus the guest calculates the - * current wall clock by adding the system time. - * - * Protocol for the "version" fields is: hypervisor raises it (making - * it uneven) before it starts updating the fields and raises it again - * (making it even) when it is done. Thus the guest can make sure the - * time values it got are consistent by checking the version before - * and after reading them. - */ - -struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 pad[3]; -} __attribute__((__packed__)); /* 32 bytes */ - -struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; -} __attribute__((__packed__)); - -#endif /* __ASSEMBLY__ */ -#endif /* _ASM_IA64__PVCLOCK_ABI_H */ diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h deleted file mode 100644 index 99503c284400..000000000000 --- a/arch/ia64/include/uapi/asm/kvm.h +++ /dev/null @@ -1,268 +0,0 @@ -#ifndef __ASM_IA64_KVM_H -#define __ASM_IA64_KVM_H - -/* - * kvm structure definitions for ia64 - * - * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/types.h> -#include <linux/ioctl.h> - -/* Select x86 specific features in <linux/kvm.h> */ -#define __KVM_HAVE_IOAPIC -#define __KVM_HAVE_IRQ_LINE - -/* Architectural interrupt line count. */ -#define KVM_NR_INTERRUPTS 256 - -#define KVM_IOAPIC_NUM_PINS 48 - -struct kvm_ioapic_state { - __u64 base_address; - __u32 ioregsel; - __u32 id; - __u32 irr; - __u32 pad; - union { - __u64 bits; - struct { - __u8 vector; - __u8 delivery_mode:3; - __u8 dest_mode:1; - __u8 delivery_status:1; - __u8 polarity:1; - __u8 remote_irr:1; - __u8 trig_mode:1; - __u8 mask:1; - __u8 reserve:7; - __u8 reserved[4]; - __u8 dest_id; - } fields; - } redirtbl[KVM_IOAPIC_NUM_PINS]; -}; - -#define KVM_IRQCHIP_PIC_MASTER 0 -#define KVM_IRQCHIP_PIC_SLAVE 1 -#define KVM_IRQCHIP_IOAPIC 2 -#define KVM_NR_IRQCHIPS 3 - -#define KVM_CONTEXT_SIZE 8*1024 - -struct kvm_fpreg { - union { - unsigned long bits[2]; - long double __dummy; /* force 16-byte alignment */ - } u; -}; - -union context { - /* 8K size */ - char dummy[KVM_CONTEXT_SIZE]; - struct { - unsigned long psr; - unsigned long pr; - unsigned long caller_unat; - unsigned long pad; - unsigned long gr[32]; - unsigned long ar[128]; - unsigned long br[8]; - unsigned long cr[128]; - unsigned long rr[8]; - unsigned long ibr[8]; - unsigned long dbr[8]; - unsigned long pkr[8]; - struct kvm_fpreg fr[128]; - }; -}; - -struct thash_data { - union { - struct { - unsigned long p : 1; /* 0 */ - unsigned long rv1 : 1; /* 1 */ - unsigned long ma : 3; /* 2-4 */ - unsigned long a : 1; /* 5 */ - unsigned long d : 1; /* 6 */ - unsigned long pl : 2; /* 7-8 */ - unsigned long ar : 3; /* 9-11 */ - unsigned long ppn : 38; /* 12-49 */ - unsigned long rv2 : 2; /* 50-51 */ - unsigned long ed : 1; /* 52 */ - unsigned long ig1 : 11; /* 53-63 */ - }; - struct { - unsigned long __rv1 : 53; /* 0-52 */ - unsigned long contiguous : 1; /*53 */ - unsigned long tc : 1; /* 54 TR or TC */ - unsigned long cl : 1; - /* 55 I side or D side cache line */ - unsigned long len : 4; /* 56-59 */ - unsigned long io : 1; /* 60 entry is for io or not */ - unsigned long nomap : 1; - /* 61 entry cann't be inserted into machine TLB.*/ - unsigned long checked : 1; - /* 62 for VTLB/VHPT sanity check */ - unsigned long invalid : 1; - /* 63 invalid entry */ - }; - unsigned long page_flags; - }; /* same for VHPT and TLB */ - - union { - struct { - unsigned long rv3 : 2; - unsigned long ps : 6; - unsigned long key : 24; - unsigned long rv4 : 32; - }; - unsigned long itir; - }; - union { - struct { - unsigned long ig2 : 12; - unsigned long vpn : 49; - unsigned long vrn : 3; - }; - unsigned long ifa; - unsigned long vadr; - struct { - unsigned long tag : 63; - unsigned long ti : 1; - }; - unsigned long etag; - }; - union { - struct thash_data *next; - unsigned long rid; - unsigned long gpaddr; - }; -}; - -#define NITRS 8 -#define NDTRS 8 - -struct saved_vpd { - unsigned long vhpi; - unsigned long vgr[16]; - unsigned long vbgr[16]; - unsigned long vnat; - unsigned long vbnat; - unsigned long vcpuid[5]; - unsigned long vpsr; - unsigned long vpr; - union { - unsigned long vcr[128]; - struct { - unsigned long dcr; - unsigned long itm; - unsigned long iva; - unsigned long rsv1[5]; - unsigned long pta; - unsigned long rsv2[7]; - unsigned long ipsr; - unsigned long isr; - unsigned long rsv3; - unsigned long iip; - unsigned long ifa; - unsigned long itir; - unsigned long iipa; - unsigned long ifs; - unsigned long iim; - unsigned long iha; - unsigned long rsv4[38]; - unsigned long lid; - unsigned long ivr; - unsigned long tpr; - unsigned long eoi; - unsigned long irr[4]; - unsigned long itv; - unsigned long pmv; - unsigned long cmcv; - unsigned long rsv5[5]; - unsigned long lrr0; - unsigned long lrr1; - unsigned long rsv6[46]; - }; - }; -}; - -struct kvm_regs { - struct saved_vpd vpd; - /*Arch-regs*/ - int mp_state; - unsigned long vmm_rr; - /* TR and TC. */ - struct thash_data itrs[NITRS]; - struct thash_data dtrs[NDTRS]; - /* Bit is set if there is a tr/tc for the region. */ - unsigned char itr_regions; - unsigned char dtr_regions; - unsigned char tc_regions; - - char irq_check; - unsigned long saved_itc; - unsigned long itc_check; - unsigned long timer_check; - unsigned long timer_pending; - unsigned long last_itc; - - unsigned long vrr[8]; - unsigned long ibr[8]; - unsigned long dbr[8]; - unsigned long insvc[4]; /* Interrupt in service. */ - unsigned long xtp; - - unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ - unsigned long metaphysical_saved_rr0; /* from kvm_arch */ - unsigned long metaphysical_saved_rr4; /* from kvm_arch */ - unsigned long fp_psr; /*used for lazy float register */ - unsigned long saved_gp; - /*for phycial emulation */ - - union context saved_guest; - - unsigned long reserved[64]; /* for future use */ -}; - -struct kvm_sregs { -}; - -struct kvm_fpu { -}; - -#define KVM_IA64_VCPU_STACK_SHIFT 16 -#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT) - -struct kvm_ia64_vcpu_stack { - unsigned char stack[KVM_IA64_VCPU_STACK_SIZE]; -}; - -struct kvm_debug_exit_arch { -}; - -/* for KVM_SET_GUEST_DEBUG */ -struct kvm_guest_debug_arch { -}; - -/* definition of registers in kvm_run */ -struct kvm_sync_regs { -}; - -#endif diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index dc063fe6646a..5f4243f0acfa 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2145,22 +2145,12 @@ doit: return 0; } -static int -pfm_no_open(struct inode *irrelevant, struct file *dontcare) -{ - DPRINT(("pfm_no_open called\n")); - return -ENXIO; -} - - - static const struct file_operations pfm_file_ops = { .llseek = no_llseek, .read = pfm_read, .write = pfm_write, .poll = pfm_poll, .unlocked_ioctl = pfm_ioctl, - .open = pfm_no_open, /* special open code to disallow open via /proc */ .fasync = pfm_fasync, .release = pfm_close, .flush = pfm_flush diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig deleted file mode 100644 index 3d50ea955c4c..000000000000 --- a/arch/ia64/kvm/Kconfig +++ /dev/null @@ -1,66 +0,0 @@ -# -# KVM configuration -# - -source "virt/kvm/Kconfig" - -menuconfig VIRTUALIZATION - bool "Virtualization" - depends on HAVE_KVM || IA64 - default y - ---help--- - Say Y here to get to see options for using your Linux host to run other - operating systems inside virtual machines (guests). - This option alone does not add any kernel code. - - If you say N, all options in this submenu will be skipped and disabled. - -if VIRTUALIZATION - -config KVM - tristate "Kernel-based Virtual Machine (KVM) support" - depends on BROKEN - depends on HAVE_KVM && MODULES - depends on BROKEN - select PREEMPT_NOTIFIERS - select ANON_INODES - select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD - select HAVE_KVM_IRQ_ROUTING - select KVM_APIC_ARCHITECTURE - select KVM_MMIO - ---help--- - Support hosting fully virtualized guest machines using hardware - virtualization extensions. You will need a fairly recent - processor equipped with virtualization extensions. You will also - need to select one or more of the processor modules below. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. - - To compile this as a module, choose M here: the module - will be called kvm. - - If unsure, say N. - -config KVM_INTEL - tristate "KVM for Intel Itanium 2 processors support" - depends on KVM && m - ---help--- - Provides support for KVM on Itanium 2 processors equipped with the VT - extensions. - -config KVM_DEVICE_ASSIGNMENT - bool "KVM legacy PCI device assignment support" - depends on KVM && PCI && IOMMU_API - default y - ---help--- - Provide support for legacy PCI device assignment through KVM. The - kernel now also supports a full featured userspace device driver - framework through VFIO, which supersedes much of this support. - - If unsure, say Y. - -source drivers/vhost/Kconfig - -endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile deleted file mode 100644 index 18e45ec49bbf..000000000000 --- a/arch/ia64/kvm/Makefile +++ /dev/null @@ -1,67 +0,0 @@ -#This Make file is to generate asm-offsets.h and build source. -# - -#Generate asm-offsets.h for vmm module build -offsets-file := asm-offsets.h - -always := $(offsets-file) -targets := $(offsets-file) -targets += arch/ia64/kvm/asm-offsets.s - -# Default sed regexp - multiline due to syntax constraints -define sed-y - "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" -endef - -quiet_cmd_offsets = GEN $@ -define cmd_offsets - (set -e; \ - echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ - echo "#define __ASM_KVM_OFFSETS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Makefile"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - -# We use internal rules to avoid the "is up to date" message from make -arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \ - $(wildcard $(srctree)/arch/ia64/include/asm/*.h)\ - $(wildcard $(srctree)/include/linux/*.h) - $(call if_changed_dep,cc_s_c) - -$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s - $(call cmd,offsets) - -FORCE : $(obj)/$(offsets-file) - -# -# Makefile for Kernel-based Virtual Machine module -# - -ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ -asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ -KVM := ../../../virt/kvm - -common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ - $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o - -ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) -common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o -endif - -kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o -obj-$(CONFIG_KVM) += kvm.o - -CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 -kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ - vtlb.o process.o kvm_lib.o -#Add link memcpy and memset to avoid possible structure assignment error -kvm-intel-objs += memcpy.o memset.o -obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c deleted file mode 100644 index 9324c875caf5..000000000000 --- a/arch/ia64/kvm/asm-offsets.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * asm-offsets.c Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - * - * Anthony Xu <anthony.xu@intel.com> - * Xiantao Zhang <xiantao.zhang@intel.com> - * Copyright (c) 2007 Intel Corporation KVM support. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/kvm_host.h> -#include <linux/kbuild.h> - -#include "vcpu.h" - -void foo(void) -{ - DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); - DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); - - BLANK(); - - DEFINE(VMM_VCPU_META_RR0_OFFSET, - offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); - DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, - offsetof(struct kvm_vcpu, - arch.metaphysical_saved_rr0)); - DEFINE(VMM_VCPU_VRR0_OFFSET, - offsetof(struct kvm_vcpu, arch.vrr[0])); - DEFINE(VMM_VPD_IRR0_OFFSET, - offsetof(struct vpd, irr[0])); - DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, - offsetof(struct kvm_vcpu, arch.itc_check)); - DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, - offsetof(struct kvm_vcpu, arch.irq_check)); - DEFINE(VMM_VPD_VHPI_OFFSET, - offsetof(struct vpd, vhpi)); - DEFINE(VMM_VCPU_VSA_BASE_OFFSET, - offsetof(struct kvm_vcpu, arch.vsa_base)); - DEFINE(VMM_VCPU_VPD_OFFSET, - offsetof(struct kvm_vcpu, arch.vpd)); - DEFINE(VMM_VCPU_IRQ_CHECK, - offsetof(struct kvm_vcpu, arch.irq_check)); - DEFINE(VMM_VCPU_TIMER_PENDING, - offsetof(struct kvm_vcpu, arch.timer_pending)); - DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, - offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); - DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, - offsetof(struct kvm_vcpu, arch.mode_flags)); - DEFINE(VMM_VCPU_ITC_OFS_OFFSET, - offsetof(struct kvm_vcpu, arch.itc_offset)); - DEFINE(VMM_VCPU_LAST_ITC_OFFSET, - offsetof(struct kvm_vcpu, arch.last_itc)); - DEFINE(VMM_VCPU_SAVED_GP_OFFSET, - offsetof(struct kvm_vcpu, arch.saved_gp)); - - BLANK(); - - DEFINE(VMM_PT_REGS_B6_OFFSET, - offsetof(struct kvm_pt_regs, b6)); - DEFINE(VMM_PT_REGS_B7_OFFSET, - offsetof(struct kvm_pt_regs, b7)); - DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, - offsetof(struct kvm_pt_regs, ar_csd)); - DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, - offsetof(struct kvm_pt_regs, ar_ssd)); - DEFINE(VMM_PT_REGS_R8_OFFSET, - offsetof(struct kvm_pt_regs, r8)); - DEFINE(VMM_PT_REGS_R9_OFFSET, - offsetof(struct kvm_pt_regs, r9)); - DEFINE(VMM_PT_REGS_R10_OFFSET, - offsetof(struct kvm_pt_regs, r10)); - DEFINE(VMM_PT_REGS_R11_OFFSET, - offsetof(struct kvm_pt_regs, r11)); - DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, - offsetof(struct kvm_pt_regs, cr_ipsr)); - DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, - offsetof(struct kvm_pt_regs, cr_iip)); - DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, - offsetof(struct kvm_pt_regs, cr_ifs)); - DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, - offsetof(struct kvm_pt_regs, ar_unat)); - DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, - offsetof(struct kvm_pt_regs, ar_pfs)); - DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, - offsetof(struct kvm_pt_regs, ar_rsc)); - DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, - offsetof(struct kvm_pt_regs, ar_rnat)); - - DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, - offsetof(struct kvm_pt_regs, ar_bspstore)); - DEFINE(VMM_PT_REGS_PR_OFFSET, - offsetof(struct kvm_pt_regs, pr)); - DEFINE(VMM_PT_REGS_B0_OFFSET, - offsetof(struct kvm_pt_regs, b0)); - DEFINE(VMM_PT_REGS_LOADRS_OFFSET, - offsetof(struct kvm_pt_regs, loadrs)); - DEFINE(VMM_PT_REGS_R1_OFFSET, - offsetof(struct kvm_pt_regs, r1)); - DEFINE(VMM_PT_REGS_R12_OFFSET, - offsetof(struct kvm_pt_regs, r12)); - DEFINE(VMM_PT_REGS_R13_OFFSET, - offsetof(struct kvm_pt_regs, r13)); - DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, - offsetof(struct kvm_pt_regs, ar_fpsr)); - DEFINE(VMM_PT_REGS_R15_OFFSET, - offsetof(struct kvm_pt_regs, r15)); - DEFINE(VMM_PT_REGS_R14_OFFSET, - offsetof(struct kvm_pt_regs, r14)); - DEFINE(VMM_PT_REGS_R2_OFFSET, - offsetof(struct kvm_pt_regs, r2)); - DEFINE(VMM_PT_REGS_R3_OFFSET, - offsetof(struct kvm_pt_regs, r3)); - DEFINE(VMM_PT_REGS_R16_OFFSET, - offsetof(struct kvm_pt_regs, r16)); - DEFINE(VMM_PT_REGS_R17_OFFSET, - offsetof(struct kvm_pt_regs, r17)); - DEFINE(VMM_PT_REGS_R18_OFFSET, - offsetof(struct kvm_pt_regs, r18)); - DEFINE(VMM_PT_REGS_R19_OFFSET, - offsetof(struct kvm_pt_regs, r19)); - DEFINE(VMM_PT_REGS_R20_OFFSET, - offsetof(struct kvm_pt_regs, r20)); - DEFINE(VMM_PT_REGS_R21_OFFSET, - offsetof(struct kvm_pt_regs, r21)); - DEFINE(VMM_PT_REGS_R22_OFFSET, - offsetof(struct kvm_pt_regs, r22)); - DEFINE(VMM_PT_REGS_R23_OFFSET, - offsetof(struct kvm_pt_regs, r23)); - DEFINE(VMM_PT_REGS_R24_OFFSET, - offsetof(struct kvm_pt_regs, r24)); - DEFINE(VMM_PT_REGS_R25_OFFSET, - offsetof(struct kvm_pt_regs, r25)); - DEFINE(VMM_PT_REGS_R26_OFFSET, - offsetof(struct kvm_pt_regs, r26)); - DEFINE(VMM_PT_REGS_R27_OFFSET, - offsetof(struct kvm_pt_regs, r27)); - DEFINE(VMM_PT_REGS_R28_OFFSET, - offsetof(struct kvm_pt_regs, r28)); - DEFINE(VMM_PT_REGS_R29_OFFSET, - offsetof(struct kvm_pt_regs, r29)); - DEFINE(VMM_PT_REGS_R30_OFFSET, - offsetof(struct kvm_pt_regs, r30)); - DEFINE(VMM_PT_REGS_R31_OFFSET, - offsetof(struct kvm_pt_regs, r31)); - DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, - offsetof(struct kvm_pt_regs, ar_ccv)); - DEFINE(VMM_PT_REGS_F6_OFFSET, - offsetof(struct kvm_pt_regs, f6)); - DEFINE(VMM_PT_REGS_F7_OFFSET, - offsetof(struct kvm_pt_regs, f7)); - DEFINE(VMM_PT_REGS_F8_OFFSET, - offsetof(struct kvm_pt_regs, f8)); - DEFINE(VMM_PT_REGS_F9_OFFSET, - offsetof(struct kvm_pt_regs, f9)); - DEFINE(VMM_PT_REGS_F10_OFFSET, - offsetof(struct kvm_pt_regs, f10)); - DEFINE(VMM_PT_REGS_F11_OFFSET, - offsetof(struct kvm_pt_regs, f11)); - DEFINE(VMM_PT_REGS_R4_OFFSET, - offsetof(struct kvm_pt_regs, r4)); - DEFINE(VMM_PT_REGS_R5_OFFSET, - offsetof(struct kvm_pt_regs, r5)); - DEFINE(VMM_PT_REGS_R6_OFFSET, - offsetof(struct kvm_pt_regs, r6)); - DEFINE(VMM_PT_REGS_R7_OFFSET, - offsetof(struct kvm_pt_regs, r7)); - DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, - offsetof(struct kvm_pt_regs, eml_unat)); - DEFINE(VMM_VCPU_IIPA_OFFSET, - offsetof(struct kvm_vcpu, arch.cr_iipa)); - DEFINE(VMM_VCPU_OPCODE_OFFSET, - offsetof(struct kvm_vcpu, arch.opcode)); - DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); - DEFINE(VMM_VCPU_ISR_OFFSET, - offsetof(struct kvm_vcpu, arch.cr_isr)); - DEFINE(VMM_PT_REGS_R16_SLOT, - (((offsetof(struct kvm_pt_regs, r16) - - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); - DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, - offsetof(struct kvm_vcpu, arch.mode_flags)); - DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); - BLANK(); - - DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); - DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); - DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, - offsetof(struct kvm_vcpu, arch.insvc[0])); - DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); - DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); - - DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); - DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); - DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); - DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); - DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); - DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); - DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); - DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); - DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); - DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); - DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); - DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); - DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); - DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); - DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); - DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); - DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); - DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); - DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); - DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); - DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); - DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); - DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); - DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); - DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); - DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); - DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); - DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); - DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); - DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); - DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); - DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); - DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); - DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); - BLANK(); -} diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h deleted file mode 100644 index c0785a728271..000000000000 --- a/arch/ia64/kvm/irq.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * irq.h: In-kernel interrupt controller related definitions - * Copyright (c) 2008, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Authors: - * Xiantao Zhang <xiantao.zhang@intel.com> - * - */ - -#ifndef __IRQ_H -#define __IRQ_H - -#include "lapic.h" - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} - -#endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c deleted file mode 100644 index dbe46f43884d..000000000000 --- a/arch/ia64/kvm/kvm-ia64.c +++ /dev/null @@ -1,1942 +0,0 @@ -/* - * kvm_ia64.c: Basic KVM support On Itanium series processors - * - * - * Copyright (C) 2007, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/percpu.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/kvm_host.h> -#include <linux/kvm.h> -#include <linux/bitops.h> -#include <linux/hrtimer.h> -#include <linux/uaccess.h> -#include <linux/iommu.h> -#include <linux/intel-iommu.h> -#include <linux/pci.h> - -#include <asm/pgtable.h> -#include <asm/gcc_intrin.h> -#include <asm/pal.h> -#include <asm/cacheflush.h> -#include <asm/div64.h> -#include <asm/tlb.h> -#include <asm/elf.h> -#include <asm/sn/addrs.h> -#include <asm/sn/clksupport.h> -#include <asm/sn/shub_mmr.h> - -#include "misc.h" -#include "vti.h" -#include "iodev.h" -#include "ioapic.h" -#include "lapic.h" -#include "irq.h" - -static unsigned long kvm_vmm_base; -static unsigned long kvm_vsa_base; -static unsigned long kvm_vm_buffer; -static unsigned long kvm_vm_buffer_size; -unsigned long kvm_vmm_gp; - -static long vp_env_info; - -static struct kvm_vmm_info *kvm_vmm_info; - -static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); - -struct kvm_stats_debugfs_item debugfs_entries[] = { - { NULL } -}; - -static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu) -{ -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - if (vcpu->kvm->arch.is_sn2) - return rtc_time(); - else -#endif - return ia64_getreg(_IA64_REG_AR_ITC); -} - -static void kvm_flush_icache(unsigned long start, unsigned long len) -{ - int l; - - for (l = 0; l < (len + 32); l += 32) - ia64_fc((void *)(start + l)); - - ia64_sync_i(); - ia64_srlz_i(); -} - -static void kvm_flush_tlb_all(void) -{ - unsigned long i, j, count0, count1, stride0, stride1, addr; - long flags; - - addr = local_cpu_data->ptce_base; - count0 = local_cpu_data->ptce_count[0]; - count1 = local_cpu_data->ptce_count[1]; - stride0 = local_cpu_data->ptce_stride[0]; - stride1 = local_cpu_data->ptce_stride[1]; - - local_irq_save(flags); - for (i = 0; i < count0; ++i) { - for (j = 0; j < count1; ++j) { - ia64_ptce(addr); - addr += stride1; - } - addr += stride0; - } - local_irq_restore(flags); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, - (u64)opt_handler); - - return iprv.status; -} - -static DEFINE_SPINLOCK(vp_lock); - -int kvm_arch_hardware_enable(void) -{ - long status; - long tmp_base; - unsigned long pte; - unsigned long saved_psr; - int slot; - - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); - local_irq_save(saved_psr); - slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); - local_irq_restore(saved_psr); - if (slot < 0) - return -EINVAL; - - spin_lock(&vp_lock); - status = ia64_pal_vp_init_env(kvm_vsa_base ? - VP_INIT_ENV : VP_INIT_ENV_INITALIZE, - __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); - if (status != 0) { - spin_unlock(&vp_lock); - printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); - return -EINVAL; - } - - if (!kvm_vsa_base) { - kvm_vsa_base = tmp_base; - printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); - } - spin_unlock(&vp_lock); - ia64_ptr_entry(0x3, slot); - - return 0; -} - -void kvm_arch_hardware_disable(void) -{ - - long status; - int slot; - unsigned long pte; - unsigned long saved_psr; - unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); - - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), - PAGE_KERNEL)); - - local_irq_save(saved_psr); - slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); - local_irq_restore(saved_psr); - if (slot < 0) - return; - - status = ia64_pal_vp_exit_env(host_iva); - if (status) - printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", - status); - ia64_ptr_entry(0x3, slot); -} - -void kvm_arch_check_processor_compat(void *rtn) -{ - *(int *)rtn = 0; -} - -int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) -{ - - int r; - - switch (ext) { - case KVM_CAP_IRQCHIP: - case KVM_CAP_MP_STATE: - case KVM_CAP_IRQ_INJECT_STATUS: - case KVM_CAP_IOAPIC_POLARITY_IGNORED: - r = 1; - break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT - case KVM_CAP_IOMMU: - r = iommu_present(&pci_bus_type); - break; -#endif - default: - r = 0; - } - return r; - -} - -static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = 1; - return 0; -} - -static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct kvm_mmio_req *p; - struct kvm_io_device *mmio_dev; - int r; - - p = kvm_get_vcpu_ioreq(vcpu); - - if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) - goto mmio; - vcpu->mmio_needed = 1; - vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr; - vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size; - vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; - - if (vcpu->mmio_is_write) - memcpy(vcpu->arch.mmio_data, &p->data, p->size); - memcpy(kvm_run->mmio.data, &p->data, p->size); - kvm_run->exit_reason = KVM_EXIT_MMIO; - return 0; -mmio: - if (p->dir) - r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr, - p->size, &p->data); - else - r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr, - p->size, &p->data); - if (r) - printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); - p->state = STATE_IORESP_READY; - - return 1; -} - -static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - - if (p->exit_reason == EXIT_REASON_PAL_CALL) - return kvm_pal_emul(vcpu, kvm_run); - else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = 2; - return 0; - } -} - -static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - kvm_sal_emul(vcpu); - return 1; - } else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = 3; - return 0; - } - -} - -static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (!test_and_set_bit(vector, &vpd->irr[0])) { - vcpu->arch.irq_new_pending = 1; - kvm_vcpu_kick(vcpu); - return 1; - } - return 0; -} - -/* - * offset: address offset to IPI space. - * value: deliver value. - */ -static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, - uint64_t vector) -{ - switch (dm) { - case SAPIC_FIXED: - break; - case SAPIC_NMI: - vector = 2; - break; - case SAPIC_EXTINT: - vector = 0; - break; - case SAPIC_INIT: - case SAPIC_PMI: - default: - printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); - return; - } - __apic_accept_irq(vcpu, vector); -} - -static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, - unsigned long eid) -{ - union ia64_lid lid; - int i; - struct kvm_vcpu *vcpu; - - kvm_for_each_vcpu(i, vcpu, kvm) { - lid.val = VCPU_LID(vcpu); - if (lid.id == id && lid.eid == eid) - return vcpu; - } - - return NULL; -} - -static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p = kvm_get_exit_data(vcpu); - struct kvm_vcpu *target_vcpu; - struct kvm_pt_regs *regs; - union ia64_ipi_a addr = p->u.ipi_data.addr; - union ia64_ipi_d data = p->u.ipi_data.data; - - target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); - if (!target_vcpu) - return handle_vm_error(vcpu, kvm_run); - - if (!target_vcpu->arch.launched) { - regs = vcpu_regs(target_vcpu); - - regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; - regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; - - target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - if (waitqueue_active(&target_vcpu->wq)) - wake_up_interruptible(&target_vcpu->wq); - } else { - vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); - if (target_vcpu != vcpu) - kvm_vcpu_kick(target_vcpu); - } - - return 1; -} - -struct call_data { - struct kvm_ptc_g ptc_g_data; - struct kvm_vcpu *vcpu; -}; - -static void vcpu_global_purge(void *info) -{ - struct call_data *p = (struct call_data *)info; - struct kvm_vcpu *vcpu = p->vcpu; - - if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) - return; - - set_bit(KVM_REQ_PTC_G, &vcpu->requests); - if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { - vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = - p->ptc_g_data; - } else { - clear_bit(KVM_REQ_PTC_G, &vcpu->requests); - vcpu->arch.ptc_g_count = 0; - set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); - } -} - -static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - struct exit_ctl_data *p = kvm_get_exit_data(vcpu); - struct kvm *kvm = vcpu->kvm; - struct call_data call_data; - int i; - struct kvm_vcpu *vcpui; - - call_data.ptc_g_data = p->u.ptc_g_data; - - kvm_for_each_vcpu(i, vcpui, kvm) { - if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || - vcpu == vcpui) - continue; - - if (waitqueue_active(&vcpui->wq)) - wake_up_interruptible(&vcpui->wq); - - if (vcpui->cpu != -1) { - call_data.vcpu = vcpui; - smp_call_function_single(vcpui->cpu, - vcpu_global_purge, &call_data, 1); - } else - printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); - - } - return 1; -} - -static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - return 1; -} - -static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu) -{ - unsigned long pte, rtc_phys_addr, map_addr; - int slot; - - map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT); - rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC; - pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC)); - slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT); - vcpu->arch.sn_rtc_tr_slot = slot; - if (slot < 0) { - printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n"); - slot = 0; - } - return slot; -} - -int kvm_emulate_halt(struct kvm_vcpu *vcpu) -{ - - ktime_t kt; - long itc_diff; - unsigned long vcpu_now_itc; - unsigned long expires; - struct hrtimer *p_ht = &vcpu->arch.hlt_timer; - unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (irqchip_in_kernel(vcpu->kvm)) { - - vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset; - - if (time_after(vcpu_now_itc, vpd->itm)) { - vcpu->arch.timer_check = 1; - return 1; - } - itc_diff = vpd->itm - vcpu_now_itc; - if (itc_diff < 0) - itc_diff = -itc_diff; - - expires = div64_u64(itc_diff, cyc_per_usec); - kt = ktime_set(0, 1000 * expires); - - vcpu->arch.ht_active = 1; - hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); - - vcpu->arch.mp_state = KVM_MP_STATE_HALTED; - kvm_vcpu_block(vcpu); - hrtimer_cancel(p_ht); - vcpu->arch.ht_active = 0; - - if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) || - kvm_cpu_has_pending_timer(vcpu)) - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - - if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) - return -EINTR; - return 1; - } else { - printk(KERN_ERR"kvm: Unsupported userspace halt!"); - return 0; - } -} - -static int handle_vm_shutdown(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; - return 0; -} - -static int handle_external_interrupt(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - return 1; -} - -static int handle_vcpu_debug(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - printk("VMM: %s", vcpu->arch.log_buf); - return 1; -} - -static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) = { - [EXIT_REASON_VM_PANIC] = handle_vm_error, - [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, - [EXIT_REASON_PAL_CALL] = handle_pal_call, - [EXIT_REASON_SAL_CALL] = handle_sal_call, - [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, - [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, - [EXIT_REASON_IPI] = handle_ipi, - [EXIT_REASON_PTC_G] = handle_global_purge, - [EXIT_REASON_DEBUG] = handle_vcpu_debug, - -}; - -static const int kvm_vti_max_exit_handlers = - sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); - -static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p_exit_data; - - p_exit_data = kvm_get_exit_data(vcpu); - return p_exit_data->exit_reason; -} - -/* - * The guest has exited. See if we can fix it or if we need userspace - * assistance. - */ -static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) -{ - u32 exit_reason = kvm_get_exit_reason(vcpu); - vcpu->arch.last_exit = exit_reason; - - if (exit_reason < kvm_vti_max_exit_handlers - && kvm_vti_exit_handlers[exit_reason]) - return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); - else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = exit_reason; - } - return 0; -} - -static inline void vti_set_rr6(unsigned long rr6) -{ - ia64_set_rr(RR6, rr6); - ia64_srlz_i(); -} - -static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) -{ - unsigned long pte; - struct kvm *kvm = vcpu->kvm; - int r; - - /*Insert a pair of tr to map vmm*/ - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); - r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); - if (r < 0) - goto out; - vcpu->arch.vmm_tr_slot = r; - /*Insert a pairt of tr to map data of vm*/ - pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); - r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, - pte, KVM_VM_DATA_SHIFT); - if (r < 0) - goto out; - vcpu->arch.vm_tr_slot = r; - -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - if (kvm->arch.is_sn2) { - r = kvm_sn2_setup_mappings(vcpu); - if (r < 0) - goto out; - } -#endif - - r = 0; -out: - return r; -} - -static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = vcpu->kvm; - ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); - ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - if (kvm->arch.is_sn2) - ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot); -#endif -} - -static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - int r; - int cpu = smp_processor_id(); - - if (vcpu->arch.last_run_cpu != cpu || - per_cpu(last_vcpu, cpu) != vcpu) { - per_cpu(last_vcpu, cpu) = vcpu; - vcpu->arch.last_run_cpu = cpu; - kvm_flush_tlb_all(); - } - - vcpu->arch.host_rr6 = ia64_get_rr(RR6); - vti_set_rr6(vcpu->arch.vmm_rr); - local_irq_save(psr); - r = kvm_insert_vmm_mapping(vcpu); - local_irq_restore(psr); - return r; -} - -static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) -{ - kvm_purge_vmm_mapping(vcpu); - vti_set_rr6(vcpu->arch.host_rr6); -} - -static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - union context *host_ctx, *guest_ctx; - int r, idx; - - idx = srcu_read_lock(&vcpu->kvm->srcu); - -again: - if (signal_pending(current)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - goto out; - } - - preempt_disable(); - local_irq_disable(); - - /*Get host and guest context with guest address space.*/ - host_ctx = kvm_get_host_context(vcpu); - guest_ctx = kvm_get_guest_context(vcpu); - - clear_bit(KVM_REQ_KICK, &vcpu->requests); - - r = kvm_vcpu_pre_transition(vcpu); - if (r < 0) - goto vcpu_run_fail; - - srcu_read_unlock(&vcpu->kvm->srcu, idx); - vcpu->mode = IN_GUEST_MODE; - kvm_guest_enter(); - - /* - * Transition to the guest - */ - kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); - - kvm_vcpu_post_transition(vcpu); - - vcpu->arch.launched = 1; - set_bit(KVM_REQ_KICK, &vcpu->requests); - local_irq_enable(); - - /* - * We must have an instruction between local_irq_enable() and - * kvm_guest_exit(), so the timer interrupt isn't delayed by - * the interrupt shadow. The stat.exits increment will do nicely. - * But we need to prevent reordering, hence this barrier(): - */ - barrier(); - kvm_guest_exit(); - vcpu->mode = OUTSIDE_GUEST_MODE; - preempt_enable(); - - idx = srcu_read_lock(&vcpu->kvm->srcu); - - r = kvm_handle_exit(kvm_run, vcpu); - - if (r > 0) { - if (!need_resched()) - goto again; - } - -out: - srcu_read_unlock(&vcpu->kvm->srcu, idx); - if (r > 0) { - cond_resched(); - idx = srcu_read_lock(&vcpu->kvm->srcu); - goto again; - } - - return r; - -vcpu_run_fail: - local_irq_enable(); - preempt_enable(); - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - goto out; -} - -static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) -{ - struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); - - if (!vcpu->mmio_is_write) - memcpy(&p->data, vcpu->arch.mmio_data, 8); - p->state = STATE_IORESP_READY; -} - -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - int r; - sigset_t sigsaved; - - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { - kvm_vcpu_block(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); - r = -EAGAIN; - goto out; - } - - if (vcpu->mmio_needed) { - memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8); - kvm_set_mmio_data(vcpu); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - r = __vcpu_run(vcpu, kvm_run); -out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return r; -} - -struct kvm *kvm_arch_alloc_vm(void) -{ - - struct kvm *kvm; - uint64_t vm_base; - - BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); - - vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); - - if (!vm_base) - return NULL; - - memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - kvm = (struct kvm *)(vm_base + - offsetof(struct kvm_vm_data, kvm_vm_struct)); - kvm->arch.vm_base = vm_base; - printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); - - return kvm; -} - -struct kvm_ia64_io_range { - unsigned long start; - unsigned long size; - unsigned long type; -}; - -static const struct kvm_ia64_io_range io_ranges[] = { - {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, - {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, - {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, - {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, - {PIB_START, PIB_SIZE, GPFN_PIB}, -}; - -static void kvm_build_io_pmt(struct kvm *kvm) -{ - unsigned long i, j; - - /* Mark I/O ranges */ - for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); - i++) { - for (j = io_ranges[i].start; - j < io_ranges[i].start + io_ranges[i].size; - j += PAGE_SIZE) - kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, - io_ranges[i].type, 0); - } - -} - -/*Use unused rids to virtualize guest rid.*/ -#define GUEST_PHYSICAL_RR0 0x1739 -#define GUEST_PHYSICAL_RR4 0x2739 -#define VMM_INIT_RR 0x1660 - -int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) -{ - BUG_ON(!kvm); - - if (type) - return -EINVAL; - - kvm->arch.is_sn2 = ia64_platform_is("sn2"); - - kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; - kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; - kvm->arch.vmm_init_rr = VMM_INIT_RR; - - /* - *Fill P2M entries for MMIO/IO ranges - */ - kvm_build_io_pmt(kvm); - - INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); - - /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ - set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); - - return 0; -} - -static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, - struct kvm_irqchip *chip) -{ - int r; - - r = 0; - switch (chip->chip_id) { - case KVM_IRQCHIP_IOAPIC: - r = kvm_get_ioapic(kvm, &chip->chip.ioapic); - break; - default: - r = -EINVAL; - break; - } - return r; -} - -static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) -{ - int r; - - r = 0; - switch (chip->chip_id) { - case KVM_IRQCHIP_IOAPIC: - r = kvm_set_ioapic(kvm, &chip->chip.ioapic); - break; - default: - r = -EINVAL; - break; - } - return r; -} - -#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x - -int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - int i; - - for (i = 0; i < 16; i++) { - vpd->vgr[i] = regs->vpd.vgr[i]; - vpd->vbgr[i] = regs->vpd.vbgr[i]; - } - for (i = 0; i < 128; i++) - vpd->vcr[i] = regs->vpd.vcr[i]; - vpd->vhpi = regs->vpd.vhpi; - vpd->vnat = regs->vpd.vnat; - vpd->vbnat = regs->vpd.vbnat; - vpd->vpsr = regs->vpd.vpsr; - - vpd->vpr = regs->vpd.vpr; - - memcpy(&vcpu->arch.guest, ®s->saved_guest, sizeof(union context)); - - RESTORE_REGS(mp_state); - RESTORE_REGS(vmm_rr); - memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); - memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); - RESTORE_REGS(itr_regions); - RESTORE_REGS(dtr_regions); - RESTORE_REGS(tc_regions); - RESTORE_REGS(irq_check); - RESTORE_REGS(itc_check); - RESTORE_REGS(timer_check); - RESTORE_REGS(timer_pending); - RESTORE_REGS(last_itc); - for (i = 0; i < 8; i++) { - vcpu->arch.vrr[i] = regs->vrr[i]; - vcpu->arch.ibr[i] = regs->ibr[i]; - vcpu->arch.dbr[i] = regs->dbr[i]; - } - for (i = 0; i < 4; i++) - vcpu->arch.insvc[i] = regs->insvc[i]; - RESTORE_REGS(xtp); - RESTORE_REGS(metaphysical_rr0); - RESTORE_REGS(metaphysical_rr4); - RESTORE_REGS(metaphysical_saved_rr0); - RESTORE_REGS(metaphysical_saved_rr4); - RESTORE_REGS(fp_psr); - RESTORE_REGS(saved_gp); - - vcpu->arch.irq_new_pending = 1; - vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); - set_bit(KVM_REQ_RESUME, &vcpu->requests); - - return 0; -} - -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, - bool line_status) -{ - if (!irqchip_in_kernel(kvm)) - return -ENXIO; - - irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level, - line_status); - return 0; -} - -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - struct kvm *kvm = filp->private_data; - void __user *argp = (void __user *)arg; - int r = -ENOTTY; - - switch (ioctl) { - case KVM_CREATE_IRQCHIP: - r = -EFAULT; - r = kvm_ioapic_init(kvm); - if (r) - goto out; - r = kvm_setup_default_irq_routing(kvm); - if (r) { - mutex_lock(&kvm->slots_lock); - kvm_ioapic_destroy(kvm); - mutex_unlock(&kvm->slots_lock); - goto out; - } - break; - case KVM_GET_IRQCHIP: { - /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ - struct kvm_irqchip chip; - - r = -EFAULT; - if (copy_from_user(&chip, argp, sizeof chip)) - goto out; - r = -ENXIO; - if (!irqchip_in_kernel(kvm)) - goto out; - r = kvm_vm_ioctl_get_irqchip(kvm, &chip); - if (r) - goto out; - r = -EFAULT; - if (copy_to_user(argp, &chip, sizeof chip)) - goto out; - r = 0; - break; - } - case KVM_SET_IRQCHIP: { - /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ - struct kvm_irqchip chip; - - r = -EFAULT; - if (copy_from_user(&chip, argp, sizeof chip)) - goto out; - r = -ENXIO; - if (!irqchip_in_kernel(kvm)) - goto out; - r = kvm_vm_ioctl_set_irqchip(kvm, &chip); - if (r) - goto out; - r = 0; - break; - } - default: - ; - } -out: - return r; -} - -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return -EINVAL; -} - -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return -EINVAL; - -} -int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - - return -EINVAL; -} - -static int kvm_alloc_vmm_area(void) -{ - if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { - kvm_vmm_base = __get_free_pages(GFP_KERNEL, - get_order(KVM_VMM_SIZE)); - if (!kvm_vmm_base) - return -ENOMEM; - - memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); - kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; - - printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", - kvm_vmm_base, kvm_vm_buffer); - } - - return 0; -} - -static void kvm_free_vmm_area(void) -{ - if (kvm_vmm_base) { - /*Zero this area before free to avoid bits leak!!*/ - memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); - free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); - kvm_vmm_base = 0; - kvm_vm_buffer = 0; - kvm_vsa_base = 0; - } -} - -static int vti_init_vpd(struct kvm_vcpu *vcpu) -{ - int i; - union cpuid3_t cpuid3; - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (IS_ERR(vpd)) - return PTR_ERR(vpd); - - /* CPUID init */ - for (i = 0; i < 5; i++) - vpd->vcpuid[i] = ia64_get_cpuid(i); - - /* Limit the CPUID number to 5 */ - cpuid3.value = vpd->vcpuid[3]; - cpuid3.number = 4; /* 5 - 1 */ - vpd->vcpuid[3] = cpuid3.value; - - /*Set vac and vdc fields*/ - vpd->vac.a_from_int_cr = 1; - vpd->vac.a_to_int_cr = 1; - vpd->vac.a_from_psr = 1; - vpd->vac.a_from_cpuid = 1; - vpd->vac.a_cover = 1; - vpd->vac.a_bsw = 1; - vpd->vac.a_int = 1; - vpd->vdc.d_vmsw = 1; - - /*Set virtual buffer*/ - vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; - - return 0; -} - -static int vti_create_vp(struct kvm_vcpu *vcpu) -{ - long ret; - struct vpd *vpd = vcpu->arch.vpd; - unsigned long vmm_ivt; - - vmm_ivt = kvm_vmm_info->vmm_ivt; - - printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); - - ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); - - if (ret) { - printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); - return -EINVAL; - } - return 0; -} - -static void init_ptce_info(struct kvm_vcpu *vcpu) -{ - ia64_ptce_info_t ptce = {0}; - - ia64_get_ptce(&ptce); - vcpu->arch.ptce_base = ptce.base; - vcpu->arch.ptce_count[0] = ptce.count[0]; - vcpu->arch.ptce_count[1] = ptce.count[1]; - vcpu->arch.ptce_stride[0] = ptce.stride[0]; - vcpu->arch.ptce_stride[1] = ptce.stride[1]; -} - -static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) -{ - struct hrtimer *p_ht = &vcpu->arch.hlt_timer; - - if (hrtimer_cancel(p_ht)) - hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS); -} - -static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) -{ - struct kvm_vcpu *vcpu; - wait_queue_head_t *q; - - vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); - q = &vcpu->wq; - - if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) - goto out; - - if (waitqueue_active(q)) - wake_up_interruptible(q); - -out: - vcpu->arch.timer_fired = 1; - vcpu->arch.timer_check = 1; - return HRTIMER_NORESTART; -} - -#define PALE_RESET_ENTRY 0x80000000ffffffb0UL - -bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) -{ - return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); -} - -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu *v; - int r; - int i; - long itc_offset; - struct kvm *kvm = vcpu->kvm; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - union context *p_ctx = &vcpu->arch.guest; - struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); - - /*Init vcpu context for first run.*/ - if (IS_ERR(vmm_vcpu)) - return PTR_ERR(vmm_vcpu); - - if (kvm_vcpu_is_bsp(vcpu)) { - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - - /*Set entry address for first run.*/ - regs->cr_iip = PALE_RESET_ENTRY; - - /*Initialize itc offset for vcpus*/ - itc_offset = 0UL - kvm_get_itc(vcpu); - for (i = 0; i < KVM_MAX_VCPUS; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + - sizeof(struct kvm_vcpu_data) * i); - v->arch.itc_offset = itc_offset; - v->arch.last_itc = 0; - } - } else - vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; - - r = -ENOMEM; - vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); - if (!vcpu->arch.apic) - goto out; - vcpu->arch.apic->vcpu = vcpu; - - p_ctx->gr[1] = 0; - p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); - p_ctx->gr[13] = (unsigned long)vmm_vcpu; - p_ctx->psr = 0x1008522000UL; - p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ - p_ctx->caller_unat = 0; - p_ctx->pr = 0x0; - p_ctx->ar[36] = 0x0; /*unat*/ - p_ctx->ar[19] = 0x0; /*rnat*/ - p_ctx->ar[18] = (unsigned long)vmm_vcpu + - ((sizeof(struct kvm_vcpu)+15) & ~15); - p_ctx->ar[64] = 0x0; /*pfs*/ - p_ctx->cr[0] = 0x7e04UL; - p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; - p_ctx->cr[8] = 0x3c; - - /*Initialize region register*/ - p_ctx->rr[0] = 0x30; - p_ctx->rr[1] = 0x30; - p_ctx->rr[2] = 0x30; - p_ctx->rr[3] = 0x30; - p_ctx->rr[4] = 0x30; - p_ctx->rr[5] = 0x30; - p_ctx->rr[7] = 0x30; - - /*Initialize branch register 0*/ - p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; - - vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; - vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; - vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; - - hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - vcpu->arch.hlt_timer.function = hlt_timer_fn; - - vcpu->arch.last_run_cpu = -1; - vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); - vcpu->arch.vsa_base = kvm_vsa_base; - vcpu->arch.__gp = kvm_vmm_gp; - vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); - vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); - vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); - init_ptce_info(vcpu); - - r = 0; -out: - return r; -} - -static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) -{ - unsigned long psr; - int r; - - local_irq_save(psr); - r = kvm_insert_vmm_mapping(vcpu); - local_irq_restore(psr); - if (r) - goto fail; - r = kvm_vcpu_init(vcpu, vcpu->kvm, id); - if (r) - goto fail; - - r = vti_init_vpd(vcpu); - if (r) { - printk(KERN_DEBUG"kvm: vpd init error!!\n"); - goto uninit; - } - - r = vti_create_vp(vcpu); - if (r) - goto uninit; - - kvm_purge_vmm_mapping(vcpu); - - return 0; -uninit: - kvm_vcpu_uninit(vcpu); -fail: - return r; -} - -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, - unsigned int id) -{ - struct kvm_vcpu *vcpu; - unsigned long vm_base = kvm->arch.vm_base; - int r; - int cpu; - - BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); - - r = -EINVAL; - if (id >= KVM_MAX_VCPUS) { - printk(KERN_ERR"kvm: Can't configure vcpus > %ld", - KVM_MAX_VCPUS); - goto fail; - } - - r = -ENOMEM; - if (!vm_base) { - printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); - goto fail; - } - vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, - vcpu_data[id].vcpu_struct)); - vcpu->kvm = kvm; - - cpu = get_cpu(); - r = vti_vcpu_setup(vcpu, id); - put_cpu(); - - if (r) { - printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); - goto fail; - } - - return vcpu; -fail: - return ERR_PTR(r); -} - -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - -int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) -{ - return 0; -} - -int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - return -EINVAL; -} - -int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - return -EINVAL; -} - -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - -void kvm_arch_free_vm(struct kvm *kvm) -{ - unsigned long vm_base = kvm->arch.vm_base; - - if (vm_base) { - memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); - } - -} - -static void kvm_release_vm_pages(struct kvm *kvm) -{ - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - int j; - - slots = kvm_memslots(kvm); - kvm_for_each_memslot(memslot, slots) { - for (j = 0; j < memslot->npages; j++) { - if (memslot->rmap[j]) - put_page((struct page *)memslot->rmap[j]); - } - } -} - -void kvm_arch_destroy_vm(struct kvm *kvm) -{ - kvm_iommu_unmap_guest(kvm); - kvm_free_all_assigned_devices(kvm); - kfree(kvm->arch.vioapic); - kvm_release_vm_pages(kvm); -} - -void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - if (cpu != vcpu->cpu) { - vcpu->cpu = cpu; - if (vcpu->arch.ht_active) - kvm_migrate_hlt_timer(vcpu); - } -} - -#define SAVE_REGS(_x) regs->_x = vcpu->arch._x - -int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - int i; - - vcpu_load(vcpu); - - for (i = 0; i < 16; i++) { - regs->vpd.vgr[i] = vpd->vgr[i]; - regs->vpd.vbgr[i] = vpd->vbgr[i]; - } - for (i = 0; i < 128; i++) - regs->vpd.vcr[i] = vpd->vcr[i]; - regs->vpd.vhpi = vpd->vhpi; - regs->vpd.vnat = vpd->vnat; - regs->vpd.vbnat = vpd->vbnat; - regs->vpd.vpsr = vpd->vpsr; - regs->vpd.vpr = vpd->vpr; - - memcpy(®s->saved_guest, &vcpu->arch.guest, sizeof(union context)); - - SAVE_REGS(mp_state); - SAVE_REGS(vmm_rr); - memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); - memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); - SAVE_REGS(itr_regions); - SAVE_REGS(dtr_regions); - SAVE_REGS(tc_regions); - SAVE_REGS(irq_check); - SAVE_REGS(itc_check); - SAVE_REGS(timer_check); - SAVE_REGS(timer_pending); - SAVE_REGS(last_itc); - for (i = 0; i < 8; i++) { - regs->vrr[i] = vcpu->arch.vrr[i]; - regs->ibr[i] = vcpu->arch.ibr[i]; - regs->dbr[i] = vcpu->arch.dbr[i]; - } - for (i = 0; i < 4; i++) - regs->insvc[i] = vcpu->arch.insvc[i]; - regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu); - SAVE_REGS(xtp); - SAVE_REGS(metaphysical_rr0); - SAVE_REGS(metaphysical_rr4); - SAVE_REGS(metaphysical_saved_rr0); - SAVE_REGS(metaphysical_saved_rr4); - SAVE_REGS(fp_psr); - SAVE_REGS(saved_gp); - - vcpu_put(vcpu); - return 0; -} - -int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu, - struct kvm_ia64_vcpu_stack *stack) -{ - memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack)); - return 0; -} - -int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu, - struct kvm_ia64_vcpu_stack *stack) -{ - memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu), - sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu)); - - vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data; - return 0; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - - hrtimer_cancel(&vcpu->arch.hlt_timer); - kfree(vcpu->arch.apic); -} - -long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = (void __user *)arg; - struct kvm_ia64_vcpu_stack *stack = NULL; - long r; - - switch (ioctl) { - case KVM_IA64_VCPU_GET_STACK: { - struct kvm_ia64_vcpu_stack __user *user_stack; - void __user *first_p = argp; - - r = -EFAULT; - if (copy_from_user(&user_stack, first_p, sizeof(void *))) - goto out; - - if (!access_ok(VERIFY_WRITE, user_stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: " - "Illegal user destination address for stack\n"); - goto out; - } - stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); - if (!stack) { - r = -ENOMEM; - goto out; - } - - r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack); - if (r) - goto out; - - if (copy_to_user(user_stack, stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - r = -EFAULT; - goto out; - } - - break; - } - case KVM_IA64_VCPU_SET_STACK: { - struct kvm_ia64_vcpu_stack __user *user_stack; - void __user *first_p = argp; - - r = -EFAULT; - if (copy_from_user(&user_stack, first_p, sizeof(void *))) - goto out; - - if (!access_ok(VERIFY_READ, user_stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: " - "Illegal user address for stack\n"); - goto out; - } - stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); - if (!stack) { - r = -ENOMEM; - goto out; - } - if (copy_from_user(stack, user_stack, - sizeof(struct kvm_ia64_vcpu_stack))) - goto out; - - r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack); - break; - } - - default: - r = -EINVAL; - } - -out: - kfree(stack); - return r; -} - -int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) -{ - return VM_FAULT_SIGBUS; -} - -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - -int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) -{ - unsigned long i; - unsigned long pfn; - int npages = memslot->npages; - unsigned long base_gfn = memslot->base_gfn; - - if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) - return -ENOMEM; - - for (i = 0; i < npages; i++) { - pfn = gfn_to_pfn(kvm, base_gfn + i); - if (!kvm_is_reserved_pfn(pfn)) { - kvm_set_pmt_entry(kvm, base_gfn + i, - pfn << PAGE_SHIFT, - _PAGE_AR_RWX | _PAGE_MA_WB); - memslot->rmap[i] = (unsigned long)pfn_to_page(pfn); - } else { - kvm_set_pmt_entry(kvm, base_gfn + i, - GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT), - _PAGE_MA_UC); - memslot->rmap[i] = 0; - } - } - - return 0; -} - -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ - kvm_flush_remote_tlbs(kvm); -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_arch_flush_shadow_all(); -} - -long kvm_arch_dev_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - return -EINVAL; -} - -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_vcpu_uninit(vcpu); -} - -static int vti_cpu_has_kvm_support(void) -{ - long avail = 1, status = 1, control = 1; - long ret; - - ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); - if (ret) - goto out; - - if (!(avail & PAL_PROC_VM_BIT)) - goto out; - - printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); - - ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); - if (ret) - goto out; - printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); - - if (!(vp_env_info & VP_OPCODE)) { - printk(KERN_WARNING"kvm: No opcode ability on hardware, " - "vm_env_info:0x%lx\n", vp_env_info); - } - - return 1; -out: - return 0; -} - - -/* - * On SN2, the ITC isn't stable, so copy in fast path code to use the - * SN2 RTC, replacing the ITC based default verion. - */ -static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info, - struct module *module) -{ - unsigned long new_ar, new_ar_sn2; - unsigned long module_base; - - if (!ia64_platform_is("sn2")) - return; - - module_base = (unsigned long)module->module_core; - - new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base; - new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base; - - printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC " - "as source\n"); - - /* - * Copy the SN2 version of mov_ar into place. They are both - * the same size, so 6 bundles is sufficient (6 * 0x10). - */ - memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60); -} - -static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, - struct module *module) -{ - unsigned long module_base; - unsigned long vmm_size; - - unsigned long vmm_offset, func_offset, fdesc_offset; - struct fdesc *p_fdesc; - - BUG_ON(!module); - - if (!kvm_vmm_base) { - printk("kvm: kvm area hasn't been initialized yet!!\n"); - return -EFAULT; - } - - /*Calculate new position of relocated vmm module.*/ - module_base = (unsigned long)module->module_core; - vmm_size = module->core_size; - if (unlikely(vmm_size > KVM_VMM_SIZE)) - return -EFAULT; - - memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); - kvm_patch_vmm(vmm_info, module); - kvm_flush_icache(kvm_vmm_base, vmm_size); - - /*Recalculate kvm_vmm_info based on new VMM*/ - vmm_offset = vmm_info->vmm_ivt - module_base; - kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; - printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", - kvm_vmm_info->vmm_ivt); - - fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; - kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + - fdesc_offset); - func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; - p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); - p_fdesc->ip = KVM_VMM_BASE + func_offset; - p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); - - printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", - KVM_VMM_BASE+func_offset); - - fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; - kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + - fdesc_offset); - func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; - p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); - p_fdesc->ip = KVM_VMM_BASE + func_offset; - p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); - - kvm_vmm_gp = p_fdesc->gp; - - printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", - kvm_vmm_info->vmm_entry); - printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", - KVM_VMM_BASE + func_offset); - - return 0; -} - -int kvm_arch_init(void *opaque) -{ - int r; - struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; - - if (!vti_cpu_has_kvm_support()) { - printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); - r = -EOPNOTSUPP; - goto out; - } - - if (kvm_vmm_info) { - printk(KERN_ERR "kvm: Already loaded VMM module!\n"); - r = -EEXIST; - goto out; - } - - r = -ENOMEM; - kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); - if (!kvm_vmm_info) - goto out; - - if (kvm_alloc_vmm_area()) - goto out_free0; - - r = kvm_relocate_vmm(vmm_info, vmm_info->module); - if (r) - goto out_free1; - - return 0; - -out_free1: - kvm_free_vmm_area(); -out_free0: - kfree(kvm_vmm_info); -out: - return r; -} - -void kvm_arch_exit(void) -{ - kvm_free_vmm_area(); - kfree(kvm_vmm_info); - kvm_vmm_info = NULL; -} - -static void kvm_ia64_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) -{ - int i; - long base; - unsigned long n; - unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + - offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); - - n = kvm_dirty_bitmap_bytes(memslot); - base = memslot->base_gfn / BITS_PER_LONG; - - spin_lock(&kvm->arch.dirty_log_lock); - for (i = 0; i < n/sizeof(long); ++i) { - memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; - dirty_bitmap[base + i] = 0; - } - spin_unlock(&kvm->arch.dirty_log_lock); -} - -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log) -{ - int r; - unsigned long n; - struct kvm_memory_slot *memslot; - int is_dirty = 0; - - mutex_lock(&kvm->slots_lock); - - r = -EINVAL; - if (log->slot >= KVM_USER_MEM_SLOTS) - goto out; - - memslot = id_to_memslot(kvm->memslots, log->slot); - r = -ENOENT; - if (!memslot->dirty_bitmap) - goto out; - - kvm_ia64_sync_dirty_log(kvm, memslot); - r = kvm_get_dirty_log(kvm, log, &is_dirty); - if (r) - goto out; - - /* If nothing is dirty, don't bother messing with page tables. */ - if (is_dirty) { - kvm_flush_remote_tlbs(kvm); - n = kvm_dirty_bitmap_bytes(memslot); - memset(memslot->dirty_bitmap, 0, n); - } - r = 0; -out: - mutex_unlock(&kvm->slots_lock); - return r; -} - -int kvm_arch_hardware_setup(void) -{ - return 0; -} - -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) -{ - return __apic_accept_irq(vcpu, irq->vector); -} - -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) -{ - return apic->vcpu->vcpu_id == dest; -} - -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) -{ - return 0; -} - -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) -{ - return vcpu1->arch.xtp - vcpu2->arch.xtp; -} - -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode) -{ - struct kvm_lapic *target = vcpu->arch.apic; - return (dest_mode == 0) ? - kvm_apic_match_physical_addr(target, dest) : - kvm_apic_match_logical_addr(target, dest); -} - -static int find_highest_bits(int *dat) -{ - u32 bits, bitnum; - int i; - - /* loop for all 256 bits */ - for (i = 7; i >= 0 ; i--) { - bits = dat[i]; - if (bits) { - bitnum = fls(bits); - return i * 32 + bitnum - 1; - } - } - - return -1; -} - -int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (vpd->irr[0] & (1UL << NMI_VECTOR)) - return NMI_VECTOR; - if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) - return ExtINT_VECTOR; - - return find_highest_bits((int *)&vpd->irr[0]); -} - -int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.timer_fired; -} - -int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || - (kvm_highest_pending_irq(vcpu) != -1); -} - -int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) -{ - return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)); -} - -int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - mp_state->mp_state = vcpu->arch.mp_state; - return 0; -} - -static int vcpu_reset(struct kvm_vcpu *vcpu) -{ - int r; - long psr; - local_irq_save(psr); - r = kvm_insert_vmm_mapping(vcpu); - local_irq_restore(psr); - if (r) - goto fail; - - vcpu->arch.launched = 0; - kvm_arch_vcpu_uninit(vcpu); - r = kvm_arch_vcpu_init(vcpu); - if (r) - goto fail; - - kvm_purge_vmm_mapping(vcpu); - r = 0; -fail: - return r; -} - -int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - int r = 0; - - vcpu->arch.mp_state = mp_state->mp_state; - if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) - r = vcpu_reset(vcpu); - return r; -} diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c deleted file mode 100644 index cb548ee9fcae..000000000000 --- a/arch/ia64/kvm/kvm_fw.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * PAL/SAL call delegation - * - * Copyright (c) 2004 Li Susie <susie.li@intel.com> - * Copyright (c) 2005 Yu Ke <ke.yu@intel.com> - * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <linux/kvm_host.h> -#include <linux/smp.h> -#include <asm/sn/addrs.h> -#include <asm/sn/clksupport.h> -#include <asm/sn/shub_mmr.h> - -#include "vti.h" -#include "misc.h" - -#include <asm/pal.h> -#include <asm/sal.h> -#include <asm/tlb.h> - -/* - * Handy macros to make sure that the PAL return values start out - * as something meaningful. - */ -#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ - { \ - x.status = PAL_STATUS_UNIMPLEMENTED; \ - x.v0 = 0; \ - x.v1 = 0; \ - x.v2 = 0; \ - } - -#define INIT_PAL_STATUS_SUCCESS(x) \ - { \ - x.status = PAL_STATUS_SUCCESS; \ - x.v0 = 0; \ - x.v1 = 0; \ - x.v2 = 0; \ - } - -static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, - u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { - struct exit_ctl_data *p; - - if (vcpu) { - p = &vcpu->arch.exit_data; - if (p->exit_reason == EXIT_REASON_PAL_CALL) { - *gr28 = p->u.pal_data.gr28; - *gr29 = p->u.pal_data.gr29; - *gr30 = p->u.pal_data.gr30; - *gr31 = p->u.pal_data.gr31; - return ; - } - } - printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); -} - -static void set_pal_result(struct kvm_vcpu *vcpu, - struct ia64_pal_retval result) { - - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - if (p->exit_reason == EXIT_REASON_PAL_CALL) { - p->u.pal_data.ret = result; - return ; - } - INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); -} - -static void set_sal_result(struct kvm_vcpu *vcpu, - struct sal_ret_values result) { - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - p->u.sal_data.ret = result; - return ; - } - printk(KERN_WARNING"Failed to set sal result!!\n"); -} - -struct cache_flush_args { - u64 cache_type; - u64 operation; - u64 progress; - long status; -}; - -cpumask_t cpu_cache_coherent_map; - -static void remote_pal_cache_flush(void *data) -{ - struct cache_flush_args *args = data; - long status; - u64 progress = args->progress; - - status = ia64_pal_cache_flush(args->cache_type, args->operation, - &progress, NULL); - if (status != 0) - args->status = status; -} - -static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) -{ - u64 gr28, gr29, gr30, gr31; - struct ia64_pal_retval result = {0, 0, 0, 0}; - struct cache_flush_args args = {0, 0, 0, 0}; - long psr; - - gr28 = gr29 = gr30 = gr31 = 0; - kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); - - if (gr31 != 0) - printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); - - /* Always call Host Pal in int=1 */ - gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; - args.cache_type = gr29; - args.operation = gr30; - smp_call_function(remote_pal_cache_flush, - (void *)&args, 1); - if (args.status != 0) - printk(KERN_ERR"pal_cache_flush error!," - "status:0x%lx\n", args.status); - /* - * Call Host PAL cache flush - * Clear psr.ic when call PAL_CACHE_FLUSH - */ - local_irq_save(psr); - result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, - &result.v0); - local_irq_restore(psr); - if (result.status != 0) - printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" - "in1:%lx,in2:%lx\n", - vcpu, result.status, gr29, gr30); - -#if 0 - if (gr29 == PAL_CACHE_TYPE_COHERENT) { - cpus_setall(vcpu->arch.cache_coherent_map); - cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); - cpus_setall(cpu_cache_coherent_map); - cpu_clear(vcpu->cpu, cpu_cache_coherent_map); - } -#endif - return result; -} - -struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); - return result; -} - -static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); - - /* - * PAL_FREQ_BASE may not be implemented in some platforms, - * call SAL instead. - */ - if (result.v0 == 0) { - result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, - &result.v0, - &result.v1); - result.v2 = 0; - } - - return result; -} - -/* - * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2 - * RTC is used instead. This function patches the ratios from SAL - * to match the RTC before providing them to the guest. - */ -static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result) -{ - struct pal_freq_ratio *ratio; - unsigned long sal_freq, sal_drift, factor; - - result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, - &sal_freq, &sal_drift); - ratio = (struct pal_freq_ratio *)&result->v2; - factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) / - sn_rtc_cycles_per_second; - - ratio->num = 3; - ratio->den = factor; -} - -static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); - - if (vcpu->kvm->arch.is_sn2) - sn2_patch_itc_freq_ratios(&result); - - return result; -} - -static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result; - - INIT_PAL_STATUS_UNIMPLEMENTED(result); - return result; -} - -static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result; - - INIT_PAL_STATUS_SUCCESS(result); - return result; -} - -static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result = {0, 0, 0, 0}; - long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, - &result.v2, in2); - - return result; -} - -static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu) -{ - - struct ia64_pal_retval result = {0, 0, 0, 0}; - long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - result.status = ia64_pal_register_info(in1, &result.v1, &result.v2); - - return result; -} - -static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) -{ - - pal_cache_config_info_t ci; - long status; - unsigned long in0, in1, in2, in3, r9, r10; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - status = ia64_pal_cache_config_info(in1, in2, &ci); - r9 = ci.pcci_info_1.pcci1_data; - r10 = ci.pcci_info_2.pcci2_data; - return ((struct ia64_pal_retval){status, r9, r10, 0}); -} - -#define GUEST_IMPL_VA_MSB 59 -#define GUEST_RID_BITS 18 - -static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) -{ - - pal_vm_info_1_u_t vminfo1; - pal_vm_info_2_u_t vminfo2; - struct ia64_pal_retval result; - - PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); - if (!result.status) { - vminfo1.pvi1_val = result.v0; - vminfo1.pal_vm_info_1_s.max_itr_entry = 8; - vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; - result.v0 = vminfo1.pvi1_val; - vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; - vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; - result.v1 = vminfo2.pvi2_val; - } - - return result; -} - -static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result; - unsigned long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - - result.status = ia64_pal_vm_info(in1, in2, - (pal_tc_info_u_t *)&result.v1, &result.v2); - - return result; -} - -static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) -{ - u64 index = 0; - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - if (p->exit_reason == EXIT_REASON_PAL_CALL) - index = p->u.pal_data.gr28; - - return index; -} - -static void prepare_for_halt(struct kvm_vcpu *vcpu) -{ - vcpu->arch.timer_pending = 1; - vcpu->arch.timer_fired = 0; -} - -static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu) -{ - long status; - unsigned long in0, in1, in2, in3, r9; - unsigned long pm_buffer[16]; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - status = ia64_pal_perf_mon_info(pm_buffer, - (pal_perf_mon_info_u_t *) &r9); - if (status != 0) { - printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status); - } else { - if (in1) - memcpy((void *)in1, pm_buffer, sizeof(pm_buffer)); - else { - status = PAL_STATUS_EINVAL; - printk(KERN_WARNING"Invalid parameters " - "for PAL call:0x%lx!\n", in0); - } - } - return (struct ia64_pal_retval){status, r9, 0, 0}; -} - -static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu) -{ - unsigned long in0, in1, in2, in3; - long status; - unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32) - | (1UL << 61) | (1UL << 60); - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - if (in1) { - memcpy((void *)in1, &res, sizeof(res)); - status = 0; - } else{ - status = PAL_STATUS_EINVAL; - printk(KERN_WARNING"Invalid parameters " - "for PAL call:0x%lx!\n", in0); - } - - return (struct ia64_pal_retval){status, 0, 0, 0}; -} - -static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu) -{ - unsigned long r9; - long status; - - status = ia64_pal_mem_attrib(&r9); - - return (struct ia64_pal_retval){status, r9, 0, 0}; -} - -static void remote_pal_prefetch_visibility(void *v) -{ - s64 trans_type = (s64)v; - ia64_pal_prefetch_visibility(trans_type); -} - -static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result = {0, 0, 0, 0}; - unsigned long in0, in1, in2, in3; - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - result.status = ia64_pal_prefetch_visibility(in1); - if (result.status == 0) { - /* Must be performed on all remote processors - in the coherence domain. */ - smp_call_function(remote_pal_prefetch_visibility, - (void *)in1, 1); - /* Unnecessary on remote processor for other vcpus!*/ - result.status = 1; - } - return result; -} - -static void remote_pal_mc_drain(void *v) -{ - ia64_pal_mc_drain(); -} - -static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu) -{ - struct ia64_pal_retval result = {0, 0, 0, 0}; - unsigned long in0, in1, in2, in3; - - kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); - - if (in1 == 0 && in2) { - char brand_info[128]; - result.status = ia64_pal_get_brand_info(brand_info); - if (result.status == PAL_STATUS_SUCCESS) - memcpy((void *)in2, brand_info, 128); - } else { - result.status = PAL_STATUS_REQUIRES_MEMORY; - printk(KERN_WARNING"Invalid parameters for " - "PAL call:0x%lx!\n", in0); - } - - return result; -} - -int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - - u64 gr28; - struct ia64_pal_retval result; - int ret = 1; - - gr28 = kvm_get_pal_call_index(vcpu); - switch (gr28) { - case PAL_CACHE_FLUSH: - result = pal_cache_flush(vcpu); - break; - case PAL_MEM_ATTRIB: - result = pal_mem_attrib(vcpu); - break; - case PAL_CACHE_SUMMARY: - result = pal_cache_summary(vcpu); - break; - case PAL_PERF_MON_INFO: - result = pal_perf_mon_info(vcpu); - break; - case PAL_HALT_INFO: - result = pal_halt_info(vcpu); - break; - case PAL_HALT_LIGHT: - { - INIT_PAL_STATUS_SUCCESS(result); - prepare_for_halt(vcpu); - if (kvm_highest_pending_irq(vcpu) == -1) - ret = kvm_emulate_halt(vcpu); - } - break; - - case PAL_PREFETCH_VISIBILITY: - result = pal_prefetch_visibility(vcpu); - break; - case PAL_MC_DRAIN: - result.status = ia64_pal_mc_drain(); - /* FIXME: All vcpus likely call PAL_MC_DRAIN. - That causes the congestion. */ - smp_call_function(remote_pal_mc_drain, NULL, 1); - break; - - case PAL_FREQ_RATIOS: - result = pal_freq_ratios(vcpu); - break; - - case PAL_FREQ_BASE: - result = pal_freq_base(vcpu); - break; - - case PAL_LOGICAL_TO_PHYSICAL : - result = pal_logical_to_physica(vcpu); - break; - - case PAL_VM_SUMMARY : - result = pal_vm_summary(vcpu); - break; - - case PAL_VM_INFO : - result = pal_vm_info(vcpu); - break; - case PAL_PLATFORM_ADDR : - result = pal_platform_addr(vcpu); - break; - case PAL_CACHE_INFO: - result = pal_cache_info(vcpu); - break; - case PAL_PTCE_INFO: - INIT_PAL_STATUS_SUCCESS(result); - result.v1 = (1L << 32) | 1L; - break; - case PAL_REGISTER_INFO: - result = pal_register_info(vcpu); - break; - case PAL_VM_PAGE_SIZE: - result.status = ia64_pal_vm_page_size(&result.v0, - &result.v1); - break; - case PAL_RSE_INFO: - result.status = ia64_pal_rse_info(&result.v0, - (pal_hints_u_t *)&result.v1); - break; - case PAL_PROC_GET_FEATURES: - result = pal_proc_get_features(vcpu); - break; - case PAL_DEBUG_INFO: - result.status = ia64_pal_debug_info(&result.v0, - &result.v1); - break; - case PAL_VERSION: - result.status = ia64_pal_version( - (pal_version_u_t *)&result.v0, - (pal_version_u_t *)&result.v1); - break; - case PAL_FIXED_ADDR: - result.status = PAL_STATUS_SUCCESS; - result.v0 = vcpu->vcpu_id; - break; - case PAL_BRAND_INFO: - result = pal_get_brand_info(vcpu); - break; - case PAL_GET_PSTATE: - case PAL_CACHE_SHARED_INFO: - INIT_PAL_STATUS_UNIMPLEMENTED(result); - break; - default: - INIT_PAL_STATUS_UNIMPLEMENTED(result); - printk(KERN_WARNING"kvm: Unsupported pal call," - " index:0x%lx\n", gr28); - } - set_pal_result(vcpu, result); - return ret; -} - -static struct sal_ret_values sal_emulator(struct kvm *kvm, - long index, unsigned long in1, - unsigned long in2, unsigned long in3, - unsigned long in4, unsigned long in5, - unsigned long in6, unsigned long in7) -{ - unsigned long r9 = 0; - unsigned long r10 = 0; - long r11 = 0; - long status; - - status = 0; - switch (index) { - case SAL_FREQ_BASE: - status = ia64_sal_freq_base(in1, &r9, &r10); - break; - case SAL_PCI_CONFIG_READ: - printk(KERN_WARNING"kvm: Not allowed to call here!" - " SAL_PCI_CONFIG_READ\n"); - break; - case SAL_PCI_CONFIG_WRITE: - printk(KERN_WARNING"kvm: Not allowed to call here!" - " SAL_PCI_CONFIG_WRITE\n"); - break; - case SAL_SET_VECTORS: - if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { - if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { - status = -2; - } else { - kvm->arch.rdv_sal_data.boot_ip = in2; - kvm->arch.rdv_sal_data.boot_gp = in3; - } - printk("Rendvous called! iip:%lx\n\n", in2); - } else - printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." - "ignored...\n", in1); - break; - case SAL_GET_STATE_INFO: - /* No more info. */ - status = -5; - r9 = 0; - break; - case SAL_GET_STATE_INFO_SIZE: - /* Return a dummy size. */ - status = 0; - r9 = 128; - break; - case SAL_CLEAR_STATE_INFO: - /* Noop. */ - break; - case SAL_MC_RENDEZ: - printk(KERN_WARNING - "kvm: called SAL_MC_RENDEZ. ignored...\n"); - break; - case SAL_MC_SET_PARAMS: - printk(KERN_WARNING - "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); - break; - case SAL_CACHE_FLUSH: - if (1) { - /*Flush using SAL. - This method is faster but has a side - effect on other vcpu running on - this cpu. */ - status = ia64_sal_cache_flush(in1); - } else { - /*Maybe need to implement the method - without side effect!*/ - status = 0; - } - break; - case SAL_CACHE_INIT: - printk(KERN_WARNING - "kvm: called SAL_CACHE_INIT. ignored...\n"); - break; - case SAL_UPDATE_PAL: - printk(KERN_WARNING - "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); - break; - default: - printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." - " index:%ld\n", index); - status = -1; - break; - } - return ((struct sal_ret_values) {status, r9, r10, r11}); -} - -static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, - u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ - - struct exit_ctl_data *p; - - p = kvm_get_exit_data(vcpu); - - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - *in0 = p->u.sal_data.in0; - *in1 = p->u.sal_data.in1; - *in2 = p->u.sal_data.in2; - *in3 = p->u.sal_data.in3; - *in4 = p->u.sal_data.in4; - *in5 = p->u.sal_data.in5; - *in6 = p->u.sal_data.in6; - *in7 = p->u.sal_data.in7; - return ; - } - *in0 = 0; -} - -void kvm_sal_emul(struct kvm_vcpu *vcpu) -{ - - struct sal_ret_values result; - u64 index, in1, in2, in3, in4, in5, in6, in7; - - kvm_get_sal_call_data(vcpu, &index, &in1, &in2, - &in3, &in4, &in5, &in6, &in7); - result = sal_emulator(vcpu->kvm, index, in1, in2, in3, - in4, in5, in6, in7); - set_sal_result(vcpu, result); -} diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c deleted file mode 100644 index f1268b8e6f9e..000000000000 --- a/arch/ia64/kvm/kvm_lib.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * kvm_lib.c: Compile some libraries for kvm-intel module. - * - * Just include kernel's library, and disable symbols export. - * Copyright (C) 2008, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#undef CONFIG_MODULES -#include <linux/module.h> -#undef CONFIG_KALLSYMS -#undef EXPORT_SYMBOL -#undef EXPORT_SYMBOL_GPL -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#include "../../../lib/vsprintf.c" -#include "../../../lib/ctype.c" diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h deleted file mode 100644 index b2bcaa2787aa..000000000000 --- a/arch/ia64/kvm/kvm_minstate.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * kvm_minstate.h: min save macros - * Copyright (c) 2007, Intel Corporation. - * - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - - -#include <asm/asmmacro.h> -#include <asm/types.h> -#include <asm/kregs.h> -#include <asm/kvm_host.h> - -#include "asm-offsets.h" - -#define KVM_MINSTATE_START_SAVE_MIN \ - mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ - ;; \ - mov.m r28 = ar.rnat; \ - addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ - ;; \ - lfetch.fault.excl.nt1 [r22]; \ - addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \ - mov r23 = ar.bspstore; /* save ar.bspstore */ \ - ;; \ - mov ar.bspstore = r22; /* switch to kernel RBS */\ - ;; \ - mov r18 = ar.bsp; \ - mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ - - - -#define KVM_MINSTATE_END_SAVE_MIN \ - bsw.1; /* switch back to bank 1 (must be last in insn group) */\ - ;; - - -#define PAL_VSA_SYNC_READ \ - /* begin to call pal vps sync_read */ \ -{.mii; \ - add r25 = VMM_VPD_BASE_OFFSET, r21; \ - nop 0x0; \ - mov r24=ip; \ - ;; \ -} \ -{.mmb \ - add r24=0x20, r24; \ - ld8 r25 = [r25]; /* read vpd base */ \ - br.cond.sptk kvm_vps_sync_read; /*call the service*/ \ - ;; \ -}; \ - - -#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 - -/* - * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves - * the minimum state necessary that allows us to turn psr.ic back - * on. - * - * Assumed state upon entry: - * psr.ic: off - * r31: contains saved predicates (pr) - * - * Upon exit, the state is as follows: - * psr.ic: off - * r2 = points to &pt_regs.r16 - * r8 = contents of ar.ccv - * r9 = contents of ar.csd - * r10 = contents of ar.ssd - * r11 = FPSR_DEFAULT - * r12 = kernel sp (kernel virtual address) - * r13 = points to current task_struct (kernel virtual address) - * p15 = TRUE if psr.i is set in cr.ipsr - * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: - * preserved - * - * Note that psr.ic is NOT turned on by this macro. This is so that - * we can pass interruption state as arguments to a handler. - */ - - -#define PT(f) (VMM_PT_REGS_##f##_OFFSET) - -#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ - KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ - mov r27 = ar.rsc; /* M */ \ - mov r20 = r1; /* A */ \ - mov r25 = ar.unat; /* M */ \ - mov r29 = cr.ipsr; /* M */ \ - mov r26 = ar.pfs; /* I */ \ - mov r18 = cr.isr; \ - COVER; /* B;; (or nothing) */ \ - ;; \ - tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ - mov r1 = r16; \ -/* mov r21=r16; */ \ - /* switch from user to kernel RBS: */ \ - ;; \ - invala; /* M */ \ - SAVE_IFS; \ - ;; \ - KVM_MINSTATE_START_SAVE_MIN \ - adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ - adds r16 = PT(CR_IPSR),r1; \ - ;; \ - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ - st8 [r16] = r29; /* save cr.ipsr */ \ - ;; \ - lfetch.fault.excl.nt1 [r17]; \ - tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ - mov r29 = b0 \ - ;; \ - adds r16 = PT(R8),r1; /* initialize first base pointer */\ - adds r17 = PT(R9),r1; /* initialize second base pointer */\ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r8,16; \ -.mem.offset 8,0; st8.spill [r17] = r9,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r10,24; \ -.mem.offset 8,0; st8.spill [r17] = r11,24; \ - ;; \ - mov r9 = cr.iip; /* M */ \ - mov r10 = ar.fpsr; /* M */ \ - ;; \ - st8 [r16] = r9,16; /* save cr.iip */ \ - st8 [r17] = r30,16; /* save cr.ifs */ \ - sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ - ;; \ - st8 [r16] = r25,16; /* save ar.unat */ \ - st8 [r17] = r26,16; /* save ar.pfs */ \ - shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ - ;; \ - st8 [r16] = r27,16; /* save ar.rsc */ \ - st8 [r17] = r28,16; /* save ar.rnat */ \ - ;; /* avoid RAW on r16 & r17 */ \ - st8 [r16] = r23,16; /* save ar.bspstore */ \ - st8 [r17] = r31,16; /* save predicates */ \ - ;; \ - st8 [r16] = r29,16; /* save b0 */ \ - st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ -.mem.offset 8,0; st8.spill [r17] = r12,16; \ - adds r12 = -16,r1; /* switch to kernel memory stack */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r13,16; \ -.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ - mov r13 = r21; /* establish `current' */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r15,16; \ -.mem.offset 8,0; st8.spill [r17] = r14,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16] = r2,16; \ -.mem.offset 8,0; st8.spill [r17] = r3,16; \ - adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ - ;; \ - adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ - adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ - mov r26 = cr.iipa; \ - mov r27 = cr.isr; \ - ;; \ - st8 [r16] = r26; \ - st8 [r17] = r27; \ - ;; \ - EXTRA; \ - mov r8 = ar.ccv; \ - mov r9 = ar.csd; \ - mov r10 = ar.ssd; \ - movl r11 = FPSR_DEFAULT; /* L-unit */ \ - adds r17 = VMM_VCPU_GP_OFFSET,r13; \ - ;; \ - ld8 r1 = [r17];/* establish kernel global pointer */ \ - ;; \ - PAL_VSA_SYNC_READ \ - KVM_MINSTATE_END_SAVE_MIN - -/* - * SAVE_REST saves the remainder of pt_regs (with psr.ic on). - * - * Assumed state upon entry: - * psr.ic: on - * r2: points to &pt_regs.f6 - * r3: points to &pt_regs.f7 - * r8: contents of ar.ccv - * r9: contents of ar.csd - * r10: contents of ar.ssd - * r11: FPSR_DEFAULT - * - * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. - */ -#define KVM_SAVE_REST \ -.mem.offset 0,0; st8.spill [r2] = r16,16; \ -.mem.offset 8,0; st8.spill [r3] = r17,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r18,16; \ -.mem.offset 8,0; st8.spill [r3] = r19,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r20,16; \ -.mem.offset 8,0; st8.spill [r3] = r21,16; \ - mov r18=b6; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r22,16; \ -.mem.offset 8,0; st8.spill [r3] = r23,16; \ - mov r19 = b7; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r24,16; \ -.mem.offset 8,0; st8.spill [r3] = r25,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r26,16; \ -.mem.offset 8,0; st8.spill [r3] = r27,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r28,16; \ -.mem.offset 8,0; st8.spill [r3] = r29,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r30,16; \ -.mem.offset 8,0; st8.spill [r3] = r31,32; \ - ;; \ - mov ar.fpsr = r11; \ - st8 [r2] = r8,8; \ - adds r24 = PT(B6)-PT(F7),r3; \ - adds r25 = PT(B7)-PT(F7),r3; \ - ;; \ - st8 [r24] = r18,16; /* b6 */ \ - st8 [r25] = r19,16; /* b7 */ \ - adds r2 = PT(R4)-PT(F6),r2; \ - adds r3 = PT(R5)-PT(F7),r3; \ - ;; \ - st8 [r24] = r9; /* ar.csd */ \ - st8 [r25] = r10; /* ar.ssd */ \ - ;; \ - mov r18 = ar.unat; \ - adds r19 = PT(EML_UNAT)-PT(R4),r2; \ - ;; \ - st8 [r19] = r18; /* eml_unat */ \ - - -#define KVM_SAVE_EXTRA \ -.mem.offset 0,0; st8.spill [r2] = r4,16; \ -.mem.offset 8,0; st8.spill [r3] = r5,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2] = r6,16; \ -.mem.offset 8,0; st8.spill [r3] = r7; \ - ;; \ - mov r26 = ar.unat; \ - ;; \ - st8 [r2] = r26;/* eml_unat */ \ - -#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) -#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) -#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h deleted file mode 100644 index c5f92a926a9a..000000000000 --- a/arch/ia64/kvm/lapic.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef __KVM_IA64_LAPIC_H -#define __KVM_IA64_LAPIC_H - -#include <linux/kvm_host.h> - -/* - * vlsapic - */ -struct kvm_lapic{ - struct kvm_vcpu *vcpu; - uint64_t insvc[4]; - uint64_t vhpi; - uint8_t xtp; - uint8_t pal_init_pending; - uint8_t pad[2]; -}; - -int kvm_create_lapic(struct kvm_vcpu *vcpu); -void kvm_free_lapic(struct kvm_vcpu *vcpu); - -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); -#define kvm_apic_present(x) (true) -#define kvm_lapic_enabled(x) (true) - -#endif diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S deleted file mode 100644 index c04cdbe9f80f..000000000000 --- a/arch/ia64/kvm/memcpy.S +++ /dev/null @@ -1 +0,0 @@ -#include "../lib/memcpy.S" diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S deleted file mode 100644 index 83c3066d844a..000000000000 --- a/arch/ia64/kvm/memset.S +++ /dev/null @@ -1 +0,0 @@ -#include "../lib/memset.S" diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h deleted file mode 100644 index dd979e00b574..000000000000 --- a/arch/ia64/kvm/misc.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef __KVM_IA64_MISC_H -#define __KVM_IA64_MISC_H - -#include <linux/kvm_host.h> -/* - * misc.h - * Copyright (C) 2007, Intel Corporation. - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -/* - *Return p2m base address at host side! - */ -static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) -{ - return (uint64_t *)(kvm->arch.vm_base + - offsetof(struct kvm_vm_data, kvm_p2m)); -} - -static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, - u64 paddr, u64 mem_flags) -{ - uint64_t *pmt_base = kvm_host_get_pmt(kvm); - unsigned long pte; - - pte = PAGE_ALIGN(paddr) | mem_flags; - pmt_base[gfn] = pte; -} - -/*Function for translating host address to guest address*/ - -static inline void *to_guest(struct kvm *kvm, void *addr) -{ - return (void *)((unsigned long)(addr) - kvm->arch.vm_base + - KVM_VM_DATA_BASE); -} - -/*Function for translating guest address to host address*/ - -static inline void *to_host(struct kvm *kvm, void *addr) -{ - return (void *)((unsigned long)addr - KVM_VM_DATA_BASE - + kvm->arch.vm_base); -} - -/* Get host context of the vcpu */ -static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) -{ - union context *ctx = &vcpu->arch.host; - return to_guest(vcpu->kvm, ctx); -} - -/* Get guest context of the vcpu */ -static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) -{ - union context *ctx = &vcpu->arch.guest; - return to_guest(vcpu->kvm, ctx); -} - -/* kvm get exit data from gvmm! */ -static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) -{ - return &vcpu->arch.exit_data; -} - -/*kvm get vcpu ioreq for kvm module!*/ -static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p_ctl_data; - - if (vcpu) { - p_ctl_data = kvm_get_exit_data(vcpu); - if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) - return &p_ctl_data->u.ioreq; - } - - return NULL; -} - -#endif diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c deleted file mode 100644 index f1e17d3d6cd9..000000000000 --- a/arch/ia64/kvm/mmio.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * mmio.c: MMIO emulation components. - * Copyright (c) 2004, Intel Corporation. - * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) - * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) - * - * Copyright (c) 2007 Intel Corporation KVM support. - * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/kvm_host.h> - -#include "vcpu.h" - -static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) -{ - VLSAPIC_XTP(v) = val; -} - -/* - * LSAPIC OFFSET - */ -#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) -#define PIB_OFST_INTA 0x1E0000 -#define PIB_OFST_XTP 0x1E0008 - -/* - * execute write IPI op. - */ -static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, - uint64_t addr, uint64_t data) -{ - struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; - unsigned long psr; - - local_irq_save(psr); - - p->exit_reason = EXIT_REASON_IPI; - p->u.ipi_data.addr.val = addr; - p->u.ipi_data.data.val = data; - vmm_transition(current_vcpu); - - local_irq_restore(psr); - -} - -void lsapic_write(struct kvm_vcpu *v, unsigned long addr, - unsigned long length, unsigned long val) -{ - addr &= (PIB_SIZE - 1); - - switch (addr) { - case PIB_OFST_INTA: - panic_vm(v, "Undefined write on PIB INTA\n"); - break; - case PIB_OFST_XTP: - if (length == 1) { - vlsapic_write_xtp(v, val); - } else { - panic_vm(v, "Undefined write on PIB XTP\n"); - } - break; - default: - if (PIB_LOW_HALF(addr)) { - /*Lower half */ - if (length != 8) - panic_vm(v, "Can't LHF write with size %ld!\n", - length); - else - vlsapic_write_ipi(v, addr, val); - } else { /*Upper half */ - panic_vm(v, "IPI-UHF write %lx\n", addr); - } - break; - } -} - -unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, - unsigned long length) -{ - uint64_t result = 0; - - addr &= (PIB_SIZE - 1); - - switch (addr) { - case PIB_OFST_INTA: - if (length == 1) /* 1 byte load */ - ; /* There is no i8259, there is no INTA access*/ - else - panic_vm(v, "Undefined read on PIB INTA\n"); - - break; - case PIB_OFST_XTP: - if (length == 1) { - result = VLSAPIC_XTP(v); - } else { - panic_vm(v, "Undefined read on PIB XTP\n"); - } - break; - default: - panic_vm(v, "Undefined addr access for lsapic!\n"); - break; - } - return result; -} - -static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, - u16 s, int ma, int dir) -{ - unsigned long iot; - struct exit_ctl_data *p = &vcpu->arch.exit_data; - unsigned long psr; - - iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); - - local_irq_save(psr); - - /*Intercept the access for PIB range*/ - if (iot == GPFN_PIB) { - if (!dir) - lsapic_write(vcpu, src_pa, s, *dest); - else - *dest = lsapic_read(vcpu, src_pa, s); - goto out; - } - p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; - p->u.ioreq.addr = src_pa; - p->u.ioreq.size = s; - p->u.ioreq.dir = dir; - if (dir == IOREQ_WRITE) - p->u.ioreq.data = *dest; - p->u.ioreq.state = STATE_IOREQ_READY; - vmm_transition(vcpu); - - if (p->u.ioreq.state == STATE_IORESP_READY) { - if (dir == IOREQ_READ) - /* it's necessary to ensure zero extending */ - *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); - } else - panic_vm(vcpu, "Unhandled mmio access returned!\n"); -out: - local_irq_restore(psr); - return ; -} - -/* - dir 1: read 0:write - inst_type 0:integer 1:floating point - */ -#define SL_INTEGER 0 /* store/load interger*/ -#define SL_FLOATING 1 /* store/load floating*/ - -void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) -{ - struct kvm_pt_regs *regs; - IA64_BUNDLE bundle; - int slot, dir = 0; - int inst_type = -1; - u16 size = 0; - u64 data, slot1a, slot1b, temp, update_reg; - s32 imm; - INST64 inst; - - regs = vcpu_regs(vcpu); - - if (fetch_code(vcpu, regs->cr_iip, &bundle)) { - /* if fetch code fail, return and try again */ - return; - } - slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; - if (!slot) - inst.inst = bundle.slot0; - else if (slot == 1) { - slot1a = bundle.slot1a; - slot1b = bundle.slot1b; - inst.inst = slot1a + (slot1b << 18); - } else if (slot == 2) - inst.inst = bundle.slot2; - - /* Integer Load/Store */ - if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { - inst_type = SL_INTEGER; - size = (inst.M1.x6 & 0x3); - if ((inst.M1.x6 >> 2) > 0xb) { - /*write*/ - dir = IOREQ_WRITE; - data = vcpu_get_gr(vcpu, inst.M4.r2); - } else if ((inst.M1.x6 >> 2) < 0xb) { - /*read*/ - dir = IOREQ_READ; - } - } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { - /* Integer Load + Reg update */ - inst_type = SL_INTEGER; - dir = IOREQ_READ; - size = (inst.M2.x6 & 0x3); - temp = vcpu_get_gr(vcpu, inst.M2.r3); - update_reg = vcpu_get_gr(vcpu, inst.M2.r2); - temp += update_reg; - vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); - } else if (inst.M3.major == 5) { - /*Integer Load/Store + Imm update*/ - inst_type = SL_INTEGER; - size = (inst.M3.x6&0x3); - if ((inst.M5.x6 >> 2) > 0xb) { - /*write*/ - dir = IOREQ_WRITE; - data = vcpu_get_gr(vcpu, inst.M5.r2); - temp = vcpu_get_gr(vcpu, inst.M5.r3); - imm = (inst.M5.s << 31) | (inst.M5.i << 30) | - (inst.M5.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); - - } else if ((inst.M3.x6 >> 2) < 0xb) { - /*read*/ - dir = IOREQ_READ; - temp = vcpu_get_gr(vcpu, inst.M3.r3); - imm = (inst.M3.s << 31) | (inst.M3.i << 30) | - (inst.M3.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); - - } - } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B - && inst.M9.m == 0 && inst.M9.x == 0) { - /* Floating-point spill*/ - struct ia64_fpreg v; - - inst_type = SL_FLOATING; - dir = IOREQ_WRITE; - vcpu_get_fpreg(vcpu, inst.M9.f2, &v); - /* Write high word. FIXME: this is a kludge! */ - v.u.bits[1] &= 0x3ffff; - mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8, - ma, IOREQ_WRITE); - data = v.u.bits[0]; - size = 3; - } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { - /* Floating-point spill + Imm update */ - struct ia64_fpreg v; - - inst_type = SL_FLOATING; - dir = IOREQ_WRITE; - vcpu_get_fpreg(vcpu, inst.M10.f2, &v); - temp = vcpu_get_gr(vcpu, inst.M10.r3); - imm = (inst.M10.s << 31) | (inst.M10.i << 30) | - (inst.M10.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); - - /* Write high word.FIXME: this is a kludge! */ - v.u.bits[1] &= 0x3ffff; - mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], - 8, ma, IOREQ_WRITE); - data = v.u.bits[0]; - size = 3; - } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { - /* Floating-point stf8 + Imm update */ - struct ia64_fpreg v; - inst_type = SL_FLOATING; - dir = IOREQ_WRITE; - size = 3; - vcpu_get_fpreg(vcpu, inst.M10.f2, &v); - data = v.u.bits[0]; /* Significand. */ - temp = vcpu_get_gr(vcpu, inst.M10.r3); - imm = (inst.M10.s << 31) | (inst.M10.i << 30) | - (inst.M10.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); - } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c - && inst.M15.x6 <= 0x2f) { - temp = vcpu_get_gr(vcpu, inst.M15.r3); - imm = (inst.M15.s << 31) | (inst.M15.i << 30) | - (inst.M15.imm7 << 23); - temp += imm >> 23; - vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); - - vcpu_increment_iip(vcpu); - return; - } else if (inst.M12.major == 6 && inst.M12.m == 1 - && inst.M12.x == 1 && inst.M12.x6 == 1) { - /* Floating-point Load Pair + Imm ldfp8 M12*/ - struct ia64_fpreg v; - - inst_type = SL_FLOATING; - dir = IOREQ_READ; - size = 8; /*ldfd*/ - mmio_access(vcpu, padr, &data, size, ma, dir); - v.u.bits[0] = data; - v.u.bits[1] = 0x1003E; - vcpu_set_fpreg(vcpu, inst.M12.f1, &v); - padr += 8; - mmio_access(vcpu, padr, &data, size, ma, dir); - v.u.bits[0] = data; - v.u.bits[1] = 0x1003E; - vcpu_set_fpreg(vcpu, inst.M12.f2, &v); - padr += 8; - vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); - vcpu_increment_iip(vcpu); - return; - } else { - inst_type = -1; - panic_vm(vcpu, "Unsupported MMIO access instruction! " - "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", - bundle.i64[0], bundle.i64[1]); - } - - size = 1 << size; - if (dir == IOREQ_WRITE) { - mmio_access(vcpu, padr, &data, size, ma, dir); - } else { - mmio_access(vcpu, padr, &data, size, ma, dir); - if (inst_type == SL_INTEGER) - vcpu_set_gr(vcpu, inst.M1.r1, data, 0); - else - panic_vm(vcpu, "Unsupported instruction type!\n"); - - } - vcpu_increment_iip(vcpu); -} diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S deleted file mode 100644 index f793be3effff..000000000000 --- a/arch/ia64/kvm/optvfault.S +++ /dev/null @@ -1,1090 +0,0 @@ -/* - * arch/ia64/kvm/optvfault.S - * optimize virtualization fault handler - * - * Copyright (C) 2006 Intel Co - * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> - * Copyright (C) 2008 Intel Co - * Add the support for Tukwila processors. - * Xiantao Zhang <xiantao.zhang@intel.com> - */ - -#include <asm/asmmacro.h> -#include <asm/processor.h> -#include <asm/kvm_host.h> - -#include "vti.h" -#include "asm-offsets.h" - -#define ACCE_MOV_FROM_AR -#define ACCE_MOV_FROM_RR -#define ACCE_MOV_TO_RR -#define ACCE_RSM -#define ACCE_SSM -#define ACCE_MOV_TO_PSR -#define ACCE_THASH - -#define VMX_VPS_SYNC_READ \ - add r16=VMM_VPD_BASE_OFFSET,r21; \ - mov r17 = b0; \ - mov r18 = r24; \ - mov r19 = r25; \ - mov r20 = r31; \ - ;; \ -{.mii; \ - ld8 r16 = [r16]; \ - nop 0x0; \ - mov r24 = ip; \ - ;; \ -}; \ -{.mmb; \ - add r24=0x20, r24; \ - mov r25 =r16; \ - br.sptk.many kvm_vps_sync_read; \ -}; \ - mov b0 = r17; \ - mov r24 = r18; \ - mov r25 = r19; \ - mov r31 = r20 - -ENTRY(kvm_vps_entry) - adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21 - ;; - ld8 r29 = [r29] - ;; - add r29 = r29, r30 - ;; - mov b0 = r29 - br.sptk.many b0 -END(kvm_vps_entry) - -/* - * Inputs: - * r24 : return address - * r25 : vpd - * r29 : scratch - * - */ -GLOBAL_ENTRY(kvm_vps_sync_read) - movl r30 = PAL_VPS_SYNC_READ - ;; - br.sptk.many kvm_vps_entry -END(kvm_vps_sync_read) - -/* - * Inputs: - * r24 : return address - * r25 : vpd - * r29 : scratch - * - */ -GLOBAL_ENTRY(kvm_vps_sync_write) - movl r30 = PAL_VPS_SYNC_WRITE - ;; - br.sptk.many kvm_vps_entry -END(kvm_vps_sync_write) - -/* - * Inputs: - * r23 : pr - * r24 : guest b0 - * r25 : vpd - * - */ -GLOBAL_ENTRY(kvm_vps_resume_normal) - movl r30 = PAL_VPS_RESUME_NORMAL - ;; - mov pr=r23,-2 - br.sptk.many kvm_vps_entry -END(kvm_vps_resume_normal) - -/* - * Inputs: - * r23 : pr - * r24 : guest b0 - * r25 : vpd - * r17 : isr - */ -GLOBAL_ENTRY(kvm_vps_resume_handler) - movl r30 = PAL_VPS_RESUME_HANDLER - ;; - ld8 r26=[r25] - shr r17=r17,IA64_ISR_IR_BIT - ;; - dep r26=r17,r26,63,1 // bit 63 of r26 indicate whether enable CFLE - mov pr=r23,-2 - br.sptk.many kvm_vps_entry -END(kvm_vps_resume_handler) - -//mov r1=ar3 -GLOBAL_ENTRY(kvm_asm_mov_from_ar) -#ifndef ACCE_MOV_FROM_AR - br.many kvm_virtualization_fault_back -#endif - add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 - add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 - extr.u r17=r25,6,7 - ;; - ld8 r18=[r18] - mov r19=ar.itc - mov r24=b0 - ;; - add r19=r19,r18 - addl r20=@gprel(asm_mov_to_reg),gp - ;; - st8 [r16] = r19 - adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 - shladd r17=r17,4,r20 - ;; - mov b0=r17 - br.sptk.few b0 - ;; -END(kvm_asm_mov_from_ar) - -/* - * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC - * clock as it's source for emulating the ITC. This version will be - * copied on top of the original version if the host is determined to - * be an SN2. - */ -GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2) - add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 - movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT)) - - add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 - extr.u r17=r25,6,7 - mov r24=b0 - ;; - ld8 r18=[r18] - ld8 r19=[r19] - addl r20=@gprel(asm_mov_to_reg),gp - ;; - add r19=r19,r18 - shladd r17=r17,4,r20 - ;; - adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 - st8 [r16] = r19 - mov b0=r17 - br.sptk.few b0 - ;; -END(kvm_asm_mov_from_ar_sn2) - - - -// mov r1=rr[r3] -GLOBAL_ENTRY(kvm_asm_mov_from_rr) -#ifndef ACCE_MOV_FROM_RR - br.many kvm_virtualization_fault_back -#endif - extr.u r16=r25,20,7 - extr.u r17=r25,6,7 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 - shladd r16=r16,4,r20 - mov r24=b0 - ;; - add r27=VMM_VCPU_VRR0_OFFSET,r21 - mov b0=r16 - br.many b0 - ;; -kvm_asm_mov_from_rr_back_1: - adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 - adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 - shr.u r26=r19,61 - ;; - shladd r17=r17,4,r22 - shladd r27=r26,3,r27 - ;; - ld8 r19=[r27] - mov b0=r17 - br.many b0 -END(kvm_asm_mov_from_rr) - - -// mov rr[r3]=r2 -GLOBAL_ENTRY(kvm_asm_mov_to_rr) -#ifndef ACCE_MOV_TO_RR - br.many kvm_virtualization_fault_back -#endif - extr.u r16=r25,20,7 - extr.u r17=r25,13,7 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 - shladd r16=r16,4,r20 - mov r22=b0 - ;; - add r27=VMM_VCPU_VRR0_OFFSET,r21 - mov b0=r16 - br.many b0 - ;; -kvm_asm_mov_to_rr_back_1: - adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 - shr.u r23=r19,61 - shladd r17=r17,4,r20 - ;; - //if rr6, go back - cmp.eq p6,p0=6,r23 - mov b0=r22 - (p6) br.cond.dpnt.many kvm_virtualization_fault_back - ;; - mov r28=r19 - mov b0=r17 - br.many b0 -kvm_asm_mov_to_rr_back_2: - adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 - shladd r27=r23,3,r27 - ;; // vrr.rid<<4 |0xe - st8 [r27]=r19 - mov b0=r30 - ;; - extr.u r16=r19,8,26 - extr.u r18 =r19,2,6 - mov r17 =0xe - ;; - shladd r16 = r16, 4, r17 - extr.u r19 =r19,0,8 - ;; - shl r16 = r16,8 - ;; - add r19 = r19, r16 - ;; //set ve 1 - dep r19=-1,r19,0,1 - cmp.lt p6,p0=14,r18 - ;; - (p6) mov r18=14 - ;; - (p6) dep r19=r18,r19,2,6 - ;; - cmp.eq p6,p0=0,r23 - ;; - cmp.eq.or p6,p0=4,r23 - ;; - adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 - ;; - ld4 r16=[r16] - cmp.eq p7,p0=r0,r0 - (p6) shladd r17=r23,1,r17 - ;; - (p6) st8 [r17]=r19 - (p6) tbit.nz p6,p7=r16,0 - ;; - (p7) mov rr[r28]=r19 - mov r24=r22 - br.many b0 -END(kvm_asm_mov_to_rr) - - -//rsm -GLOBAL_ENTRY(kvm_asm_rsm) -#ifndef ACCE_RSM - br.many kvm_virtualization_fault_back -#endif - VMX_VPS_SYNC_READ - ;; - extr.u r26=r25,6,21 - extr.u r27=r25,31,2 - ;; - extr.u r28=r25,36,1 - dep r26=r27,r26,21,2 - ;; - add r17=VPD_VPSR_START_OFFSET,r16 - add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - //r26 is imm24 - dep r26=r28,r26,23,1 - ;; - ld8 r18=[r17] - movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI - ld4 r23=[r22] - sub r27=-1,r26 - mov r24=b0 - ;; - mov r20=cr.ipsr - or r28=r27,r28 - and r19=r18,r27 - ;; - st8 [r17]=r19 - and r20=r20,r28 - /* Comment it out due to short of fp lazy alorgithm support - adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 - ;; - ld8 r27=[r27] - ;; - tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT - ;; - (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 - */ - ;; - mov cr.ipsr=r20 - tbit.nz p6,p0=r23,0 - ;; - tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT - (p6) br.dptk kvm_resume_to_guest_with_sync - ;; - add r26=VMM_VCPU_META_RR0_OFFSET,r21 - add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 - dep r23=-1,r23,0,1 - ;; - ld8 r26=[r26] - ld8 r27=[r27] - st4 [r22]=r23 - dep.z r28=4,61,3 - ;; - mov rr[r0]=r26 - ;; - mov rr[r28]=r27 - ;; - srlz.d - br.many kvm_resume_to_guest_with_sync -END(kvm_asm_rsm) - - -//ssm -GLOBAL_ENTRY(kvm_asm_ssm) -#ifndef ACCE_SSM - br.many kvm_virtualization_fault_back -#endif - VMX_VPS_SYNC_READ - ;; - extr.u r26=r25,6,21 - extr.u r27=r25,31,2 - ;; - extr.u r28=r25,36,1 - dep r26=r27,r26,21,2 - ;; //r26 is imm24 - add r27=VPD_VPSR_START_OFFSET,r16 - dep r26=r28,r26,23,1 - ;; //r19 vpsr - ld8 r29=[r27] - mov r24=b0 - ;; - add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - mov r20=cr.ipsr - or r19=r29,r26 - ;; - ld4 r23=[r22] - st8 [r27]=r19 - or r20=r20,r26 - ;; - mov cr.ipsr=r20 - movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT - ;; - and r19=r28,r19 - tbit.z p6,p0=r23,0 - ;; - cmp.ne.or p6,p0=r28,r19 - (p6) br.dptk kvm_asm_ssm_1 - ;; - add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 - add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 - dep r23=0,r23,0,1 - ;; - ld8 r26=[r26] - ld8 r27=[r27] - st4 [r22]=r23 - dep.z r28=4,61,3 - ;; - mov rr[r0]=r26 - ;; - mov rr[r28]=r27 - ;; - srlz.d - ;; -kvm_asm_ssm_1: - tbit.nz p6,p0=r29,IA64_PSR_I_BIT - ;; - tbit.z.or p6,p0=r19,IA64_PSR_I_BIT - (p6) br.dptk kvm_resume_to_guest_with_sync - ;; - add r29=VPD_VTPR_START_OFFSET,r16 - add r30=VPD_VHPI_START_OFFSET,r16 - ;; - ld8 r29=[r29] - ld8 r30=[r30] - ;; - extr.u r17=r29,4,4 - extr.u r18=r29,16,1 - ;; - dep r17=r18,r17,4,1 - ;; - cmp.gt p6,p0=r30,r17 - (p6) br.dpnt.few kvm_asm_dispatch_vexirq - br.many kvm_resume_to_guest_with_sync -END(kvm_asm_ssm) - - -//mov psr.l=r2 -GLOBAL_ENTRY(kvm_asm_mov_to_psr) -#ifndef ACCE_MOV_TO_PSR - br.many kvm_virtualization_fault_back -#endif - VMX_VPS_SYNC_READ - ;; - extr.u r26=r25,13,7 //r2 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 - shladd r26=r26,4,r20 - mov r24=b0 - ;; - add r27=VPD_VPSR_START_OFFSET,r16 - mov b0=r26 - br.many b0 - ;; -kvm_asm_mov_to_psr_back: - ld8 r17=[r27] - add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 - dep r19=0,r19,32,32 - ;; - ld4 r23=[r22] - dep r18=0,r17,0,32 - ;; - add r30=r18,r19 - movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT - ;; - st8 [r27]=r30 - and r27=r28,r30 - and r29=r28,r17 - ;; - cmp.eq p5,p0=r29,r27 - cmp.eq p6,p7=r28,r27 - (p5) br.many kvm_asm_mov_to_psr_1 - ;; - //virtual to physical - (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 - (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 - (p7) dep r23=-1,r23,0,1 - ;; - //physical to virtual - (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 - (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 - (p6) dep r23=0,r23,0,1 - ;; - ld8 r26=[r26] - ld8 r27=[r27] - st4 [r22]=r23 - dep.z r28=4,61,3 - ;; - mov rr[r0]=r26 - ;; - mov rr[r28]=r27 - ;; - srlz.d - ;; -kvm_asm_mov_to_psr_1: - mov r20=cr.ipsr - movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT - ;; - or r19=r19,r28 - dep r20=0,r20,0,32 - ;; - add r20=r19,r20 - mov b0=r24 - ;; - /* Comment it out due to short of fp lazy algorithm support - adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 - ;; - ld8 r27=[r27] - ;; - tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT - ;; - (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 - ;; - */ - mov cr.ipsr=r20 - cmp.ne p6,p0=r0,r0 - ;; - tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT - tbit.z.or p6,p0=r30,IA64_PSR_I_BIT - (p6) br.dpnt.few kvm_resume_to_guest_with_sync - ;; - add r29=VPD_VTPR_START_OFFSET,r16 - add r30=VPD_VHPI_START_OFFSET,r16 - ;; - ld8 r29=[r29] - ld8 r30=[r30] - ;; - extr.u r17=r29,4,4 - extr.u r18=r29,16,1 - ;; - dep r17=r18,r17,4,1 - ;; - cmp.gt p6,p0=r30,r17 - (p6) br.dpnt.few kvm_asm_dispatch_vexirq - br.many kvm_resume_to_guest_with_sync -END(kvm_asm_mov_to_psr) - - -ENTRY(kvm_asm_dispatch_vexirq) -//increment iip - mov r17 = b0 - mov r18 = r31 -{.mii - add r25=VMM_VPD_BASE_OFFSET,r21 - nop 0x0 - mov r24 = ip - ;; -} -{.mmb - add r24 = 0x20, r24 - ld8 r25 = [r25] - br.sptk.many kvm_vps_sync_write -} - mov b0 =r17 - mov r16=cr.ipsr - mov r31 = r18 - mov r19 = 37 - ;; - extr.u r17=r16,IA64_PSR_RI_BIT,2 - tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 - ;; - (p6) mov r18=cr.iip - (p6) mov r17=r0 - (p7) add r17=1,r17 - ;; - (p6) add r18=0x10,r18 - dep r16=r17,r16,IA64_PSR_RI_BIT,2 - ;; - (p6) mov cr.iip=r18 - mov cr.ipsr=r16 - mov r30 =1 - br.many kvm_dispatch_vexirq -END(kvm_asm_dispatch_vexirq) - -// thash -// TODO: add support when pta.vf = 1 -GLOBAL_ENTRY(kvm_asm_thash) -#ifndef ACCE_THASH - br.many kvm_virtualization_fault_back -#endif - extr.u r17=r25,20,7 // get r3 from opcode in r25 - extr.u r18=r25,6,7 // get r1 from opcode in r25 - addl r20=@gprel(asm_mov_from_reg),gp - ;; - adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 - shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) - adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs - ;; - mov r24=b0 - ;; - ld8 r16=[r16] // get VPD addr - mov b0=r17 - br.many b0 // r19 return value - ;; -kvm_asm_thash_back1: - shr.u r23=r19,61 // get RR number - adds r28=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr - adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta - ;; - shladd r27=r23,3,r28 // get vcpu->arch.vrr[r23]'s addr - ld8 r17=[r16] // get PTA - mov r26=1 - ;; - extr.u r29=r17,2,6 // get pta.size - ld8 r28=[r27] // get vcpu->arch.vrr[r23]'s value - ;; - mov b0=r24 - //Fallback to C if pta.vf is set - tbit.nz p6,p0=r17, 8 - ;; - (p6) mov r24=EVENT_THASH - (p6) br.cond.dpnt.many kvm_virtualization_fault_back - extr.u r28=r28,2,6 // get rr.ps - shl r22=r26,r29 // 1UL << pta.size - ;; - shr.u r23=r19,r28 // vaddr >> rr.ps - adds r26=3,r29 // pta.size + 3 - shl r27=r17,3 // pta << 3 - ;; - shl r23=r23,3 // (vaddr >> rr.ps) << 3 - shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) - movl r16=7<<61 - ;; - adds r22=-1,r22 // (1UL << pta.size) - 1 - shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size - and r19=r19,r16 // vaddr & VRN_MASK - ;; - and r22=r22,r23 // vhpt_offset - or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size) - adds r26=asm_mov_to_reg-asm_mov_from_reg,r20 - ;; - or r19=r19,r22 // calc pval - shladd r17=r18,4,r26 - adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 - ;; - mov b0=r17 - br.many b0 -END(kvm_asm_thash) - -#define MOV_TO_REG0 \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - nop.b 0x0; \ - ;; \ -}; - - -#define MOV_TO_REG(n) \ -{; \ - mov r##n##=r19; \ - mov b0=r30; \ - br.sptk.many b0; \ - ;; \ -}; - - -#define MOV_FROM_REG(n) \ -{; \ - mov r19=r##n##; \ - mov b0=r30; \ - br.sptk.many b0; \ - ;; \ -}; - - -#define MOV_TO_BANK0_REG(n) \ -ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ -{; \ - mov r26=r2; \ - mov r2=r19; \ - bsw.1; \ - ;; \ -}; \ -{; \ - mov r##n##=r2; \ - nop.b 0x0; \ - bsw.0; \ - ;; \ -}; \ -{; \ - mov r2=r26; \ - mov b0=r30; \ - br.sptk.many b0; \ - ;; \ -}; \ -END(asm_mov_to_bank0_reg##n##) - - -#define MOV_FROM_BANK0_REG(n) \ -ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ -{; \ - mov r26=r2; \ - nop.b 0x0; \ - bsw.1; \ - ;; \ -}; \ -{; \ - mov r2=r##n##; \ - nop.b 0x0; \ - bsw.0; \ - ;; \ -}; \ -{; \ - mov r19=r2; \ - mov r2=r26; \ - mov b0=r30; \ -}; \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - br.sptk.many b0; \ - ;; \ -}; \ -END(asm_mov_from_bank0_reg##n##) - - -#define JMP_TO_MOV_TO_BANK0_REG(n) \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - br.sptk.many asm_mov_to_bank0_reg##n##; \ - ;; \ -} - - -#define JMP_TO_MOV_FROM_BANK0_REG(n) \ -{; \ - nop.b 0x0; \ - nop.b 0x0; \ - br.sptk.many asm_mov_from_bank0_reg##n##; \ - ;; \ -} - - -MOV_FROM_BANK0_REG(16) -MOV_FROM_BANK0_REG(17) -MOV_FROM_BANK0_REG(18) -MOV_FROM_BANK0_REG(19) -MOV_FROM_BANK0_REG(20) -MOV_FROM_BANK0_REG(21) -MOV_FROM_BANK0_REG(22) -MOV_FROM_BANK0_REG(23) -MOV_FROM_BANK0_REG(24) -MOV_FROM_BANK0_REG(25) -MOV_FROM_BANK0_REG(26) -MOV_FROM_BANK0_REG(27) -MOV_FROM_BANK0_REG(28) -MOV_FROM_BANK0_REG(29) -MOV_FROM_BANK0_REG(30) -MOV_FROM_BANK0_REG(31) - - -// mov from reg table -ENTRY(asm_mov_from_reg) - MOV_FROM_REG(0) - MOV_FROM_REG(1) - MOV_FROM_REG(2) - MOV_FROM_REG(3) - MOV_FROM_REG(4) - MOV_FROM_REG(5) - MOV_FROM_REG(6) - MOV_FROM_REG(7) - MOV_FROM_REG(8) - MOV_FROM_REG(9) - MOV_FROM_REG(10) - MOV_FROM_REG(11) - MOV_FROM_REG(12) - MOV_FROM_REG(13) - MOV_FROM_REG(14) - MOV_FROM_REG(15) - JMP_TO_MOV_FROM_BANK0_REG(16) - JMP_TO_MOV_FROM_BANK0_REG(17) - JMP_TO_MOV_FROM_BANK0_REG(18) - JMP_TO_MOV_FROM_BANK0_REG(19) - JMP_TO_MOV_FROM_BANK0_REG(20) - JMP_TO_MOV_FROM_BANK0_REG(21) - JMP_TO_MOV_FROM_BANK0_REG(22) - JMP_TO_MOV_FROM_BANK0_REG(23) - JMP_TO_MOV_FROM_BANK0_REG(24) - JMP_TO_MOV_FROM_BANK0_REG(25) - JMP_TO_MOV_FROM_BANK0_REG(26) - JMP_TO_MOV_FROM_BANK0_REG(27) - JMP_TO_MOV_FROM_BANK0_REG(28) - JMP_TO_MOV_FROM_BANK0_REG(29) - JMP_TO_MOV_FROM_BANK0_REG(30) - JMP_TO_MOV_FROM_BANK0_REG(31) - MOV_FROM_REG(32) - MOV_FROM_REG(33) - MOV_FROM_REG(34) - MOV_FROM_REG(35) - MOV_FROM_REG(36) - MOV_FROM_REG(37) - MOV_FROM_REG(38) - MOV_FROM_REG(39) - MOV_FROM_REG(40) - MOV_FROM_REG(41) - MOV_FROM_REG(42) - MOV_FROM_REG(43) - MOV_FROM_REG(44) - MOV_FROM_REG(45) - MOV_FROM_REG(46) - MOV_FROM_REG(47) - MOV_FROM_REG(48) - MOV_FROM_REG(49) - MOV_FROM_REG(50) - MOV_FROM_REG(51) - MOV_FROM_REG(52) - MOV_FROM_REG(53) - MOV_FROM_REG(54) - MOV_FROM_REG(55) - MOV_FROM_REG(56) - MOV_FROM_REG(57) - MOV_FROM_REG(58) - MOV_FROM_REG(59) - MOV_FROM_REG(60) - MOV_FROM_REG(61) - MOV_FROM_REG(62) - MOV_FROM_REG(63) - MOV_FROM_REG(64) - MOV_FROM_REG(65) - MOV_FROM_REG(66) - MOV_FROM_REG(67) - MOV_FROM_REG(68) - MOV_FROM_REG(69) - MOV_FROM_REG(70) - MOV_FROM_REG(71) - MOV_FROM_REG(72) - MOV_FROM_REG(73) - MOV_FROM_REG(74) - MOV_FROM_REG(75) - MOV_FROM_REG(76) - MOV_FROM_REG(77) - MOV_FROM_REG(78) - MOV_FROM_REG(79) - MOV_FROM_REG(80) - MOV_FROM_REG(81) - MOV_FROM_REG(82) - MOV_FROM_REG(83) - MOV_FROM_REG(84) - MOV_FROM_REG(85) - MOV_FROM_REG(86) - MOV_FROM_REG(87) - MOV_FROM_REG(88) - MOV_FROM_REG(89) - MOV_FROM_REG(90) - MOV_FROM_REG(91) - MOV_FROM_REG(92) - MOV_FROM_REG(93) - MOV_FROM_REG(94) - MOV_FROM_REG(95) - MOV_FROM_REG(96) - MOV_FROM_REG(97) - MOV_FROM_REG(98) - MOV_FROM_REG(99) - MOV_FROM_REG(100) - MOV_FROM_REG(101) - MOV_FROM_REG(102) - MOV_FROM_REG(103) - MOV_FROM_REG(104) - MOV_FROM_REG(105) - MOV_FROM_REG(106) - MOV_FROM_REG(107) - MOV_FROM_REG(108) - MOV_FROM_REG(109) - MOV_FROM_REG(110) - MOV_FROM_REG(111) - MOV_FROM_REG(112) - MOV_FROM_REG(113) - MOV_FROM_REG(114) - MOV_FROM_REG(115) - MOV_FROM_REG(116) - MOV_FROM_REG(117) - MOV_FROM_REG(118) - MOV_FROM_REG(119) - MOV_FROM_REG(120) - MOV_FROM_REG(121) - MOV_FROM_REG(122) - MOV_FROM_REG(123) - MOV_FROM_REG(124) - MOV_FROM_REG(125) - MOV_FROM_REG(126) - MOV_FROM_REG(127) -END(asm_mov_from_reg) - - -/* must be in bank 0 - * parameter: - * r31: pr - * r24: b0 - */ -ENTRY(kvm_resume_to_guest_with_sync) - adds r19=VMM_VPD_BASE_OFFSET,r21 - mov r16 = r31 - mov r17 = r24 - ;; -{.mii - ld8 r25 =[r19] - nop 0x0 - mov r24 = ip - ;; -} -{.mmb - add r24 =0x20, r24 - nop 0x0 - br.sptk.many kvm_vps_sync_write -} - - mov r31 = r16 - mov r24 =r17 - ;; - br.sptk.many kvm_resume_to_guest -END(kvm_resume_to_guest_with_sync) - -ENTRY(kvm_resume_to_guest) - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 =[r16] - adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21 - ;; - mov r16=cr.ipsr - ;; - ld8 r20 = [r20] - adds r19=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r25=[r19] - extr.u r17=r16,IA64_PSR_RI_BIT,2 - tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 - ;; - (p6) mov r18=cr.iip - (p6) mov r17=r0 - ;; - (p6) add r18=0x10,r18 - (p7) add r17=1,r17 - ;; - (p6) mov cr.iip=r18 - dep r16=r17,r16,IA64_PSR_RI_BIT,2 - ;; - mov cr.ipsr=r16 - adds r19= VPD_VPSR_START_OFFSET,r25 - add r28=PAL_VPS_RESUME_NORMAL,r20 - add r29=PAL_VPS_RESUME_HANDLER,r20 - ;; - ld8 r19=[r19] - mov b0=r29 - mov r27=cr.isr - ;; - tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p7=vpsr.ic - shr r27=r27,IA64_ISR_IR_BIT - ;; - (p6) ld8 r26=[r25] - (p7) mov b0=r28 - ;; - (p6) dep r26=r27,r26,63,1 - mov pr=r31,-2 - br.sptk.many b0 // call pal service - ;; -END(kvm_resume_to_guest) - - -MOV_TO_BANK0_REG(16) -MOV_TO_BANK0_REG(17) -MOV_TO_BANK0_REG(18) -MOV_TO_BANK0_REG(19) -MOV_TO_BANK0_REG(20) -MOV_TO_BANK0_REG(21) -MOV_TO_BANK0_REG(22) -MOV_TO_BANK0_REG(23) -MOV_TO_BANK0_REG(24) -MOV_TO_BANK0_REG(25) -MOV_TO_BANK0_REG(26) -MOV_TO_BANK0_REG(27) -MOV_TO_BANK0_REG(28) -MOV_TO_BANK0_REG(29) -MOV_TO_BANK0_REG(30) -MOV_TO_BANK0_REG(31) - - -// mov to reg table -ENTRY(asm_mov_to_reg) - MOV_TO_REG0 - MOV_TO_REG(1) - MOV_TO_REG(2) - MOV_TO_REG(3) - MOV_TO_REG(4) - MOV_TO_REG(5) - MOV_TO_REG(6) - MOV_TO_REG(7) - MOV_TO_REG(8) - MOV_TO_REG(9) - MOV_TO_REG(10) - MOV_TO_REG(11) - MOV_TO_REG(12) - MOV_TO_REG(13) - MOV_TO_REG(14) - MOV_TO_REG(15) - JMP_TO_MOV_TO_BANK0_REG(16) - JMP_TO_MOV_TO_BANK0_REG(17) - JMP_TO_MOV_TO_BANK0_REG(18) - JMP_TO_MOV_TO_BANK0_REG(19) - JMP_TO_MOV_TO_BANK0_REG(20) - JMP_TO_MOV_TO_BANK0_REG(21) - JMP_TO_MOV_TO_BANK0_REG(22) - JMP_TO_MOV_TO_BANK0_REG(23) - JMP_TO_MOV_TO_BANK0_REG(24) - JMP_TO_MOV_TO_BANK0_REG(25) - JMP_TO_MOV_TO_BANK0_REG(26) - JMP_TO_MOV_TO_BANK0_REG(27) - JMP_TO_MOV_TO_BANK0_REG(28) - JMP_TO_MOV_TO_BANK0_REG(29) - JMP_TO_MOV_TO_BANK0_REG(30) - JMP_TO_MOV_TO_BANK0_REG(31) - MOV_TO_REG(32) - MOV_TO_REG(33) - MOV_TO_REG(34) - MOV_TO_REG(35) - MOV_TO_REG(36) - MOV_TO_REG(37) - MOV_TO_REG(38) - MOV_TO_REG(39) - MOV_TO_REG(40) - MOV_TO_REG(41) - MOV_TO_REG(42) - MOV_TO_REG(43) - MOV_TO_REG(44) - MOV_TO_REG(45) - MOV_TO_REG(46) - MOV_TO_REG(47) - MOV_TO_REG(48) - MOV_TO_REG(49) - MOV_TO_REG(50) - MOV_TO_REG(51) - MOV_TO_REG(52) - MOV_TO_REG(53) - MOV_TO_REG(54) - MOV_TO_REG(55) - MOV_TO_REG(56) - MOV_TO_REG(57) - MOV_TO_REG(58) - MOV_TO_REG(59) - MOV_TO_REG(60) - MOV_TO_REG(61) - MOV_TO_REG(62) - MOV_TO_REG(63) - MOV_TO_REG(64) - MOV_TO_REG(65) - MOV_TO_REG(66) - MOV_TO_REG(67) - MOV_TO_REG(68) - MOV_TO_REG(69) - MOV_TO_REG(70) - MOV_TO_REG(71) - MOV_TO_REG(72) - MOV_TO_REG(73) - MOV_TO_REG(74) - MOV_TO_REG(75) - MOV_TO_REG(76) - MOV_TO_REG(77) - MOV_TO_REG(78) - MOV_TO_REG(79) - MOV_TO_REG(80) - MOV_TO_REG(81) - MOV_TO_REG(82) - MOV_TO_REG(83) - MOV_TO_REG(84) - MOV_TO_REG(85) - MOV_TO_REG(86) - MOV_TO_REG(87) - MOV_TO_REG(88) - MOV_TO_REG(89) - MOV_TO_REG(90) - MOV_TO_REG(91) - MOV_TO_REG(92) - MOV_TO_REG(93) - MOV_TO_REG(94) - MOV_TO_REG(95) - MOV_TO_REG(96) - MOV_TO_REG(97) - MOV_TO_REG(98) - MOV_TO_REG(99) - MOV_TO_REG(100) - MOV_TO_REG(101) - MOV_TO_REG(102) - MOV_TO_REG(103) - MOV_TO_REG(104) - MOV_TO_REG(105) - MOV_TO_REG(106) - MOV_TO_REG(107) - MOV_TO_REG(108) - MOV_TO_REG(109) - MOV_TO_REG(110) - MOV_TO_REG(111) - MOV_TO_REG(112) - MOV_TO_REG(113) - MOV_TO_REG(114) - MOV_TO_REG(115) - MOV_TO_REG(116) - MOV_TO_REG(117) - MOV_TO_REG(118) - MOV_TO_REG(119) - MOV_TO_REG(120) - MOV_TO_REG(121) - MOV_TO_REG(122) - MOV_TO_REG(123) - MOV_TO_REG(124) - MOV_TO_REG(125) - MOV_TO_REG(126) - MOV_TO_REG(127) -END(asm_mov_to_reg) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c deleted file mode 100644 index b0398740b48d..000000000000 --- a/arch/ia64/kvm/process.c +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * process.c: handle interruption inject for guests. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Shaofan Li (Susue Li) <susie.li@intel.com> - * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - */ -#include "vcpu.h" - -#include <asm/pal.h> -#include <asm/sal.h> -#include <asm/fpswa.h> -#include <asm/kregs.h> -#include <asm/tlb.h> - -fpswa_interface_t *vmm_fpswa_interface; - -#define IA64_VHPT_TRANS_VECTOR 0x0000 -#define IA64_INST_TLB_VECTOR 0x0400 -#define IA64_DATA_TLB_VECTOR 0x0800 -#define IA64_ALT_INST_TLB_VECTOR 0x0c00 -#define IA64_ALT_DATA_TLB_VECTOR 0x1000 -#define IA64_DATA_NESTED_TLB_VECTOR 0x1400 -#define IA64_INST_KEY_MISS_VECTOR 0x1800 -#define IA64_DATA_KEY_MISS_VECTOR 0x1c00 -#define IA64_DIRTY_BIT_VECTOR 0x2000 -#define IA64_INST_ACCESS_BIT_VECTOR 0x2400 -#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 -#define IA64_BREAK_VECTOR 0x2c00 -#define IA64_EXTINT_VECTOR 0x3000 -#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 -#define IA64_KEY_PERMISSION_VECTOR 0x5100 -#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 -#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 -#define IA64_GENEX_VECTOR 0x5400 -#define IA64_DISABLED_FPREG_VECTOR 0x5500 -#define IA64_NAT_CONSUMPTION_VECTOR 0x5600 -#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ -#define IA64_DEBUG_VECTOR 0x5900 -#define IA64_UNALIGNED_REF_VECTOR 0x5a00 -#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 -#define IA64_FP_FAULT_VECTOR 0x5c00 -#define IA64_FP_TRAP_VECTOR 0x5d00 -#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 -#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 -#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 - -/* SDM vol2 5.5 - IVA based interruption handling */ -#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ - IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ - IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) - -#define DOMN_PAL_REQUEST 0x110000 -#define DOMN_SAL_REQUEST 0x110001 - -static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, - 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, - 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, - 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, - 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, - 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, - 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, - 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 -}; - -static void collect_interruption(struct kvm_vcpu *vcpu) -{ - u64 ipsr; - u64 vdcr; - u64 vifs; - unsigned long vpsr; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - vpsr = vcpu_get_psr(vcpu); - vcpu_bsw0(vcpu); - if (vpsr & IA64_PSR_IC) { - - /* Sync mpsr id/da/dd/ss/ed bits to vipsr - * since after guest do rfi, we still want these bits on in - * mpsr - */ - - ipsr = regs->cr_ipsr; - vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA - | IA64_PSR_DD | IA64_PSR_SS - | IA64_PSR_ED)); - vcpu_set_ipsr(vcpu, vpsr); - - /* Currently, for trap, we do not advance IIP to next - * instruction. That's because we assume caller already - * set up IIP correctly - */ - - vcpu_set_iip(vcpu , regs->cr_iip); - - /* set vifs.v to zero */ - vifs = VCPU(vcpu, ifs); - vifs &= ~IA64_IFS_V; - vcpu_set_ifs(vcpu, vifs); - - vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); - } - - vdcr = VCPU(vcpu, dcr); - - /* Set guest psr - * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged - * be: set to the value of dcr.be - * pp: set to the value of dcr.pp - */ - vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; - vpsr |= (vdcr & IA64_DCR_BE); - - /* VDCR pp bit position is different from VPSR pp bit */ - if (vdcr & IA64_DCR_PP) { - vpsr |= IA64_PSR_PP; - } else { - vpsr &= ~IA64_PSR_PP; - } - - vcpu_set_psr(vcpu, vpsr); - -} - -void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) -{ - u64 viva; - struct kvm_pt_regs *regs; - union ia64_isr pt_isr; - - regs = vcpu_regs(vcpu); - - /* clear cr.isr.ir (incomplete register frame)*/ - pt_isr.val = VMX(vcpu, cr_isr); - pt_isr.ir = 0; - VMX(vcpu, cr_isr) = pt_isr.val; - - collect_interruption(vcpu); - - viva = vcpu_get_iva(vcpu); - regs->cr_iip = viva + vec; -} - -static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) -{ - union ia64_rr rr, rr1; - - rr.val = vcpu_get_rr(vcpu, ifa); - rr1.val = 0; - rr1.ps = rr.ps; - rr1.rid = rr.rid; - return (rr1.val); -} - -/* - * Set vIFA & vITIR & vIHA, when vPSR.ic =1 - * Parameter: - * set_ifa: if true, set vIFA - * set_itir: if true, set vITIR - * set_iha: if true, set vIHA - */ -void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, - int set_ifa, int set_itir, int set_iha) -{ - long vpsr; - u64 value; - - vpsr = VCPU(vcpu, vpsr); - /* Vol2, Table 8-1 */ - if (vpsr & IA64_PSR_IC) { - if (set_ifa) - vcpu_set_ifa(vcpu, vadr); - if (set_itir) { - value = vcpu_get_itir_on_fault(vcpu, vadr); - vcpu_set_itir(vcpu, value); - } - - if (set_iha) { - value = vcpu_thash(vcpu, vadr); - vcpu_set_iha(vcpu, value); - } - } -} - -/* - * Data TLB Fault - * @ Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); -} - -/* - * Instruction TLB Fault - * @ Instruction TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); -} - -/* - * Data Nested TLB Fault - * @ Data Nested TLB Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void nested_dtlb(struct kvm_vcpu *vcpu) -{ - inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); -} - -/* - * Alternate Data TLB Fault - * @ Alternate Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) -{ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); -} - -/* - * Data TLB Fault - * @ Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) -{ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); -} - -/* Deal with: - * VHPT Translation Vector - */ -static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA*/ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); -} - -/* - * VHPT Instruction Fault - * @ VHPT Translation vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - _vhpt_fault(vcpu, vadr); -} - -/* - * VHPT Data Fault - * @ VHPT Translation vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) -{ - _vhpt_fault(vcpu, vadr); -} - -/* - * Deal with: - * General Exception vector - */ -void _general_exception(struct kvm_vcpu *vcpu) -{ - inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); -} - -/* - * Illegal Operation Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void illegal_op(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Illegal Dependency Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void illegal_dep(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Reserved Register/Field Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void rsv_reg_field(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} -/* - * Privileged Operation Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ - -void privilege_op(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Unimplement Data Address Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void unimpl_daddr(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Privileged Register Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void privilege_reg(struct kvm_vcpu *vcpu) -{ - _general_exception(vcpu); -} - -/* Deal with - * Nat consumption vector - * Parameter: - * vaddr: Optional, if t == REGISTER - */ -static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, - enum tlb_miss_type t) -{ - /* If vPSR.ic && t == DATA/INST, IFA */ - if (t == DATA || t == INSTRUCTION) { - /* IFA */ - set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); - } - - inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); -} - -/* - * Instruction Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) -{ - _nat_consumption_fault(vcpu, vadr, INSTRUCTION); -} - -/* - * Register Nat Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void rnat_consumption(struct kvm_vcpu *vcpu) -{ - _nat_consumption_fault(vcpu, 0, REGISTER); -} - -/* - * Data Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) -{ - _nat_consumption_fault(vcpu, vadr, DATA); -} - -/* Deal with - * Page not present vector - */ -static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); -} - -void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) -{ - __page_not_present(vcpu, vadr); -} - -void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) -{ - __page_not_present(vcpu, vadr); -} - -/* Deal with - * Data access rights vector - */ -void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR */ - set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); -} - -fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, - unsigned long *fpsr, unsigned long *isr, unsigned long *pr, - unsigned long *ifs, struct kvm_pt_regs *regs) -{ - fp_state_t fp_state; - fpswa_ret_t ret; - struct kvm_vcpu *vcpu = current_vcpu; - - uint64_t old_rr7 = ia64_get_rr(7UL<<61); - - if (!vmm_fpswa_interface) - return (fpswa_ret_t) {-1, 0, 0, 0}; - - memset(&fp_state, 0, sizeof(fp_state_t)); - - /* - * compute fp_state. only FP registers f6 - f11 are used by the - * vmm, so set those bits in the mask and set the low volatile - * pointer to point to these registers. - */ - fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ - - fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; - - /* - * unsigned long (*EFI_FPSWA) ( - * unsigned long trap_type, - * void *Bundle, - * unsigned long *pipsr, - * unsigned long *pfsr, - * unsigned long *pisr, - * unsigned long *ppreds, - * unsigned long *pifs, - * void *fp_state); - */ - /*Call host fpswa interface directly to virtualize - *guest fpswa request! - */ - ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); - ia64_srlz_d(); - - ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, - ipsr, fpsr, isr, pr, ifs, &fp_state); - ia64_set_rr(7UL << 61, old_rr7); - ia64_srlz_d(); - return ret; -} - -/* - * Handle floating-point assist faults and traps for domain. - */ -unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, - unsigned long isr) -{ - struct kvm_vcpu *v = current_vcpu; - IA64_BUNDLE bundle; - unsigned long fault_ip; - fpswa_ret_t ret; - - fault_ip = regs->cr_iip; - /* - * When the FP trap occurs, the trapping instruction is completed. - * If ipsr.ri == 0, there is the trapping instruction in previous - * bundle. - */ - if (!fp_fault && (ia64_psr(regs)->ri == 0)) - fault_ip -= 16; - - if (fetch_code(v, fault_ip, &bundle)) - return -EAGAIN; - - if (!bundle.i64[0] && !bundle.i64[1]) - return -EACCES; - - ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, - &isr, ®s->pr, ®s->cr_ifs, regs); - return ret.status; -} - -void reflect_interruption(u64 ifa, u64 isr, u64 iim, - u64 vec, struct kvm_pt_regs *regs) -{ - u64 vector; - int status ; - struct kvm_vcpu *vcpu = current_vcpu; - u64 vpsr = VCPU(vcpu, vpsr); - - vector = vec2off[vec]; - - if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { - panic_vm(vcpu, "Interruption with vector :0x%lx occurs " - "with psr.ic = 0\n", vector); - return; - } - - switch (vec) { - case 32: /*IA64_FP_FAULT_VECTOR*/ - status = vmm_handle_fpu_swa(1, regs, isr); - if (!status) { - vcpu_increment_iip(vcpu); - return; - } else if (-EAGAIN == status) - return; - break; - case 33: /*IA64_FP_TRAP_VECTOR*/ - status = vmm_handle_fpu_swa(0, regs, isr); - if (!status) - return ; - break; - } - - VCPU(vcpu, isr) = isr; - VCPU(vcpu, iipa) = regs->cr_iip; - if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) - VCPU(vcpu, iim) = iim; - else - set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); - - inject_guest_interruption(vcpu, vector); -} - -static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu, - unsigned long arg) -{ - struct thash_data *data; - unsigned long gpa, poff; - - if (!is_physical_mode(vcpu)) { - /* Depends on caller to provide the DTR or DTC mapping.*/ - data = vtlb_lookup(vcpu, arg, D_TLB); - if (data) - gpa = data->page_flags & _PAGE_PPN_MASK; - else { - data = vhpt_lookup(arg); - if (!data) - return 0; - gpa = data->gpaddr & _PAGE_PPN_MASK; - } - - poff = arg & (PSIZE(data->ps) - 1); - arg = PAGEALIGN(gpa, data->ps) | poff; - } - arg = kvm_gpa_to_mpa(arg << 1 >> 1); - - return (unsigned long)__va(arg); -} - -static void set_pal_call_data(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - unsigned long gr28 = vcpu_get_gr(vcpu, 28); - unsigned long gr29 = vcpu_get_gr(vcpu, 29); - unsigned long gr30 = vcpu_get_gr(vcpu, 30); - - /*FIXME:For static and stacked convention, firmware - * has put the parameters in gr28-gr31 before - * break to vmm !!*/ - - switch (gr28) { - case PAL_PERF_MON_INFO: - case PAL_HALT_INFO: - p->u.pal_data.gr29 = kvm_trans_pal_call_args(vcpu, gr29); - p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); - break; - case PAL_BRAND_INFO: - p->u.pal_data.gr29 = gr29; - p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30); - break; - default: - p->u.pal_data.gr29 = gr29; - p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); - } - p->u.pal_data.gr28 = gr28; - p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); - - p->exit_reason = EXIT_REASON_PAL_CALL; -} - -static void get_pal_call_result(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (p->exit_reason == EXIT_REASON_PAL_CALL) { - vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); - vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); - vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); - vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); - } else - panic_vm(vcpu, "Mis-set for exit reason!\n"); -} - -static void set_sal_call_data(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); - p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); - p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); - p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); - p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); - p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); - p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); - p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); - p->exit_reason = EXIT_REASON_SAL_CALL; -} - -static void get_sal_call_result(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (p->exit_reason == EXIT_REASON_SAL_CALL) { - vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); - vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); - vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); - vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); - } else - panic_vm(vcpu, "Mis-set for exit reason!\n"); -} - -void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, - unsigned long isr, unsigned long iim) -{ - struct kvm_vcpu *v = current_vcpu; - long psr; - - if (ia64_psr(regs)->cpl == 0) { - /* Allow hypercalls only when cpl = 0. */ - if (iim == DOMN_PAL_REQUEST) { - local_irq_save(psr); - set_pal_call_data(v); - vmm_transition(v); - get_pal_call_result(v); - vcpu_increment_iip(v); - local_irq_restore(psr); - return; - } else if (iim == DOMN_SAL_REQUEST) { - local_irq_save(psr); - set_sal_call_data(v); - vmm_transition(v); - get_sal_call_result(v); - vcpu_increment_iip(v); - local_irq_restore(psr); - return; - } - } - reflect_interruption(ifa, isr, iim, 11, regs); -} - -void check_pending_irq(struct kvm_vcpu *vcpu) -{ - int mask, h_pending, h_inservice; - u64 isr; - unsigned long vpsr; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - h_pending = highest_pending_irq(vcpu); - if (h_pending == NULL_VECTOR) { - update_vhpi(vcpu, NULL_VECTOR); - return; - } - h_inservice = highest_inservice_irq(vcpu); - - vpsr = VCPU(vcpu, vpsr); - mask = irq_masked(vcpu, h_pending, h_inservice); - if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { - isr = vpsr & IA64_PSR_RI; - update_vhpi(vcpu, h_pending); - reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ - } else if (mask == IRQ_MASKED_BY_INSVC) { - if (VCPU(vcpu, vhpi)) - update_vhpi(vcpu, NULL_VECTOR); - } else { - /* masked by vpsr.i or vtpr.*/ - update_vhpi(vcpu, h_pending); - } -} - -static void generate_exirq(struct kvm_vcpu *vcpu) -{ - unsigned vpsr; - uint64_t isr; - - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - vpsr = VCPU(vcpu, vpsr); - isr = vpsr & IA64_PSR_RI; - if (!(vpsr & IA64_PSR_IC)) - panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n"); - reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ -} - -void vhpi_detection(struct kvm_vcpu *vcpu) -{ - uint64_t threshold, vhpi; - union ia64_tpr vtpr; - struct ia64_psr vpsr; - - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - vtpr.val = VCPU(vcpu, tpr); - - threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; - vhpi = VCPU(vcpu, vhpi); - if (vhpi > threshold) { - /* interrupt actived*/ - generate_exirq(vcpu); - } -} - -void leave_hypervisor_tail(void) -{ - struct kvm_vcpu *v = current_vcpu; - - if (VMX(v, timer_check)) { - VMX(v, timer_check) = 0; - if (VMX(v, itc_check)) { - if (vcpu_get_itc(v) > VCPU(v, itm)) { - if (!(VCPU(v, itv) & (1 << 16))) { - vcpu_pend_interrupt(v, VCPU(v, itv) - & 0xff); - VMX(v, itc_check) = 0; - } else { - v->arch.timer_pending = 1; - } - VMX(v, last_itc) = VCPU(v, itm) + 1; - } - } - } - - rmb(); - if (v->arch.irq_new_pending) { - v->arch.irq_new_pending = 0; - VMX(v, irq_check) = 0; - check_pending_irq(v); - return; - } - if (VMX(v, irq_check)) { - VMX(v, irq_check) = 0; - vhpi_detection(v); - } -} - -static inline void handle_lds(struct kvm_pt_regs *regs) -{ - regs->cr_ipsr |= IA64_PSR_ED; -} - -void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) -{ - unsigned long pte; - union ia64_rr rr; - - rr.val = ia64_get_rr(vadr); - pte = vadr & _PAGE_PPN_MASK; - pte = pte | PHY_PAGE_WB; - thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); - return; -} - -void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) -{ - unsigned long vpsr; - int type; - - u64 vhpt_adr, gppa, pteval, rr, itir; - union ia64_isr misr; - union ia64_pta vpta; - struct thash_data *data; - struct kvm_vcpu *v = current_vcpu; - - vpsr = VCPU(v, vpsr); - misr.val = VMX(v, cr_isr); - - type = vec; - - if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { - if (vec == 2) { - if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { - emulate_io_inst(v, ((vadr << 1) >> 1), 4); - return; - } - } - physical_tlb_miss(v, vadr, type); - return; - } - data = vtlb_lookup(v, vadr, type); - if (data != 0) { - if (type == D_TLB) { - gppa = (vadr & ((1UL << data->ps) - 1)) - + (data->ppn >> (data->ps - 12) << data->ps); - if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { - if (data->pl >= ((regs->cr_ipsr >> - IA64_PSR_CPL0_BIT) & 3)) - emulate_io_inst(v, gppa, data->ma); - else { - vcpu_set_isr(v, misr.val); - data_access_rights(v, vadr); - } - return ; - } - } - thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); - - } else if (type == D_TLB) { - if (misr.sp) { - handle_lds(regs); - return; - } - - rr = vcpu_get_rr(v, vadr); - itir = rr & (RR_RID_MASK | RR_PS_MASK); - - if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { - if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - alt_dtlb(v, vadr); - } else { - nested_dtlb(v); - } - return ; - } - - vpta.val = vcpu_get_pta(v); - /* avoid recursively walking (short format) VHPT */ - - vhpt_adr = vcpu_thash(v, vadr); - if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { - /* VHPT successfully read. */ - if (!(pteval & _PAGE_P)) { - if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - dtlb_fault(v, vadr); - } else { - nested_dtlb(v); - } - } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { - thash_purge_and_insert(v, pteval, itir, - vadr, D_TLB); - } else if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - dtlb_fault(v, vadr); - } else { - nested_dtlb(v); - } - } else { - /* Can't read VHPT. */ - if (vpsr & IA64_PSR_IC) { - vcpu_set_isr(v, misr.val); - dvhpt_fault(v, vadr); - } else { - nested_dtlb(v); - } - } - } else if (type == I_TLB) { - if (!(vpsr & IA64_PSR_IC)) - misr.ni = 1; - if (!vhpt_enabled(v, vadr, INST_REF)) { - vcpu_set_isr(v, misr.val); - alt_itlb(v, vadr); - return; - } - - vpta.val = vcpu_get_pta(v); - - vhpt_adr = vcpu_thash(v, vadr); - if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { - /* VHPT successfully read. */ - if (pteval & _PAGE_P) { - if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { - vcpu_set_isr(v, misr.val); - itlb_fault(v, vadr); - return ; - } - rr = vcpu_get_rr(v, vadr); - itir = rr & (RR_RID_MASK | RR_PS_MASK); - thash_purge_and_insert(v, pteval, itir, - vadr, I_TLB); - } else { - vcpu_set_isr(v, misr.val); - inst_page_not_present(v, vadr); - } - } else { - vcpu_set_isr(v, misr.val); - ivhpt_fault(v, vadr); - } - } -} - -void kvm_vexirq(struct kvm_vcpu *vcpu) -{ - u64 vpsr, isr; - struct kvm_pt_regs *regs; - - regs = vcpu_regs(vcpu); - vpsr = VCPU(vcpu, vpsr); - isr = vpsr & IA64_PSR_RI; - reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ -} - -void kvm_ia64_handle_irq(struct kvm_vcpu *v) -{ - struct exit_ctl_data *p = &v->arch.exit_data; - long psr; - - local_irq_save(psr); - p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmm_transition(v); - local_irq_restore(psr); - - VMX(v, timer_check) = 1; - -} - -static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) -{ - u64 oldrid, moldrid, oldpsbits, vaddr; - struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; - vaddr = p->vaddr; - - oldrid = VMX(v, vrr[0]); - VMX(v, vrr[0]) = p->rr; - oldpsbits = VMX(v, psbits[0]); - VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); - moldrid = ia64_get_rr(0x0); - ia64_set_rr(0x0, vrrtomrr(p->rr)); - ia64_srlz_d(); - - vaddr = PAGEALIGN(vaddr, p->ps); - thash_purge_entries_remote(v, vaddr, p->ps); - - VMX(v, vrr[0]) = oldrid; - VMX(v, psbits[0]) = oldpsbits; - ia64_set_rr(0x0, moldrid); - ia64_dv_serialize_data(); -} - -static void vcpu_do_resume(struct kvm_vcpu *vcpu) -{ - /*Re-init VHPT and VTLB once from resume*/ - vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; - thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); - vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; - thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); - - ia64_set_pta(vcpu->arch.vhpt.pta.val); -} - -static void vmm_sanity_check(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - - if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) { - panic_vm(vcpu, "Failed to do vmm sanity check," - "it maybe caused by crashed vmm!!\n\n"); - } -} - -static void kvm_do_resume_op(struct kvm_vcpu *vcpu) -{ - vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/ - - if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { - vcpu_do_resume(vcpu); - return; - } - - if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { - thash_purge_all(vcpu); - return; - } - - if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { - while (vcpu->arch.ptc_g_count > 0) - ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); - } -} - -void vmm_transition(struct kvm_vcpu *vcpu) -{ - ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, - 1, 0, 0, 0, 0, 0); - vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); - ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, - 1, 0, 0, 0, 0, 0); - kvm_do_resume_op(vcpu); -} - -void vmm_panic_handler(u64 vec) -{ - struct kvm_vcpu *vcpu = current_vcpu; - vmm_sanity = 0; - panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n", - vec2off[vec]); -} diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S deleted file mode 100644 index 30897d44d61e..000000000000 --- a/arch/ia64/kvm/trampoline.S +++ /dev/null @@ -1,1038 +0,0 @@ -/* Save all processor states - * - * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com> - * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com> - */ - -#include <asm/asmmacro.h> -#include "asm-offsets.h" - - -#define CTX(name) VMM_CTX_##name##_OFFSET - - /* - * r32: context_t base address - */ -#define SAVE_BRANCH_REGS \ - add r2 = CTX(B0),r32; \ - add r3 = CTX(B1),r32; \ - mov r16 = b0; \ - mov r17 = b1; \ - ;; \ - st8 [r2]=r16,16; \ - st8 [r3]=r17,16; \ - ;; \ - mov r16 = b2; \ - mov r17 = b3; \ - ;; \ - st8 [r2]=r16,16; \ - st8 [r3]=r17,16; \ - ;; \ - mov r16 = b4; \ - mov r17 = b5; \ - ;; \ - st8 [r2]=r16; \ - st8 [r3]=r17; \ - ;; - - /* - * r33: context_t base address - */ -#define RESTORE_BRANCH_REGS \ - add r2 = CTX(B0),r33; \ - add r3 = CTX(B1),r33; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov b0 = r16; \ - mov b1 = r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov b2 = r16; \ - mov b3 = r17; \ - ;; \ - ld8 r16=[r2]; \ - ld8 r17=[r3]; \ - ;; \ - mov b4=r16; \ - mov b5=r17; \ - ;; - - - /* - * r32: context_t base address - * bsw == 1 - * Save all bank1 general registers, r4 ~ r7 - */ -#define SAVE_GENERAL_REGS \ - add r2=CTX(R4),r32; \ - add r3=CTX(R5),r32; \ - ;; \ -.mem.offset 0,0; \ - st8.spill [r2]=r4,16; \ -.mem.offset 8,0; \ - st8.spill [r3]=r5,16; \ - ;; \ -.mem.offset 0,0; \ - st8.spill [r2]=r6,48; \ -.mem.offset 8,0; \ - st8.spill [r3]=r7,48; \ - ;; \ -.mem.offset 0,0; \ - st8.spill [r2]=r12; \ -.mem.offset 8,0; \ - st8.spill [r3]=r13; \ - ;; - - /* - * r33: context_t base address - * bsw == 1 - */ -#define RESTORE_GENERAL_REGS \ - add r2=CTX(R4),r33; \ - add r3=CTX(R5),r33; \ - ;; \ - ld8.fill r4=[r2],16; \ - ld8.fill r5=[r3],16; \ - ;; \ - ld8.fill r6=[r2],48; \ - ld8.fill r7=[r3],48; \ - ;; \ - ld8.fill r12=[r2]; \ - ld8.fill r13 =[r3]; \ - ;; - - - - - /* - * r32: context_t base address - */ -#define SAVE_KERNEL_REGS \ - add r2 = CTX(KR0),r32; \ - add r3 = CTX(KR1),r32; \ - mov r16 = ar.k0; \ - mov r17 = ar.k1; \ - ;; \ - st8 [r2] = r16,16; \ - st8 [r3] = r17,16; \ - ;; \ - mov r16 = ar.k2; \ - mov r17 = ar.k3; \ - ;; \ - st8 [r2] = r16,16; \ - st8 [r3] = r17,16; \ - ;; \ - mov r16 = ar.k4; \ - mov r17 = ar.k5; \ - ;; \ - st8 [r2] = r16,16; \ - st8 [r3] = r17,16; \ - ;; \ - mov r16 = ar.k6; \ - mov r17 = ar.k7; \ - ;; \ - st8 [r2] = r16; \ - st8 [r3] = r17; \ - ;; - - - - /* - * r33: context_t base address - */ -#define RESTORE_KERNEL_REGS \ - add r2 = CTX(KR0),r33; \ - add r3 = CTX(KR1),r33; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k0=r16; \ - mov ar.k1=r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k2=r16; \ - mov ar.k3=r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k4=r16; \ - mov ar.k5=r17; \ - ;; \ - ld8 r16=[r2],16; \ - ld8 r17=[r3],16; \ - ;; \ - mov ar.k6=r16; \ - mov ar.k7=r17; \ - ;; - - - - /* - * r32: context_t base address - */ -#define SAVE_APP_REGS \ - add r2 = CTX(BSPSTORE),r32; \ - mov r16 = ar.bspstore; \ - ;; \ - st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ - mov r16 = ar.rnat; \ - ;; \ - st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ - mov r16 = ar.fcr; \ - ;; \ - st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ - mov r16 = ar.eflag; \ - ;; \ - st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ - mov r16 = ar.cflg; \ - ;; \ - st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ - mov r16 = ar.fsr; \ - ;; \ - st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ - mov r16 = ar.fir; \ - ;; \ - st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ - mov r16 = ar.fdr; \ - ;; \ - st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ - mov r16 = ar.unat; \ - ;; \ - st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ - mov r16 = ar.fpsr; \ - ;; \ - st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ - mov r16 = ar.pfs; \ - ;; \ - st8 [r2] = r16,CTX(LC)-CTX(PFS); \ - mov r16 = ar.lc; \ - ;; \ - st8 [r2] = r16; \ - ;; - - /* - * r33: context_t base address - */ -#define RESTORE_APP_REGS \ - add r2=CTX(BSPSTORE),r33; \ - ;; \ - ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ - ;; \ - mov ar.bspstore=r16; \ - ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ - ;; \ - mov ar.rnat=r16; \ - ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ - ;; \ - mov ar.fcr=r16; \ - ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ - ;; \ - mov ar.eflag=r16; \ - ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ - ;; \ - mov ar.cflg=r16; \ - ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ - ;; \ - mov ar.fsr=r16; \ - ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ - ;; \ - mov ar.fir=r16; \ - ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ - ;; \ - mov ar.fdr=r16; \ - ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ - ;; \ - mov ar.unat=r16; \ - ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ - ;; \ - mov ar.fpsr=r16; \ - ld8 r16=[r2],CTX(LC)-CTX(PFS); \ - ;; \ - mov ar.pfs=r16; \ - ld8 r16=[r2]; \ - ;; \ - mov ar.lc=r16; \ - ;; - - /* - * r32: context_t base address - */ -#define SAVE_CTL_REGS \ - add r2 = CTX(DCR),r32; \ - mov r16 = cr.dcr; \ - ;; \ - st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ - ;; \ - mov r16 = cr.iva; \ - ;; \ - st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ - ;; \ - mov r16 = cr.pta; \ - ;; \ - st8 [r2] = r16 ; \ - ;; - - /* - * r33: context_t base address - */ -#define RESTORE_CTL_REGS \ - add r2 = CTX(DCR),r33; \ - ;; \ - ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ - ;; \ - mov cr.dcr = r16; \ - dv_serialize_data; \ - ;; \ - ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ - ;; \ - mov cr.iva = r16; \ - dv_serialize_data; \ - ;; \ - ld8 r16 = [r2]; \ - ;; \ - mov cr.pta = r16; \ - dv_serialize_data; \ - ;; - - - /* - * r32: context_t base address - */ -#define SAVE_REGION_REGS \ - add r2=CTX(RR0),r32; \ - mov r16=rr[r0]; \ - dep.z r18=1,61,3; \ - ;; \ - st8 [r2]=r16,8; \ - mov r17=rr[r18]; \ - dep.z r18=2,61,3; \ - ;; \ - st8 [r2]=r17,8; \ - mov r16=rr[r18]; \ - dep.z r18=3,61,3; \ - ;; \ - st8 [r2]=r16,8; \ - mov r17=rr[r18]; \ - dep.z r18=4,61,3; \ - ;; \ - st8 [r2]=r17,8; \ - mov r16=rr[r18]; \ - dep.z r18=5,61,3; \ - ;; \ - st8 [r2]=r16,8; \ - mov r17=rr[r18]; \ - dep.z r18=7,61,3; \ - ;; \ - st8 [r2]=r17,16; \ - mov r16=rr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - ;; - - /* - * r33:context_t base address - */ -#define RESTORE_REGION_REGS \ - add r2=CTX(RR0),r33;\ - mov r18=r0; \ - ;; \ - ld8 r20=[r2],8; \ - ;; /* rr0 */ \ - ld8 r21=[r2],8; \ - ;; /* rr1 */ \ - ld8 r22=[r2],8; \ - ;; /* rr2 */ \ - ld8 r23=[r2],8; \ - ;; /* rr3 */ \ - ld8 r24=[r2],8; \ - ;; /* rr4 */ \ - ld8 r25=[r2],16; \ - ;; /* rr5 */ \ - ld8 r27=[r2]; \ - ;; /* rr7 */ \ - mov rr[r18]=r20; \ - dep.z r18=1,61,3; \ - ;; /* rr1 */ \ - mov rr[r18]=r21; \ - dep.z r18=2,61,3; \ - ;; /* rr2 */ \ - mov rr[r18]=r22; \ - dep.z r18=3,61,3; \ - ;; /* rr3 */ \ - mov rr[r18]=r23; \ - dep.z r18=4,61,3; \ - ;; /* rr4 */ \ - mov rr[r18]=r24; \ - dep.z r18=5,61,3; \ - ;; /* rr5 */ \ - mov rr[r18]=r25; \ - dep.z r18=7,61,3; \ - ;; /* rr7 */ \ - mov rr[r18]=r27; \ - ;; \ - srlz.i; \ - ;; - - - - /* - * r32: context_t base address - * r36~r39:scratch registers - */ -#define SAVE_DEBUG_REGS \ - add r2=CTX(IBR0),r32; \ - add r3=CTX(DBR0),r32; \ - mov r16=ibr[r0]; \ - mov r17=dbr[r0]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=1,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=2,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=2,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=3,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=4,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=5,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=6,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - add r18=7,r0; \ - ;; \ - mov r16=ibr[r18]; \ - mov r17=dbr[r18]; \ - ;; \ - st8 [r2]=r16,8; \ - st8 [r3]=r17,8; \ - ;; - - -/* - * r33: point to context_t structure - * ar.lc are corrupted. - */ -#define RESTORE_DEBUG_REGS \ - add r2=CTX(IBR0),r33; \ - add r3=CTX(DBR0),r33; \ - mov r16=7; \ - mov r17=r0; \ - ;; \ - mov ar.lc = r16; \ - ;; \ -1: \ - ld8 r18=[r2],8; \ - ld8 r19=[r3],8; \ - ;; \ - mov ibr[r17]=r18; \ - mov dbr[r17]=r19; \ - ;; \ - srlz.i; \ - ;; \ - add r17=1,r17; \ - br.cloop.sptk 1b; \ - ;; - - - /* - * r32: context_t base address - */ -#define SAVE_FPU_LOW \ - add r2=CTX(F2),r32; \ - add r3=CTX(F3),r32; \ - ;; \ - stf.spill.nta [r2]=f2,32; \ - stf.spill.nta [r3]=f3,32; \ - ;; \ - stf.spill.nta [r2]=f4,32; \ - stf.spill.nta [r3]=f5,32; \ - ;; \ - stf.spill.nta [r2]=f6,32; \ - stf.spill.nta [r3]=f7,32; \ - ;; \ - stf.spill.nta [r2]=f8,32; \ - stf.spill.nta [r3]=f9,32; \ - ;; \ - stf.spill.nta [r2]=f10,32; \ - stf.spill.nta [r3]=f11,32; \ - ;; \ - stf.spill.nta [r2]=f12,32; \ - stf.spill.nta [r3]=f13,32; \ - ;; \ - stf.spill.nta [r2]=f14,32; \ - stf.spill.nta [r3]=f15,32; \ - ;; \ - stf.spill.nta [r2]=f16,32; \ - stf.spill.nta [r3]=f17,32; \ - ;; \ - stf.spill.nta [r2]=f18,32; \ - stf.spill.nta [r3]=f19,32; \ - ;; \ - stf.spill.nta [r2]=f20,32; \ - stf.spill.nta [r3]=f21,32; \ - ;; \ - stf.spill.nta [r2]=f22,32; \ - stf.spill.nta [r3]=f23,32; \ - ;; \ - stf.spill.nta [r2]=f24,32; \ - stf.spill.nta [r3]=f25,32; \ - ;; \ - stf.spill.nta [r2]=f26,32; \ - stf.spill.nta [r3]=f27,32; \ - ;; \ - stf.spill.nta [r2]=f28,32; \ - stf.spill.nta [r3]=f29,32; \ - ;; \ - stf.spill.nta [r2]=f30; \ - stf.spill.nta [r3]=f31; \ - ;; - - /* - * r32: context_t base address - */ -#define SAVE_FPU_HIGH \ - add r2=CTX(F32),r32; \ - add r3=CTX(F33),r32; \ - ;; \ - stf.spill.nta [r2]=f32,32; \ - stf.spill.nta [r3]=f33,32; \ - ;; \ - stf.spill.nta [r2]=f34,32; \ - stf.spill.nta [r3]=f35,32; \ - ;; \ - stf.spill.nta [r2]=f36,32; \ - stf.spill.nta [r3]=f37,32; \ - ;; \ - stf.spill.nta [r2]=f38,32; \ - stf.spill.nta [r3]=f39,32; \ - ;; \ - stf.spill.nta [r2]=f40,32; \ - stf.spill.nta [r3]=f41,32; \ - ;; \ - stf.spill.nta [r2]=f42,32; \ - stf.spill.nta [r3]=f43,32; \ - ;; \ - stf.spill.nta [r2]=f44,32; \ - stf.spill.nta [r3]=f45,32; \ - ;; \ - stf.spill.nta [r2]=f46,32; \ - stf.spill.nta [r3]=f47,32; \ - ;; \ - stf.spill.nta [r2]=f48,32; \ - stf.spill.nta [r3]=f49,32; \ - ;; \ - stf.spill.nta [r2]=f50,32; \ - stf.spill.nta [r3]=f51,32; \ - ;; \ - stf.spill.nta [r2]=f52,32; \ - stf.spill.nta [r3]=f53,32; \ - ;; \ - stf.spill.nta [r2]=f54,32; \ - stf.spill.nta [r3]=f55,32; \ - ;; \ - stf.spill.nta [r2]=f56,32; \ - stf.spill.nta [r3]=f57,32; \ - ;; \ - stf.spill.nta [r2]=f58,32; \ - stf.spill.nta [r3]=f59,32; \ - ;; \ - stf.spill.nta [r2]=f60,32; \ - stf.spill.nta [r3]=f61,32; \ - ;; \ - stf.spill.nta [r2]=f62,32; \ - stf.spill.nta [r3]=f63,32; \ - ;; \ - stf.spill.nta [r2]=f64,32; \ - stf.spill.nta [r3]=f65,32; \ - ;; \ - stf.spill.nta [r2]=f66,32; \ - stf.spill.nta [r3]=f67,32; \ - ;; \ - stf.spill.nta [r2]=f68,32; \ - stf.spill.nta [r3]=f69,32; \ - ;; \ - stf.spill.nta [r2]=f70,32; \ - stf.spill.nta [r3]=f71,32; \ - ;; \ - stf.spill.nta [r2]=f72,32; \ - stf.spill.nta [r3]=f73,32; \ - ;; \ - stf.spill.nta [r2]=f74,32; \ - stf.spill.nta [r3]=f75,32; \ - ;; \ - stf.spill.nta [r2]=f76,32; \ - stf.spill.nta [r3]=f77,32; \ - ;; \ - stf.spill.nta [r2]=f78,32; \ - stf.spill.nta [r3]=f79,32; \ - ;; \ - stf.spill.nta [r2]=f80,32; \ - stf.spill.nta [r3]=f81,32; \ - ;; \ - stf.spill.nta [r2]=f82,32; \ - stf.spill.nta [r3]=f83,32; \ - ;; \ - stf.spill.nta [r2]=f84,32; \ - stf.spill.nta [r3]=f85,32; \ - ;; \ - stf.spill.nta [r2]=f86,32; \ - stf.spill.nta [r3]=f87,32; \ - ;; \ - stf.spill.nta [r2]=f88,32; \ - stf.spill.nta [r3]=f89,32; \ - ;; \ - stf.spill.nta [r2]=f90,32; \ - stf.spill.nta [r3]=f91,32; \ - ;; \ - stf.spill.nta [r2]=f92,32; \ - stf.spill.nta [r3]=f93,32; \ - ;; \ - stf.spill.nta [r2]=f94,32; \ - stf.spill.nta [r3]=f95,32; \ - ;; \ - stf.spill.nta [r2]=f96,32; \ - stf.spill.nta [r3]=f97,32; \ - ;; \ - stf.spill.nta [r2]=f98,32; \ - stf.spill.nta [r3]=f99,32; \ - ;; \ - stf.spill.nta [r2]=f100,32; \ - stf.spill.nta [r3]=f101,32; \ - ;; \ - stf.spill.nta [r2]=f102,32; \ - stf.spill.nta [r3]=f103,32; \ - ;; \ - stf.spill.nta [r2]=f104,32; \ - stf.spill.nta [r3]=f105,32; \ - ;; \ - stf.spill.nta [r2]=f106,32; \ - stf.spill.nta [r3]=f107,32; \ - ;; \ - stf.spill.nta [r2]=f108,32; \ - stf.spill.nta [r3]=f109,32; \ - ;; \ - stf.spill.nta [r2]=f110,32; \ - stf.spill.nta [r3]=f111,32; \ - ;; \ - stf.spill.nta [r2]=f112,32; \ - stf.spill.nta [r3]=f113,32; \ - ;; \ - stf.spill.nta [r2]=f114,32; \ - stf.spill.nta [r3]=f115,32; \ - ;; \ - stf.spill.nta [r2]=f116,32; \ - stf.spill.nta [r3]=f117,32; \ - ;; \ - stf.spill.nta [r2]=f118,32; \ - stf.spill.nta [r3]=f119,32; \ - ;; \ - stf.spill.nta [r2]=f120,32; \ - stf.spill.nta [r3]=f121,32; \ - ;; \ - stf.spill.nta [r2]=f122,32; \ - stf.spill.nta [r3]=f123,32; \ - ;; \ - stf.spill.nta [r2]=f124,32; \ - stf.spill.nta [r3]=f125,32; \ - ;; \ - stf.spill.nta [r2]=f126; \ - stf.spill.nta [r3]=f127; \ - ;; - - /* - * r33: point to context_t structure - */ -#define RESTORE_FPU_LOW \ - add r2 = CTX(F2), r33; \ - add r3 = CTX(F3), r33; \ - ;; \ - ldf.fill.nta f2 = [r2], 32; \ - ldf.fill.nta f3 = [r3], 32; \ - ;; \ - ldf.fill.nta f4 = [r2], 32; \ - ldf.fill.nta f5 = [r3], 32; \ - ;; \ - ldf.fill.nta f6 = [r2], 32; \ - ldf.fill.nta f7 = [r3], 32; \ - ;; \ - ldf.fill.nta f8 = [r2], 32; \ - ldf.fill.nta f9 = [r3], 32; \ - ;; \ - ldf.fill.nta f10 = [r2], 32; \ - ldf.fill.nta f11 = [r3], 32; \ - ;; \ - ldf.fill.nta f12 = [r2], 32; \ - ldf.fill.nta f13 = [r3], 32; \ - ;; \ - ldf.fill.nta f14 = [r2], 32; \ - ldf.fill.nta f15 = [r3], 32; \ - ;; \ - ldf.fill.nta f16 = [r2], 32; \ - ldf.fill.nta f17 = [r3], 32; \ - ;; \ - ldf.fill.nta f18 = [r2], 32; \ - ldf.fill.nta f19 = [r3], 32; \ - ;; \ - ldf.fill.nta f20 = [r2], 32; \ - ldf.fill.nta f21 = [r3], 32; \ - ;; \ - ldf.fill.nta f22 = [r2], 32; \ - ldf.fill.nta f23 = [r3], 32; \ - ;; \ - ldf.fill.nta f24 = [r2], 32; \ - ldf.fill.nta f25 = [r3], 32; \ - ;; \ - ldf.fill.nta f26 = [r2], 32; \ - ldf.fill.nta f27 = [r3], 32; \ - ;; \ - ldf.fill.nta f28 = [r2], 32; \ - ldf.fill.nta f29 = [r3], 32; \ - ;; \ - ldf.fill.nta f30 = [r2], 32; \ - ldf.fill.nta f31 = [r3], 32; \ - ;; - - - - /* - * r33: point to context_t structure - */ -#define RESTORE_FPU_HIGH \ - add r2 = CTX(F32), r33; \ - add r3 = CTX(F33), r33; \ - ;; \ - ldf.fill.nta f32 = [r2], 32; \ - ldf.fill.nta f33 = [r3], 32; \ - ;; \ - ldf.fill.nta f34 = [r2], 32; \ - ldf.fill.nta f35 = [r3], 32; \ - ;; \ - ldf.fill.nta f36 = [r2], 32; \ - ldf.fill.nta f37 = [r3], 32; \ - ;; \ - ldf.fill.nta f38 = [r2], 32; \ - ldf.fill.nta f39 = [r3], 32; \ - ;; \ - ldf.fill.nta f40 = [r2], 32; \ - ldf.fill.nta f41 = [r3], 32; \ - ;; \ - ldf.fill.nta f42 = [r2], 32; \ - ldf.fill.nta f43 = [r3], 32; \ - ;; \ - ldf.fill.nta f44 = [r2], 32; \ - ldf.fill.nta f45 = [r3], 32; \ - ;; \ - ldf.fill.nta f46 = [r2], 32; \ - ldf.fill.nta f47 = [r3], 32; \ - ;; \ - ldf.fill.nta f48 = [r2], 32; \ - ldf.fill.nta f49 = [r3], 32; \ - ;; \ - ldf.fill.nta f50 = [r2], 32; \ - ldf.fill.nta f51 = [r3], 32; \ - ;; \ - ldf.fill.nta f52 = [r2], 32; \ - ldf.fill.nta f53 = [r3], 32; \ - ;; \ - ldf.fill.nta f54 = [r2], 32; \ - ldf.fill.nta f55 = [r3], 32; \ - ;; \ - ldf.fill.nta f56 = [r2], 32; \ - ldf.fill.nta f57 = [r3], 32; \ - ;; \ - ldf.fill.nta f58 = [r2], 32; \ - ldf.fill.nta f59 = [r3], 32; \ - ;; \ - ldf.fill.nta f60 = [r2], 32; \ - ldf.fill.nta f61 = [r3], 32; \ - ;; \ - ldf.fill.nta f62 = [r2], 32; \ - ldf.fill.nta f63 = [r3], 32; \ - ;; \ - ldf.fill.nta f64 = [r2], 32; \ - ldf.fill.nta f65 = [r3], 32; \ - ;; \ - ldf.fill.nta f66 = [r2], 32; \ - ldf.fill.nta f67 = [r3], 32; \ - ;; \ - ldf.fill.nta f68 = [r2], 32; \ - ldf.fill.nta f69 = [r3], 32; \ - ;; \ - ldf.fill.nta f70 = [r2], 32; \ - ldf.fill.nta f71 = [r3], 32; \ - ;; \ - ldf.fill.nta f72 = [r2], 32; \ - ldf.fill.nta f73 = [r3], 32; \ - ;; \ - ldf.fill.nta f74 = [r2], 32; \ - ldf.fill.nta f75 = [r3], 32; \ - ;; \ - ldf.fill.nta f76 = [r2], 32; \ - ldf.fill.nta f77 = [r3], 32; \ - ;; \ - ldf.fill.nta f78 = [r2], 32; \ - ldf.fill.nta f79 = [r3], 32; \ - ;; \ - ldf.fill.nta f80 = [r2], 32; \ - ldf.fill.nta f81 = [r3], 32; \ - ;; \ - ldf.fill.nta f82 = [r2], 32; \ - ldf.fill.nta f83 = [r3], 32; \ - ;; \ - ldf.fill.nta f84 = [r2], 32; \ - ldf.fill.nta f85 = [r3], 32; \ - ;; \ - ldf.fill.nta f86 = [r2], 32; \ - ldf.fill.nta f87 = [r3], 32; \ - ;; \ - ldf.fill.nta f88 = [r2], 32; \ - ldf.fill.nta f89 = [r3], 32; \ - ;; \ - ldf.fill.nta f90 = [r2], 32; \ - ldf.fill.nta f91 = [r3], 32; \ - ;; \ - ldf.fill.nta f92 = [r2], 32; \ - ldf.fill.nta f93 = [r3], 32; \ - ;; \ - ldf.fill.nta f94 = [r2], 32; \ - ldf.fill.nta f95 = [r3], 32; \ - ;; \ - ldf.fill.nta f96 = [r2], 32; \ - ldf.fill.nta f97 = [r3], 32; \ - ;; \ - ldf.fill.nta f98 = [r2], 32; \ - ldf.fill.nta f99 = [r3], 32; \ - ;; \ - ldf.fill.nta f100 = [r2], 32; \ - ldf.fill.nta f101 = [r3], 32; \ - ;; \ - ldf.fill.nta f102 = [r2], 32; \ - ldf.fill.nta f103 = [r3], 32; \ - ;; \ - ldf.fill.nta f104 = [r2], 32; \ - ldf.fill.nta f105 = [r3], 32; \ - ;; \ - ldf.fill.nta f106 = [r2], 32; \ - ldf.fill.nta f107 = [r3], 32; \ - ;; \ - ldf.fill.nta f108 = [r2], 32; \ - ldf.fill.nta f109 = [r3], 32; \ - ;; \ - ldf.fill.nta f110 = [r2], 32; \ - ldf.fill.nta f111 = [r3], 32; \ - ;; \ - ldf.fill.nta f112 = [r2], 32; \ - ldf.fill.nta f113 = [r3], 32; \ - ;; \ - ldf.fill.nta f114 = [r2], 32; \ - ldf.fill.nta f115 = [r3], 32; \ - ;; \ - ldf.fill.nta f116 = [r2], 32; \ - ldf.fill.nta f117 = [r3], 32; \ - ;; \ - ldf.fill.nta f118 = [r2], 32; \ - ldf.fill.nta f119 = [r3], 32; \ - ;; \ - ldf.fill.nta f120 = [r2], 32; \ - ldf.fill.nta f121 = [r3], 32; \ - ;; \ - ldf.fill.nta f122 = [r2], 32; \ - ldf.fill.nta f123 = [r3], 32; \ - ;; \ - ldf.fill.nta f124 = [r2], 32; \ - ldf.fill.nta f125 = [r3], 32; \ - ;; \ - ldf.fill.nta f126 = [r2], 32; \ - ldf.fill.nta f127 = [r3], 32; \ - ;; - - /* - * r32: context_t base address - */ -#define SAVE_PTK_REGS \ - add r2=CTX(PKR0), r32; \ - mov r16=7; \ - ;; \ - mov ar.lc=r16; \ - mov r17=r0; \ - ;; \ -1: \ - mov r18=pkr[r17]; \ - ;; \ - srlz.i; \ - ;; \ - st8 [r2]=r18, 8; \ - ;; \ - add r17 =1,r17; \ - ;; \ - br.cloop.sptk 1b; \ - ;; - -/* - * r33: point to context_t structure - * ar.lc are corrupted. - */ -#define RESTORE_PTK_REGS \ - add r2=CTX(PKR0), r33; \ - mov r16=7; \ - ;; \ - mov ar.lc=r16; \ - mov r17=r0; \ - ;; \ -1: \ - ld8 r18=[r2], 8; \ - ;; \ - mov pkr[r17]=r18; \ - ;; \ - srlz.i; \ - ;; \ - add r17 =1,r17; \ - ;; \ - br.cloop.sptk 1b; \ - ;; - - -/* - * void vmm_trampoline( context_t * from, - * context_t * to) - * - * from: r32 - * to: r33 - * note: interrupt disabled before call this function. - */ -GLOBAL_ENTRY(vmm_trampoline) - mov r16 = psr - adds r2 = CTX(PSR), r32 - ;; - st8 [r2] = r16, 8 // psr - mov r17 = pr - ;; - st8 [r2] = r17, 8 // pr - mov r18 = ar.unat - ;; - st8 [r2] = r18 - mov r17 = ar.rsc - ;; - adds r2 = CTX(RSC),r32 - ;; - st8 [r2]= r17 - mov ar.rsc =0 - flushrs - ;; - SAVE_GENERAL_REGS - ;; - SAVE_KERNEL_REGS - ;; - SAVE_APP_REGS - ;; - SAVE_BRANCH_REGS - ;; - SAVE_CTL_REGS - ;; - SAVE_REGION_REGS - ;; - //SAVE_DEBUG_REGS - ;; - rsm psr.dfl - ;; - srlz.d - ;; - SAVE_FPU_LOW - ;; - rsm psr.dfh - ;; - srlz.d - ;; - SAVE_FPU_HIGH - ;; - SAVE_PTK_REGS - ;; - RESTORE_PTK_REGS - ;; - RESTORE_FPU_HIGH - ;; - RESTORE_FPU_LOW - ;; - //RESTORE_DEBUG_REGS - ;; - RESTORE_REGION_REGS - ;; - RESTORE_CTL_REGS - ;; - RESTORE_BRANCH_REGS - ;; - RESTORE_APP_REGS - ;; - RESTORE_KERNEL_REGS - ;; - RESTORE_GENERAL_REGS - ;; - adds r2=CTX(PSR), r33 - ;; - ld8 r16=[r2], 8 // psr - ;; - mov psr.l=r16 - ;; - srlz.d - ;; - ld8 r16=[r2], 8 // pr - ;; - mov pr =r16,-1 - ld8 r16=[r2] // unat - ;; - mov ar.unat=r16 - ;; - adds r2=CTX(RSC),r33 - ;; - ld8 r16 =[r2] - ;; - mov ar.rsc = r16 - ;; - br.ret.sptk.few b0 -END(vmm_trampoline) diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c deleted file mode 100644 index 958815c9787d..000000000000 --- a/arch/ia64/kvm/vcpu.c +++ /dev/null @@ -1,2209 +0,0 @@ -/* - * kvm_vcpu.c: handling all virtual cpu related thing. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Shaofan Li (Susue Li) <susie.li@intel.com> - * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang <xiantao.zhang@intel.com> - */ - -#include <linux/kvm_host.h> -#include <linux/types.h> - -#include <asm/processor.h> -#include <asm/ia64regs.h> -#include <asm/gcc_intrin.h> -#include <asm/kregs.h> -#include <asm/pgtable.h> -#include <asm/tlb.h> - -#include "asm-offsets.h" -#include "vcpu.h" - -/* - * Special notes: - * - Index by it/dt/rt sequence - * - Only existing mode transitions are allowed in this table - * - RSE is placed at lazy mode when emulating guest partial mode - * - If gva happens to be rr0 and rr4, only allowed case is identity - * mapping (gva=gpa), or panic! (How?) - */ -int mm_switch_table[8][8] = { - /* 2004/09/12(Kevin): Allow switch to self */ - /* - * (it,dt,rt): (0,0,0) -> (1,1,1) - * This kind of transition usually occurs in the very early - * stage of Linux boot up procedure. Another case is in efi - * and pal calls. (see "arch/ia64/kernel/head.S") - * - * (it,dt,rt): (0,0,0) -> (0,1,1) - * This kind of transition is found when OSYa exits efi boot - * service. Due to gva = gpa in this case (Same region), - * data access can be satisfied though itlb entry for physical - * emulation is hit. - */ - {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - /* - * (it,dt,rt): (0,1,1) -> (1,1,1) - * This kind of transition is found in OSYa. - * - * (it,dt,rt): (0,1,1) -> (0,0,0) - * This kind of transition is found in OSYa - */ - {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, - /* (1,0,0)->(1,1,1) */ - {0, 0, 0, 0, 0, 0, 0, SW_P2V}, - /* - * (it,dt,rt): (1,0,1) -> (1,1,1) - * This kind of transition usually occurs when Linux returns - * from the low level TLB miss handlers. - * (see "arch/ia64/kernel/ivt.S") - */ - {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, - {0, 0, 0, 0, 0, 0, 0, 0}, - /* - * (it,dt,rt): (1,1,1) -> (1,0,1) - * This kind of transition usually occurs in Linux low level - * TLB miss handler. (see "arch/ia64/kernel/ivt.S") - * - * (it,dt,rt): (1,1,1) -> (0,0,0) - * This kind of transition usually occurs in pal and efi calls, - * which requires running in physical mode. - * (see "arch/ia64/kernel/head.S") - * (1,1,1)->(1,0,0) - */ - - {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, -}; - -void physical_mode_init(struct kvm_vcpu *vcpu) -{ - vcpu->arch.mode_flags = GUEST_IN_PHY; -} - -void switch_to_physical_rid(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - - /* Save original virtual mode rr[0] and rr[4] */ - psr = ia64_clear_ic(); - ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); - ia64_srlz_d(); - ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); - ia64_srlz_d(); - - ia64_set_psr(psr); - return; -} - -void switch_to_virtual_rid(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); - ia64_srlz_d(); - ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); - ia64_srlz_d(); - ia64_set_psr(psr); - return; -} - -static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) -{ - return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; -} - -void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, - struct ia64_psr new_psr) -{ - int act; - act = mm_switch_action(old_psr, new_psr); - switch (act) { - case SW_V2P: - /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", - old_psr.val, new_psr.val);*/ - switch_to_physical_rid(vcpu); - /* - * Set rse to enforced lazy, to prevent active rse - *save/restor when guest physical mode. - */ - vcpu->arch.mode_flags |= GUEST_IN_PHY; - break; - case SW_P2V: - switch_to_virtual_rid(vcpu); - /* - * recover old mode which is saved when entering - * guest physical mode - */ - vcpu->arch.mode_flags &= ~GUEST_IN_PHY; - break; - case SW_SELF: - break; - case SW_NOP: - break; - default: - /* Sanity check */ - break; - } - return; -} - -/* - * In physical mode, insert tc/tr for region 0 and 4 uses - * RID[0] and RID[4] which is for physical mode emulation. - * However what those inserted tc/tr wants is rid for - * virtual mode. So original virtual rid needs to be restored - * before insert. - * - * Operations which required such switch include: - * - insertions (itc.*, itr.*) - * - purges (ptc.* and ptr.*) - * - tpa - * - tak - * - thash?, ttag? - * All above needs actual virtual rid for destination entry. - */ - -void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, - struct ia64_psr new_psr) -{ - - if ((old_psr.dt != new_psr.dt) - || (old_psr.it != new_psr.it) - || (old_psr.rt != new_psr.rt)) - switch_mm_mode(vcpu, old_psr, new_psr); - - return; -} - - -/* - * In physical mode, insert tc/tr for region 0 and 4 uses - * RID[0] and RID[4] which is for physical mode emulation. - * However what those inserted tc/tr wants is rid for - * virtual mode. So original virtual rid needs to be restored - * before insert. - * - * Operations which required such switch include: - * - insertions (itc.*, itr.*) - * - purges (ptc.* and ptr.*) - * - tpa - * - tak - * - thash?, ttag? - * All above needs actual virtual rid for destination entry. - */ - -void prepare_if_physical_mode(struct kvm_vcpu *vcpu) -{ - if (is_physical_mode(vcpu)) { - vcpu->arch.mode_flags |= GUEST_PHY_EMUL; - switch_to_virtual_rid(vcpu); - } - return; -} - -/* Recover always follows prepare */ -void recover_if_physical_mode(struct kvm_vcpu *vcpu) -{ - if (is_physical_mode(vcpu)) - switch_to_physical_rid(vcpu); - vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; - return; -} - -#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) - -static u16 gr_info[32] = { - 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ - RPT(r1), RPT(r2), RPT(r3), - RPT(r4), RPT(r5), RPT(r6), RPT(r7), - RPT(r8), RPT(r9), RPT(r10), RPT(r11), - RPT(r12), RPT(r13), RPT(r14), RPT(r15), - RPT(r16), RPT(r17), RPT(r18), RPT(r19), - RPT(r20), RPT(r21), RPT(r22), RPT(r23), - RPT(r24), RPT(r25), RPT(r26), RPT(r27), - RPT(r28), RPT(r29), RPT(r30), RPT(r31) -}; - -#define IA64_FIRST_STACKED_GR 32 -#define IA64_FIRST_ROTATING_FR 32 - -static inline unsigned long -rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) -{ - reg += rrb; - if (reg >= sor) - reg -= sor; - return reg; -} - -/* - * Return the (rotated) index for floating point register - * be in the REGNUM (REGNUM must range from 32-127, - * result is in the range from 0-95. - */ -static inline unsigned long fph_index(struct kvm_pt_regs *regs, - long regnum) -{ - unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; - return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); -} - -/* - * The inverse of the above: given bspstore and the number of - * registers, calculate ar.bsp. - */ -static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, - long num_regs) -{ - long delta = ia64_rse_slot_num(addr) + num_regs; - int i = 0; - - if (num_regs < 0) - delta -= 0x3e; - if (delta < 0) { - while (delta <= -0x3f) { - i--; - delta += 0x3f; - } - } else { - while (delta >= 0x3f) { - i++; - delta -= 0x3f; - } - } - - return addr + num_regs + i; -} - -static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, - unsigned long *val, int *nat) -{ - unsigned long *bsp, *addr, *rnat_addr, *bspstore; - unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; - unsigned long nat_mask; - unsigned long old_rsc, new_rsc; - long sof = (regs->cr_ifs) & 0x7f; - long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); - long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; - long ridx = r1 - 32; - - if (ridx < sor) - ridx = rotate_reg(sor, rrb_gr, ridx); - - old_rsc = ia64_getreg(_IA64_REG_AR_RSC); - new_rsc = old_rsc&(~(0x3)); - ia64_setreg(_IA64_REG_AR_RSC, new_rsc); - - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - bsp = kbs + (regs->loadrs >> 19); - - addr = kvm_rse_skip_regs(bsp, -sof + ridx); - nat_mask = 1UL << ia64_rse_slot_num(addr); - rnat_addr = ia64_rse_rnat_addr(addr); - - if (addr >= bspstore) { - ia64_flushrs(); - ia64_mf(); - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - } - *val = *addr; - if (nat) { - if (bspstore < rnat_addr) - *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) - & nat_mask); - else - *nat = (int)!!((*rnat_addr) & nat_mask); - ia64_setreg(_IA64_REG_AR_RSC, old_rsc); - } -} - -void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, - unsigned long val, unsigned long nat) -{ - unsigned long *bsp, *bspstore, *addr, *rnat_addr; - unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; - unsigned long nat_mask; - unsigned long old_rsc, new_rsc, psr; - unsigned long rnat; - long sof = (regs->cr_ifs) & 0x7f; - long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); - long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; - long ridx = r1 - 32; - - if (ridx < sor) - ridx = rotate_reg(sor, rrb_gr, ridx); - - old_rsc = ia64_getreg(_IA64_REG_AR_RSC); - /* put RSC to lazy mode, and set loadrs 0 */ - new_rsc = old_rsc & (~0x3fff0003); - ia64_setreg(_IA64_REG_AR_RSC, new_rsc); - bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ - - addr = kvm_rse_skip_regs(bsp, -sof + ridx); - nat_mask = 1UL << ia64_rse_slot_num(addr); - rnat_addr = ia64_rse_rnat_addr(addr); - - local_irq_save(psr); - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - if (addr >= bspstore) { - - ia64_flushrs(); - ia64_mf(); - *addr = val; - bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); - rnat = ia64_getreg(_IA64_REG_AR_RNAT); - if (bspstore < rnat_addr) - rnat = rnat & (~nat_mask); - else - *rnat_addr = (*rnat_addr)&(~nat_mask); - - ia64_mf(); - ia64_loadrs(); - ia64_setreg(_IA64_REG_AR_RNAT, rnat); - } else { - rnat = ia64_getreg(_IA64_REG_AR_RNAT); - *addr = val; - if (bspstore < rnat_addr) - rnat = rnat&(~nat_mask); - else - *rnat_addr = (*rnat_addr) & (~nat_mask); - - ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore); - ia64_setreg(_IA64_REG_AR_RNAT, rnat); - } - local_irq_restore(psr); - ia64_setreg(_IA64_REG_AR_RSC, old_rsc); -} - -void getreg(unsigned long regnum, unsigned long *val, - int *nat, struct kvm_pt_regs *regs) -{ - unsigned long addr, *unat; - if (regnum >= IA64_FIRST_STACKED_GR) { - get_rse_reg(regs, regnum, val, nat); - return; - } - - /* - * Now look at registers in [0-31] range and init correct UNAT - */ - addr = (unsigned long)regs; - unat = ®s->eml_unat; - - addr += gr_info[regnum]; - - *val = *(unsigned long *)addr; - /* - * do it only when requested - */ - if (nat) - *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; -} - -void setreg(unsigned long regnum, unsigned long val, - int nat, struct kvm_pt_regs *regs) -{ - unsigned long addr; - unsigned long bitmask; - unsigned long *unat; - - /* - * First takes care of stacked registers - */ - if (regnum >= IA64_FIRST_STACKED_GR) { - set_rse_reg(regs, regnum, val, nat); - return; - } - - /* - * Now look at registers in [0-31] range and init correct UNAT - */ - addr = (unsigned long)regs; - unat = ®s->eml_unat; - /* - * add offset from base of struct - * and do it ! - */ - addr += gr_info[regnum]; - - *(unsigned long *)addr = val; - - /* - * We need to clear the corresponding UNAT bit to fully emulate the load - * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 - */ - bitmask = 1UL << ((addr >> 3) & 0x3f); - if (nat) - *unat |= bitmask; - else - *unat &= ~bitmask; - -} - -u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - unsigned long val; - - if (!reg) - return 0; - getreg(reg, &val, 0, regs); - return val; -} - -void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - long sof = (regs->cr_ifs) & 0x7f; - - if (!reg) - return; - if (reg >= sof + 32) - return; - setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ -} - -void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, - struct kvm_pt_regs *regs) -{ - /* Take floating register rotation into consideration*/ - if (regnum >= IA64_FIRST_ROTATING_FR) - regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); -#define CASE_FIXED_FP(reg) \ - case (reg) : \ - ia64_stf_spill(fpval, reg); \ - break - - switch (regnum) { - CASE_FIXED_FP(0); - CASE_FIXED_FP(1); - CASE_FIXED_FP(2); - CASE_FIXED_FP(3); - CASE_FIXED_FP(4); - CASE_FIXED_FP(5); - - CASE_FIXED_FP(6); - CASE_FIXED_FP(7); - CASE_FIXED_FP(8); - CASE_FIXED_FP(9); - CASE_FIXED_FP(10); - CASE_FIXED_FP(11); - - CASE_FIXED_FP(12); - CASE_FIXED_FP(13); - CASE_FIXED_FP(14); - CASE_FIXED_FP(15); - CASE_FIXED_FP(16); - CASE_FIXED_FP(17); - CASE_FIXED_FP(18); - CASE_FIXED_FP(19); - CASE_FIXED_FP(20); - CASE_FIXED_FP(21); - CASE_FIXED_FP(22); - CASE_FIXED_FP(23); - CASE_FIXED_FP(24); - CASE_FIXED_FP(25); - CASE_FIXED_FP(26); - CASE_FIXED_FP(27); - CASE_FIXED_FP(28); - CASE_FIXED_FP(29); - CASE_FIXED_FP(30); - CASE_FIXED_FP(31); - CASE_FIXED_FP(32); - CASE_FIXED_FP(33); - CASE_FIXED_FP(34); - CASE_FIXED_FP(35); - CASE_FIXED_FP(36); - CASE_FIXED_FP(37); - CASE_FIXED_FP(38); - CASE_FIXED_FP(39); - CASE_FIXED_FP(40); - CASE_FIXED_FP(41); - CASE_FIXED_FP(42); - CASE_FIXED_FP(43); - CASE_FIXED_FP(44); - CASE_FIXED_FP(45); - CASE_FIXED_FP(46); - CASE_FIXED_FP(47); - CASE_FIXED_FP(48); - CASE_FIXED_FP(49); - CASE_FIXED_FP(50); - CASE_FIXED_FP(51); - CASE_FIXED_FP(52); - CASE_FIXED_FP(53); - CASE_FIXED_FP(54); - CASE_FIXED_FP(55); - CASE_FIXED_FP(56); - CASE_FIXED_FP(57); - CASE_FIXED_FP(58); - CASE_FIXED_FP(59); - CASE_FIXED_FP(60); - CASE_FIXED_FP(61); - CASE_FIXED_FP(62); - CASE_FIXED_FP(63); - CASE_FIXED_FP(64); - CASE_FIXED_FP(65); - CASE_FIXED_FP(66); - CASE_FIXED_FP(67); - CASE_FIXED_FP(68); - CASE_FIXED_FP(69); - CASE_FIXED_FP(70); - CASE_FIXED_FP(71); - CASE_FIXED_FP(72); - CASE_FIXED_FP(73); - CASE_FIXED_FP(74); - CASE_FIXED_FP(75); - CASE_FIXED_FP(76); - CASE_FIXED_FP(77); - CASE_FIXED_FP(78); - CASE_FIXED_FP(79); - CASE_FIXED_FP(80); - CASE_FIXED_FP(81); - CASE_FIXED_FP(82); - CASE_FIXED_FP(83); - CASE_FIXED_FP(84); - CASE_FIXED_FP(85); - CASE_FIXED_FP(86); - CASE_FIXED_FP(87); - CASE_FIXED_FP(88); - CASE_FIXED_FP(89); - CASE_FIXED_FP(90); - CASE_FIXED_FP(91); - CASE_FIXED_FP(92); - CASE_FIXED_FP(93); - CASE_FIXED_FP(94); - CASE_FIXED_FP(95); - CASE_FIXED_FP(96); - CASE_FIXED_FP(97); - CASE_FIXED_FP(98); - CASE_FIXED_FP(99); - CASE_FIXED_FP(100); - CASE_FIXED_FP(101); - CASE_FIXED_FP(102); - CASE_FIXED_FP(103); - CASE_FIXED_FP(104); - CASE_FIXED_FP(105); - CASE_FIXED_FP(106); - CASE_FIXED_FP(107); - CASE_FIXED_FP(108); - CASE_FIXED_FP(109); - CASE_FIXED_FP(110); - CASE_FIXED_FP(111); - CASE_FIXED_FP(112); - CASE_FIXED_FP(113); - CASE_FIXED_FP(114); - CASE_FIXED_FP(115); - CASE_FIXED_FP(116); - CASE_FIXED_FP(117); - CASE_FIXED_FP(118); - CASE_FIXED_FP(119); - CASE_FIXED_FP(120); - CASE_FIXED_FP(121); - CASE_FIXED_FP(122); - CASE_FIXED_FP(123); - CASE_FIXED_FP(124); - CASE_FIXED_FP(125); - CASE_FIXED_FP(126); - CASE_FIXED_FP(127); - } -#undef CASE_FIXED_FP -} - -void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, - struct kvm_pt_regs *regs) -{ - /* Take floating register rotation into consideration*/ - if (regnum >= IA64_FIRST_ROTATING_FR) - regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); - -#define CASE_FIXED_FP(reg) \ - case (reg) : \ - ia64_ldf_fill(reg, fpval); \ - break - - switch (regnum) { - CASE_FIXED_FP(2); - CASE_FIXED_FP(3); - CASE_FIXED_FP(4); - CASE_FIXED_FP(5); - - CASE_FIXED_FP(6); - CASE_FIXED_FP(7); - CASE_FIXED_FP(8); - CASE_FIXED_FP(9); - CASE_FIXED_FP(10); - CASE_FIXED_FP(11); - - CASE_FIXED_FP(12); - CASE_FIXED_FP(13); - CASE_FIXED_FP(14); - CASE_FIXED_FP(15); - CASE_FIXED_FP(16); - CASE_FIXED_FP(17); - CASE_FIXED_FP(18); - CASE_FIXED_FP(19); - CASE_FIXED_FP(20); - CASE_FIXED_FP(21); - CASE_FIXED_FP(22); - CASE_FIXED_FP(23); - CASE_FIXED_FP(24); - CASE_FIXED_FP(25); - CASE_FIXED_FP(26); - CASE_FIXED_FP(27); - CASE_FIXED_FP(28); - CASE_FIXED_FP(29); - CASE_FIXED_FP(30); - CASE_FIXED_FP(31); - CASE_FIXED_FP(32); - CASE_FIXED_FP(33); - CASE_FIXED_FP(34); - CASE_FIXED_FP(35); - CASE_FIXED_FP(36); - CASE_FIXED_FP(37); - CASE_FIXED_FP(38); - CASE_FIXED_FP(39); - CASE_FIXED_FP(40); - CASE_FIXED_FP(41); - CASE_FIXED_FP(42); - CASE_FIXED_FP(43); - CASE_FIXED_FP(44); - CASE_FIXED_FP(45); - CASE_FIXED_FP(46); - CASE_FIXED_FP(47); - CASE_FIXED_FP(48); - CASE_FIXED_FP(49); - CASE_FIXED_FP(50); - CASE_FIXED_FP(51); - CASE_FIXED_FP(52); - CASE_FIXED_FP(53); - CASE_FIXED_FP(54); - CASE_FIXED_FP(55); - CASE_FIXED_FP(56); - CASE_FIXED_FP(57); - CASE_FIXED_FP(58); - CASE_FIXED_FP(59); - CASE_FIXED_FP(60); - CASE_FIXED_FP(61); - CASE_FIXED_FP(62); - CASE_FIXED_FP(63); - CASE_FIXED_FP(64); - CASE_FIXED_FP(65); - CASE_FIXED_FP(66); - CASE_FIXED_FP(67); - CASE_FIXED_FP(68); - CASE_FIXED_FP(69); - CASE_FIXED_FP(70); - CASE_FIXED_FP(71); - CASE_FIXED_FP(72); - CASE_FIXED_FP(73); - CASE_FIXED_FP(74); - CASE_FIXED_FP(75); - CASE_FIXED_FP(76); - CASE_FIXED_FP(77); - CASE_FIXED_FP(78); - CASE_FIXED_FP(79); - CASE_FIXED_FP(80); - CASE_FIXED_FP(81); - CASE_FIXED_FP(82); - CASE_FIXED_FP(83); - CASE_FIXED_FP(84); - CASE_FIXED_FP(85); - CASE_FIXED_FP(86); - CASE_FIXED_FP(87); - CASE_FIXED_FP(88); - CASE_FIXED_FP(89); - CASE_FIXED_FP(90); - CASE_FIXED_FP(91); - CASE_FIXED_FP(92); - CASE_FIXED_FP(93); - CASE_FIXED_FP(94); - CASE_FIXED_FP(95); - CASE_FIXED_FP(96); - CASE_FIXED_FP(97); - CASE_FIXED_FP(98); - CASE_FIXED_FP(99); - CASE_FIXED_FP(100); - CASE_FIXED_FP(101); - CASE_FIXED_FP(102); - CASE_FIXED_FP(103); - CASE_FIXED_FP(104); - CASE_FIXED_FP(105); - CASE_FIXED_FP(106); - CASE_FIXED_FP(107); - CASE_FIXED_FP(108); - CASE_FIXED_FP(109); - CASE_FIXED_FP(110); - CASE_FIXED_FP(111); - CASE_FIXED_FP(112); - CASE_FIXED_FP(113); - CASE_FIXED_FP(114); - CASE_FIXED_FP(115); - CASE_FIXED_FP(116); - CASE_FIXED_FP(117); - CASE_FIXED_FP(118); - CASE_FIXED_FP(119); - CASE_FIXED_FP(120); - CASE_FIXED_FP(121); - CASE_FIXED_FP(122); - CASE_FIXED_FP(123); - CASE_FIXED_FP(124); - CASE_FIXED_FP(125); - CASE_FIXED_FP(126); - CASE_FIXED_FP(127); - } -} - -void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ -} - -void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - if (reg > 1) - setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ -} - -/* - * The Altix RTC is mapped specially here for the vmm module - */ -#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT)) -static long kvm_get_itc(struct kvm_vcpu *vcpu) -{ -#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) - struct kvm *kvm = (struct kvm *)KVM_VM_BASE; - - if (kvm->arch.is_sn2) - return (*SN_RTC_BASE); - else -#endif - return ia64_getreg(_IA64_REG_AR_ITC); -} - -/************************************************************************ - * lsapic timer - ***********************************************************************/ -u64 vcpu_get_itc(struct kvm_vcpu *vcpu) -{ - unsigned long guest_itc; - guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu); - - if (guest_itc >= VMX(vcpu, last_itc)) { - VMX(vcpu, last_itc) = guest_itc; - return guest_itc; - } else - return VMX(vcpu, last_itc); -} - -static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); -static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) -{ - struct kvm_vcpu *v; - struct kvm *kvm; - int i; - long itc_offset = val - kvm_get_itc(vcpu); - unsigned long vitv = VCPU(vcpu, itv); - - kvm = (struct kvm *)KVM_VM_BASE; - - if (kvm_vcpu_is_bsp(vcpu)) { - for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { - v = (struct kvm_vcpu *)((char *)vcpu + - sizeof(struct kvm_vcpu_data) * i); - VMX(v, itc_offset) = itc_offset; - VMX(v, last_itc) = 0; - } - } - VMX(vcpu, last_itc) = 0; - if (VCPU(vcpu, itm) <= val) { - VMX(vcpu, itc_check) = 0; - vcpu_unpend_interrupt(vcpu, vitv); - } else { - VMX(vcpu, itc_check) = 1; - vcpu_set_itm(vcpu, VCPU(vcpu, itm)); - } - -} - -static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, itm)); -} - -static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) -{ - unsigned long vitv = VCPU(vcpu, itv); - VCPU(vcpu, itm) = val; - - if (val > vcpu_get_itc(vcpu)) { - VMX(vcpu, itc_check) = 1; - vcpu_unpend_interrupt(vcpu, vitv); - VMX(vcpu, timer_pending) = 0; - } else - VMX(vcpu, itc_check) = 0; -} - -#define ITV_VECTOR(itv) (itv&0xff) -#define ITV_IRQ_MASK(itv) (itv&(1<<16)) - -static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, itv) = val; - if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { - vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); - vcpu->arch.timer_pending = 0; - } -} - -static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) -{ - int vec; - - vec = highest_inservice_irq(vcpu); - if (vec == NULL_VECTOR) - return; - VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); - VCPU(vcpu, eoi) = 0; - vcpu->arch.irq_new_pending = 1; - -} - -/* See Table 5-8 in SDM vol2 for the definition */ -int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) -{ - union ia64_tpr vtpr; - - vtpr.val = VCPU(vcpu, tpr); - - if (h_inservice == NMI_VECTOR) - return IRQ_MASKED_BY_INSVC; - - if (h_pending == NMI_VECTOR) { - /* Non Maskable Interrupt */ - return IRQ_NO_MASKED; - } - - if (h_inservice == ExtINT_VECTOR) - return IRQ_MASKED_BY_INSVC; - - if (h_pending == ExtINT_VECTOR) { - if (vtpr.mmi) { - /* mask all external IRQ */ - return IRQ_MASKED_BY_VTPR; - } else - return IRQ_NO_MASKED; - } - - if (is_higher_irq(h_pending, h_inservice)) { - if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) - return IRQ_NO_MASKED; - else - return IRQ_MASKED_BY_VTPR; - } else { - return IRQ_MASKED_BY_INSVC; - } -} - -void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) -{ - long spsr; - int ret; - - local_irq_save(spsr); - ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); - local_irq_restore(spsr); - - vcpu->arch.irq_new_pending = 1; -} - -void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) -{ - long spsr; - int ret; - - local_irq_save(spsr); - ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); - local_irq_restore(spsr); - if (ret) { - vcpu->arch.irq_new_pending = 1; - wmb(); - } -} - -void update_vhpi(struct kvm_vcpu *vcpu, int vec) -{ - u64 vhpi; - - if (vec == NULL_VECTOR) - vhpi = 0; - else if (vec == NMI_VECTOR) - vhpi = 32; - else if (vec == ExtINT_VECTOR) - vhpi = 16; - else - vhpi = vec >> 4; - - VCPU(vcpu, vhpi) = vhpi; - if (VCPU(vcpu, vac).a_int) - ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, - (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); -} - -u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) -{ - int vec, h_inservice, mask; - - vec = highest_pending_irq(vcpu); - h_inservice = highest_inservice_irq(vcpu); - mask = irq_masked(vcpu, vec, h_inservice); - if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { - if (VCPU(vcpu, vhpi)) - update_vhpi(vcpu, NULL_VECTOR); - return IA64_SPURIOUS_INT_VECTOR; - } - if (mask == IRQ_MASKED_BY_VTPR) { - update_vhpi(vcpu, vec); - return IA64_SPURIOUS_INT_VECTOR; - } - VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); - vcpu_unpend_interrupt(vcpu, vec); - return (u64)vec; -} - -/************************************************************************** - Privileged operation emulation routines - **************************************************************************/ -u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) -{ - union ia64_pta vpta; - union ia64_rr vrr; - u64 pval; - u64 vhpt_offset; - - vpta.val = vcpu_get_pta(vcpu); - vrr.val = vcpu_get_rr(vcpu, vadr); - vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); - if (vpta.vf) { - pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, - vpta.val, 0, 0, 0, 0); - } else { - pval = (vadr & VRN_MASK) | vhpt_offset | - (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); - } - return pval; -} - -u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) -{ - union ia64_rr vrr; - union ia64_pta vpta; - u64 pval; - - vpta.val = vcpu_get_pta(vcpu); - vrr.val = vcpu_get_rr(vcpu, vadr); - if (vpta.vf) { - pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, - 0, 0, 0, 0, 0); - } else - pval = 1; - - return pval; -} - -u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) -{ - struct thash_data *data; - union ia64_pta vpta; - u64 key; - - vpta.val = vcpu_get_pta(vcpu); - if (vpta.vf == 0) { - key = 1; - return key; - } - data = vtlb_lookup(vcpu, vadr, D_TLB); - if (!data || !data->p) - key = 1; - else - key = data->key; - - return key; -} - -void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long thash, vadr; - - vadr = vcpu_get_gr(vcpu, inst.M46.r3); - thash = vcpu_thash(vcpu, vadr); - vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); -} - -void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long tag, vadr; - - vadr = vcpu_get_gr(vcpu, inst.M46.r3); - tag = vcpu_ttag(vcpu, vadr); - vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); -} - -int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr) -{ - struct thash_data *data; - union ia64_isr visr, pt_isr; - struct kvm_pt_regs *regs; - struct ia64_psr vpsr; - - regs = vcpu_regs(vcpu); - pt_isr.val = VMX(vcpu, cr_isr); - visr.val = 0; - visr.ei = pt_isr.ei; - visr.ir = pt_isr.ir; - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - visr.na = 1; - - data = vhpt_lookup(vadr); - if (data) { - if (data->p == 0) { - vcpu_set_isr(vcpu, visr.val); - data_page_not_present(vcpu, vadr); - return IA64_FAULT; - } else if (data->ma == VA_MATTR_NATPAGE) { - vcpu_set_isr(vcpu, visr.val); - dnat_page_consumption(vcpu, vadr); - return IA64_FAULT; - } else { - *padr = (data->gpaddr >> data->ps << data->ps) | - (vadr & (PSIZE(data->ps) - 1)); - return IA64_NO_FAULT; - } - } - - data = vtlb_lookup(vcpu, vadr, D_TLB); - if (data) { - if (data->p == 0) { - vcpu_set_isr(vcpu, visr.val); - data_page_not_present(vcpu, vadr); - return IA64_FAULT; - } else if (data->ma == VA_MATTR_NATPAGE) { - vcpu_set_isr(vcpu, visr.val); - dnat_page_consumption(vcpu, vadr); - return IA64_FAULT; - } else{ - *padr = ((data->ppn >> (data->ps - 12)) << data->ps) - | (vadr & (PSIZE(data->ps) - 1)); - return IA64_NO_FAULT; - } - } - if (!vhpt_enabled(vcpu, vadr, NA_REF)) { - if (vpsr.ic) { - vcpu_set_isr(vcpu, visr.val); - alt_dtlb(vcpu, vadr); - return IA64_FAULT; - } else { - nested_dtlb(vcpu); - return IA64_FAULT; - } - } else { - if (vpsr.ic) { - vcpu_set_isr(vcpu, visr.val); - dvhpt_fault(vcpu, vadr); - return IA64_FAULT; - } else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - - return IA64_NO_FAULT; -} - -int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r1, r3; - - r3 = vcpu_get_gr(vcpu, inst.M46.r3); - - if (vcpu_tpa(vcpu, r3, &r1)) - return IA64_FAULT; - - vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); - return(IA64_NO_FAULT); -} - -void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r1, r3; - - r3 = vcpu_get_gr(vcpu, inst.M46.r3); - r1 = vcpu_tak(vcpu, r3); - vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); -} - -/************************************ - * Insert/Purge translation register/cache - ************************************/ -void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) -{ - thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); -} - -void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) -{ - thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); -} - -void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) -{ - u64 ps, va, rid; - struct thash_data *p_itr; - - ps = itir_ps(itir); - va = PAGEALIGN(ifa, ps); - pte &= ~PAGE_FLAGS_RV_MASK; - rid = vcpu_get_rr(vcpu, ifa); - rid = rid & RR_RID_MASK; - p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; - vcpu_set_tr(p_itr, pte, itir, va, rid); - vcpu_quick_region_set(VMX(vcpu, itr_regions), va); -} - - -void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) -{ - u64 gpfn; - u64 ps, va, rid; - struct thash_data *p_dtr; - - ps = itir_ps(itir); - va = PAGEALIGN(ifa, ps); - pte &= ~PAGE_FLAGS_RV_MASK; - - if (ps != _PAGE_SIZE_16M) - thash_purge_entries(vcpu, va, ps); - gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; - if (__gpfn_is_io(gpfn)) - pte |= VTLB_PTE_IO; - rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK; - p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; - vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], - pte, itir, va, rid); - vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); -} - -void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) -{ - int index; - u64 va; - - va = PAGEALIGN(ifa, ps); - while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) - vcpu->arch.dtrs[index].page_flags = 0; - - thash_purge_entries(vcpu, va, ps); -} - -void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) -{ - int index; - u64 va; - - va = PAGEALIGN(ifa, ps); - while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) - vcpu->arch.itrs[index].page_flags = 0; - - thash_purge_entries(vcpu, va, ps); -} - -void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) -{ - va = PAGEALIGN(va, ps); - thash_purge_entries(vcpu, va, ps); -} - -void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) -{ - thash_purge_all(vcpu); -} - -void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - long psr; - local_irq_save(psr); - p->exit_reason = EXIT_REASON_PTC_G; - - p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); - p->u.ptc_g_data.vaddr = va; - p->u.ptc_g_data.ps = ps; - vmm_transition(vcpu); - /* Do Local Purge Here*/ - vcpu_ptc_l(vcpu, va, ps); - local_irq_restore(psr); -} - - -void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) -{ - vcpu_ptc_ga(vcpu, va, ps); -} - -void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - vcpu_ptc_e(vcpu, ifa); -} - -void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); -} - -void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long ifa, itir; - - ifa = vcpu_get_gr(vcpu, inst.M45.r3); - itir = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); -} - -void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte, slot; - - slot = vcpu_get_gr(vcpu, inst.M45.r3); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - vcpu_itr_d(vcpu, slot, pte, itir, ifa); -} - - - -void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte, slot; - - slot = vcpu_get_gr(vcpu, inst.M45.r3); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - vcpu_itr_i(vcpu, slot, pte, itir, ifa); -} - -void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte; - - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_itc_d(vcpu, pte, itir, ifa); -} - -void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long itir, ifa, pte; - - itir = vcpu_get_itir(vcpu); - ifa = vcpu_get_ifa(vcpu); - pte = vcpu_get_gr(vcpu, inst.M45.r2); - vcpu_itc_i(vcpu, pte, itir, ifa); -} - -/************************************* - * Moves to semi-privileged registers - *************************************/ - -void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long imm; - - if (inst.M30.s) - imm = -inst.M30.imm; - else - imm = inst.M30.imm; - - vcpu_set_itc(vcpu, imm); -} - -void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r2; - - r2 = vcpu_get_gr(vcpu, inst.M29.r2); - vcpu_set_itc(vcpu, r2); -} - -void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r1; - - r1 = vcpu_get_itc(vcpu); - vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); -} - -/************************************************************************** - struct kvm_vcpu protection key register access routines - **************************************************************************/ - -unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) -{ - return ((unsigned long)ia64_get_pkr(reg)); -} - -void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) -{ - ia64_set_pkr(reg, val); -} - -/******************************** - * Moves to privileged registers - ********************************/ -unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, - unsigned long val) -{ - union ia64_rr oldrr, newrr; - unsigned long rrval; - struct exit_ctl_data *p = &vcpu->arch.exit_data; - unsigned long psr; - - oldrr.val = vcpu_get_rr(vcpu, reg); - newrr.val = val; - vcpu->arch.vrr[reg >> VRN_SHIFT] = val; - - switch ((unsigned long)(reg >> VRN_SHIFT)) { - case VRN6: - vcpu->arch.vmm_rr = vrrtomrr(val); - local_irq_save(psr); - p->exit_reason = EXIT_REASON_SWITCH_RR6; - vmm_transition(vcpu); - local_irq_restore(psr); - break; - case VRN4: - rrval = vrrtomrr(val); - vcpu->arch.metaphysical_saved_rr4 = rrval; - if (!is_physical_mode(vcpu)) - ia64_set_rr(reg, rrval); - break; - case VRN0: - rrval = vrrtomrr(val); - vcpu->arch.metaphysical_saved_rr0 = rrval; - if (!is_physical_mode(vcpu)) - ia64_set_rr(reg, rrval); - break; - default: - ia64_set_rr(reg, vrrtomrr(val)); - break; - } - - return (IA64_NO_FAULT); -} - -void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_rr(vcpu, r3, r2); -} - -void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) -{ -} - -void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) -{ -} - -void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_pmc(vcpu, r3, r2); -} - -void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_pmd(vcpu, r3, r2); -} - -void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) -{ - u64 r3, r2; - - r3 = vcpu_get_gr(vcpu, inst.M42.r3); - r2 = vcpu_get_gr(vcpu, inst.M42.r2); - vcpu_set_pkr(vcpu, r3, r2); -} - -void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_rr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_pkr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_dbr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_ibr(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_pmc(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) -{ - /* FIXME: This could get called as a result of a rsvd-reg fault */ - if (reg > (ia64_get_cpuid(3) & 0xff)) - return 0; - else - return ia64_get_cpuid(reg); -} - -void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r3, r1; - - r3 = vcpu_get_gr(vcpu, inst.M43.r3); - r1 = vcpu_get_cpuid(vcpu, r3); - vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); -} - -void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) -{ - VCPU(vcpu, tpr) = val; - vcpu->arch.irq_check = 1; -} - -unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long r2; - - r2 = vcpu_get_gr(vcpu, inst.M32.r2); - VCPU(vcpu, vcr[inst.M32.cr3]) = r2; - - switch (inst.M32.cr3) { - case 0: - vcpu_set_dcr(vcpu, r2); - break; - case 1: - vcpu_set_itm(vcpu, r2); - break; - case 66: - vcpu_set_tpr(vcpu, r2); - break; - case 67: - vcpu_set_eoi(vcpu, r2); - break; - default: - break; - } - - return 0; -} - -unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long tgt = inst.M33.r1; - unsigned long val; - - switch (inst.M33.cr3) { - case 65: - val = vcpu_get_ivr(vcpu); - vcpu_set_gr(vcpu, tgt, val, 0); - break; - - case 67: - vcpu_set_gr(vcpu, tgt, 0L, 0); - break; - default: - val = VCPU(vcpu, vcr[inst.M33.cr3]); - vcpu_set_gr(vcpu, tgt, val, 0); - break; - } - - return 0; -} - -void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) -{ - - unsigned long mask; - struct kvm_pt_regs *regs; - struct ia64_psr old_psr, new_psr; - - old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - - regs = vcpu_regs(vcpu); - /* We only support guest as: - * vpsr.pk = 0 - * vpsr.is = 0 - * Otherwise panic - */ - if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) - panic_vm(vcpu, "Only support guests with vpsr.pk =0 " - "& vpsr.is=0\n"); - - /* - * For those IA64_PSR bits: id/da/dd/ss/ed/ia - * Since these bits will become 0, after success execution of each - * instruction, we will change set them to mIA64_PSR - */ - VCPU(vcpu, vpsr) = val - & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | - IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); - - if (!old_psr.i && (val & IA64_PSR_I)) { - /* vpsr.i 0->1 */ - vcpu->arch.irq_check = 1; - } - new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - - /* - * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) - * , except for the following bits: - * ic/i/dt/si/rt/mc/it/bn/vm - */ - mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + - IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + - IA64_PSR_VM; - - regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); - - check_mm_mode_switch(vcpu, old_psr, new_psr); - - return ; -} - -unsigned long vcpu_cover(struct kvm_vcpu *vcpu) -{ - struct ia64_psr vpsr; - - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - - if (!vpsr.ic) - VCPU(vcpu, ifs) = regs->cr_ifs; - regs->cr_ifs = IA64_IFS_V; - return (IA64_NO_FAULT); -} - - - -/************************************************************************** - VCPU banked general register access routines - **************************************************************************/ -#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ - do { \ - __asm__ __volatile__ ( \ - ";;extr.u %0 = %3,%6,16;;\n" \ - "dep %1 = %0, %1, 0, 16;;\n" \ - "st8 [%4] = %1\n" \ - "extr.u %0 = %2, 16, 16;;\n" \ - "dep %3 = %0, %3, %6, 16;;\n" \ - "st8 [%5] = %3\n" \ - ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ - "r"(*runat), "r"(b1unat), "r"(runat), \ - "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ - } while (0) - -void vcpu_bsw0(struct kvm_vcpu *vcpu) -{ - unsigned long i; - - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - unsigned long *r = ®s->r16; - unsigned long *b0 = &VCPU(vcpu, vbgr[0]); - unsigned long *b1 = &VCPU(vcpu, vgr[0]); - unsigned long *runat = ®s->eml_unat; - unsigned long *b0unat = &VCPU(vcpu, vbnat); - unsigned long *b1unat = &VCPU(vcpu, vnat); - - - if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { - for (i = 0; i < 16; i++) { - *b1++ = *r; - *r++ = *b0++; - } - vcpu_bsw0_unat(i, b0unat, b1unat, runat, - VMM_PT_REGS_R16_SLOT); - VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; - } -} - -#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ - do { \ - __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ - "dep %1 = %0, %1, 16, 16;;\n" \ - "st8 [%4] = %1\n" \ - "extr.u %0 = %2, 0, 16;;\n" \ - "dep %3 = %0, %3, %6, 16;;\n" \ - "st8 [%5] = %3\n" \ - ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ - "r"(*runat), "r"(b0unat), "r"(runat), \ - "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ - } while (0) - -void vcpu_bsw1(struct kvm_vcpu *vcpu) -{ - unsigned long i; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - unsigned long *r = ®s->r16; - unsigned long *b0 = &VCPU(vcpu, vbgr[0]); - unsigned long *b1 = &VCPU(vcpu, vgr[0]); - unsigned long *runat = ®s->eml_unat; - unsigned long *b0unat = &VCPU(vcpu, vbnat); - unsigned long *b1unat = &VCPU(vcpu, vnat); - - if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { - for (i = 0; i < 16; i++) { - *b0++ = *r; - *r++ = *b1++; - } - vcpu_bsw1_unat(i, b0unat, b1unat, runat, - VMM_PT_REGS_R16_SLOT); - VCPU(vcpu, vpsr) |= IA64_PSR_BN; - } -} - -void vcpu_rfi(struct kvm_vcpu *vcpu) -{ - unsigned long ifs, psr; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - psr = VCPU(vcpu, ipsr); - if (psr & IA64_PSR_BN) - vcpu_bsw1(vcpu); - else - vcpu_bsw0(vcpu); - vcpu_set_psr(vcpu, psr); - ifs = VCPU(vcpu, ifs); - if (ifs >> 63) - regs->cr_ifs = ifs; - regs->cr_iip = VCPU(vcpu, iip); -} - -/* - VPSR can't keep track of below bits of guest PSR - This function gets guest PSR - */ - -unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) -{ - unsigned long mask; - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - - mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | - IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; - return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); -} - -void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long vpsr; - unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) - | inst.M44.imm; - - vpsr = vcpu_get_psr(vcpu); - vpsr &= (~imm24); - vcpu_set_psr(vcpu, vpsr); -} - -void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long vpsr; - unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) - | inst.M44.imm; - - vpsr = vcpu_get_psr(vcpu); - vpsr |= imm24; - vcpu_set_psr(vcpu, vpsr); -} - -/* Generate Mask - * Parameter: - * bit -- starting bit - * len -- how many bits - */ -#define MASK(bit,len) \ -({ \ - __u64 ret; \ - \ - __asm __volatile("dep %0=-1, r0, %1, %2"\ - : "=r" (ret): \ - "M" (bit), \ - "M" (len)); \ - ret; \ -}) - -void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) -{ - val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); - vcpu_set_psr(vcpu, val); -} - -void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long val; - - val = vcpu_get_gr(vcpu, inst.M35.r2); - vcpu_set_psr_l(vcpu, val); -} - -void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) -{ - unsigned long val; - - val = vcpu_get_psr(vcpu); - val = (val & MASK(0, 32)) | (val & MASK(35, 2)); - vcpu_set_gr(vcpu, inst.M33.r1, val, 0); -} - -void vcpu_increment_iip(struct kvm_vcpu *vcpu) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; - if (ipsr->ri == 2) { - ipsr->ri = 0; - regs->cr_iip += 16; - } else - ipsr->ri++; -} - -void vcpu_decrement_iip(struct kvm_vcpu *vcpu) -{ - struct kvm_pt_regs *regs = vcpu_regs(vcpu); - struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; - - if (ipsr->ri == 0) { - ipsr->ri = 2; - regs->cr_iip -= 16; - } else - ipsr->ri--; -} - -/** Emulate a privileged operation. - * - * - * @param vcpu virtual cpu - * @cause the reason cause virtualization fault - * @opcode the instruction code which cause virtualization fault - */ - -void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) -{ - unsigned long status, cause, opcode ; - INST64 inst; - - status = IA64_NO_FAULT; - cause = VMX(vcpu, cause); - opcode = VMX(vcpu, opcode); - inst.inst = opcode; - /* - * Switch to actual virtual rid in rr0 and rr4, - * which is required by some tlb related instructions. - */ - prepare_if_physical_mode(vcpu); - - switch (cause) { - case EVENT_RSM: - kvm_rsm(vcpu, inst); - break; - case EVENT_SSM: - kvm_ssm(vcpu, inst); - break; - case EVENT_MOV_TO_PSR: - kvm_mov_to_psr(vcpu, inst); - break; - case EVENT_MOV_FROM_PSR: - kvm_mov_from_psr(vcpu, inst); - break; - case EVENT_MOV_FROM_CR: - kvm_mov_from_cr(vcpu, inst); - break; - case EVENT_MOV_TO_CR: - kvm_mov_to_cr(vcpu, inst); - break; - case EVENT_BSW_0: - vcpu_bsw0(vcpu); - break; - case EVENT_BSW_1: - vcpu_bsw1(vcpu); - break; - case EVENT_COVER: - vcpu_cover(vcpu); - break; - case EVENT_RFI: - vcpu_rfi(vcpu); - break; - case EVENT_ITR_D: - kvm_itr_d(vcpu, inst); - break; - case EVENT_ITR_I: - kvm_itr_i(vcpu, inst); - break; - case EVENT_PTR_D: - kvm_ptr_d(vcpu, inst); - break; - case EVENT_PTR_I: - kvm_ptr_i(vcpu, inst); - break; - case EVENT_ITC_D: - kvm_itc_d(vcpu, inst); - break; - case EVENT_ITC_I: - kvm_itc_i(vcpu, inst); - break; - case EVENT_PTC_L: - kvm_ptc_l(vcpu, inst); - break; - case EVENT_PTC_G: - kvm_ptc_g(vcpu, inst); - break; - case EVENT_PTC_GA: - kvm_ptc_ga(vcpu, inst); - break; - case EVENT_PTC_E: - kvm_ptc_e(vcpu, inst); - break; - case EVENT_MOV_TO_RR: - kvm_mov_to_rr(vcpu, inst); - break; - case EVENT_MOV_FROM_RR: - kvm_mov_from_rr(vcpu, inst); - break; - case EVENT_THASH: - kvm_thash(vcpu, inst); - break; - case EVENT_TTAG: - kvm_ttag(vcpu, inst); - break; - case EVENT_TPA: - status = kvm_tpa(vcpu, inst); - break; - case EVENT_TAK: - kvm_tak(vcpu, inst); - break; - case EVENT_MOV_TO_AR_IMM: - kvm_mov_to_ar_imm(vcpu, inst); - break; - case EVENT_MOV_TO_AR: - kvm_mov_to_ar_reg(vcpu, inst); - break; - case EVENT_MOV_FROM_AR: - kvm_mov_from_ar_reg(vcpu, inst); - break; - case EVENT_MOV_TO_DBR: - kvm_mov_to_dbr(vcpu, inst); - break; - case EVENT_MOV_TO_IBR: - kvm_mov_to_ibr(vcpu, inst); - break; - case EVENT_MOV_TO_PMC: - kvm_mov_to_pmc(vcpu, inst); - break; - case EVENT_MOV_TO_PMD: - kvm_mov_to_pmd(vcpu, inst); - break; - case EVENT_MOV_TO_PKR: - kvm_mov_to_pkr(vcpu, inst); - break; - case EVENT_MOV_FROM_DBR: - kvm_mov_from_dbr(vcpu, inst); - break; - case EVENT_MOV_FROM_IBR: - kvm_mov_from_ibr(vcpu, inst); - break; - case EVENT_MOV_FROM_PMC: - kvm_mov_from_pmc(vcpu, inst); - break; - case EVENT_MOV_FROM_PKR: - kvm_mov_from_pkr(vcpu, inst); - break; - case EVENT_MOV_FROM_CPUID: - kvm_mov_from_cpuid(vcpu, inst); - break; - case EVENT_VMSW: - status = IA64_FAULT; - break; - default: - break; - }; - /*Assume all status is NO_FAULT ?*/ - if (status == IA64_NO_FAULT && cause != EVENT_RFI) - vcpu_increment_iip(vcpu); - - recover_if_physical_mode(vcpu); -} - -void init_vcpu(struct kvm_vcpu *vcpu) -{ - int i; - - vcpu->arch.mode_flags = GUEST_IN_PHY; - VMX(vcpu, vrr[0]) = 0x38; - VMX(vcpu, vrr[1]) = 0x38; - VMX(vcpu, vrr[2]) = 0x38; - VMX(vcpu, vrr[3]) = 0x38; - VMX(vcpu, vrr[4]) = 0x38; - VMX(vcpu, vrr[5]) = 0x38; - VMX(vcpu, vrr[6]) = 0x38; - VMX(vcpu, vrr[7]) = 0x38; - VCPU(vcpu, vpsr) = IA64_PSR_BN; - VCPU(vcpu, dcr) = 0; - /* pta.size must not be 0. The minimum is 15 (32k) */ - VCPU(vcpu, pta) = 15 << 2; - VCPU(vcpu, itv) = 0x10000; - VCPU(vcpu, itm) = 0; - VMX(vcpu, last_itc) = 0; - - VCPU(vcpu, lid) = VCPU_LID(vcpu); - VCPU(vcpu, ivr) = 0; - VCPU(vcpu, tpr) = 0x10000; - VCPU(vcpu, eoi) = 0; - VCPU(vcpu, irr[0]) = 0; - VCPU(vcpu, irr[1]) = 0; - VCPU(vcpu, irr[2]) = 0; - VCPU(vcpu, irr[3]) = 0; - VCPU(vcpu, pmv) = 0x10000; - VCPU(vcpu, cmcv) = 0x10000; - VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ - VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ - update_vhpi(vcpu, NULL_VECTOR); - VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ - - for (i = 0; i < 4; i++) - VLSAPIC_INSVC(vcpu, i) = 0; -} - -void kvm_init_all_rr(struct kvm_vcpu *vcpu) -{ - unsigned long psr; - - local_irq_save(psr); - - /* WARNING: not allow co-exist of both virtual mode and physical - * mode in same region - */ - - vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); - vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); - - if (is_physical_mode(vcpu)) { - if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic_vm(vcpu, "Machine Status conflicts!\n"); - - ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); - ia64_dv_serialize_data(); - ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); - ia64_dv_serialize_data(); - } else { - ia64_set_rr((VRN0 << VRN_SHIFT), - vcpu->arch.metaphysical_saved_rr0); - ia64_dv_serialize_data(); - ia64_set_rr((VRN4 << VRN_SHIFT), - vcpu->arch.metaphysical_saved_rr4); - ia64_dv_serialize_data(); - } - ia64_set_rr((VRN1 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN1]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN2 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN2]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN3 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN3]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN5 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN5]))); - ia64_dv_serialize_data(); - ia64_set_rr((VRN7 << VRN_SHIFT), - vrrtomrr(VMX(vcpu, vrr[VRN7]))); - ia64_dv_serialize_data(); - ia64_srlz_d(); - ia64_set_psr(psr); -} - -int vmm_entry(void) -{ - struct kvm_vcpu *v; - v = current_vcpu; - - ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, - 0, 0, 0, 0, 0, 0); - kvm_init_vtlb(v); - kvm_init_vhpt(v); - init_vcpu(v); - kvm_init_all_rr(v); - vmm_reset_entry(); - - return 0; -} - -static void kvm_show_registers(struct kvm_pt_regs *regs) -{ - unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; - - struct kvm_vcpu *vcpu = current_vcpu; - if (vcpu != NULL) - printk("vcpu 0x%p vcpu %d\n", - vcpu, vcpu->vcpu_id); - - printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n", - regs->cr_ipsr, regs->cr_ifs, ip); - - printk("unat: %016lx pfs : %016lx rsc : %016lx\n", - regs->ar_unat, regs->ar_pfs, regs->ar_rsc); - printk("rnat: %016lx bspstore: %016lx pr : %016lx\n", - regs->ar_rnat, regs->ar_bspstore, regs->pr); - printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", - regs->loadrs, regs->ar_ccv, regs->ar_fpsr); - printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); - printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, - regs->b6, regs->b7); - printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", - regs->f6.u.bits[1], regs->f6.u.bits[0], - regs->f7.u.bits[1], regs->f7.u.bits[0]); - printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", - regs->f8.u.bits[1], regs->f8.u.bits[0], - regs->f9.u.bits[1], regs->f9.u.bits[0]); - printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", - regs->f10.u.bits[1], regs->f10.u.bits[0], - regs->f11.u.bits[1], regs->f11.u.bits[0]); - - printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, - regs->r2, regs->r3); - printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, - regs->r9, regs->r10); - printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, - regs->r12, regs->r13); - printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, - regs->r15, regs->r16); - printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, - regs->r18, regs->r19); - printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, - regs->r21, regs->r22); - printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, - regs->r24, regs->r25); - printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, - regs->r27, regs->r28); - printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, - regs->r30, regs->r31); - -} - -void panic_vm(struct kvm_vcpu *v, const char *fmt, ...) -{ - va_list args; - char buf[256]; - - struct kvm_pt_regs *regs = vcpu_regs(v); - struct exit_ctl_data *p = &v->arch.exit_data; - va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - printk(buf); - kvm_show_registers(regs); - p->exit_reason = EXIT_REASON_VM_PANIC; - vmm_transition(v); - /*Never to return*/ - while (1); -} diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h deleted file mode 100644 index 988911b4cc7a..000000000000 --- a/arch/ia64/kvm/vcpu.h +++ /dev/null @@ -1,752 +0,0 @@ -/* - * vcpu.h: vcpu routines - * Copyright (c) 2005, Intel Corporation. - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) - * - * Copyright (c) 2007, Intel Corporation. - * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - - -#ifndef __KVM_VCPU_H__ -#define __KVM_VCPU_H__ - -#include <asm/types.h> -#include <asm/fpu.h> -#include <asm/processor.h> - -#ifndef __ASSEMBLY__ -#include "vti.h" - -#include <linux/kvm_host.h> -#include <linux/spinlock.h> - -typedef unsigned long IA64_INST; - -typedef union U_IA64_BUNDLE { - unsigned long i64[2]; - struct { unsigned long template:5, slot0:41, slot1a:18, - slot1b:23, slot2:41; }; - /* NOTE: following doesn't work because bitfields can't cross natural - size boundaries - struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ -} IA64_BUNDLE; - -typedef union U_INST64_A5 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, - imm9d:9, s:1, major:4; }; -} INST64_A5; - -typedef union U_INST64_B4 { - IA64_INST inst; - struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, - wh:2, d:1, un1:1, major:4; }; -} INST64_B4; - -typedef union U_INST64_B8 { - IA64_INST inst; - struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; -} INST64_B8; - -typedef union U_INST64_B9 { - IA64_INST inst; - struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; -} INST64_B9; - -typedef union U_INST64_I19 { - IA64_INST inst; - struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; -} INST64_I19; - -typedef union U_INST64_I26 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_I26; - -typedef union U_INST64_I27 { - IA64_INST inst; - struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; -} INST64_I27; - -typedef union U_INST64_I28 { /* not privileged (mov from AR) */ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_I28; - -typedef union U_INST64_M28 { - IA64_INST inst; - struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M28; - -typedef union U_INST64_M29 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M29; - -typedef union U_INST64_M30 { - IA64_INST inst; - struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, - x3:3, s:1, major:4; }; -} INST64_M30; - -typedef union U_INST64_M31 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M31; - -typedef union U_INST64_M32 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M32; - -typedef union U_INST64_M33 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M33; - -typedef union U_INST64_M35 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; - -} INST64_M35; - -typedef union U_INST64_M36 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; -} INST64_M36; - -typedef union U_INST64_M37 { - IA64_INST inst; - struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, - i:1, major:4; }; -} INST64_M37; - -typedef union U_INST64_M41 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; -} INST64_M41; - -typedef union U_INST64_M42 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M42; - -typedef union U_INST64_M43 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M43; - -typedef union U_INST64_M44 { - IA64_INST inst; - struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; -} INST64_M44; - -typedef union U_INST64_M45 { - IA64_INST inst; - struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; -} INST64_M45; - -typedef union U_INST64_M46 { - IA64_INST inst; - struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, - x3:3, un1:1, major:4; }; -} INST64_M46; - -typedef union U_INST64_M47 { - IA64_INST inst; - struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; -} INST64_M47; - -typedef union U_INST64_M1{ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M1; - -typedef union U_INST64_M2{ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M2; - -typedef union U_INST64_M3{ - IA64_INST inst; - struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M3; - -typedef union U_INST64_M4 { - IA64_INST inst; - struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M4; - -typedef union U_INST64_M5 { - IA64_INST inst; - struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M5; - -typedef union U_INST64_M6 { - IA64_INST inst; - struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M6; - -typedef union U_INST64_M9 { - IA64_INST inst; - struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M9; - -typedef union U_INST64_M10 { - IA64_INST inst; - struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M10; - -typedef union U_INST64_M12 { - IA64_INST inst; - struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, - x6:6, m:1, major:4; }; -} INST64_M12; - -typedef union U_INST64_M15 { - IA64_INST inst; - struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, - x6:6, s:1, major:4; }; -} INST64_M15; - -typedef union U_INST64 { - IA64_INST inst; - struct { unsigned long :37, major:4; } generic; - INST64_A5 A5; /* used in build_hypercall_bundle only */ - INST64_B4 B4; /* used in build_hypercall_bundle only */ - INST64_B8 B8; /* rfi, bsw.[01] */ - INST64_B9 B9; /* break.b */ - INST64_I19 I19; /* used in build_hypercall_bundle only */ - INST64_I26 I26; /* mov register to ar (I unit) */ - INST64_I27 I27; /* mov immediate to ar (I unit) */ - INST64_I28 I28; /* mov from ar (I unit) */ - INST64_M1 M1; /* ld integer */ - INST64_M2 M2; - INST64_M3 M3; - INST64_M4 M4; /* st integer */ - INST64_M5 M5; - INST64_M6 M6; /* ldfd floating pointer */ - INST64_M9 M9; /* stfd floating pointer */ - INST64_M10 M10; /* stfd floating pointer */ - INST64_M12 M12; /* ldfd pair floating pointer */ - INST64_M15 M15; /* lfetch + imm update */ - INST64_M28 M28; /* purge translation cache entry */ - INST64_M29 M29; /* mov register to ar (M unit) */ - INST64_M30 M30; /* mov immediate to ar (M unit) */ - INST64_M31 M31; /* mov from ar (M unit) */ - INST64_M32 M32; /* mov reg to cr */ - INST64_M33 M33; /* mov from cr */ - INST64_M35 M35; /* mov to psr */ - INST64_M36 M36; /* mov from psr */ - INST64_M37 M37; /* break.m */ - INST64_M41 M41; /* translation cache insert */ - INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ - INST64_M43 M43; /* mov from indirect reg */ - INST64_M44 M44; /* set/reset system mask */ - INST64_M45 M45; /* translation purge */ - INST64_M46 M46; /* translation access (tpa,tak) */ - INST64_M47 M47; /* purge translation entry */ -} INST64; - -#define MASK_41 ((unsigned long)0x1ffffffffff) - -/* Virtual address memory attributes encoding */ -#define VA_MATTR_WB 0x0 -#define VA_MATTR_UC 0x4 -#define VA_MATTR_UCE 0x5 -#define VA_MATTR_WC 0x6 -#define VA_MATTR_NATPAGE 0x7 - -#define PMASK(size) (~((size) - 1)) -#define PSIZE(size) (1UL<<(size)) -#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) -#define PAGEALIGN(va, ps) CLEARLSB(va, ps) -#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) -#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ - -#define ARCH_PAGE_SHIFT 12 - -#define INVALID_TI_TAG (1UL << 63) - -#define VTLB_PTE_P_BIT 0 -#define VTLB_PTE_IO_BIT 60 -#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) -#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) - -#define vcpu_quick_region_check(_tr_regions,_ifa) \ - (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) - -#define vcpu_quick_region_set(_tr_regions,_ifa) \ - do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) - -static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, - u64 va, u64 rid) -{ - trp->page_flags = pte; - trp->itir = itir; - trp->vadr = va; - trp->rid = rid; -} - -extern u64 kvm_get_mpt_entry(u64 gpfn); - -/* Return I/ */ -static inline u64 __gpfn_is_io(u64 gpfn) -{ - u64 pte; - pte = kvm_get_mpt_entry(gpfn); - if (!(pte & GPFN_INV_MASK)) { - pte = pte & GPFN_IO_MASK; - if (pte != GPFN_PHYS_MMIO) - return pte; - } - return 0; -} -#endif -#define IA64_NO_FAULT 0 -#define IA64_FAULT 1 - -#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) - -#define SW_BAD 0 /* Bad mode transitition */ -#define SW_V2P 1 /* Physical emulatino is activated */ -#define SW_P2V 2 /* Exit physical mode emulation */ -#define SW_SELF 3 /* No mode transition */ -#define SW_NOP 4 /* Mode transition, but without action required */ - -#define GUEST_IN_PHY 0x1 -#define GUEST_PHY_EMUL 0x2 - -#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) - -#define VRN_SHIFT 61 -#define VRN_MASK 0xe000000000000000 -#define VRN0 0x0UL -#define VRN1 0x1UL -#define VRN2 0x2UL -#define VRN3 0x3UL -#define VRN4 0x4UL -#define VRN5 0x5UL -#define VRN6 0x6UL -#define VRN7 0x7UL - -#define IRQ_NO_MASKED 0 -#define IRQ_MASKED_BY_VTPR 1 -#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ - -#define PTA_BASE_SHIFT 15 - -#define IA64_PSR_VM_BIT 46 -#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) - -/* Interruption Function State */ -#define IA64_IFS_V_BIT 63 -#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) - -#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) -#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) - -#ifndef __ASSEMBLY__ - -#include <asm/gcc_intrin.h> - -#define is_physical_mode(v) \ - ((v->arch.mode_flags) & GUEST_IN_PHY) - -#define is_virtual_mode(v) \ - (!is_physical_mode(v)) - -#define MODE_IND(psr) \ - (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) - -#ifndef CONFIG_SMP -#define _vmm_raw_spin_lock(x) do {}while(0) -#define _vmm_raw_spin_unlock(x) do {}while(0) -#else -typedef struct { - volatile unsigned int lock; -} vmm_spinlock_t; -#define _vmm_raw_spin_lock(x) \ - do { \ - __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ - __u64 ia64_spinlock_val; \ - ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ - if (unlikely(ia64_spinlock_val)) { \ - do { \ - while (*ia64_spinlock_ptr) \ - ia64_barrier(); \ - ia64_spinlock_val = \ - ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ - } while (ia64_spinlock_val); \ - } \ - } while (0) - -#define _vmm_raw_spin_unlock(x) \ - do { barrier(); \ - ((vmm_spinlock_t *)x)->lock = 0; } \ -while (0) -#endif - -void vmm_spin_lock(vmm_spinlock_t *lock); -void vmm_spin_unlock(vmm_spinlock_t *lock); -enum { - I_TLB = 1, - D_TLB = 2 -}; - -union kvm_va { - struct { - unsigned long off : 60; /* intra-region offset */ - unsigned long reg : 4; /* region number */ - } f; - unsigned long l; - void *p; -}; - -#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ - _v.f.reg = 0; _v.l; }) -#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ - _v.f.reg = -1; _v.p; }) - -#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ - _v.rid; }) -#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ - _v.ps; }) -#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ - _v.ve; }) - -enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; -enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; - -#define VCPU(_v, _x) ((_v)->arch.vpd->_x) -#define VMX(_v, _x) ((_v)->arch._x) - -#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) -#define VLSAPIC_XTP(_v) VMX(_v, xtp) - -static inline unsigned long itir_ps(unsigned long itir) -{ - return ((itir >> 2) & 0x3f); -} - - -/************************************************************************** - VCPU control register access routines - **************************************************************************/ - -static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, itir)); -} - -static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, itir) = val; -} - -static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, ifa)); -} - -static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, ifa) = val; -} - -static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, iva)); -} - -static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, pta)); -} - -static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, lid)); -} - -static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, tpr)); -} - -static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) -{ - return (0UL); /*reads of eoi always return 0 */ -} - -static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[0])); -} - -static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[1])); -} - -static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[2])); -} - -static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) -{ - return ((u64)VCPU(vcpu, irr[3])); -} - -static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) -{ - ia64_setreg(_IA64_REG_CR_DCR, val); -} - -static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, isr) = val; -} - -static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, lid) = val; -} - -static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, ipsr) = val; -} - -static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, iip) = val; -} - -static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, ifs) = val; -} - -static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, iipa) = val; -} - -static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) -{ - VCPU(vcpu, iha) = val; -} - - -static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) -{ - return vcpu->arch.vrr[reg>>61]; -} - -/************************************************************************** - VCPU debug breakpoint register access routines - **************************************************************************/ - -static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - __ia64_set_dbr(reg, val); -} - -static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - ia64_set_ibr(reg, val); -} - -static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) -{ - return ((u64)__ia64_get_dbr(reg)); -} - -static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) -{ - return ((u64)ia64_get_ibr(reg)); -} - -/************************************************************************** - VCPU performance monitor register access routines - **************************************************************************/ -static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - /* NOTE: Writes to unimplemented PMC registers are discarded */ - ia64_set_pmc(reg, val); -} - -static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) -{ - /* NOTE: Writes to unimplemented PMD registers are discarded */ - ia64_set_pmd(reg, val); -} - -static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) -{ - /* NOTE: Reads from unimplemented PMC registers return zero */ - return ((u64)ia64_get_pmc(reg)); -} - -static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) -{ - /* NOTE: Reads from unimplemented PMD registers return zero */ - return ((u64)ia64_get_pmd(reg)); -} - -static inline unsigned long vrrtomrr(unsigned long val) -{ - union ia64_rr rr; - rr.val = val; - rr.rid = (rr.rid << 4) | 0xe; - if (rr.ps > PAGE_SHIFT) - rr.ps = PAGE_SHIFT; - rr.ve = 1; - return rr.val; -} - - -static inline int highest_bits(int *dat) -{ - u32 bits, bitnum; - int i; - - /* loop for all 256 bits */ - for (i = 7; i >= 0 ; i--) { - bits = dat[i]; - if (bits) { - bitnum = fls(bits); - return i * 32 + bitnum - 1; - } - } - return NULL_VECTOR; -} - -/* - * The pending irq is higher than the inservice one. - * - */ -static inline int is_higher_irq(int pending, int inservice) -{ - return ((pending > inservice) - || ((pending != NULL_VECTOR) - && (inservice == NULL_VECTOR))); -} - -static inline int is_higher_class(int pending, int mic) -{ - return ((pending >> 4) > mic); -} - -/* - * Return 0-255 for pending irq. - * NULL_VECTOR: when no pending. - */ -static inline int highest_pending_irq(struct kvm_vcpu *vcpu) -{ - if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR)) - return NMI_VECTOR; - if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR)) - return ExtINT_VECTOR; - - return highest_bits((int *)&VCPU(vcpu, irr[0])); -} - -static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) -{ - if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR)) - return NMI_VECTOR; - if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR)) - return ExtINT_VECTOR; - - return highest_bits((int *)&(VMX(vcpu, insvc[0]))); -} - -extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val); -extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, - struct ia64_fpreg *val); -extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg); -extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, - u64 val, int nat); -extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu); -extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val); -extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); -extern void vcpu_bsw0(struct kvm_vcpu *vcpu); -extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, - u64 itir, u64 va, int type); -extern struct thash_data *vhpt_lookup(u64 va); -extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); -extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); -extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); -extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); -extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, - u64 itir, u64 ifa, int type); -extern void thash_purge_all(struct kvm_vcpu *v); -extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, - u64 va, int is_data); -extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, - u64 ps, int is_data); - -extern void vcpu_increment_iip(struct kvm_vcpu *v); -extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu); -extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec); -extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec); -extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr); -extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr); -extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr); -extern void nested_dtlb(struct kvm_vcpu *vcpu); -extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr); -extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref); - -extern void update_vhpi(struct kvm_vcpu *vcpu, int vec); -extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice); - -extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle); -extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma); -extern void vmm_transition(struct kvm_vcpu *vcpu); -extern void vmm_trampoline(union context *from, union context *to); -extern int vmm_entry(void); -extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu); - -extern void vmm_reset_entry(void); -void kvm_init_vtlb(struct kvm_vcpu *v); -void kvm_init_vhpt(struct kvm_vcpu *v); -void thash_init(struct thash_cb *hcb, u64 sz); - -void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); -u64 kvm_gpa_to_mpa(u64 gpa); -extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, - u64 arg4, u64 arg5, u64 arg6, u64 arg7); - -extern long vmm_sanity; - -#endif -#endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c deleted file mode 100644 index 176a12cd56de..000000000000 --- a/arch/ia64/kvm/vmm.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * vmm.c: vmm module interface with kvm module - * - * Copyright (c) 2007, Intel Corporation. - * - * Xiantao Zhang (xiantao.zhang@intel.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/fpswa.h> - -#include "vcpu.h" - -MODULE_AUTHOR("Intel"); -MODULE_LICENSE("GPL"); - -extern char kvm_ia64_ivt; -extern char kvm_asm_mov_from_ar; -extern char kvm_asm_mov_from_ar_sn2; -extern fpswa_interface_t *vmm_fpswa_interface; - -long vmm_sanity = 1; - -struct kvm_vmm_info vmm_info = { - .module = THIS_MODULE, - .vmm_entry = vmm_entry, - .tramp_entry = vmm_trampoline, - .vmm_ivt = (unsigned long)&kvm_ia64_ivt, - .patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar, - .patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2, -}; - -static int __init kvm_vmm_init(void) -{ - - vmm_fpswa_interface = fpswa_interface; - - /*Register vmm data to kvm side*/ - return kvm_init(&vmm_info, 1024, 0, THIS_MODULE); -} - -static void __exit kvm_vmm_exit(void) -{ - kvm_exit(); - return ; -} - -void vmm_spin_lock(vmm_spinlock_t *lock) -{ - _vmm_raw_spin_lock(lock); -} - -void vmm_spin_unlock(vmm_spinlock_t *lock) -{ - _vmm_raw_spin_unlock(lock); -} - -static void vcpu_debug_exit(struct kvm_vcpu *vcpu) -{ - struct exit_ctl_data *p = &vcpu->arch.exit_data; - long psr; - - local_irq_save(psr); - p->exit_reason = EXIT_REASON_DEBUG; - vmm_transition(vcpu); - local_irq_restore(psr); -} - -asmlinkage int printk(const char *fmt, ...) -{ - struct kvm_vcpu *vcpu = current_vcpu; - va_list args; - int r; - - memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN); - va_start(args, fmt); - r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args); - va_end(args); - vcpu_debug_exit(vcpu); - return r; -} - -module_init(kvm_vmm_init) -module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S deleted file mode 100644 index 397e34a63e18..000000000000 --- a/arch/ia64/kvm/vmm_ivt.S +++ /dev/null @@ -1,1392 +0,0 @@ -/* - * arch/ia64/kvm/vmm_ivt.S - * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * Stephane Eranian <eranian@hpl.hp.com> - * David Mosberger <davidm@hpl.hp.com> - * Copyright (C) 2000, 2002-2003 Intel Co - * Asit Mallick <asit.k.mallick@intel.com> - * Suresh Siddha <suresh.b.siddha@intel.com> - * Kenneth Chen <kenneth.w.chen@intel.com> - * Fenghua Yu <fenghua.yu@intel.com> - * - * - * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling - * for SMP - * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB - * handler now uses virtual PT. - * - * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) - * Supporting Intel virtualization architecture - * - */ - -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for - * critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss - * (12,51) - * entry offset ----/ / / / - * / - * entry number ---------/ / / - * / - * size of the entry -------------/ / - * / - * vector name -------------------------------------/ - * / - * interruptions triggering this vector - * ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB - * boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - - -#include <asm/asmmacro.h> -#include <asm/cache.h> -#include <asm/pgtable.h> - -#include "asm-offsets.h" -#include "vcpu.h" -#include "kvm_minstate.h" -#include "vti.h" - -#if 0 -# define PSR_DEFAULT_BITS psr.ac -#else -# define PSR_DEFAULT_BITS 0 -#endif - -#define KVM_FAULT(n) \ - kvm_fault_##n:; \ - mov r19=n;; \ - br.sptk.many kvm_vmm_panic; \ - ;; \ - -#define KVM_REFLECT(n) \ - mov r31=pr; \ - mov r19=n; /* prepare to save predicates */ \ - mov r29=cr.ipsr; \ - ;; \ - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ -(p7) br.sptk.many kvm_dispatch_reflection; \ - br.sptk.many kvm_vmm_panic; \ - -GLOBAL_ENTRY(kvm_vmm_panic) - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr. - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - br.call.sptk.many b6=vmm_panic_handler; -END(kvm_vmm_panic) - - .section .text..ivt,"ax" - - .align 32768 // align on 32KB boundary - .global kvm_ia64_ivt -kvm_ia64_ivt: -/////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(kvm_vhpt_miss) - KVM_FAULT(0) -END(kvm_vhpt_miss) - - .org kvm_ia64_ivt+0x400 -//////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(kvm_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk kvm_alt_itlb_miss - mov r19 = 1 - br.sptk kvm_itlb_miss_dispatch - KVM_FAULT(1); -END(kvm_itlb_miss) - - .org kvm_ia64_ivt+0x0800 -////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(kvm_dtlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk kvm_alt_dtlb_miss - br.sptk kvm_dtlb_miss_dispatch -END(kvm_dtlb_miss) - - .org kvm_ia64_ivt+0x0c00 -//////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(kvm_alt_itlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - mov r24=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r17,r19 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi -END(kvm_alt_itlb_miss) - - .org kvm_ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(kvm_alt_dtlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r24=cr.ipsr - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r19,r17 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi -END(kvm_alt_dtlb_miss) - - .org kvm_ia64_ivt+0x1400 -////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(kvm_nested_dtlb_miss) - KVM_FAULT(5) -END(kvm_nested_dtlb_miss) - - .org kvm_ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(kvm_ikey_miss) - KVM_REFLECT(6) -END(kvm_ikey_miss) - - .org kvm_ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(kvm_dkey_miss) - KVM_REFLECT(7) -END(kvm_dkey_miss) - - .org kvm_ia64_ivt+0x2000 -//////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(kvm_dirty_bit) - KVM_REFLECT(8) -END(kvm_dirty_bit) - - .org kvm_ia64_ivt+0x2400 -//////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(kvm_iaccess_bit) - KVM_REFLECT(9) -END(kvm_iaccess_bit) - - .org kvm_ia64_ivt+0x2800 -/////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(kvm_daccess_bit) - KVM_REFLECT(10) -END(kvm_daccess_bit) - - .org kvm_ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(kvm_break_fault) - mov r31=pr - mov r19=11 - mov r29=cr.ipsr - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!) - mov out0=cr.ifa - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15)ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out1=16,sp - br.call.sptk.many b6=kvm_ia64_handle_break - ;; -END(kvm_break_fault) - - .org kvm_ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(kvm_interrupt) - mov r31=pr // prepare to save predicates - mov r19=12 - mov r29=cr.ipsr - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT - tbit.z p0,p15=r29,IA64_PSR_I_BIT - ;; -(p7) br.sptk kvm_dispatch_interrupt - ;; - mov r27=ar.rsc /* M */ - mov r20=r1 /* A */ - mov r25=ar.unat /* M */ - mov r26=ar.pfs /* I */ - mov r28=cr.iip /* M */ - cover /* B (or nothing) */ - ;; - mov r1=sp - ;; - invala /* M */ - mov r30=cr.ifs - ;; - addl r1=-VMM_PT_REGS_SIZE,r1 - ;; - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ - adds r16=PT(CR_IPSR),r1 - ;; - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES - st8 [r16]=r29 /* save cr.ipsr */ - ;; - lfetch.fault.excl.nt1 [r17] - mov r29=b0 - ;; - adds r16=PT(R8),r1 /* initialize first base pointer */ - adds r17=PT(R9),r1 /* initialize second base pointer */ - mov r18=r0 /* make sure r18 isn't NaT */ - ;; -.mem.offset 0,0; st8.spill [r16]=r8,16 -.mem.offset 8,0; st8.spill [r17]=r9,16 - ;; -.mem.offset 0,0; st8.spill [r16]=r10,24 -.mem.offset 8,0; st8.spill [r17]=r11,24 - ;; - st8 [r16]=r28,16 /* save cr.iip */ - st8 [r17]=r30,16 /* save cr.ifs */ - mov r8=ar.fpsr /* M */ - mov r9=ar.csd - mov r10=ar.ssd - movl r11=FPSR_DEFAULT /* L-unit */ - ;; - st8 [r16]=r25,16 /* save ar.unat */ - st8 [r17]=r26,16 /* save ar.pfs */ - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ - ;; - st8 [r16]=r27,16 /* save ar.rsc */ - adds r17=16,r17 /* skip over ar_rnat field */ - ;; - st8 [r17]=r31,16 /* save predicates */ - adds r16=16,r16 /* skip over ar_bspstore field */ - ;; - st8 [r16]=r29,16 /* save b0 */ - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ - ;; -.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ -.mem.offset 8,0; st8.spill [r17]=r12,16 - adds r12=-16,r1 - /* switch to kernel memory stack (with 16 bytes of scratch) */ - ;; -.mem.offset 0,0; st8.spill [r16]=r13,16 -.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - ;; -.mem.offset 0,0; st8.spill [r16]=r15,16 -.mem.offset 8,0; st8.spill [r17]=r14,16 - dep r14=-1,r0,60,4 - ;; -.mem.offset 0,0; st8.spill [r16]=r2,16 -.mem.offset 8,0; st8.spill [r17]=r3,16 - adds r2=VMM_PT_REGS_R16_OFFSET,r1 - adds r14 = VMM_VCPU_GP_OFFSET,r13 - ;; - mov r8=ar.ccv - ld8 r14 = [r14] - ;; - mov r1=r14 /* establish kernel global pointer */ - ;; \ - bsw.1 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - mov out0=r13 - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; -.mem.offset 0,0; st8.spill [r2]=r16,16 -.mem.offset 8,0; st8.spill [r3]=r17,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r18,16 -.mem.offset 8,0; st8.spill [r3]=r19,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r20,16 -.mem.offset 8,0; st8.spill [r3]=r21,16 - mov r18=b6 - ;; -.mem.offset 0,0; st8.spill [r2]=r22,16 -.mem.offset 8,0; st8.spill [r3]=r23,16 - mov r19=b7 - ;; -.mem.offset 0,0; st8.spill [r2]=r24,16 -.mem.offset 8,0; st8.spill [r3]=r25,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r26,16 -.mem.offset 8,0; st8.spill [r3]=r27,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r28,16 -.mem.offset 8,0; st8.spill [r3]=r29,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r30,16 -.mem.offset 8,0; st8.spill [r3]=r31,32 - ;; - mov ar.fpsr=r11 /* M-unit */ - st8 [r2]=r8,8 /* ar.ccv */ - adds r24=PT(B6)-PT(F7),r3 - ;; - stf.spill [r2]=f6,32 - stf.spill [r3]=f7,32 - ;; - stf.spill [r2]=f8,32 - stf.spill [r3]=f9,32 - ;; - stf.spill [r2]=f10 - stf.spill [r3]=f11 - adds r25=PT(B7)-PT(F11),r3 - ;; - st8 [r24]=r18,16 /* b6 */ - st8 [r25]=r19,16 /* b7 */ - ;; - st8 [r24]=r9 /* ar.csd */ - st8 [r25]=r10 /* ar.ssd */ - ;; - srlz.d // make sure we see the effect of cr.ivr - addl r14=@gprel(ia64_leave_nested),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_ia64_handle_irq - ;; -END(kvm_interrupt) - - .global kvm_dispatch_vexirq - .org kvm_ia64_ivt+0x3400 -////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved -ENTRY(kvm_virtual_exirq) - mov r31=pr - mov r19=13 - mov r30 =r0 - ;; -kvm_dispatch_vexirq: - cmp.eq p6,p0 = 1,r30 - ;; -(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; -(p6) ld8 r1 = [r29] - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r13 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - KVM_SAVE_REST - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_vexirq -END(kvm_virtual_exirq) - - .org kvm_ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - KVM_FAULT(14) - // this code segment is from 2.6.16.13 - - .org kvm_ia64_ivt+0x3c00 -/////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - KVM_FAULT(15) - - .org kvm_ia64_ivt+0x4000 -/////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - KVM_FAULT(16) - - .org kvm_ia64_ivt+0x4400 -////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - KVM_FAULT(17) - - .org kvm_ia64_ivt+0x4800 -////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - KVM_FAULT(18) - - .org kvm_ia64_ivt+0x4c00 -////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - KVM_FAULT(19) - - .org kvm_ia64_ivt+0x5000 -////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present -ENTRY(kvm_page_not_present) - KVM_REFLECT(20) -END(kvm_page_not_present) - - .org kvm_ia64_ivt+0x5100 -/////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission vector -ENTRY(kvm_key_permission) - KVM_REFLECT(21) -END(kvm_key_permission) - - .org kvm_ia64_ivt+0x5200 -////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(kvm_iaccess_rights) - KVM_REFLECT(22) -END(kvm_iaccess_rights) - - .org kvm_ia64_ivt+0x5300 -////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(kvm_daccess_rights) - KVM_REFLECT(23) -END(kvm_daccess_rights) - - .org kvm_ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(kvm_general_exception) - KVM_REFLECT(24) - KVM_FAULT(24) -END(kvm_general_exception) - - .org kvm_ia64_ivt+0x5500 -////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(kvm_disabled_fp_reg) - KVM_REFLECT(25) -END(kvm_disabled_fp_reg) - - .org kvm_ia64_ivt+0x5600 -//////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(kvm_nat_consumption) - KVM_REFLECT(26) -END(kvm_nat_consumption) - - .org kvm_ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(kvm_speculation_vector) - KVM_REFLECT(27) -END(kvm_speculation_vector) - - .org kvm_ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - KVM_FAULT(28) - - .org kvm_ia64_ivt+0x5900 -/////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(kvm_debug_vector) - KVM_FAULT(29) -END(kvm_debug_vector) - - .org kvm_ia64_ivt+0x5a00 -/////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(kvm_unaligned_access) - KVM_REFLECT(30) -END(kvm_unaligned_access) - - .org kvm_ia64_ivt+0x5b00 -////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(kvm_unsupported_data_reference) - KVM_REFLECT(31) -END(kvm_unsupported_data_reference) - - .org kvm_ia64_ivt+0x5c00 -//////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) -ENTRY(kvm_floating_point_fault) - KVM_REFLECT(32) -END(kvm_floating_point_fault) - - .org kvm_ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(kvm_floating_point_trap) - KVM_REFLECT(33) -END(kvm_floating_point_trap) - - .org kvm_ia64_ivt+0x5e00 -////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(kvm_lower_privilege_trap) - KVM_REFLECT(34) -END(kvm_lower_privilege_trap) - - .org kvm_ia64_ivt+0x5f00 -////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(kvm_taken_branch_trap) - KVM_REFLECT(35) -END(kvm_taken_branch_trap) - - .org kvm_ia64_ivt+0x6000 -//////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(kvm_single_step_trap) - KVM_REFLECT(36) -END(kvm_single_step_trap) - .global kvm_virtualization_fault_back - .org kvm_ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault -ENTRY(kvm_virtualization_fault) - mov r31=pr - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - st8 [r16] = r1 - adds r17 = VMM_VCPU_GP_OFFSET, r21 - ;; - ld8 r1 = [r17] - cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 - cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 - cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 - cmp.eq p9,p0=EVENT_RSM,r24 - cmp.eq p10,p0=EVENT_SSM,r24 - cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 - cmp.eq p12,p0=EVENT_THASH,r24 -(p6) br.dptk.many kvm_asm_mov_from_ar -(p7) br.dptk.many kvm_asm_mov_from_rr -(p8) br.dptk.many kvm_asm_mov_to_rr -(p9) br.dptk.many kvm_asm_rsm -(p10) br.dptk.many kvm_asm_ssm -(p11) br.dptk.many kvm_asm_mov_to_psr -(p12) br.dptk.many kvm_asm_thash - ;; -kvm_virtualization_fault_back: - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 = [r16] - ;; - mov r19=37 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - cmp.ne p6,p0=EVENT_RFI, r24 -(p6) br.sptk kvm_dispatch_virtualization_fault - ;; - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] - ;; - adds r18=VMM_VPD_VIFS_OFFSET,r18 - ;; - ld8 r18=[r18] - ;; - tbit.z p6,p0=r18,63 -(p6) br.sptk kvm_dispatch_virtualization_fault - ;; -//if vifs.v=1 desert current register frame - alloc r18=ar.pfs,0,0,0,0 - br.sptk kvm_dispatch_virtualization_fault -END(kvm_virtualization_fault) - - .org kvm_ia64_ivt+0x6200 -////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - KVM_FAULT(38) - - .org kvm_ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - KVM_FAULT(39) - - .org kvm_ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - KVM_FAULT(40) - - .org kvm_ia64_ivt+0x6500 -////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - KVM_FAULT(41) - - .org kvm_ia64_ivt+0x6600 -////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - KVM_FAULT(42) - - .org kvm_ia64_ivt+0x6700 -////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - KVM_FAULT(43) - - .org kvm_ia64_ivt+0x6800 -////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - KVM_FAULT(44) - - .org kvm_ia64_ivt+0x6900 -/////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception -//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(kvm_ia32_exception) - KVM_FAULT(45) -END(kvm_ia32_exception) - - .org kvm_ia64_ivt+0x6a00 -//////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(kvm_ia32_intercept) - KVM_FAULT(47) -END(kvm_ia32_intercept) - - .org kvm_ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - KVM_FAULT(48) - - .org kvm_ia64_ivt+0x6d00 -////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - KVM_FAULT(49) - - .org kvm_ia64_ivt+0x6e00 -////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - KVM_FAULT(50) - - .org kvm_ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - KVM_FAULT(52) - - .org kvm_ia64_ivt+0x7100 -//////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - KVM_FAULT(53) - - .org kvm_ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - KVM_FAULT(54) - - .org kvm_ia64_ivt+0x7300 -//////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - KVM_FAULT(55) - - .org kvm_ia64_ivt+0x7400 -//////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - KVM_FAULT(56) - - .org kvm_ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - KVM_FAULT(57) - - .org kvm_ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - KVM_FAULT(58) - - .org kvm_ia64_ivt+0x7700 -//////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - KVM_FAULT(59) - - .org kvm_ia64_ivt+0x7800 -//////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - KVM_FAULT(60) - - .org kvm_ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - KVM_FAULT(61) - - .org kvm_ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - KVM_FAULT(62) - - .org kvm_ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - KVM_FAULT(63) - - .org kvm_ia64_ivt+0x7c00 -//////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - KVM_FAULT(64) - - .org kvm_ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - KVM_FAULT(65) - - .org kvm_ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - KVM_FAULT(66) - - .org kvm_ia64_ivt+0x7f00 -//////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - KVM_FAULT(67) - - .org kvm_ia64_ivt+0x8000 -// There is no particular reason for this code to be here, other than that -// there happens to be space here that would go unused otherwise. If this -// fault ever gets "unreserved", simply moved the following code to a more -// suitable spot... - - -ENTRY(kvm_dtlb_miss_dispatch) - mov r19 = 2 - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault -END(kvm_dtlb_miss_dispatch) - -ENTRY(kvm_itlb_miss_dispatch) - - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault -END(kvm_itlb_miss_dispatch) - -ENTRY(kvm_dispatch_reflection) -/* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out2=cr.iim - mov out3=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out4=16,r12 - br.call.sptk.many b6=reflect_interruption -END(kvm_dispatch_reflection) - -ENTRY(kvm_dispatch_virtualization_fault) - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!) - mov out0=r13 //vcpu - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out1=16,sp //regs - br.call.sptk.many b6=kvm_emulate -END(kvm_dispatch_virtualization_fault) - - -ENTRY(kvm_dispatch_interrupt) - KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - ssm psr.ic - ;; - srlz.i - ;; - (p15) ssm psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - mov out0=r13 // pass pointer to pt_regs as second arg - br.call.sptk.many b6=kvm_ia64_handle_irq -END(kvm_dispatch_interrupt) - -GLOBAL_ENTRY(ia64_leave_nested) - rsm psr.i - ;; - adds r21=PT(PR)+16,r12 - ;; - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3] - adds r3=PT(AR_CSD)-PT(R16),r3 - adds r30=PT(AR_CCV)+16,r12 - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.i // ensure interruption collection is off - mov ar.ccv=r15 - ;; - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) - ;; - ldf.fill f11=[r2] -// mov r18=r13 -// mov r21=r13 - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - ;; - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 - // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16// load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] - ;; - ld8.fill r3=[r16] - ;; - mov r16=ar.bsp // get existing backing store pointer - ;; - mov b0=r22 - mov ar.pfs=r26 - mov cr.ifs=r30 - mov cr.ipsr=r29 - mov ar.fpsr=r20 - mov cr.iip=r28 - ;; - mov ar.rsc=r27 - mov ar.unat=r25 - mov pr=r31,-1 - rfi -END(ia64_leave_nested) - -GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) -/* - * work.need_resched etc. mustn't get changed - *by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on: - */ - adds r2 = PT(R4)+16,r12 - adds r3 = PT(R5)+16,r12 - adds r8 = PT(EML_UNAT)+16,r12 - ;; - ld8 r8 = [r8] - ;; - mov ar.unat=r8 - ;; - ld8.fill r4=[r2],16 //load r4 - ld8.fill r5=[r3],16 //load r5 - ;; - ld8.fill r6=[r2] //load r6 - ld8.fill r7=[r3] //load r7 - ;; -END(ia64_leave_hypervisor_prepare) -//fall through -GLOBAL_ENTRY(ia64_leave_hypervisor) - rsm psr.i - ;; - br.call.sptk.many b0=leave_hypervisor_tail - ;; - adds r20=PT(PR)+16,r12 - adds r8=PT(EML_UNAT)+16,r12 - ;; - ld8 r8=[r8] - ;; - mov ar.unat=r8 - ;; - lfetch [r20],PT(CR_IPSR)-PT(PR) - adds r2 = PT(B6)+16,r12 - adds r3 = PT(B7)+16,r12 - ;; - lfetch [r20] - ;; - ld8 r24=[r2],16 /* B6 */ - ld8 r25=[r3],16 /* B7 */ - ;; - ld8 r26=[r2],16 /* ar_csd */ - ld8 r27=[r3],16 /* ar_ssd */ - mov b6 = r24 - ;; - ld8.fill r8=[r2],16 - ld8.fill r9=[r3],16 - mov b7 = r25 - ;; - mov ar.csd = r26 - mov ar.ssd = r27 - ;; - ld8.fill r10=[r2],PT(R15)-PT(R10) - ld8.fill r11=[r3],PT(R14)-PT(R11) - ;; - ld8.fill r15=[r2],PT(R16)-PT(R15) - ld8.fill r14=[r3],PT(R17)-PT(R14) - ;; - ld8.fill r16=[r2],16 - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - ;; - ld8.fill r22=[r2],16 - ld8.fill r23=[r3],16 - ;; - ld8.fill r24=[r2],16 - ld8.fill r25=[r3],16 - ;; - ld8.fill r26=[r2],16 - ld8.fill r27=[r3],16 - ;; - ld8.fill r28=[r2],16 - ld8.fill r29=[r3],16 - ;; - ld8.fill r30=[r2],PT(F6)-PT(R30) - ld8.fill r31=[r3],PT(F7)-PT(R31) - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i // ensure interruption collection is off - ;; - bsw.0 - ;; - adds r16 = PT(CR_IPSR)+16,r12 - adds r17 = PT(CR_IIP)+16,r12 - mov r21=r13 // get current - ;; - ld8 r31=[r16],16 // load cr.ipsr - ld8 r30=[r17],16 // load cr.iip - ;; - ld8 r29=[r16],16 // load cr.ifs - ld8 r28=[r17],16 // load ar.unat - ;; - ld8 r27=[r16],16 // load ar.pfs - ld8 r26=[r17],16 // load ar.rsc - ;; - ld8 r25=[r16],16 // load ar.rnat - ld8 r24=[r17],16 // load ar.bspstore - ;; - ld8 r23=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 //load r1 - ;; - ld8.fill r12=[r16],16 //load r12 - ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 - ;; - ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr - ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 - ;; - ld8.fill r3=[r16] //load r3 - ld8 r18=[r17] //load ar_ccv - ;; - mov ar.fpsr=r19 - mov ar.ccv=r18 - shr.u r18=r20,16 - ;; -kvm_rbs_switch: - mov r19=96 - -kvm_dont_preserve_current_frame: -/* - * To prevent leaking bits between the hypervisor and guest domain, - * we must clear the stacked registers in the "invalid" partition here. - * 5 registers/cycle on McKinley). - */ -# define pRecurse p6 -# define pReturn p7 -# define Nregs 14 - - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r19 - mov in1=0 - ;; - TEXT_ALIGN(32) -kvm_rse_clear_invalid: - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 - // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 -(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 - // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 -(pReturn) br.ret.dptk.many b0 - -# undef pRecurse -# undef pReturn - -// loadrs has already been shifted - alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; - mov ar.bspstore=r24 - ;; - mov ar.unat=r28 - mov ar.rnat=r25 - mov ar.rsc=r26 - ;; - mov cr.ipsr=r31 - mov cr.iip=r30 - mov cr.ifs=r29 - mov ar.pfs=r27 - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] //vpd - adds r17=VMM_VCPU_ISR_OFFSET,r21 - ;; - ld8 r17=[r17] - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] //vpsr - mov r25=r18 - adds r16= VMM_VCPU_GP_OFFSET,r21 - ;; - ld8 r16= [r16] // Put gp in r24 - movl r24=@gprel(ia64_vmm_entry) // calculate return address - ;; - add r24=r24,r16 - ;; - br.sptk.many kvm_vps_sync_write // call the service - ;; -END(ia64_leave_hypervisor) -// fall through -GLOBAL_ENTRY(ia64_vmm_entry) -/* - * must be at bank 0 - * parameter: - * r17:cr.isr - * r18:vpd - * r19:vpsr - * r22:b0 - * r23:predicate - */ - mov r24=r22 - mov r25=r18 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic -(p1) br.cond.sptk.few kvm_vps_resume_normal -(p2) br.cond.sptk.many kvm_vps_resume_handler - ;; -END(ia64_vmm_entry) - -/* - * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, - * u64 arg3, u64 arg4, u64 arg5, - * u64 arg6, u64 arg7); - * - * XXX: The currently defined services use only 4 args at the max. The - * rest are not consumed. - */ -GLOBAL_ENTRY(ia64_call_vsa) - .regstk 4,4,0,0 - -rpsave = loc0 -pfssave = loc1 -psrsave = loc2 -entry = loc3 -hostret = r24 - - alloc pfssave=ar.pfs,4,4,0,0 - mov rpsave=rp - adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 - ;; - ld8 entry=[entry] -1: mov hostret=ip - mov r25=in1 // copy arguments - mov r26=in2 - mov r27=in3 - mov psrsave=psr - ;; - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC - ;; - add hostret=2f-1b,hostret // calculate return address - add entry=entry,in0 - ;; - rsm psr.i | psr.ic - ;; - srlz.i - mov b6=entry - br.cond.sptk b6 // call the service -2: -// Architectural sequence for enabling interrupts if necessary -(p7) ssm psr.ic - ;; -(p7) srlz.i - ;; -(p6) ssm psr.i - ;; - mov rp=rpsave - mov ar.pfs=pfssave - mov r8=r31 - ;; - srlz.d - br.ret.sptk rp - -END(ia64_call_vsa) - -#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) - -GLOBAL_ENTRY(vmm_reset_entry) - //set up ipsr, iip, vpd.vpsr, dcr - // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 - // For DCR: all bits 0 - bsw.0 - ;; - mov r21 =r13 - adds r14=-VMM_PT_REGS_SIZE, r12 - ;; - movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 - movl r10=0x8000000000000000 - adds r16=PT(CR_IIP), r14 - adds r20=PT(R1), r14 - ;; - rsm psr.ic | psr.i - ;; - srlz.i - ;; - mov ar.rsc = 0 - ;; - flushrs - ;; - mov ar.bspstore = 0 - // clear BSPSTORE - ;; - mov cr.ipsr=r6 - mov cr.ifs=r10 - ld8 r4 = [r16] // Set init iip for first run. - ld8 r1 = [r20] - ;; - mov cr.iip=r4 - adds r16=VMM_VPD_BASE_OFFSET,r13 - ;; - ld8 r18=[r16] - ;; - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] - mov r17=r0 - mov r22=r0 - mov r23=r0 - br.cond.sptk ia64_vmm_entry - br.ret.sptk b0 -END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h deleted file mode 100644 index b214b5b0432d..000000000000 --- a/arch/ia64/kvm/vti.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * vti.h: prototype for generial vt related interface - * Copyright (c) 2004, Intel Corporation. - * - * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) - * Fred Yang (fred.yang@intel.com) - * Kun Tian (Kevin Tian) (kevin.tian@intel.com) - * - * Copyright (c) 2007, Intel Corporation. - * Zhang xiantao <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ -#ifndef _KVM_VT_I_H -#define _KVM_VT_I_H - -#ifndef __ASSEMBLY__ -#include <asm/page.h> - -#include <linux/kvm_host.h> - -/* define itr.i and itr.d in ia64_itr function */ -#define ITR 0x01 -#define DTR 0x02 -#define IaDTR 0x03 - -#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ -#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ - -#define RR6 (6UL<<61) -#define RR7 (7UL<<61) - - -/* config_options in pal_vp_init_env */ -#define VP_INITIALIZE 1UL -#define VP_FR_PMC 1UL<<1 -#define VP_OPCODE 1UL<<8 -#define VP_CAUSE 1UL<<9 -#define VP_FW_ACC 1UL<<63 - -/* init vp env with initializing vm_buffer */ -#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ - VP_OPCODE | VP_CAUSE | VP_FW_ACC) -/* init vp env without initializing vm_buffer */ -#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC - -#define PAL_VP_CREATE 265 -/* Stacked Virt. Initializes a new VPD for the operation of - * a new virtual processor in the virtual environment. - */ -#define PAL_VP_ENV_INFO 266 -/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ -#define PAL_VP_EXIT_ENV 267 -/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ -#define PAL_VP_INIT_ENV 268 -/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ -#define PAL_VP_REGISTER 269 -/*Stacked Virt. Register a different host IVT for the virtual processor.*/ -#define PAL_VP_RESUME 270 -/* Renamed from PAL_VP_RESUME */ -#define PAL_VP_RESTORE 270 -/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ -#define PAL_VP_SUSPEND 271 -/* Renamed from PAL_VP_SUSPEND */ -#define PAL_VP_SAVE 271 -/* Stacked Virt. Suspends operation for the specified virtual processor on - * the logical processor. - */ -#define PAL_VP_TERMINATE 272 -/* Stacked Virt. Terminates operation for the specified virtual processor.*/ - -union vac { - unsigned long value; - struct { - unsigned int a_int:1; - unsigned int a_from_int_cr:1; - unsigned int a_to_int_cr:1; - unsigned int a_from_psr:1; - unsigned int a_from_cpuid:1; - unsigned int a_cover:1; - unsigned int a_bsw:1; - long reserved:57; - }; -}; - -union vdc { - unsigned long value; - struct { - unsigned int d_vmsw:1; - unsigned int d_extint:1; - unsigned int d_ibr_dbr:1; - unsigned int d_pmc:1; - unsigned int d_to_pmd:1; - unsigned int d_itm:1; - long reserved:58; - }; -}; - -struct vpd { - union vac vac; - union vdc vdc; - unsigned long virt_env_vaddr; - unsigned long reserved1[29]; - unsigned long vhpi; - unsigned long reserved2[95]; - unsigned long vgr[16]; - unsigned long vbgr[16]; - unsigned long vnat; - unsigned long vbnat; - unsigned long vcpuid[5]; - unsigned long reserved3[11]; - unsigned long vpsr; - unsigned long vpr; - unsigned long reserved4[76]; - union { - unsigned long vcr[128]; - struct { - unsigned long dcr; - unsigned long itm; - unsigned long iva; - unsigned long rsv1[5]; - unsigned long pta; - unsigned long rsv2[7]; - unsigned long ipsr; - unsigned long isr; - unsigned long rsv3; - unsigned long iip; - unsigned long ifa; - unsigned long itir; - unsigned long iipa; - unsigned long ifs; - unsigned long iim; - unsigned long iha; - unsigned long rsv4[38]; - unsigned long lid; - unsigned long ivr; - unsigned long tpr; - unsigned long eoi; - unsigned long irr[4]; - unsigned long itv; - unsigned long pmv; - unsigned long cmcv; - unsigned long rsv5[5]; - unsigned long lrr0; - unsigned long lrr1; - unsigned long rsv6[46]; - }; - }; - unsigned long reserved5[128]; - unsigned long reserved6[3456]; - unsigned long vmm_avail[128]; - unsigned long reserved7[4096]; -}; - -#define PAL_PROC_VM_BIT (1UL << 40) -#define PAL_PROC_VMSW_BIT (1UL << 54) - -static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, - u64 *vp_env_info) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); - *buffer_size = iprv.v0; - *vp_env_info = iprv.v1; - return iprv.status; -} - -static inline s64 ia64_pal_vp_exit_env(u64 iva) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); - return iprv.status; -} - -static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, - u64 vbase_addr, u64 *vsa_base) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, - vbase_addr); - *vsa_base = iprv.v0; - - return iprv.status; -} - -static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); - - return iprv.status; -} - -static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) -{ - struct ia64_pal_retval iprv; - - PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); - - return iprv.status; -} - -#endif - -/*VPD field offset*/ -#define VPD_VAC_START_OFFSET 0 -#define VPD_VDC_START_OFFSET 8 -#define VPD_VHPI_START_OFFSET 256 -#define VPD_VGR_START_OFFSET 1024 -#define VPD_VBGR_START_OFFSET 1152 -#define VPD_VNAT_START_OFFSET 1280 -#define VPD_VBNAT_START_OFFSET 1288 -#define VPD_VCPUID_START_OFFSET 1296 -#define VPD_VPSR_START_OFFSET 1424 -#define VPD_VPR_START_OFFSET 1432 -#define VPD_VRSE_CFLE_START_OFFSET 1440 -#define VPD_VCR_START_OFFSET 2048 -#define VPD_VTPR_START_OFFSET 2576 -#define VPD_VRR_START_OFFSET 3072 -#define VPD_VMM_VAIL_START_OFFSET 31744 - -/*Virtualization faults*/ - -#define EVENT_MOV_TO_AR 1 -#define EVENT_MOV_TO_AR_IMM 2 -#define EVENT_MOV_FROM_AR 3 -#define EVENT_MOV_TO_CR 4 -#define EVENT_MOV_FROM_CR 5 -#define EVENT_MOV_TO_PSR 6 -#define EVENT_MOV_FROM_PSR 7 -#define EVENT_ITC_D 8 -#define EVENT_ITC_I 9 -#define EVENT_MOV_TO_RR 10 -#define EVENT_MOV_TO_DBR 11 -#define EVENT_MOV_TO_IBR 12 -#define EVENT_MOV_TO_PKR 13 -#define EVENT_MOV_TO_PMC 14 -#define EVENT_MOV_TO_PMD 15 -#define EVENT_ITR_D 16 -#define EVENT_ITR_I 17 -#define EVENT_MOV_FROM_RR 18 -#define EVENT_MOV_FROM_DBR 19 -#define EVENT_MOV_FROM_IBR 20 -#define EVENT_MOV_FROM_PKR 21 -#define EVENT_MOV_FROM_PMC 22 -#define EVENT_MOV_FROM_CPUID 23 -#define EVENT_SSM 24 -#define EVENT_RSM 25 -#define EVENT_PTC_L 26 -#define EVENT_PTC_G 27 -#define EVENT_PTC_GA 28 -#define EVENT_PTR_D 29 -#define EVENT_PTR_I 30 -#define EVENT_THASH 31 -#define EVENT_TTAG 32 -#define EVENT_TPA 33 -#define EVENT_TAK 34 -#define EVENT_PTC_E 35 -#define EVENT_COVER 36 -#define EVENT_RFI 37 -#define EVENT_BSW_0 38 -#define EVENT_BSW_1 39 -#define EVENT_VMSW 40 - -/**PAL virtual services offsets */ -#define PAL_VPS_RESUME_NORMAL 0x0000 -#define PAL_VPS_RESUME_HANDLER 0x0400 -#define PAL_VPS_SYNC_READ 0x0800 -#define PAL_VPS_SYNC_WRITE 0x0c00 -#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 -#define PAL_VPS_THASH 0x1400 -#define PAL_VPS_TTAG 0x1800 -#define PAL_VPS_RESTORE 0x1c00 -#define PAL_VPS_SAVE 0x2000 - -#endif/* _VT_I_H*/ diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c deleted file mode 100644 index a7869f8f49a6..000000000000 --- a/arch/ia64/kvm/vtlb.c +++ /dev/null @@ -1,640 +0,0 @@ -/* - * vtlb.c: guest virtual tlb handling module. - * Copyright (c) 2004, Intel Corporation. - * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> - * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> - * - * Copyright (c) 2007, Intel Corporation. - * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> - * Xiantao Zhang <xiantao.zhang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include "vcpu.h" - -#include <linux/rwsem.h> - -#include <asm/tlb.h> - -/* - * Check to see if the address rid:va is translated by the TLB - */ - -static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) -{ - return ((trp->p) && (trp->rid == rid) - && ((va-trp->vadr) < PSIZE(trp->ps))); -} - -/* - * Only for GUEST TR format. - */ -static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) -{ - u64 sa1, ea1; - - if (!trp->p || trp->rid != rid) - return 0; - - sa1 = trp->vadr; - ea1 = sa1 + PSIZE(trp->ps) - 1; - eva -= 1; - if ((sva > ea1) || (sa1 > eva)) - return 0; - else - return 1; - -} - -void machine_tlb_purge(u64 va, u64 ps) -{ - ia64_ptcl(va, ps << 2); -} - -void local_flush_tlb_all(void) -{ - int i, j; - unsigned long flags, count0, count1; - unsigned long stride0, stride1, addr; - - addr = current_vcpu->arch.ptce_base; - count0 = current_vcpu->arch.ptce_count[0]; - count1 = current_vcpu->arch.ptce_count[1]; - stride0 = current_vcpu->arch.ptce_stride[0]; - stride1 = current_vcpu->arch.ptce_stride[1]; - - local_irq_save(flags); - for (i = 0; i < count0; ++i) { - for (j = 0; j < count1; ++j) { - ia64_ptce(addr); - addr += stride1; - } - addr += stride0; - } - local_irq_restore(flags); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) -{ - union ia64_rr vrr; - union ia64_pta vpta; - struct ia64_psr vpsr; - - vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); - vrr.val = vcpu_get_rr(vcpu, vadr); - vpta.val = vcpu_get_pta(vcpu); - - if (vrr.ve & vpta.ve) { - switch (ref) { - case DATA_REF: - case NA_REF: - return vpsr.dt; - case INST_REF: - return vpsr.dt && vpsr.it && vpsr.ic; - case RSE_REF: - return vpsr.dt && vpsr.rt; - - } - } - return 0; -} - -struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) -{ - u64 index, pfn, rid, pfn_bits; - - pfn_bits = vpta.size - 5 - 8; - pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); - rid = _REGION_ID(vrr); - index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); - *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); - - return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + - (index << 5)); -} - -struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) -{ - - struct thash_data *trp; - int i; - u64 rid; - - rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK; - if (type == D_TLB) { - if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; - i < NDTRS; i++, trp++) { - if (__is_tr_translated(trp, rid, va)) - return trp; - } - } - } else { - if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; - i < NITRS; i++, trp++) { - if (__is_tr_translated(trp, rid, va)) - return trp; - } - } - } - - return NULL; -} - -static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) -{ - union ia64_rr rr; - struct thash_data *head; - unsigned long ps, gpaddr; - - ps = itir_ps(itir); - rr.val = ia64_get_rr(ifa); - - gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | - (ifa & ((1UL << ps) - 1)); - - head = (struct thash_data *)ia64_thash(ifa); - head->etag = INVALID_TI_TAG; - ia64_mf(); - head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; - head->itir = rr.ps << 2; - head->etag = ia64_ttag(ifa); - head->gpaddr = gpaddr; -} - -void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) -{ - u64 i, dirty_pages = 1; - u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; - vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); - void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; - - dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; - - vmm_spin_lock(lock); - for (i = 0; i < dirty_pages; i++) { - /* avoid RMW */ - if (!test_bit(base_gfn + i, dirty_bitmap)) - set_bit(base_gfn + i , dirty_bitmap); - } - vmm_spin_unlock(lock); -} - -void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) -{ - u64 phy_pte, psr; - union ia64_rr mrr; - - mrr.val = ia64_get_rr(va); - phy_pte = translate_phy_pte(&pte, itir, va); - - if (itir_ps(itir) >= mrr.ps) { - vhpt_insert(phy_pte, itir, va, pte); - } else { - phy_pte &= ~PAGE_FLAGS_RV_MASK; - psr = ia64_clear_ic(); - ia64_itc(type, va, phy_pte, itir_ps(itir)); - paravirt_dv_serialize_data(); - ia64_set_psr(psr); - } - - if (!(pte&VTLB_PTE_IO)) - mark_pages_dirty(v, pte, itir_ps(itir)); -} - -/* - * vhpt lookup - */ -struct thash_data *vhpt_lookup(u64 va) -{ - struct thash_data *head; - u64 tag; - - head = (struct thash_data *)ia64_thash(va); - tag = ia64_ttag(va); - if (head->etag == tag) - return head; - return NULL; -} - -u64 guest_vhpt_lookup(u64 iha, u64 *pte) -{ - u64 ret; - struct thash_data *data; - - data = __vtr_lookup(current_vcpu, iha, D_TLB); - if (data != NULL) - thash_vhpt_insert(current_vcpu, data->page_flags, - data->itir, iha, D_TLB); - - asm volatile ("rsm psr.ic|psr.i;;" - "srlz.d;;" - "ld8.s r9=[%1];;" - "tnat.nz p6,p7=r9;;" - "(p6) mov %0=1;" - "(p6) mov r9=r0;" - "(p7) extr.u r9=r9,0,53;;" - "(p7) mov %0=r0;" - "(p7) st8 [%2]=r9;;" - "ssm psr.ic;;" - "srlz.d;;" - "ssm psr.i;;" - "srlz.d;;" - : "=&r"(ret) : "r"(iha), "r"(pte) : "memory"); - - return ret; -} - -/* - * purge software guest tlb - */ - -static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) -{ - struct thash_data *cur; - u64 start, curadr, size, psbits, tag, rr_ps, num; - union ia64_rr vrr; - struct thash_cb *hcb = &v->arch.vtlb; - - vrr.val = vcpu_get_rr(v, va); - psbits = VMX(v, psbits[(va >> 61)]); - start = va & ~((1UL << ps) - 1); - while (psbits) { - curadr = start; - rr_ps = __ffs(psbits); - psbits &= ~(1UL << rr_ps); - num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); - size = PSIZE(rr_ps); - vrr.ps = rr_ps; - while (num) { - cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); - if (cur->etag == tag && cur->ps == rr_ps) - cur->etag = INVALID_TI_TAG; - curadr += size; - num--; - } - } -} - - -/* - * purge VHPT and machine TLB - */ -static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) -{ - struct thash_data *cur; - u64 start, size, tag, num; - union ia64_rr rr; - - start = va & ~((1UL << ps) - 1); - rr.val = ia64_get_rr(va); - size = PSIZE(rr.ps); - num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); - while (num) { - cur = (struct thash_data *)ia64_thash(start); - tag = ia64_ttag(start); - if (cur->etag == tag) - cur->etag = INVALID_TI_TAG; - start += size; - num--; - } - machine_tlb_purge(va, ps); -} - -/* - * Insert an entry into hash TLB or VHPT. - * NOTES: - * 1: When inserting VHPT to thash, "va" is a must covered - * address by the inserted machine VHPT entry. - * 2: The format of entry is always in TLB. - * 3: The caller need to make sure the new entry will not overlap - * with any existed entry. - */ -void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) -{ - struct thash_data *head; - union ia64_rr vrr; - u64 tag; - struct thash_cb *hcb = &v->arch.vtlb; - - vrr.val = vcpu_get_rr(v, va); - vrr.ps = itir_ps(itir); - VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); - head = vsa_thash(hcb->pta, va, vrr.val, &tag); - head->page_flags = pte; - head->itir = itir; - head->etag = tag; -} - -int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) -{ - struct thash_data *trp; - int i; - u64 end, rid; - - rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK; - end = va + PSIZE(ps); - if (type == D_TLB) { - if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; - i < NDTRS; i++, trp++) { - if (__is_tr_overlap(trp, rid, va, end)) - return i; - } - } - } else { - if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { - for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; - i < NITRS; i++, trp++) { - if (__is_tr_overlap(trp, rid, va, end)) - return i; - } - } - } - return -1; -} - -/* - * Purge entries in VTLB and VHPT - */ -void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) -{ - if (vcpu_quick_region_check(v->arch.tc_regions, va)) - vtlb_purge(v, va, ps); - vhpt_purge(v, va, ps); -} - -void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) -{ - u64 old_va = va; - va = REGION_OFFSET(va); - if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) - vtlb_purge(v, va, ps); - vhpt_purge(v, va, ps); -} - -u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) -{ - u64 ps, ps_mask, paddr, maddr, io_mask; - union pte_flags phy_pte; - - ps = itir_ps(itir); - ps_mask = ~((1UL << ps) - 1); - phy_pte.val = *pte; - paddr = *pte; - paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); - maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT); - io_mask = maddr & GPFN_IO_MASK; - if (io_mask && (io_mask != GPFN_PHYS_MMIO)) { - *pte |= VTLB_PTE_IO; - return -1; - } - maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | - (paddr & ~PAGE_MASK); - phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; - return phy_pte.val; -} - -/* - * Purge overlap TCs and then insert the new entry to emulate itc ops. - * Notes: Only TC entry can purge and insert. - */ -void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, - u64 ifa, int type) -{ - u64 ps; - u64 phy_pte, io_mask, index; - union ia64_rr vrr, mrr; - - ps = itir_ps(itir); - vrr.val = vcpu_get_rr(v, ifa); - mrr.val = ia64_get_rr(ifa); - - index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; - io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK; - phy_pte = translate_phy_pte(&pte, itir, ifa); - - /* Ensure WB attribute if pte is related to a normal mem page, - * which is required by vga acceleration since qemu maps shared - * vram buffer with WB. - */ - if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) && - io_mask != GPFN_PHYS_MMIO) { - pte &= ~_PAGE_MA_MASK; - phy_pte &= ~_PAGE_MA_MASK; - } - - vtlb_purge(v, ifa, ps); - vhpt_purge(v, ifa, ps); - - if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) { - vtlb_insert(v, pte, itir, ifa); - vcpu_quick_region_set(VMX(v, tc_regions), ifa); - } - if (pte & VTLB_PTE_IO) - return; - - if (ps >= mrr.ps) - vhpt_insert(phy_pte, itir, ifa, pte); - else { - u64 psr; - phy_pte &= ~PAGE_FLAGS_RV_MASK; - psr = ia64_clear_ic(); - ia64_itc(type, ifa, phy_pte, ps); - paravirt_dv_serialize_data(); - ia64_set_psr(psr); - } - if (!(pte&VTLB_PTE_IO)) - mark_pages_dirty(v, pte, ps); - -} - -/* - * Purge all TCs or VHPT entries including those in Hash table. - * - */ - -void thash_purge_all(struct kvm_vcpu *v) -{ - int i; - struct thash_data *head; - struct thash_cb *vtlb, *vhpt; - vtlb = &v->arch.vtlb; - vhpt = &v->arch.vhpt; - - for (i = 0; i < 8; i++) - VMX(v, psbits[i]) = 0; - - head = vtlb->hash; - for (i = 0; i < vtlb->num; i++) { - head->page_flags = 0; - head->etag = INVALID_TI_TAG; - head->itir = 0; - head->next = 0; - head++; - }; - - head = vhpt->hash; - for (i = 0; i < vhpt->num; i++) { - head->page_flags = 0; - head->etag = INVALID_TI_TAG; - head->itir = 0; - head->next = 0; - head++; - }; - - local_flush_tlb_all(); -} - -/* - * Lookup the hash table and its collision chain to find an entry - * covering this address rid:va or the entry. - * - * INPUT: - * in: TLB format for both VHPT & TLB. - */ -struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) -{ - struct thash_data *cch; - u64 psbits, ps, tag; - union ia64_rr vrr; - - struct thash_cb *hcb = &v->arch.vtlb; - - cch = __vtr_lookup(v, va, is_data); - if (cch) - return cch; - - if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) - return NULL; - - psbits = VMX(v, psbits[(va >> 61)]); - vrr.val = vcpu_get_rr(v, va); - while (psbits) { - ps = __ffs(psbits); - psbits &= ~(1UL << ps); - vrr.ps = ps; - cch = vsa_thash(hcb->pta, va, vrr.val, &tag); - if (cch->etag == tag && cch->ps == ps) - return cch; - } - - return NULL; -} - -/* - * Initialize internal control data before service. - */ -void thash_init(struct thash_cb *hcb, u64 sz) -{ - int i; - struct thash_data *head; - - hcb->pta.val = (unsigned long)hcb->hash; - hcb->pta.vf = 1; - hcb->pta.ve = 1; - hcb->pta.size = sz; - head = hcb->hash; - for (i = 0; i < hcb->num; i++) { - head->page_flags = 0; - head->itir = 0; - head->etag = INVALID_TI_TAG; - head->next = 0; - head++; - } -} - -u64 kvm_get_mpt_entry(u64 gpfn) -{ - u64 *base = (u64 *) KVM_P2M_BASE; - - if (gpfn >= (KVM_P2M_SIZE >> 3)) - panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn); - - return *(base + gpfn); -} - -u64 kvm_lookup_mpa(u64 gpfn) -{ - u64 maddr; - maddr = kvm_get_mpt_entry(gpfn); - return maddr&_PAGE_PPN_MASK; -} - -u64 kvm_gpa_to_mpa(u64 gpa) -{ - u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); - return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); -} - -/* - * Fetch guest bundle code. - * INPUT: - * gip: guest ip - * pbundle: used to return fetched bundle. - */ -int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) -{ - u64 gpip = 0; /* guest physical IP*/ - u64 *vpa; - struct thash_data *tlb; - u64 maddr; - - if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { - /* I-side physical mode */ - gpip = gip; - } else { - tlb = vtlb_lookup(vcpu, gip, I_TLB); - if (tlb) - gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | - (gip & (PSIZE(tlb->ps) - 1)); - } - if (gpip) { - maddr = kvm_gpa_to_mpa(gpip); - } else { - tlb = vhpt_lookup(gip); - if (tlb == NULL) { - ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); - return IA64_FAULT; - } - maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) - | (gip & (PSIZE(tlb->ps) - 1)); - } - vpa = (u64 *)__kvm_va(maddr); - - pbundle->i64[0] = *vpa++; - pbundle->i64[1] = *vpa; - - return IA64_NO_FAULT; -} - -void kvm_init_vhpt(struct kvm_vcpu *v) -{ - v->arch.vhpt.num = VHPT_NUM_ENTRIES; - thash_init(&v->arch.vhpt, VHPT_SHIFT); - ia64_set_pta(v->arch.vhpt.pta.val); - /*Enable VHPT here?*/ -} - -void kvm_init_vtlb(struct kvm_vcpu *v) -{ - v->arch.vtlb.num = VTLB_NUM_ENTRIES; - thash_init(&v->arch.vtlb, VTLB_SHIFT); -} diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 95cef0b5f836..df19d0c47be8 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -565,6 +565,7 @@ void consistent_free(size_t size, void *vaddr); void consistent_sync(void *vaddr, size_t size, int direction); void consistent_sync_page(struct page *page, unsigned long offset, size_t size, int direction); +unsigned long consistent_virt_to_pfn(void *vaddr); void setup_memory(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 4633c36c1b32..ed7ba8a11822 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -154,9 +154,36 @@ dma_direct_sync_sg_for_device(struct device *dev, __dma_sync(sg->dma_address, sg->length, direction); } +int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ +#ifdef CONFIG_MMU + unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + unsigned long pfn; + + if (off >= count || user_count > (count - off)) + return -ENXIO; + +#ifdef NOT_COHERENT_CACHE + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = consistent_virt_to_pfn(cpu_addr); +#else + pfn = virt_to_pfn(cpu_addr); +#endif + return remap_pfn_range(vma, vma->vm_start, pfn + off, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +#else + return -ENXIO; +#endif +} + struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, + .mmap = dma_direct_mmap_coherent, .map_sg = dma_direct_map_sg, .dma_supported = dma_direct_dma_supported, .map_page = dma_direct_map_page, diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index e10ad930895e..b06c3a7faf20 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -156,6 +156,25 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle) } EXPORT_SYMBOL(consistent_alloc); +#ifdef CONFIG_MMU +static pte_t *consistent_virt_to_pte(void *vaddr) +{ + unsigned long addr = (unsigned long)vaddr; + + return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr); +} + +unsigned long consistent_virt_to_pfn(void *vaddr) +{ + pte_t *ptep = consistent_virt_to_pte(vaddr); + + if (pte_none(*ptep) || !pte_present(*ptep)) + return 0; + + return pte_pfn(*ptep); +} +#endif + /* * free page(s) as defined by the above mapping. */ @@ -181,13 +200,9 @@ void consistent_free(size_t size, void *vaddr) } while (size -= PAGE_SIZE); #else do { - pte_t *ptep; + pte_t *ptep = consistent_virt_to_pte(vaddr); unsigned long pfn; - ptep = pte_offset_kernel(pmd_offset(pgd_offset_k( - (unsigned int)vaddr), - (unsigned int)vaddr), - (unsigned int)vaddr); if (!pte_none(*ptep) && pte_present(*ptep)) { pfn = pte_pfn(*ptep); pte_clear(&init_mm, (unsigned int)vaddr, ptep); diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index 203e4403c366..48a9dfc55b51 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -374,7 +374,7 @@ static long alchemy_calc_div(unsigned long rate, unsigned long prate, static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate, unsigned long *best_parent_rate, - struct clk **best_parent_clk, + struct clk_hw **best_parent_clk, int scale, int maxdiv) { struct clk *pc, *bpc, *free; @@ -453,7 +453,7 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate, } *best_parent_rate = bpr; - *best_parent_clk = bpc; + *best_parent_clk = __clk_get_hw(bpc); return br; } @@ -547,7 +547,7 @@ static unsigned long alchemy_clk_fgv1_recalc(struct clk_hw *hw, static long alchemy_clk_fgv1_detr(struct clk_hw *hw, unsigned long rate, unsigned long *best_parent_rate, - struct clk **best_parent_clk) + struct clk_hw **best_parent_clk) { return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate, best_parent_clk, 2, 512); @@ -679,7 +679,7 @@ static unsigned long alchemy_clk_fgv2_recalc(struct clk_hw *hw, static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate, unsigned long *best_parent_rate, - struct clk **best_parent_clk) + struct clk_hw **best_parent_clk) { struct alchemy_fgcs_clk *c = to_fgcs_clk(hw); int scale, maxdiv; @@ -898,7 +898,7 @@ static int alchemy_clk_csrc_setr(struct clk_hw *hw, unsigned long rate, static long alchemy_clk_csrc_detr(struct clk_hw *hw, unsigned long rate, unsigned long *best_parent_rate, - struct clk **best_parent_clk) + struct clk_hw **best_parent_clk) { struct alchemy_fgcs_clk *c = to_fgcs_clk(hw); int scale = c->dt[2] == 3 ? 1 : 2; /* au1300 check */ diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index 46e8f7676a15..3bdb72a70364 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -36,7 +36,7 @@ CONFIG_PCI=y CONFIG_PCI_REALLOC_ENABLE_AUTO=y CONFIG_PCCARD=y CONFIG_PCMCIA_ALCHEMY_DEVBOARD=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=y diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 227a9de32246..e51aad9a94b1 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -37,7 +37,6 @@ CONFIG_MIPS32_N32=y CONFIG_PM=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="/dev/hda3" -CONFIG_PM_RUNTIME=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEBUG=y CONFIG_CPU_FREQ_STAT=m diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 1c6191ebd583..7eabcd2031ea 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -58,7 +58,7 @@ CONFIG_BINFMT_MISC=m CONFIG_MIPS32_COMPAT=y CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index 70509a48df82..b3d1d37f85ea 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -61,7 +61,7 @@ CONFIG_BINFMT_MISC=y CONFIG_MIPS32_COMPAT=y CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index 82207e8079f3..3d8016d6cf3e 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -41,7 +41,7 @@ CONFIG_PCI=y CONFIG_PCI_MSI=y CONFIG_PCI_DEBUG=y CONFIG_BINFMT_MISC=m -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 7cba480568c8..70795a67a276 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -30,7 +30,7 @@ retry: return pte; #else - return ACCESS_ONCE(*ptep); + return READ_ONCE(*ptep); #endif } diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile index e142c9ee51fa..2328f82ba2a8 100644 --- a/arch/nios2/Makefile +++ b/arch/nios2/Makefile @@ -14,6 +14,8 @@ # Nios2 port by Wind River Systems Inc trough: # fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com +KBUILD_DEFCONFIG := 3c120_defconfig + UTS_SYSNAME = Linux export MMU diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h index 9102bfd3fa1c..6e24d7cceb0c 100644 --- a/arch/nios2/include/asm/io.h +++ b/arch/nios2/include/asm/io.h @@ -45,6 +45,8 @@ static inline void iounmap(void __iomem *addr) __iounmap(addr); } +#define ioremap_wc ioremap_nocache + /* Pages to physical address... */ #define page_to_phys(page) virt_to_phys(page_to_virt(page)) #define page_to_bus(page) page_to_virt(page) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index acedc0a2860e..caa51ff85a3c 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -168,7 +168,7 @@ do { \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ unsigned long __gu_val; \ __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ - (x) = (__typeof__(x))__gu_val; \ + (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ }) @@ -180,7 +180,7 @@ do { \ if (access_ok(VERIFY_READ, __gu_ptr, sizeof(*__gu_ptr))) \ __get_user_common(__gu_val, sizeof(*__gu_ptr), \ __gu_ptr, __gu_err); \ - (x) = (__typeof__(x))__gu_val; \ + (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ }) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index d2d11b7055ba..8121aa6db2ff 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -33,11 +33,18 @@ #endif /*!CONFIG_PA20*/ -/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. */ +/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. + We don't explicitly expose that "*a" may be written as reload + fails to find a register in class R1_REGS when "a" needs to be + reloaded when generating 64-bit PIC code. Instead, we clobber + memory to indicate to the compiler that the assembly code reads + or writes to items other than those listed in the input and output + operands. This may pessimize the code somewhat but __ldcw is + usually used within code blocks surrounded by memory barriors. */ #define __ldcw(a) ({ \ unsigned __ret; \ - __asm__ __volatile__(__LDCW " 0(%2),%0" \ - : "=r" (__ret), "+m" (*(a)) : "r" (a)); \ + __asm__ __volatile__(__LDCW " 0(%1),%0" \ + : "=r" (__ret) : "r" (a) : "memory"); \ __ret; \ }) diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2e637c881d2b..879de5efb073 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -36,7 +36,7 @@ CONFIG_KEXEC=y CONFIG_SCHED_SMT=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_PM_DEBUG=y # CONFIG_SECCOMP is not set # CONFIG_PCI is not set diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h new file mode 100644 index 000000000000..d2f99ca1e3a6 --- /dev/null +++ b/arch/powerpc/include/asm/cpuidle.h @@ -0,0 +1,20 @@ +#ifndef _ASM_POWERPC_CPUIDLE_H +#define _ASM_POWERPC_CPUIDLE_H + +#ifdef CONFIG_PPC_POWERNV +/* Used in powernv idle state management */ +#define PNV_THREAD_RUNNING 0 +#define PNV_THREAD_NAP 1 +#define PNV_THREAD_SLEEP 2 +#define PNV_THREAD_WINKLE 3 +#define PNV_CORE_IDLE_LOCK_BIT 0x100 +#define PNV_CORE_IDLE_THREAD_BITS 0x0FF + +#ifndef __ASSEMBLY__ +extern u32 pnv_fastsleep_workaround_at_entry[]; +extern u32 pnv_fastsleep_workaround_at_exit[]; +#endif + +#endif + +#endif diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6acf0c2a0f99..942c7b1678e3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, unsigned long gpa, bool dirty); -extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *idx_ret); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 0aa817933e6a..2d81e202bdcc 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ -extern unsigned long kvm_rma_pages; #endif #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ @@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ - rb |= v >> (62 - 8); /* B field */ + rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 047855619cc4..7efd666a3fa7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table { struct page *pages[0]; }; -struct kvm_rma_info { - atomic_t use_count; - unsigned long base_pfn; -}; - /* XICS components, defined in book3s_xics.c */ struct kvmppc_xics; struct kvmppc_icp; @@ -214,16 +209,9 @@ struct revmap_entry { #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful -/* Low-order bits in memslot->arch.slot_phys[] */ -#define KVMPPC_PAGE_ORDER_MASK 0x1f -#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ -#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ -#define KVMPPC_GOT_PAGE 0x80 - struct kvm_arch_memory_slot { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned long *rmap; - unsigned long *slot_phys; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ }; @@ -242,14 +230,12 @@ struct kvm_arch { struct kvm_rma_info *rma; unsigned long vrma_slb_v; int rma_setup_done; - int using_mmu_notifiers; u32 hpt_order; atomic_t vcpus_running; u32 online_vcores; unsigned long hpt_npte; unsigned long hpt_mask; atomic_t hpte_mod_interest; - spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ @@ -297,6 +283,7 @@ struct kvmppc_vcore { struct list_head runnable_threads; spinlock_t lock; wait_queue_head_t wq; + spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; struct kvm_vcpu *runner; @@ -308,6 +295,7 @@ struct kvmppc_vcore { ulong dpdes; /* doorbell state (POWER8) */ void *mpp_buffer; /* Micro Partition Prefetch buffer */ bool mpp_buffer_is_valid; + ulong conferring_threads; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -664,6 +652,8 @@ struct kvm_vcpu_arch { spinlock_t tbacct_lock; u64 busy_stolen; u64 busy_preempt; + + u32 emul_inst; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a6dcdb6d13c1..46bf652c9169 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba); -extern struct kvm_rma_info *kvm_alloc_rma(void); -extern void kvm_release_rma(struct kvm_rma_info *ri); extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5cd8d2fddba9..eb95b675109b 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -56,6 +56,14 @@ struct opal_sg_list { #define OPAL_HARDWARE_FROZEN -13 #define OPAL_WRONG_STATE -14 #define OPAL_ASYNC_COMPLETION -15 +#define OPAL_I2C_TIMEOUT -17 +#define OPAL_I2C_INVALID_CMD -18 +#define OPAL_I2C_LBUS_PARITY -19 +#define OPAL_I2C_BKEND_OVERRUN -20 +#define OPAL_I2C_BKEND_ACCESS -21 +#define OPAL_I2C_ARBT_LOST -22 +#define OPAL_I2C_NACK_RCVD -23 +#define OPAL_I2C_STOP_ERR -24 /* API Tokens (in r0) */ #define OPAL_INVALID_CALL -1 @@ -152,12 +160,25 @@ struct opal_sg_list { #define OPAL_PCI_ERR_INJECT 96 #define OPAL_PCI_EEH_FREEZE_SET 97 #define OPAL_HANDLE_HMI 98 +#define OPAL_CONFIG_CPU_IDLE_STATE 99 +#define OPAL_SLW_SET_REG 100 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 #define OPAL_WRITE_TPO 103 #define OPAL_READ_TPO 104 #define OPAL_IPMI_SEND 107 #define OPAL_IPMI_RECV 108 +#define OPAL_I2C_REQUEST 109 + +/* Device tree flags */ + +/* Flags set in power-mgmt nodes in device tree if + * respective idle states are supported in the platform. + */ +#define OPAL_PM_NAP_ENABLED 0x00010000 +#define OPAL_PM_SLEEP_ENABLED 0x00020000 +#define OPAL_PM_WINKLE_ENABLED 0x00040000 +#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 #ifndef __ASSEMBLY__ @@ -712,6 +733,24 @@ typedef struct oppanel_line { uint64_t line_len; } oppanel_line_t; +/* OPAL I2C request */ +struct opal_i2c_request { + uint8_t type; +#define OPAL_I2C_RAW_READ 0 +#define OPAL_I2C_RAW_WRITE 1 +#define OPAL_I2C_SM_READ 2 +#define OPAL_I2C_SM_WRITE 3 + uint8_t flags; +#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */ + uint8_t subaddr_sz; /* Max 4 */ + uint8_t reserved; + __be16 addr; /* 7 or 10 bit address */ + __be16 reserved2; + __be32 subaddr; /* Sub-address if any */ + __be32 size; /* Data size */ + __be64 buffer_ra; /* Buffer real address */ +}; + /* /sys/firmware/opal */ extern struct kobject *opal_kobj; @@ -876,11 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); +int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t msg_len); int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t *msg_len); +int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, + struct opal_i2c_request *oreq); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 24a386cbb928..e5f22c6c4bf9 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -152,6 +152,16 @@ struct paca_struct { u64 tm_scratch; /* TM scratch area for reclaim */ #endif +#ifdef CONFIG_PPC_POWERNV + /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ + u32 *core_idle_state_ptr; + u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ + /* Mask to indicate thread id in core */ + u8 thread_mask; + /* Mask to denote subcore sibling threads */ + u8 subcore_sibling_mask; +#endif + #ifdef CONFIG_PPC_BOOK3S_64 /* Exclusive emergency stack pointer for machine check exception. */ void *mc_emergency_sp; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1a5287759fc8..03cd858a401c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -194,6 +194,7 @@ #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 +#define PPC_INST_WINKLE 0x4c0003e4 /* A2 specific instructions */ #define PPC_INST_ERATWE 0x7c0001a6 @@ -375,6 +376,7 @@ #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) +#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) /* BHRB instructions */ #define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 29c3798cf800..bf117d8fb45f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -452,7 +452,8 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern unsigned long power7_nap(int check_irq); -extern void power7_sleep(void); +extern unsigned long power7_sleep(void); +extern unsigned long power7_winkle(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c998279bd85b..1c874fb533bb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -118,8 +118,10 @@ #define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) #ifdef __BIG_ENDIAN__ #define MSR_ __MSR +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) #else #define MSR_ (__MSR | MSR_LE) +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE) #endif #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) @@ -371,6 +373,7 @@ #define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ #define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ #define SPRN_PPR 0x380 /* SMT Thread status Register */ +#define SPRN_TSCR 0x399 /* Thread Switch Control Register */ #define SPRN_DEC 0x016 /* Decrement Register */ #define SPRN_DER 0x095 /* Debug Enable Regsiter */ @@ -728,6 +731,7 @@ #define SPRN_BESCR 806 /* Branch event status and control register */ #define BESCR_GE 0x8000000000000000ULL /* Global Enable */ #define SPRN_WORT 895 /* Workload optimization register - thread */ +#define SPRN_WORC 863 /* Workload optimization register - core */ #define SPRN_PMC1 787 #define SPRN_PMC2 788 diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 6240698fee9a..ff21b7a2f0cc 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(void) { - return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; + int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; +#ifdef __LITTLE_ENDIAN__ + arch |= __AUDIT_ARCH_LE; +#endif + return arch; } #endif /* _ASM_SYSCALL_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9485b43a7c00..a0c071d24e0e 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -284,7 +284,7 @@ do { \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) #endif /* __powerpc64__ */ @@ -297,7 +297,7 @@ do { \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -308,7 +308,7 @@ do { \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c161ef3f28a1..e624f9646350 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -489,7 +489,6 @@ int main(void) DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); - DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); @@ -499,6 +498,7 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); + DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -726,5 +726,16 @@ int main(void) arch.timing_last_enter.tv32.tbl)); #endif +#ifdef CONFIG_PPC_POWERNV + DEFINE(PACA_CORE_IDLE_STATE_PTR, + offsetof(struct paca_struct, core_idle_state_ptr)); + DEFINE(PACA_THREAD_IDLE_STATE, + offsetof(struct paca_struct, thread_idle_state)); + DEFINE(PACA_THREAD_MASK, + offsetof(struct paca_struct, thread_mask)); + DEFINE(PACA_SUBCORE_SIBLING_MASK, + offsetof(struct paca_struct, subcore_sibling_mask)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index db08382e19f1..c2df8150bd7a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -15,6 +15,7 @@ #include <asm/hw_irq.h> #include <asm/exception-64s.h> #include <asm/ptrace.h> +#include <asm/cpuidle.h> /* * We layout physical memory as follows: @@ -101,23 +102,34 @@ system_reset_pSeries: #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + * waking up from nap/sleep/winkle. */ mfspr r13,SPRN_SRR1 rlwinm. r13,r13,47-31,30,31 beq 9f - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal, we could try to use the - * PIR to locate a PACA, then use an emergency stack etc... - * OPAL v3 based powernv platforms have new idle states - * which fall in this catagory. + cmpwi cr3,r13,2 + + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. */ - bgt cr1,8f GET_PACA(r13) + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,8f /* Either sleep or Winkle */ + + /* Waking up from nap should not cause hypervisor state loss */ + bgt cr3,. + + /* Waking up from nap */ + li r0,PNV_THREAD_RUNNING + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE li r0,KVM_HWTHREAD_IN_KERNEL @@ -133,7 +145,7 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 - beq cr1,2f + beq cr3,2f b power7_wakeup_noloss 2: b power7_wakeup_loss @@ -1382,6 +1394,7 @@ machine_check_handle_early: MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) + li r3,PNV_THREAD_NAP b power7_enter_nap_mode 4: #endif diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 18c0687e5ab3..05adc8bbdef8 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -18,9 +18,25 @@ #include <asm/hw_irq.h> #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> +#include <asm/cpuidle.h> +#include <asm/mmu-hash64.h> #undef DEBUG +/* + * Use unused space in the interrupt stack to save and restore + * registers for winkle support. + */ +#define _SDR1 GPR3 +#define _RPR GPR4 +#define _SPURR GPR5 +#define _PURR GPR6 +#define _TSCR GPR7 +#define _DSCR GPR8 +#define _AMOR GPR9 +#define _WORT GPR10 +#define _WORC GPR11 + /* Idle state entry routines */ #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ @@ -37,8 +53,7 @@ /* * Pass requested state in r3: - * 0 - nap - * 1 - sleep + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE * * To check IRQ_HAPPENED in r4 * 0 - don't check @@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -_GLOBAL(power7_enter_nap_mode) + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + + .globl power7_enter_nap_mode +power7_enter_nap_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP stb r4,HSTATE_HWTHREAD_STATE(r13) #endif - cmpwi cr0,r3,1 - beq 2f + stb r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr3,r3,PNV_THREAD_SLEEP + bge cr3,2f IDLE_STATE_ENTER_SEQ(PPC_NAP) /* No return */ -2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) - /* No return */ +2: + /* Sleep or winkle */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop1: + lwarx r15,0,r14 + andc r15,r15,r7 /* Clear thread bit */ + + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + +/* + * If cr0 = 0, then current thread is the last thread of the core entering + * sleep. Last thread needs to execute the hardware bug workaround code if + * required by the platform. + * Make the workaround call unconditionally here. The below branch call is + * patched out when the idle states are discovered if the platform does not + * require it. + */ +.global pnv_fastsleep_workaround_at_entry +pnv_fastsleep_workaround_at_entry: + beq fastsleep_workaround_at_entry + + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + +common_enter: /* common code for all the threads entering sleep or winkle */ + bgt cr3,enter_winkle + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + +fastsleep_workaround_at_entry: + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + + /* Fast sleep workaround */ + li r3,1 + li r4,1 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + + /* Clear Lock bit */ + li r0,0 + lwsync + stw r0,0(r14) + b common_enter + +enter_winkle: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ @@ -125,48 +227,21 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 - li r3,0 + li r3,PNV_THREAD_NAP b power7_powersave_common /* No return */ _GLOBAL(power7_sleep) - li r3,1 + li r3,PNV_THREAD_SLEEP li r4,1 b power7_powersave_common /* No return */ -/* - * Make opal call in realmode. This is a generic function to be called - * from realmode from reset vector. It handles endianess. - * - * r13 - paca pointer - * r1 - stack pointer - * r3 - opal token - */ -opal_call_realmode: - mflr r12 - std r12,_LINK(r1) - ld r2,PACATOC(r13) - /* Set opal return address */ - LOAD_REG_ADDR(r0,return_from_opal_call) - mtlr r0 - /* Handle endian-ness */ - li r0,MSR_LE - mfmsr r12 - andc r12,r12,r0 - mtspr SPRN_HSRR1,r12 - mr r0,r3 /* Move opal token to r0 */ - LOAD_REG_ADDR(r11,opal) - ld r12,8(r11) - ld r2,0(r11) - mtspr SPRN_HSRR0,r12 - hrfid - -return_from_opal_call: - FIXUP_ENDIAN - ld r0,_LINK(r1) - mtlr r0 - blr +_GLOBAL(power7_winkle) + li r3,3 + li r4,1 + b power7_powersave_common + /* No return */ #define CHECK_HMI_INTERRUPT \ mfspr r0,SPRN_SRR1; \ @@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ bl opal_call_realmode; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; @@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) + /* + * Before entering any idle state, the NVGPRs are saved in the stack + * and they are restored before switching to the process context. Hence + * until they are restored, they are free to be used. + * + * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode + * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the + * wakeup reason if we branch to kvm_start_guest. + */ + mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop2: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + /* + * Lock bit is set in one of the 2 cases- + * a. In the sleep/winkle enter path, the last thread is executing + * fastsleep workaround code. + * b. In the wake up path, another thread is executing fastsleep + * workaround undo code or resyncing timebase or restoring context + * In either case loop until the lock bit is cleared. + */ + bne core_idle_lock_held + + cmpwi cr2,r15,0 + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi cr1,r4,0 /* Check if first in subcore */ + + /* + * At this stage + * cr1 - 0b0100 if first thread to wakeup in subcore + * cr2 - 0b0100 if first thread to wakeup in core + * cr3- 0b0010 if waking up from sleep or winkle + * cr4 - 0b0100 if waking up from winkle + */ + + or r15,r15,r7 /* Set thread bit */ + + beq cr1,first_thread_in_subcore + + /* Not first thread in subcore to wake up */ + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + b common_exit + +core_idle_lock_held: + HMT_LOW +core_idle_lock_loop: + lwz r15,0(14) + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_loop + HMT_MEDIUM + b lwarx_loop2 + +first_thread_in_subcore: + /* First thread in subcore to wakeup */ + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + + /* + * If waking up from sleep, subcore state is not lost. Hence + * skip subcore state restore + */ + bne cr4,subcore_state_restored + + /* Restore per-subcore state */ + ld r4,_SDR1(r1) + mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 + +subcore_state_restored: + /* + * Check if the thread is also the first thread in the core. If not, + * skip to clear_lock. + */ + bne cr2,clear_lock + +first_thread_in_core: + + /* + * First thread in the core waking up from fastsleep. It needs to + * call the fastsleep workaround code if the platform requires it. + * Call it unconditionally here. The below branch instruction will + * be patched out when the idle states are discovered if platform + * does not require workaround. + */ +.global pnv_fastsleep_workaround_at_exit +pnv_fastsleep_workaround_at_exit: + b fastsleep_workaround_at_exit + +timebase_resync: + /* Do timebase resync if we are waking up from sleep. Use cr3 value + * set in exceptions-64s.S */ + ble cr3,clear_lock /* Time base re-sync */ - li r3,OPAL_RESYNC_TIMEBASE + li r0,OPAL_RESYNC_TIMEBASE bl opal_call_realmode; - /* TODO: Check r3 for failure */ + /* + * If waking up from sleep, per core state is not lost, skip to + * clear_lock. + */ + bne cr4,clear_lock + + /* Restore per core state */ + ld r4,_TSCR(r1) + mtspr SPRN_TSCR,r4 + ld r4,_WORC(r1) + mtspr SPRN_WORC,r4 + +clear_lock: + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + lwsync + stw r15,0(r14) + +common_exit: + /* + * Common to all threads. + * + * If waking up from sleep, hypervisor state is not lost. Hence + * skip hypervisor state restore. + */ + bne cr4,hypervisor_state_restored + + /* Waking up from winkle */ + + /* Restore per thread state */ + bl __restore_cpu_power8 + + /* Restore SLB from PACA */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + ld r4,_SPURR(r1) + mtspr SPRN_SPURR,r4 + ld r4,_PURR(r1) + mtspr SPRN_PURR,r4 + ld r4,_DSCR(r1) + mtspr SPRN_DSCR,r4 + ld r4,_WORT(r1) + mtspr SPRN_WORT,r4 + +hypervisor_state_restored: + + li r5,PNV_THREAD_RUNNING + stb r5,PACA_THREAD_IDLE_STATE(r13) + + mtspr SPRN_SRR1,r16 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 6f + b kvm_start_guest +6: +#endif + REST_NVGPRS(r1) REST_GPR(2, r1) ld r3,_CCR(r1) @@ -212,6 +461,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) mtspr SPRN_SRR0,r5 rfid +fastsleep_workaround_at_exit: + li r3,1 + li r4,0 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + b timebase_resync + /* * R3 here contains the value that will be returned to the caller * of power7_nap. diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8b2d2dc8ef10..8ec017cb4446 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -700,7 +700,6 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); preempt_disable(); - cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) smp_ops->setup_cpu(cpu); @@ -739,6 +738,14 @@ void start_secondary(void *unused) notify_cpu_starting(cpu); set_cpu_online(cpu, true); + /* + * CPU must be marked active and online before we signal back to the + * master, because the scheduler needs to see the cpu_online and + * cpu_active bits set. + */ + smp_wmb(); + cpu_callin_map[cpu] = 1; + local_irq_enable(); cpu_startup_entry(CPUHP_ONLINE); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 602eb51d20bc..f5769f19ae25 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -172,6 +172,7 @@ config KVM_XICS depends on KVM_BOOK3S_64 && !KVM_MPIC select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD + default y ---help--- Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b32db4b95361..888bf466d8c6 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ -} - void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) { if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index cd0b0730e29e..a2eb6d354a57 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw) return (sr_raw & 0x20000000) ? true: false; } -static inline bool sr_nx(u32 sr_raw) -{ - return (sr_raw & 0x10000000) ? true: false; -} - static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data, bool iswrite); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d40770248b6a..534acb3c6c3d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,8 +37,7 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ -#define MAX_LPID_970 63 +#include "trace_hv.h" /* Power architecture requires HPT is at least 256kB */ #define PPC_MIN_HPT_ORDER 18 @@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) { - host_lpid = mfspr(SPRN_LPID); /* POWER7 */ - rsvd_lpid = LPID_RSVD; - } else { - host_lpid = 0; /* PPC970 */ - rsvd_lpid = MAX_LPID_970; - } + /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ + host_lpid = mfspr(SPRN_LPID); + rsvd_lpid = LPID_RSVD; kvmppc_init_lpid(rsvd_lpid + 1); @@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, msr); } -/* - * This is called to get a reference to a guest page if there isn't - * one already in the memslot->arch.slot_phys[] array. - */ -static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot, - unsigned long psize) -{ - unsigned long start; - long np, err; - struct page *page, *hpage, *pages[1]; - unsigned long s, pgsize; - unsigned long *physp; - unsigned int is_io, got, pgorder; - struct vm_area_struct *vma; - unsigned long pfn, i, npages; - - physp = memslot->arch.slot_phys; - if (!physp) - return -EINVAL; - if (physp[gfn - memslot->base_gfn]) - return 0; - - is_io = 0; - got = 0; - page = NULL; - pgsize = psize; - err = -EINVAL; - start = gfn_to_hva_memslot(memslot, gfn); - - /* Instantiate and get the page we want access to */ - np = get_user_pages_fast(start, 1, 1, pages); - if (np != 1) { - /* Look up the vma for the page */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); - if (!vma || vma->vm_start > start || - start + psize > vma->vm_end || - !(vma->vm_flags & VM_PFNMAP)) - goto up_err; - is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); - pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - /* check alignment of pfn vs. requested page size */ - if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) - goto up_err; - up_read(¤t->mm->mmap_sem); - - } else { - page = pages[0]; - got = KVMPPC_GOT_PAGE; - - /* See if this is a large page */ - s = PAGE_SIZE; - if (PageHuge(page)) { - hpage = compound_head(page); - s <<= compound_order(hpage); - /* Get the whole large page if slot alignment is ok */ - if (s > psize && slot_is_aligned(memslot, s) && - !(memslot->userspace_addr & (s - 1))) { - start &= ~(s - 1); - pgsize = s; - get_page(hpage); - put_page(page); - page = hpage; - } - } - if (s < psize) - goto out; - pfn = page_to_pfn(page); - } - - npages = pgsize >> PAGE_SHIFT; - pgorder = __ilog2(npages); - physp += (gfn - memslot->base_gfn) & ~(npages - 1); - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) { - if (!physp[i]) { - physp[i] = ((pfn + i) << PAGE_SHIFT) + - got + is_io + pgorder; - got = 0; - } - } - spin_unlock(&kvm->arch.slot_phys_lock); - err = 0; - - out: - if (got) - put_page(page); - return err; - - up_err: - up_read(¤t->mm->mmap_sem); - return err; -} - long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { - unsigned long psize, gpa, gfn; - struct kvm_memory_slot *memslot; long ret; - if (kvm->arch.using_mmu_notifiers) - goto do_insert; - - psize = hpte_page_size(pteh, ptel); - if (!psize) - return H_PARAMETER; - - pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); - - /* Find the memslot (if any) for this address */ - gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); - gfn = gpa >> PAGE_SHIFT; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { - if (!slot_is_aligned(memslot, psize)) - return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) - return H_PARAMETER; - } - - do_insert: /* Protect linux PTE lookup from page table destruction */ rcu_read_lock_sched(); /* this disables preemption too */ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, @@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, } -/* - * We come here on a H_ENTER call from the guest when we are not - * using mmu notifiers and we don't have the requested page pinned - * already. - */ -long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, - unsigned long ptel) -{ - return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, - pteh, ptel, &vcpu->arch.gpr[4]); -} - static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, gva_t eaddr) { @@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); /* Storage key permission check for POWER7 */ - if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { + if (data && virtmode) { int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); if (amrfield & 1) gpte->may_read = 0; @@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gfn = gpa >> PAGE_SHIFT; memslot = gfn_to_memslot(kvm, gfn); + trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr); + /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); - if (!kvm->arch.using_mmu_notifiers) - return -EFAULT; /* should never get here */ - /* * This should never happen, because of the slot_is_aligned() * check in kvmppc_do_h_enter(). @@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); + ret = -EFAULT; is_io = 0; pfn = 0; page = NULL; @@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } up_read(¤t->mm->mmap_sem); if (!pfn) - return -EFAULT; + goto out_put; } else { page = pages[0]; pfn = page_to_pfn(page); @@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - ret = -EFAULT; if (psize > pte_size) goto out_put; /* Check WIMG vs. the actual page we're accessing */ if (!hpte_cache_flags_ok(r, is_io)) { if (is_io) - return -EFAULT; + goto out_put; + /* * Allow guest to map emulated device memory as * uncacheable, but actually make it cacheable. @@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, SetPageDirty(page); out_put: + trace_kvm_page_fault_exit(vcpu, hpte, ret); + if (page) { /* * We drop pages[0] here, not page because page might @@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { - if (kvm->arch.using_mmu_notifiers) - hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); @@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) { - if (kvm->arch.using_mmu_notifiers) - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); return 0; } int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - if (kvm->arch.using_mmu_notifiers) - kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); + kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); return 0; } @@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.using_mmu_notifiers) - return 0; return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } @@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.using_mmu_notifiers) - return 0; return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); } void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) { - if (!kvm->arch.using_mmu_notifiers) - return; kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); } @@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } /* Now check and modify the HPTE */ - if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) { + /* unlock and continue */ + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); continue; + } /* need to make it temporarily absent so C is stable */ hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); @@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct page *page, *pages[1]; int npages; unsigned long hva, offset; - unsigned long pa; - unsigned long *physp; int srcu_idx; srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; - if (!kvm->arch.using_mmu_notifiers) { - physp = memslot->arch.slot_phys; - if (!physp) - goto err; - physp += gfn - memslot->base_gfn; - pa = *physp; - if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot, - PAGE_SIZE) < 0) - goto err; - pa = *physp; - } - page = pfn_to_page(pa >> PAGE_SHIFT); - get_page(page); - } else { - hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); - if (npages < 1) - goto err; - page = pages[0]; - } + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, 1, pages); + if (npages < 1) + goto err; + page = pages[0]; srcu_read_unlock(&kvm->srcu, srcu_idx); offset = gpa & (PAGE_SIZE - 1); @@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, put_page(page); - if (!dirty || !kvm->arch.using_mmu_notifiers) + if (!dirty) return; /* We need to mark this page dirty in the rmap chain */ @@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { + __be64 hpte_v; + __be64 hpte_r; + err = -EFAULT; - if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + if (__get_user(hpte_v, lbuf) || + __get_user(hpte_r, lbuf + 1)) goto out; + v = be64_to_cpu(hpte_v); + r = be64_to_cpu(hpte_r); err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; @@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; - if (cpu_has_feature(CPU_FTR_ARCH_206)) - vcpu->arch.slb_nr = 32; /* POWER7 */ - else - vcpu->arch.slb_nr = 64; + vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e63587d30b70..de4018a1bc4b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,9 @@ #include "book3s.h" +#define CREATE_TRACE_POINTS +#include "trace_hv.h" + /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * stolen. * * Updates to busy_stolen are protected by arch.tbacct_lock; - * updates to vc->stolen_tb are protected by the arch.tbacct_lock - * of the vcpu that has taken responsibility for running the vcore - * (i.e. vc->runner). The stolen times are measured in units of - * timebase ticks. (Note that the != TB_NIL checks below are - * purely defensive; they should never fail.) + * updates to vc->stolen_tb are protected by the vcore->stoltb_lock + * lock. The stolen times are measured in units of timebase ticks. + * (Note that the != TB_NIL checks below are purely defensive; + * they should never fail.) */ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) @@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && - vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; - vc->preempt_tb = TB_NIL; + /* + * We can test vc->runner without taking the vcore lock, + * because only this task ever sets vc->runner to this + * vcpu, and once it is set to this vcpu, only this task + * ever sets it to NULL. + */ + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { + spin_lock_irqsave(&vc->stoltb_lock, flags); + if (vc->preempt_tb != TB_NIL) { + vc->stolen_tb += mftb() - vc->preempt_tb; + vc->preempt_tb = TB_NIL; + } + spin_unlock_irqrestore(&vc->stoltb_lock, flags); } + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; @@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { + spin_lock_irqsave(&vc->stoltb_lock, flags); vc->preempt_tb = mftb(); + spin_unlock_irqrestore(&vc->stoltb_lock, flags); + } + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) vcpu->arch.busy_preempt = mftb(); spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); @@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) struct kvmppc_vcore *vc = vcpu->arch.vcore; if (arch_compat) { - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - return -EINVAL; /* 970 has no compat mode support */ - switch (arch_compat) { case PVR_ARCH_205: /* @@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) { u64 p; + unsigned long flags; - /* - * If we are the task running the vcore, then since we hold - * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb - * can't be updated, so we don't need the tbacct_lock. - * If the vcore is inactive, it can't become active (since we - * hold the vcore lock), so the vcpu load/put functions won't - * update stolen_tb/preempt_tb, and we don't need tbacct_lock. - */ + spin_lock_irqsave(&vc->stoltb_lock, flags); + p = vc->stolen_tb; if (vc->vcore_state != VCORE_INACTIVE && - vc->runner->arch.run_task != current) { - spin_lock_irq(&vc->runner->arch.tbacct_lock); - p = vc->stolen_tb; - if (vc->preempt_tb != TB_NIL) - p += now - vc->preempt_tb; - spin_unlock_irq(&vc->runner->arch.tbacct_lock); - } else { - p = vc->stolen_tb; - } + vc->preempt_tb != TB_NIL) + p += now - vc->preempt_tb; + spin_unlock_irqrestore(&vc->stoltb_lock, flags); return p; } @@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, } } +static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) +{ + struct kvmppc_vcore *vcore = target->arch.vcore; + + /* + * We expect to have been called by the real mode handler + * (kvmppc_rm_h_confer()) which would have directly returned + * H_SUCCESS if the source vcore wasn't idle (e.g. if it may + * have useful work to do and should not confer) so we don't + * recheck that here. + */ + + spin_lock(&vcore->lock); + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && + vcore->vcore_state != VCORE_INACTIVE) + target = vcore->runner; + spin_unlock(&vcore->lock); + + return kvm_vcpu_yield_to(target); +} + +static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) +{ + int yield_count = 0; + struct lppaca *lppaca; + + spin_lock(&vcpu->arch.vpa_update_lock); + lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; + if (lppaca) + yield_count = lppaca->yield_count; + spin_unlock(&vcpu->arch.vpa_update_lock); + return yield_count; +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; + int yield_count; struct kvm_vcpu *tvcpu; int idx, rc; @@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_HOST; switch (req) { - case H_ENTER: - idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), - kvmppc_get_gpr(vcpu, 5), - kvmppc_get_gpr(vcpu, 6), - kvmppc_get_gpr(vcpu, 7)); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - break; case H_CEDE: break; case H_PROD: @@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - kvm_vcpu_yield_to(tvcpu); + yield_count = kvmppc_get_gpr(vcpu, 5); + if (kvmppc_get_yield_count(tvcpu) != yield_count) + break; + kvm_arch_vcpu_yield_to(tvcpu); break; case H_REGISTER_VPA: ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), @@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/ + case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: r = RESUME_GUEST; break; @@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: + if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED) + vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.emul_inst) : + vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { r = kvmppc_emulate_debug_inst(run, vcpu); } else { @@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); + spin_lock_init(&vcore->stoltb_lock); init_waitqueue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; @@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->n_woken = 0; vc->nap_count = 0; vc->entry_exit_count = 0; + vc->preempt_tb = TB_NIL; vc->vcore_state = VCORE_STARTING; vc->in_guest = 0; vc->napping_threads = 0; + vc->conferring_threads = 0; /* * Updating any of the vpas requires calling kvmppc_pin_guest_page, @@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); kvmppc_create_dtl_entry(vcpu, vc); + trace_kvm_guest_enter(vcpu); } /* Set this explicitly in case thread 0 doesn't have a vcpu */ @@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_RUNNING; preempt_disable(); + + trace_kvmppc_run_core(vc, 0); + spin_unlock(&vc->lock); kvm_guest_enter(); @@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_core_pending_dec(vcpu)) kvmppc_core_dequeue_dec(vcpu); + trace_kvm_guest_exit(vcpu); + ret = RESUME_GUEST; if (vcpu->arch.trap) ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, @@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) wake_up(&vcpu->arch.cpu_run); } } + + trace_kvmppc_run_core(vc, 1); } /* @@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { + struct kvm_vcpu *vcpu; + int do_sleep = 1; + DEFINE_WAIT(wait); prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + /* + * Check one last time for pending exceptions and ceded state after + * we put ourselves on the wait queue + */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { + do_sleep = 0; + break; + } + } + + if (!do_sleep) { + finish_wait(&vc->wq, &wait); + return; + } + vc->vcore_state = VCORE_SLEEPING; + trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); finish_wait(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; + trace_kvmppc_vcore_blocked(vc, 1); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) @@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc; struct kvm_vcpu *v, *vn; + trace_kvmppc_run_vcpu_enter(vcpu); + kvm_run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) VCORE_EXIT_COUNT(vc) == 0) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); + trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { wake_up(&vc->wq); } @@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } + trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); spin_unlock(&vc->lock); return vcpu->arch.ret; } @@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ smp_mb(); - /* On the first time here, set up HTAB and VRMA or RMA */ + /* On the first time here, set up HTAB and VRMA */ if (!vcpu->kvm->arch.rma_setup_done) { r = kvmppc_hv_setup_htab_rma(vcpu); if (r) @@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { + trace_kvm_hcall_enter(vcpu); r = kvmppc_pseries_do_hcall(vcpu); + trace_kvm_hcall_exit(vcpu, r); kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } - -/* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7 or PPC970. */ -static inline int lpcr_rmls(unsigned long rma_size) -{ - switch (rma_size) { - case 32ul << 20: /* 32 MB */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return 8; /* only supported on POWER7 */ - return -1; - case 64ul << 20: /* 64 MB */ - return 3; - case 128ul << 20: /* 128 MB */ - return 7; - case 256ul << 20: /* 256 MB */ - return 4; - case 1ul << 30: /* 1 GB */ - return 2; - case 16ul << 30: /* 16 GB */ - return 1; - case 256ul << 30: /* 256 GB */ - return 0; - default: - return -1; - } -} - -static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page; - struct kvm_rma_info *ri = vma->vm_file->private_data; - - if (vmf->pgoff >= kvm_rma_pages) - return VM_FAULT_SIGBUS; - - page = pfn_to_page(ri->base_pfn + vmf->pgoff); - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct kvm_rma_vm_ops = { - .fault = kvm_rma_fault, -}; - -static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_ops = &kvm_rma_vm_ops; - return 0; -} - -static int kvm_rma_release(struct inode *inode, struct file *filp) -{ - struct kvm_rma_info *ri = filp->private_data; - - kvm_release_rma(ri); - return 0; -} - -static const struct file_operations kvm_rma_fops = { - .mmap = kvm_rma_mmap, - .release = kvm_rma_release, -}; - -static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, - struct kvm_allocate_rma *ret) -{ - long fd; - struct kvm_rma_info *ri; - /* - * Only do this on PPC970 in HV mode - */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) - return -EINVAL; - - if (!kvm_rma_pages) - return -EINVAL; - - ri = kvm_alloc_rma(); - if (!ri) - return -ENOMEM; - - fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC); - if (fd < 0) - kvm_release_rma(ri); - - ret->rma_size = kvm_rma_pages << PAGE_SHIFT; - return fd; -} - static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, int linux_psize) { @@ -2167,26 +2150,6 @@ out: return r; } -static void unpin_slot(struct kvm_memory_slot *memslot) -{ - unsigned long *physp; - unsigned long j, npages, pfn; - struct page *page; - - physp = memslot->arch.slot_phys; - npages = memslot->npages; - if (!physp) - return; - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } -} - static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { @@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, vfree(free->arch.rmap); free->arch.rmap = NULL; } - if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { - unpin_slot(free); - vfree(free->arch.slot_phys); - free->arch.slot_phys = NULL; - } } static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, @@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); if (!slot->arch.rmap) return -ENOMEM; - slot->arch.slot_phys = NULL; return 0; } @@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { - unsigned long *phys; - - /* Allocate a slot_phys array if needed */ - phys = memslot->arch.slot_phys; - if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { - phys = vzalloc(memslot->npages * sizeof(unsigned long)); - if (!phys) - return -ENOMEM; - memslot->arch.slot_phys = phys; - } - return 0; } @@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; - struct kvm_rma_info *ri = NULL; unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; unsigned long lpcr = 0, senc; - unsigned long lpcr_mask = 0; unsigned long psize, porder; - unsigned long rma_size; - unsigned long rmls; - unsigned long *physp; - unsigned long i, npages; int srcu_idx; mutex_lock(&kvm->lock); @@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) psize = vma_kernel_pagesize(vma); porder = __ilog2(psize); - /* Is this one of our preallocated RMAs? */ - if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && - hva == vma->vm_start) - ri = vma->vm_file->private_data; - up_read(¤t->mm->mmap_sem); - if (!ri) { - /* On POWER7, use VRMA; on PPC970, give up */ - err = -EPERM; - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - pr_err("KVM: CPU requires an RMO\n"); - goto out_srcu; - } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out_srcu; - /* We can handle 4k, 64k or 16M pages in the VRMA */ - err = -EINVAL; - if (!(psize == 0x1000 || psize == 0x10000 || - psize == 0x1000000)) - goto out_srcu; + /* Update VRMASD field in the LPCR */ + senc = slb_pgsize_encoding(psize); + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); - /* Update VRMASD field in the LPCR */ - senc = slb_pgsize_encoding(psize); - kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | - (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr_mask = LPCR_VRMASD; - /* the -4 is to account for senc values starting at 0x10 */ - lpcr = senc << (LPCR_VRMASD_SH - 4); + /* Create HPTEs in the hash page table for the VRMA */ + kvmppc_map_vrma(vcpu, memslot, porder); - /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot, porder); - - } else { - /* Set up to use an RMO region */ - rma_size = kvm_rma_pages; - if (rma_size > memslot->npages) - rma_size = memslot->npages; - rma_size <<= PAGE_SHIFT; - rmls = lpcr_rmls(rma_size); - err = -EINVAL; - if ((long)rmls < 0) { - pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); - goto out_srcu; - } - atomic_inc(&ri->use_count); - kvm->arch.rma = ri; - - /* Update LPCR and RMOR */ - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970; insert RMLS value (split field) in HID4 */ - lpcr_mask = (1ul << HID4_RMLS0_SH) | - (3ul << HID4_RMLS2_SH) | HID4_RMOR; - lpcr = ((rmls >> 2) << HID4_RMLS0_SH) | - ((rmls & 3) << HID4_RMLS2_SH); - /* RMOR is also in HID4 */ - lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) - << HID4_RMOR_SH; - } else { - /* POWER7 */ - lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; - lpcr = rmls << LPCR_RMLS_SH; - kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; - } - pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", - ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); - - /* Initialize phys addrs of pages in RMO */ - npages = kvm_rma_pages; - porder = __ilog2(npages); - physp = memslot->arch.slot_phys; - if (physp) { - if (npages > memslot->npages) - npages = memslot->npages; - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) - physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + - porder; - spin_unlock(&kvm->arch.slot_phys_lock); - } - } - - kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ smp_wmb(); @@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, sizeof(kvm->arch.enabled_hcalls)); - kvm->arch.rma = NULL; - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970; HID4 is effectively the LPCR */ - kvm->arch.host_lpid = 0; - kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); - lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); - lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | - ((lpid & 0xf) << HID4_LPID5_SH); - } else { - /* POWER7; init LPCR for virtual RMA mode */ - kvm->arch.host_lpid = mfspr(SPRN_LPID); - kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); - lpcr &= LPCR_PECE | LPCR_LPES; - lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | - LPCR_VPM0 | LPCR_VPM1; - kvm->arch.vrma_slb_v = SLB_VSID_B_1T | - (VRMA_VSID << SLB_VSID_SHIFT_1T); - /* On POWER8 turn on online bit to enable PURR/SPURR */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - lpcr |= LPCR_ONL; - } + /* Init LPCR for virtual RMA mode */ + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); + lpcr &= LPCR_PECE | LPCR_LPES; + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VPM1; + kvm->arch.vrma_slb_v = SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* On POWER8 turn on online bit to enable PURR/SPURR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr |= LPCR_ONL; kvm->arch.lpcr = lpcr; - kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); - spin_lock_init(&kvm->arch.slot_phys_lock); - /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. @@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvm_hv_vm_deactivated(); kvmppc_free_vcores(kvm); - if (kvm->arch.rma) { - kvm_release_rma(kvm->arch.rma); - kvm->arch.rma = NULL; - } kvmppc_free_hpt(kvm); } @@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, static int kvmppc_core_check_processor_compat_hv(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE)) + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; return 0; } @@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, switch (ioctl) { - case KVM_ALLOCATE_RMA: { - struct kvm_allocate_rma rma; - struct kvm *kvm = filp->private_data; - - r = kvm_vm_ioctl_allocate_rma(kvm, &rma); - if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) - r = -EFAULT; - break; - } - case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 3f1bb5a36c27..1f083ff8a61a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -16,6 +16,7 @@ #include <linux/memblock.h> #include <linux/sizes.h> #include <linux/cma.h> +#include <linux/bitops.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> @@ -32,95 +33,9 @@ * By default we reserve 5% of memory for hash pagetable allocation. */ static unsigned long kvm_cma_resv_ratio = 5; -/* - * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. - * Each RMA has to be physically contiguous and of a size that the - * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, - * and other larger sizes. Since we are unlikely to be allocate that - * much physically contiguous memory after the system is up and running, - * we preallocate a set of RMAs in early boot using CMA. - * should be power of 2. - */ -unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ -EXPORT_SYMBOL_GPL(kvm_rma_pages); static struct cma *kvm_cma; -/* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7 or PPC970. */ -static inline int lpcr_rmls(unsigned long rma_size) -{ - switch (rma_size) { - case 32ul << 20: /* 32 MB */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return 8; /* only supported on POWER7 */ - return -1; - case 64ul << 20: /* 64 MB */ - return 3; - case 128ul << 20: /* 128 MB */ - return 7; - case 256ul << 20: /* 256 MB */ - return 4; - case 1ul << 30: /* 1 GB */ - return 2; - case 16ul << 30: /* 16 GB */ - return 1; - case 256ul << 30: /* 256 GB */ - return 0; - default: - return -1; - } -} - -static int __init early_parse_rma_size(char *p) -{ - unsigned long kvm_rma_size; - - pr_debug("%s(%s)\n", __func__, p); - if (!p) - return -EINVAL; - kvm_rma_size = memparse(p, &p); - /* - * Check that the requested size is one supported in hardware - */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); - return -EINVAL; - } - kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; - return 0; -} -early_param("kvm_rma_size", early_parse_rma_size); - -struct kvm_rma_info *kvm_alloc_rma() -{ - struct page *page; - struct kvm_rma_info *ri; - - ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); - if (!ri) - return NULL; - page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages)); - if (!page) - goto err_out; - atomic_set(&ri->use_count, 1); - ri->base_pfn = page_to_pfn(page); - return ri; -err_out: - kfree(ri); - return NULL; -} -EXPORT_SYMBOL_GPL(kvm_alloc_rma); - -void kvm_release_rma(struct kvm_rma_info *ri) -{ - if (atomic_dec_and_test(&ri->use_count)) { - cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages); - kfree(ri); - } -} -EXPORT_SYMBOL_GPL(kvm_release_rma); - static int __init early_parse_kvm_cma_resv(char *p) { pr_debug("%s(%s)\n", __func__, p); @@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); struct page *kvm_alloc_hpt(unsigned long nr_pages) { - unsigned long align_pages = HPT_ALIGN_PAGES; - VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - /* Old CPUs require HPT aligned on a multiple of its size */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - align_pages = nr_pages; - return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages)); + return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES)); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); @@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void) if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); - /* - * Old CPUs require HPT aligned on a multiple of its size. So for them - * make the alignment as max size we could request. - */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - align_size = __rounddown_pow_of_two(selected_size); - else - align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; - - align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); + align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); } } /* + * Real-mode H_CONFER implementation. + * We check if we are the only vcpu out of this virtual core + * still running in the guest and not ceded. If so, we pop up + * to the virtual-mode implementation; if not, just return to + * the guest. + */ +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + int threads_running; + int threads_ceded; + int threads_conferring; + u64 stop = get_tb() + 10 * tb_ticks_per_usec; + int rv = H_SUCCESS; /* => don't yield */ + + set_bit(vcpu->arch.ptid, &vc->conferring_threads); + while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) { + threads_running = VCORE_ENTRY_COUNT(vc); + threads_ceded = hweight32(vc->napping_threads); + threads_conferring = hweight32(vc->conferring_threads); + if (threads_ceded + threads_conferring >= threads_running) { + rv = H_TOO_HARD; /* => do yield */ + break; + } + } + clear_bit(vcpu->arch.ptid, &vc->conferring_threads); + return rv; +} + +/* * When running HV mode KVM we need to block certain operations while KVM VMs * exist in the system. We use a counter of VMs to track this. * diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 731be7478b27..36540a99d178 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry) std r3, _CCR(r1) /* Save host DSCR */ -BEGIN_FTR_SECTION mfspr r3, SPRN_DSCR std r3, HSTATE_DSCR(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Save host DABR */ @@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ + /* Clear MMCRA in order to disable SDAR updates */ li r5, 0 mtspr SPRN_MMCRA, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r5, LPPACA_PMCINUSE(r3) @@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_PMC4 mfspr r8, SPRN_PMC5 mfspr r9, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, HSTATE_PMC(r13) stw r5, HSTATE_PMC + 4(r13) stw r6, HSTATE_PMC + 8(r13) stw r7, HSTATE_PMC + 12(r13) stw r8, HSTATE_PMC + 16(r13) stw r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - stw r10, HSTATE_PMC + 24(r13) - stw r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 31: /* @@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) add r8,r8,r7 std r8,HSTATE_DECEXP(r13) -#ifdef CONFIG_SMP - /* - * On PPC970, if the guest vcpu has an external interrupt pending, - * send ourselves an IPI so as to interrupt the guest once it - * enables interrupts. (It must have interrupts disabled, - * otherwise we would already have delivered the interrupt.) - * - * XXX If this is a UP build, smp_send_reschedule is not available, - * so the interrupt will be delayed until the next time the vcpu - * enters the guest with interrupts enabled. - */ -BEGIN_FTR_SECTION - ld r4, HSTATE_KVM_VCPU(r13) - ld r0, VCPU_PENDING_EXC(r4) - li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) - oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h - and. r0, r0, r7 - beq 32f - lhz r3, PACAPACAINDEX(r13) - bl smp_send_reschedule - nop -32: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -#endif /* CONFIG_SMP */ - /* Jump to partition switch code */ bl kvmppc_hv_entry_trampoline nop diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d562c8e2bc30..60081bd75847 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -138,8 +138,5 @@ out: long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) { - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return kvmppc_realmode_mc_power7(vcpu); - - return 0; + return kvmppc_realmode_mc_power7(vcpu); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 084ad54c73cd..510bdfbc4073 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags) * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, * we can use tlbiel as long as we mark all other physical * cores as potentially having stale TLB entries for this lpid. - * If we're not using MMU notifiers, we never take pages away - * from the guest, so we can use tlbiel if requested. * Otherwise, don't use tlbiel. */ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) global = 0; - else if (kvm->arch.using_mmu_notifiers) - global = 1; else - global = !(flags & H_LOCAL); + global = 1; if (!global) { /* any other core might now have stale TLB entries... */ @@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; - unsigned long *physp, pte_size; + unsigned long pte_size; unsigned long is_io; unsigned long *rmap; pte_t pte; @@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, is_io = ~0ul; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { - /* PPC970 can't do emulated MMIO */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - return H_PARAMETER; /* Emulated MMIO - mark this with key=31 */ pteh |= HPTE_V_ABSENT; ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; @@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, slot_fn = gfn - memslot->base_gfn; rmap = &memslot->arch.rmap[slot_fn]; - if (!kvm->arch.using_mmu_notifiers) { - physp = memslot->arch.slot_phys; - if (!physp) - return H_PARAMETER; - physp += slot_fn; - if (realmode) - physp = real_vmalloc_addr(physp); - pa = *physp; - if (!pa) - return H_TOO_HARD; - is_io = pa & (HPTE_R_I | HPTE_R_W); - pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); - pa &= PAGE_MASK; + /* Translate to host virtual address */ + hva = __gfn_to_hva_memslot(memslot, gfn); + + /* Look up the Linux PTE for the backing page */ + pte_size = psize; + pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); + if (pte_present(pte) && !pte_numa(pte)) { + if (writing && !pte_write(pte)) + /* make the actual HPTE be read-only */ + ptel = hpte_make_readonly(ptel); + is_io = hpte_cache_bits(pte_val(pte)); + pa = pte_pfn(pte) << PAGE_SHIFT; + pa |= hva & (pte_size - 1); pa |= gpa & ~PAGE_MASK; - } else { - /* Translate to host virtual address */ - hva = __gfn_to_hva_memslot(memslot, gfn); - - /* Look up the Linux PTE for the backing page */ - pte_size = psize; - pte = lookup_linux_pte_and_update(pgdir, hva, writing, - &pte_size); - if (pte_present(pte) && !pte_numa(pte)) { - if (writing && !pte_write(pte)) - /* make the actual HPTE be read-only */ - ptel = hpte_make_readonly(ptel); - is_io = hpte_cache_bits(pte_val(pte)); - pa = pte_pfn(pte) << PAGE_SHIFT; - pa |= hva & (pte_size - 1); - pa |= gpa & ~PAGE_MASK; - } } if (pte_size < psize) @@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (kvm->arch.using_mmu_notifiers && - mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_notifier_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } -/* - * tlbie/tlbiel is a bit different on the PPC970 compared to later - * processors such as POWER7; the large page bit is in the instruction - * not RB, and the top 16 bits and the bottom 12 bits of the VA - * in RB must be 0. - */ -static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, - long npages, int global, bool need_sync) -{ - long i; - - if (global) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - if (need_sync) - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < npages; ++i) { - unsigned long rb = rbvalues[i]; - - if (rb & 1) /* large page */ - asm volatile("tlbie %0,1" : : - "r" (rb & 0x0000fffffffff000ul)); - else - asm volatile("tlbie %0,0" : : - "r" (rb & 0x0000fffffffff000ul)); - } - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - if (need_sync) - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < npages; ++i) { - unsigned long rb = rbvalues[i]; - - if (rb & 1) /* large page */ - asm volatile("tlbiel %0,1" : : - "r" (rb & 0x0000fffffffff000ul)); - else - asm volatile("tlbiel %0,0" : : - "r" (rb & 0x0000fffffffff000ul)); - } - asm volatile("ptesync" : : : "memory"); - } -} - static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { long i; - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970 tlbie instruction is a bit different */ - do_tlbies_970(kvm, rbvalues, npages, global, need_sync); - return; - } if (global) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); @@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { - rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* - * If the host has this page as readonly but the guest - * wants to make it read/write, reduce the permissions. - * Checking the host permissions involves finding the - * memslot and then the Linux PTE for the page. + * If the page is valid, don't let it transition from + * readonly to writable. If it should be writable, we'll + * take a trap and let the page fault code sort it out. */ - if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) { - unsigned long psize, gfn, hva; - struct kvm_memory_slot *memslot; - pgd_t *pgdir = vcpu->arch.pgdir; - pte_t pte; - - psize = hpte_page_size(v, r); - gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); - if (memslot) { - hva = __gfn_to_hva_memslot(memslot, gfn); - pte = lookup_linux_pte_and_update(pgdir, hva, - 1, &psize); - if (pte_present(pte) && !pte_write(pte)) - r = hpte_make_readonly(r); - } + pte = be64_to_cpu(hpte[1]); + r = (pte & ~mask) | bits; + if (hpte_is_writable(r) && !hpte_is_writable(pte)) + r = hpte_make_readonly(r); + /* If the PTE is changing, invalidate it first */ + if (r != pte) { + rb = compute_tlbie_rb(v, r, pte_index); + hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | + HPTE_V_ABSENT); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), + true); + hpte[1] = cpu_to_be64(r); } } - hpte[1] = cpu_to_be64(r); - eieio(); - hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 3ee38e6e884f..7b066f6b02ad 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * state update in HW (ie bus transactions) so we can handle them * separately here as well. */ - if (resend) + if (resend) { icp->rm_action |= XICS_RM_CHECK_RESEND; + icp->rm_resend_icp = icp; + } } @@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, * nothing needs to be done as there can be no XISR to * reject. * + * ICP state: Check_IPI + * * If the CPPR is less favored, then we might be replacing - * an interrupt, and thus need to possibly reject it as in + * an interrupt, and thus need to possibly reject it. * - * ICP state: Check_IPI + * ICP State: IPI + * + * Besides rejecting any pending interrupts, we also + * update XISR and pending_pri to mark IPI as pending. + * + * PAPR does not describe this state, but if the MFRR is being + * made less favored than its earlier value, there might be + * a previously-rejected interrupt needing to be resent. + * Ideally, we would want to resend only if + * prio(pending_interrupt) < mfrr && + * prio(pending_interrupt) < cppr + * where pending interrupt is the one that was rejected. But + * we don't have that state, so we simply trigger a resend + * whenever the MFRR is made less favored. */ do { old_state = new_state = ACCESS_ONCE(icp->state); @@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, resend = false; if (mfrr < new_state.cppr) { /* Reject a pending interrupt if not an IPI */ - if (mfrr <= new_state.pending_pri) + if (mfrr <= new_state.pending_pri) { reject = new_state.xisr; - new_state.pending_pri = mfrr; - new_state.xisr = XICS_IPI; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } } - if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + if (mfrr > old_state.mfrr) { resend = new_state.need_resend; new_state.need_resend = 0; } @@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, } /* Pass resends to virtual mode */ - if (resend) + if (resend) { this_icp->rm_action |= XICS_RM_CHECK_RESEND; + this_icp->rm_resend_icp = icp; + } return check_too_hard(xics, this_icp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 65c105b17a25..10554df13852 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) lwz r6, HSTATE_PMC + 12(r13) lwz r8, HSTATE_PMC + 16(r13) lwz r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - lwz r10, HSTATE_PMC + 24(r13) - lwz r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r4 mtspr SPRN_PMC3, r5 mtspr SPRN_PMC4, r6 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, HSTATE_MMCR(r13) ld r4, HSTATE_MMCR + 8(r13) ld r5, HSTATE_MMCR + 16(r13) @@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL -BEGIN_FTR_SECTION beq 11f cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI beq cr2, 14f /* HMI check */ -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* RFI into the highmem handler, or branch to interrupt handler */ mfmsr r6 @@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - beqa 0x500 /* external interrupt (PPC970) */ beq cr1, 13f /* machine check */ RFI @@ -393,11 +382,8 @@ kvmppc_hv_entry: slbia ptesync -BEGIN_FTR_SECTION - b 30f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* - * POWER7 host -> guest partition switch code. + * POWER7/POWER8 host -> guest partition switch code. * We don't have to lock against concurrent tlbies, * but we do have to coordinate across hardware threads. */ @@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r3,512 /* 1 microsecond */ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER blt hdec_soon - b 31f - - /* - * PPC970 host -> guest partition switch code. - * We have to lock against concurrent tlbies, - * using native_tlbie_lock to lock against host tlbies - * and kvm->arch.tlbie_lock to lock against guest tlbies. - * We also have to invalidate the TLB since its - * entries aren't tagged with the LPID. - */ -30: ld r5,HSTATE_KVM_VCORE(r13) - ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ - - /* first take native_tlbie_lock */ - .section ".toc","aw" -toc_tlbie_lock: - .tc native_tlbie_lock[TC],native_tlbie_lock - .previous - ld r3,toc_tlbie_lock@toc(r2) -#ifdef __BIG_ENDIAN__ - lwz r8,PACA_LOCK_TOKEN(r13) -#else - lwz r8,PACAPACAINDEX(r13) -#endif -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r5,HSTATE_KVM_VCORE(r13) - ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */ - li r0,0x18f - rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ - or r0,r7,r0 - ptesync - sync - mtspr SPRN_HID4,r0 /* switch to reserved LPID */ - isync - li r0,0 - stw r0,0(r3) /* drop native_tlbie_lock */ - - /* invalidate the whole TLB */ - li r0,256 - mtctr r0 - li r6,0 -25: tlbiel r6 - addi r6,r6,0x1000 - bdnz 25b - ptesync - /* Take the guest's tlbie_lock */ - addi r3,r9,KVM_TLBIE_LOCK -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - ld r6,KVM_SDR1(r9) - mtspr SPRN_SDR1,r6 /* switch to partition page table */ - - /* Set up HID4 with the guest's LPID etc. */ - sync - mtspr SPRN_HID4,r7 - isync - - /* drop the guest's tlbie_lock */ - li r0,0 - stw r0,0(r3) - - /* Check if HDEC expires soon */ - mfspr r3,SPRN_HDEC - cmpwi r3,10 - li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - blt hdec_soon - - /* Enable HDEC interrupts */ - mfspr r0,SPRN_HID0 - li r3,1 - rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 - sync - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 -31: /* Do we have a guest vcpu to run? */ cmpdi r4, 0 beq kvmppc_primary_no_guest @@ -625,7 +521,6 @@ kvmppc_got_guest: stb r6, VCPU_VPA_DIRTY(r4) 25: -BEGIN_FTR_SECTION /* Save purr/spurr */ mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR @@ -635,7 +530,6 @@ BEGIN_FTR_SECTION ld r8,VCPU_SPURR(r4) mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -644,9 +538,7 @@ BEGIN_FTR_SECTION ld r6,VCPU_DABR(r4) mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION_NESTED(89) isync - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) lwz r7, VCPU_PMC + 12(r4) lwz r8, VCPU_PMC + 16(r4) lwz r9, VCPU_PMC + 20(r4) -BEGIN_FTR_SECTION - lwz r10, VCPU_PMC + 24(r4) - lwz r11, VCPU_PMC + 28(r4) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r5 mtspr SPRN_PMC3, r6 mtspr SPRN_PMC4, r7 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) ld r6, VCPU_MMCR + 16(r4) @@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r30, VCPU_GPR(R30)(r4) ld r31, VCPU_GPR(R31)(r4) -BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) mtspr SPRN_DSCR, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION - /* Skip next section on POWER7 or PPC970 */ + /* Skip next section on POWER7 */ b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ @@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 -BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) @@ -928,7 +809,6 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 mtspr SPRN_AMOR,r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore state of CTRL run bit; assume 1 on entry */ lwz r5,VCPU_CTRL(r4) @@ -963,13 +843,11 @@ deliver_guest_interrupt: rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 cmpdi cr1, r0, 0 andi. r8, r11, MSR_EE -BEGIN_FTR_SECTION mfspr r8, SPRN_LPCR /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH mtspr SPRN_LPCR, r8 isync -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) beq 5f li r0, BOOK3S_INTERRUPT_EXTERNAL bne cr1, 12f @@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) stw r12,VCPU_TRAP(r9) - /* Save HEIR (HV emulation assist reg) in last_inst + /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED -BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) -11: stw r3,VCPU_LAST_INST(r9) +11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ mfctr r3 @@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_CTR(r9) stw r4, VCPU_XER(r9) -BEGIN_FTR_SECTION /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE beq kvmppc_hisi -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* See if this is a leftover HDEC interrupt */ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER @@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode - /* Only handle external interrupts here on arch 206 and later */ -BEGIN_FTR_SECTION - b ext_interrupt_to_host -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) - /* External interrupt ? */ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL bne+ ext_interrupt_to_host @@ -1193,11 +1062,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ mfdsisr r7 std r6, VCPU_DAR(r9) stw r7, VCPU_DSISR(r9) -BEGIN_FTR_SECTION /* don't overwrite fault_dar/fault_dsisr if HDSI */ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE beq 6f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r6, VCPU_FAULT_DAR(r9) stw r7, VCPU_FAULT_DSISR(r9) @@ -1236,7 +1103,6 @@ mc_cont: /* * Save the guest PURR/SPURR */ -BEGIN_FTR_SECTION mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR ld r7,VCPU_PURR(r9) @@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION add r4,r4,r6 mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) /* Save DEC */ mfspr r5,SPRN_DEC @@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 8: /* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION mfspr r5,SPRN_AMR mfspr r6,SPRN_UAMOR std r5,VCPU_AMR(r9) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Switch DSCR back to host value */ -BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) std r8, VCPU_DSCR(r9) mtspr SPRN_DSCR, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save non-volatile GPRs */ std r14, VCPU_GPR(R14)(r9) @@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ + /* Clear MMCRA in order to disable SDAR updates */ li r7, 0 mtspr SPRN_MMCRA, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync beq 21f /* if no VPA, save PMU stuff anyway */ lbz r7, LPPACA_PMCINUSE(r8) @@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r6, SPRN_PMC4 mfspr r7, SPRN_PMC5 mfspr r8, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, VCPU_PMC(r9) stw r4, VCPU_PMC + 4(r9) stw r5, VCPU_PMC + 8(r9) @@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r7, VCPU_PMC + 16(r9) stw r8, VCPU_PMC + 20(r9) BEGIN_FTR_SECTION - stw r10, VCPU_PMC + 24(r9) - stw r11, VCPU_PMC + 28(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -BEGIN_FTR_SECTION mfspr r5, SPRN_SIER mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 @@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ptesync hdec_soon: /* r12 = trap, r13 = paca */ -BEGIN_FTR_SECTION - b 32f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* - * POWER7 guest -> host partition switch code. + * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do * have to coordinate the hardware threads. */ @@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync - b 33f - - /* - * PPC970 guest -> host partition switch code. - * We have to lock against concurrent tlbies, and - * we have to flush the whole TLB. - */ -32: ld r5,HSTATE_KVM_VCORE(r13) - ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ - - /* Take the guest's tlbie_lock */ -#ifdef __BIG_ENDIAN__ - lwz r8,PACA_LOCK_TOKEN(r13) -#else - lwz r8,PACAPACAINDEX(r13) -#endif - addi r3,r4,KVM_TLBIE_LOCK -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */ - li r0,0x18f - rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ - or r0,r7,r0 - ptesync - sync - mtspr SPRN_HID4,r0 /* switch to reserved LPID */ - isync - li r0,0 - stw r0,0(r3) /* drop guest tlbie_lock */ - - /* invalidate the whole TLB */ - li r0,256 - mtctr r0 - li r6,0 -25: tlbiel r6 - addi r6,r6,0x1000 - bdnz 25b - ptesync - - /* take native_tlbie_lock */ - ld r3,toc_tlbie_lock@toc(2) -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r6,KVM_HOST_SDR1(r4) - mtspr SPRN_SDR1,r6 /* switch to host page table */ - - /* Set up host HID4 value */ - sync - mtspr SPRN_HID4,r7 - isync - li r0,0 - stw r0,0(r3) /* drop native_tlbie_lock */ - - lis r8,0x7fff /* MAX_INT@h */ - mtspr SPRN_HDEC,r8 - - /* Disable HDEC interrupts */ - mfspr r0,SPRN_HID0 - li r3,0 - rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 - sync - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 /* load host SLB entries */ -33: ld r8,PACA_SLBSHADOWPTR(r13) + ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED li r3, SLBSHADOW_SAVEAREA @@ -2047,7 +1817,7 @@ hcall_real_table: .long 0 /* 0xd8 */ .long 0 /* 0xdc */ .long DOTSYM(kvmppc_h_cede) - hcall_real_table - .long 0 /* 0xe4 */ + .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table .long 0 /* 0xe8 */ .long 0 /* 0xec */ .long 0 /* 0xf0 */ @@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede) stw r0,VCPU_TRAP(r3) li r0,H_SUCCESS std r0,VCPU_GPR(R3)(r3) -BEGIN_FTR_SECTION - b kvm_cede_exit /* just send it up to host on 970 */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) /* * Set our bit in the bitmask of napping threads unless all the @@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 - isync addi r3,r3,VCPU_FPRS bl store_fp_state #ifdef CONFIG_ALTIVEC @@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 - isync addi r3,r4,VCPU_FPRS bl load_fp_state #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index bfb8035314e3..bd6ab1672ae6 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb) return kvmppc_get_field(inst, msb + 32, lsb + 32); } -/* - * Replaces inst bits with ordering according to spec. - */ -static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) -{ - return kvmppc_set_field(inst, msb + 32, lsb + 32, value); -} - bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) { if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cf2eb16846d1..f57383941d03 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static inline int get_fpr_index(int i) -{ - return i * TS_FPRWIDTH; -} - /* Give up external provider (FPU, Altivec, VSX) */ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index eaeb78047fb8..807351f76f84 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, * there might be a previously-rejected interrupt needing * to be resent. * + * ICP state: Check_IPI + * * If the CPPR is less favored, then we might be replacing - * an interrupt, and thus need to possibly reject it as in + * an interrupt, and thus need to possibly reject it. * - * ICP state: Check_IPI + * ICP State: IPI + * + * Besides rejecting any pending interrupts, we also + * update XISR and pending_pri to mark IPI as pending. + * + * PAPR does not describe this state, but if the MFRR is being + * made less favored than its earlier value, there might be + * a previously-rejected interrupt needing to be resent. + * Ideally, we would want to resend only if + * prio(pending_interrupt) < mfrr && + * prio(pending_interrupt) < cppr + * where pending interrupt is the one that was rejected. But + * we don't have that state, so we simply trigger a resend + * whenever the MFRR is made less favored. */ do { old_state = new_state = ACCESS_ONCE(icp->state); @@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, resend = false; if (mfrr < new_state.cppr) { /* Reject a pending interrupt if not an IPI */ - if (mfrr <= new_state.pending_pri) + if (mfrr <= new_state.pending_pri) { reject = new_state.xisr; - new_state.pending_pri = mfrr; - new_state.xisr = XICS_IPI; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } } - if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + if (mfrr > old_state.mfrr) { resend = new_state.need_resend; new_state.need_resend = 0; } @@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) if (icp->rm_action & XICS_RM_KICK_VCPU) kvmppc_fast_vcpu_kick(icp->rm_kick_target); if (icp->rm_action & XICS_RM_CHECK_RESEND) - icp_check_resend(xics, icp); + icp_check_resend(xics, icp->rm_resend_icp); if (icp->rm_action & XICS_RM_REJECT) icp_deliver_irq(xics, icp, icp->rm_reject); if (icp->rm_action & XICS_RM_NOTIFY_EOI) diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index e8aaa7a3f209..73f0f2723c07 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -74,6 +74,7 @@ struct kvmppc_icp { #define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; + struct kvmppc_icp *rm_resend_icp; u32 rm_reject; u32 rm_eoied_irq; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 16095841afe1..b29ce752c7d6 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry) sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __this_cpu_write(pcpu_sids)entry[sid], entry); + __this_cpu_write(pcpu_sids.entry[sid], entry); entry->val = sid; entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; @@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); } -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ -} - static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) { kvmppc_booke_vcpu_load(vcpu, cpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c1f8f53cd312..c45eaab752b0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 0; break; case KVM_CAP_PPC_RMA: - r = hv_enabled; - /* PPC970 requires an RMA */ - if (r && cpu_has_feature(CPU_FTR_ARCH_201)) - r = 2; + r = 0; break; #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (hv_enabled) - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; - else - r = 0; + r = hv_enabled; #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) r = 1; #else diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h new file mode 100644 index 000000000000..f647ce0f428b --- /dev/null +++ b/arch/powerpc/kvm/trace_book3s.h @@ -0,0 +1,32 @@ +#if !defined(_TRACE_KVM_BOOK3S_H) +#define _TRACE_KVM_BOOK3S_H + +/* + * Common defines used by the trace macros in trace_pr.h and trace_hv.h + */ + +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index f7537cf26ce7..7ec534d1db9f 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release, __entry->pfn, __entry->flags) ); +#ifdef CONFIG_SPE_POSSIBLE +#define kvm_trace_symbol_irqprio_spe \ + {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \ + {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \ + {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"}, +#else +#define kvm_trace_symbol_irqprio_spe +#endif + +#ifdef CONFIG_PPC_E500MC +#define kvm_trace_symbol_irqprio_e500mc \ + {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \ + {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"}, +#else +#define kvm_trace_symbol_irqprio_e500mc +#endif + +#define kvm_trace_symbol_irqprio \ + kvm_trace_symbol_irqprio_spe \ + kvm_trace_symbol_irqprio_e500mc \ + {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \ + {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \ + {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \ + {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \ + {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \ + {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \ + {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \ + {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \ + {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \ + {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \ + {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \ + {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \ + {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \ + {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \ + {BOOKE_IRQPRIO_FIT, "FIT"}, \ + {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \ + {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \ + {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \ + {BOOKE_IRQPRIO_DBELL, "DBELL"}, \ + {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \ + TRACE_EVENT(kvm_booke_queue_irqprio, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), TP_ARGS(vcpu, priority), @@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio, __entry->pending = vcpu->arch.pending_exceptions; ), - TP_printk("vcpu=%x prio=%x pending=%lx", - __entry->cpu_nr, __entry->priority, __entry->pending) + TP_printk("vcpu=%x prio=%s pending=%lx", + __entry->cpu_nr, + __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio), + __entry->pending) ); #endif diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h new file mode 100644 index 000000000000..33d9daff5783 --- /dev/null +++ b/arch/powerpc/kvm/trace_hv.h @@ -0,0 +1,477 @@ +#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_HV_H + +#include <linux/tracepoint.h> +#include "trace_book3s.h" +#include <asm/hvcall.h> +#include <asm/kvm_asm.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_hv +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + +#define kvm_trace_symbol_hcall \ + {H_REMOVE, "H_REMOVE"}, \ + {H_ENTER, "H_ENTER"}, \ + {H_READ, "H_READ"}, \ + {H_CLEAR_MOD, "H_CLEAR_MOD"}, \ + {H_CLEAR_REF, "H_CLEAR_REF"}, \ + {H_PROTECT, "H_PROTECT"}, \ + {H_GET_TCE, "H_GET_TCE"}, \ + {H_PUT_TCE, "H_PUT_TCE"}, \ + {H_SET_SPRG0, "H_SET_SPRG0"}, \ + {H_SET_DABR, "H_SET_DABR"}, \ + {H_PAGE_INIT, "H_PAGE_INIT"}, \ + {H_SET_ASR, "H_SET_ASR"}, \ + {H_ASR_ON, "H_ASR_ON"}, \ + {H_ASR_OFF, "H_ASR_OFF"}, \ + {H_LOGICAL_CI_LOAD, "H_LOGICAL_CI_LOAD"}, \ + {H_LOGICAL_CI_STORE, "H_LOGICAL_CI_STORE"}, \ + {H_LOGICAL_CACHE_LOAD, "H_LOGICAL_CACHE_LOAD"}, \ + {H_LOGICAL_CACHE_STORE, "H_LOGICAL_CACHE_STORE"}, \ + {H_LOGICAL_ICBI, "H_LOGICAL_ICBI"}, \ + {H_LOGICAL_DCBF, "H_LOGICAL_DCBF"}, \ + {H_GET_TERM_CHAR, "H_GET_TERM_CHAR"}, \ + {H_PUT_TERM_CHAR, "H_PUT_TERM_CHAR"}, \ + {H_REAL_TO_LOGICAL, "H_REAL_TO_LOGICAL"}, \ + {H_HYPERVISOR_DATA, "H_HYPERVISOR_DATA"}, \ + {H_EOI, "H_EOI"}, \ + {H_CPPR, "H_CPPR"}, \ + {H_IPI, "H_IPI"}, \ + {H_IPOLL, "H_IPOLL"}, \ + {H_XIRR, "H_XIRR"}, \ + {H_PERFMON, "H_PERFMON"}, \ + {H_MIGRATE_DMA, "H_MIGRATE_DMA"}, \ + {H_REGISTER_VPA, "H_REGISTER_VPA"}, \ + {H_CEDE, "H_CEDE"}, \ + {H_CONFER, "H_CONFER"}, \ + {H_PROD, "H_PROD"}, \ + {H_GET_PPP, "H_GET_PPP"}, \ + {H_SET_PPP, "H_SET_PPP"}, \ + {H_PURR, "H_PURR"}, \ + {H_PIC, "H_PIC"}, \ + {H_REG_CRQ, "H_REG_CRQ"}, \ + {H_FREE_CRQ, "H_FREE_CRQ"}, \ + {H_VIO_SIGNAL, "H_VIO_SIGNAL"}, \ + {H_SEND_CRQ, "H_SEND_CRQ"}, \ + {H_COPY_RDMA, "H_COPY_RDMA"}, \ + {H_REGISTER_LOGICAL_LAN, "H_REGISTER_LOGICAL_LAN"}, \ + {H_FREE_LOGICAL_LAN, "H_FREE_LOGICAL_LAN"}, \ + {H_ADD_LOGICAL_LAN_BUFFER, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {H_SEND_LOGICAL_LAN, "H_SEND_LOGICAL_LAN"}, \ + {H_BULK_REMOVE, "H_BULK_REMOVE"}, \ + {H_MULTICAST_CTRL, "H_MULTICAST_CTRL"}, \ + {H_SET_XDABR, "H_SET_XDABR"}, \ + {H_STUFF_TCE, "H_STUFF_TCE"}, \ + {H_PUT_TCE_INDIRECT, "H_PUT_TCE_INDIRECT"}, \ + {H_CHANGE_LOGICAL_LAN_MAC, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {H_VTERM_PARTNER_INFO, "H_VTERM_PARTNER_INFO"}, \ + {H_REGISTER_VTERM, "H_REGISTER_VTERM"}, \ + {H_FREE_VTERM, "H_FREE_VTERM"}, \ + {H_RESET_EVENTS, "H_RESET_EVENTS"}, \ + {H_ALLOC_RESOURCE, "H_ALLOC_RESOURCE"}, \ + {H_FREE_RESOURCE, "H_FREE_RESOURCE"}, \ + {H_MODIFY_QP, "H_MODIFY_QP"}, \ + {H_QUERY_QP, "H_QUERY_QP"}, \ + {H_REREGISTER_PMR, "H_REREGISTER_PMR"}, \ + {H_REGISTER_SMR, "H_REGISTER_SMR"}, \ + {H_QUERY_MR, "H_QUERY_MR"}, \ + {H_QUERY_MW, "H_QUERY_MW"}, \ + {H_QUERY_HCA, "H_QUERY_HCA"}, \ + {H_QUERY_PORT, "H_QUERY_PORT"}, \ + {H_MODIFY_PORT, "H_MODIFY_PORT"}, \ + {H_DEFINE_AQP1, "H_DEFINE_AQP1"}, \ + {H_GET_TRACE_BUFFER, "H_GET_TRACE_BUFFER"}, \ + {H_DEFINE_AQP0, "H_DEFINE_AQP0"}, \ + {H_RESIZE_MR, "H_RESIZE_MR"}, \ + {H_ATTACH_MCQP, "H_ATTACH_MCQP"}, \ + {H_DETACH_MCQP, "H_DETACH_MCQP"}, \ + {H_CREATE_RPT, "H_CREATE_RPT"}, \ + {H_REMOVE_RPT, "H_REMOVE_RPT"}, \ + {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \ + {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \ + {H_ERROR_DATA, "H_ERROR_DATA"}, \ + {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ + {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ + {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \ + {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \ + {H_POLL_PENDING, "H_POLL_PENDING"}, \ + {H_ILLAN_ATTRIBUTES, "H_ILLAN_ATTRIBUTES"}, \ + {H_MODIFY_HEA_QP, "H_MODIFY_HEA_QP"}, \ + {H_QUERY_HEA_QP, "H_QUERY_HEA_QP"}, \ + {H_QUERY_HEA, "H_QUERY_HEA"}, \ + {H_QUERY_HEA_PORT, "H_QUERY_HEA_PORT"}, \ + {H_MODIFY_HEA_PORT, "H_MODIFY_HEA_PORT"}, \ + {H_REG_BCMC, "H_REG_BCMC"}, \ + {H_DEREG_BCMC, "H_DEREG_BCMC"}, \ + {H_REGISTER_HEA_RPAGES, "H_REGISTER_HEA_RPAGES"}, \ + {H_DISABLE_AND_GET_HEA, "H_DISABLE_AND_GET_HEA"}, \ + {H_GET_HEA_INFO, "H_GET_HEA_INFO"}, \ + {H_ALLOC_HEA_RESOURCE, "H_ALLOC_HEA_RESOURCE"}, \ + {H_ADD_CONN, "H_ADD_CONN"}, \ + {H_DEL_CONN, "H_DEL_CONN"}, \ + {H_JOIN, "H_JOIN"}, \ + {H_VASI_STATE, "H_VASI_STATE"}, \ + {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ + {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_SET_MPP, "H_SET_MPP"}, \ + {H_GET_MPP, "H_GET_MPP"}, \ + {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {H_BEST_ENERGY, "H_BEST_ENERGY"}, \ + {H_XIRR_X, "H_XIRR_X"}, \ + {H_RANDOM, "H_RANDOM"}, \ + {H_COP, "H_COP"}, \ + {H_GET_MPP_X, "H_GET_MPP_X"}, \ + {H_SET_MODE, "H_SET_MODE"}, \ + {H_RTAS, "H_RTAS"} + +#define kvm_trace_symbol_kvmret \ + {RESUME_GUEST, "RESUME_GUEST"}, \ + {RESUME_GUEST_NV, "RESUME_GUEST_NV"}, \ + {RESUME_HOST, "RESUME_HOST"}, \ + {RESUME_HOST_NV, "RESUME_HOST_NV"} + +#define kvm_trace_symbol_hcall_rc \ + {H_SUCCESS, "H_SUCCESS"}, \ + {H_BUSY, "H_BUSY"}, \ + {H_CLOSED, "H_CLOSED"}, \ + {H_NOT_AVAILABLE, "H_NOT_AVAILABLE"}, \ + {H_CONSTRAINED, "H_CONSTRAINED"}, \ + {H_PARTIAL, "H_PARTIAL"}, \ + {H_IN_PROGRESS, "H_IN_PROGRESS"}, \ + {H_PAGE_REGISTERED, "H_PAGE_REGISTERED"}, \ + {H_PARTIAL_STORE, "H_PARTIAL_STORE"}, \ + {H_PENDING, "H_PENDING"}, \ + {H_CONTINUE, "H_CONTINUE"}, \ + {H_LONG_BUSY_START_RANGE, "H_LONG_BUSY_START_RANGE"}, \ + {H_LONG_BUSY_ORDER_1_MSEC, "H_LONG_BUSY_ORDER_1_MSEC"}, \ + {H_LONG_BUSY_ORDER_10_MSEC, "H_LONG_BUSY_ORDER_10_MSEC"}, \ + {H_LONG_BUSY_ORDER_100_MSEC, "H_LONG_BUSY_ORDER_100_MSEC"}, \ + {H_LONG_BUSY_ORDER_1_SEC, "H_LONG_BUSY_ORDER_1_SEC"}, \ + {H_LONG_BUSY_ORDER_10_SEC, "H_LONG_BUSY_ORDER_10_SEC"}, \ + {H_LONG_BUSY_ORDER_100_SEC, "H_LONG_BUSY_ORDER_100_SEC"}, \ + {H_LONG_BUSY_END_RANGE, "H_LONG_BUSY_END_RANGE"}, \ + {H_TOO_HARD, "H_TOO_HARD"}, \ + {H_HARDWARE, "H_HARDWARE"}, \ + {H_FUNCTION, "H_FUNCTION"}, \ + {H_PRIVILEGE, "H_PRIVILEGE"}, \ + {H_PARAMETER, "H_PARAMETER"}, \ + {H_BAD_MODE, "H_BAD_MODE"}, \ + {H_PTEG_FULL, "H_PTEG_FULL"}, \ + {H_NOT_FOUND, "H_NOT_FOUND"}, \ + {H_RESERVED_DABR, "H_RESERVED_DABR"}, \ + {H_NO_MEM, "H_NO_MEM"}, \ + {H_AUTHORITY, "H_AUTHORITY"}, \ + {H_PERMISSION, "H_PERMISSION"}, \ + {H_DROPPED, "H_DROPPED"}, \ + {H_SOURCE_PARM, "H_SOURCE_PARM"}, \ + {H_DEST_PARM, "H_DEST_PARM"}, \ + {H_REMOTE_PARM, "H_REMOTE_PARM"}, \ + {H_RESOURCE, "H_RESOURCE"}, \ + {H_ADAPTER_PARM, "H_ADAPTER_PARM"}, \ + {H_RH_PARM, "H_RH_PARM"}, \ + {H_RCQ_PARM, "H_RCQ_PARM"}, \ + {H_SCQ_PARM, "H_SCQ_PARM"}, \ + {H_EQ_PARM, "H_EQ_PARM"}, \ + {H_RT_PARM, "H_RT_PARM"}, \ + {H_ST_PARM, "H_ST_PARM"}, \ + {H_SIGT_PARM, "H_SIGT_PARM"}, \ + {H_TOKEN_PARM, "H_TOKEN_PARM"}, \ + {H_MLENGTH_PARM, "H_MLENGTH_PARM"}, \ + {H_MEM_PARM, "H_MEM_PARM"}, \ + {H_MEM_ACCESS_PARM, "H_MEM_ACCESS_PARM"}, \ + {H_ATTR_PARM, "H_ATTR_PARM"}, \ + {H_PORT_PARM, "H_PORT_PARM"}, \ + {H_MCG_PARM, "H_MCG_PARM"}, \ + {H_VL_PARM, "H_VL_PARM"}, \ + {H_TSIZE_PARM, "H_TSIZE_PARM"}, \ + {H_TRACE_PARM, "H_TRACE_PARM"}, \ + {H_MASK_PARM, "H_MASK_PARM"}, \ + {H_MCG_FULL, "H_MCG_FULL"}, \ + {H_ALIAS_EXIST, "H_ALIAS_EXIST"}, \ + {H_P_COUNTER, "H_P_COUNTER"}, \ + {H_TABLE_FULL, "H_TABLE_FULL"}, \ + {H_ALT_TABLE, "H_ALT_TABLE"}, \ + {H_MR_CONDITION, "H_MR_CONDITION"}, \ + {H_NOT_ENOUGH_RESOURCES, "H_NOT_ENOUGH_RESOURCES"}, \ + {H_R_STATE, "H_R_STATE"}, \ + {H_RESCINDED, "H_RESCINDED"}, \ + {H_P2, "H_P2"}, \ + {H_P3, "H_P3"}, \ + {H_P4, "H_P4"}, \ + {H_P5, "H_P5"}, \ + {H_P6, "H_P6"}, \ + {H_P7, "H_P7"}, \ + {H_P8, "H_P8"}, \ + {H_P9, "H_P9"}, \ + {H_TOO_BIG, "H_TOO_BIG"}, \ + {H_OVERLAP, "H_OVERLAP"}, \ + {H_INTERRUPT, "H_INTERRUPT"}, \ + {H_BAD_DATA, "H_BAD_DATA"}, \ + {H_NOT_ACTIVE, "H_NOT_ACTIVE"}, \ + {H_SG_LIST, "H_SG_LIST"}, \ + {H_OP_MODE, "H_OP_MODE"}, \ + {H_COP_HW, "H_COP_HW"}, \ + {H_UNSUPPORTED_FLAG_START, "H_UNSUPPORTED_FLAG_START"}, \ + {H_UNSUPPORTED_FLAG_END, "H_UNSUPPORTED_FLAG_END"}, \ + {H_MULTI_THREADS_ACTIVE, "H_MULTI_THREADS_ACTIVE"}, \ + {H_OUTSTANDING_COP_OPS, "H_OUTSTANDING_COP_OPS"} + +TRACE_EVENT(kvm_guest_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, pc) + __field(unsigned long, pending_exceptions) + __field(u8, ceded) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->ceded = vcpu->arch.ceded; + __entry->pending_exceptions = vcpu->arch.pending_exceptions; + ), + + TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d", + __entry->vcpu_id, + __entry->pc, + __entry->pending_exceptions, __entry->ceded) +); + +TRACE_EVENT(kvm_guest_exit, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, trap) + __field(unsigned long, pc) + __field(unsigned long, msr) + __field(u8, ceded) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->trap = vcpu->arch.trap; + __entry->ceded = vcpu->arch.ceded; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->msr = vcpu->arch.shregs.msr; + ), + + TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d", + __entry->vcpu_id, + __print_symbolic(__entry->trap, kvm_trace_symbol_exit), + __entry->pc, __entry->msr, __entry->ceded + ) +); + +TRACE_EVENT(kvm_page_fault_enter, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, + struct kvm_memory_slot *memslot, unsigned long ea, + unsigned long dsisr), + + TP_ARGS(vcpu, hptep, memslot, ea, dsisr), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, hpte_v) + __field(unsigned long, hpte_r) + __field(unsigned long, gpte_r) + __field(unsigned long, ea) + __field(u64, base_gfn) + __field(u32, slot_flags) + __field(u32, dsisr) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->hpte_v = hptep[0]; + __entry->hpte_r = hptep[1]; + __entry->gpte_r = hptep[2]; + __entry->ea = ea; + __entry->dsisr = dsisr; + __entry->base_gfn = memslot ? memslot->base_gfn : -1UL; + __entry->slot_flags = memslot ? memslot->flags : 0; + ), + + TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x", + __entry->vcpu_id, + __entry->hpte_v, __entry->hpte_r, __entry->gpte_r, + __entry->ea, __entry->dsisr, + __entry->base_gfn, __entry->slot_flags) +); + +TRACE_EVENT(kvm_page_fault_exit, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret), + + TP_ARGS(vcpu, hptep, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, hpte_v) + __field(unsigned long, hpte_r) + __field(long, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->hpte_v = hptep[0]; + __entry->hpte_r = hptep[1]; + __entry->ret = ret; + ), + + TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx", + __entry->vcpu_id, + __entry->hpte_v, __entry->hpte_r, __entry->ret) +); + +TRACE_EVENT(kvm_hcall_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, req) + __field(unsigned long, gpr4) + __field(unsigned long, gpr5) + __field(unsigned long, gpr6) + __field(unsigned long, gpr7) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->req = kvmppc_get_gpr(vcpu, 3); + __entry->gpr4 = kvmppc_get_gpr(vcpu, 4); + __entry->gpr5 = kvmppc_get_gpr(vcpu, 5); + __entry->gpr6 = kvmppc_get_gpr(vcpu, 6); + __entry->gpr7 = kvmppc_get_gpr(vcpu, 7); + ), + + TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx", + __entry->vcpu_id, + __print_symbolic(__entry->req, kvm_trace_symbol_hcall), + __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7) +); + +TRACE_EVENT(kvm_hcall_exit, + TP_PROTO(struct kvm_vcpu *vcpu, int ret), + + TP_ARGS(vcpu, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, ret) + __field(unsigned long, hcall_rc) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->ret = ret; + __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3); + ), + + TP_printk("VCPU %d: ret=%s hcall_rc=%s", + __entry->vcpu_id, + __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret), + __print_symbolic(__entry->ret & RESUME_FLAG_HOST ? + H_TOO_HARD : __entry->hcall_rc, + kvm_trace_symbol_hcall_rc)) +); + +TRACE_EVENT(kvmppc_run_core, + TP_PROTO(struct kvmppc_vcore *vc, int where), + + TP_ARGS(vc, where), + + TP_STRUCT__entry( + __field(int, n_runnable) + __field(int, runner_vcpu) + __field(int, where) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->runner_vcpu = vc->runner->vcpu_id; + __entry->n_runnable = vc->n_runnable; + __entry->where = where; + __entry->tgid = current->tgid; + ), + + TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d", + __entry->where ? "Exit" : "Enter", + __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) +); + +TRACE_EVENT(kvmppc_vcore_blocked, + TP_PROTO(struct kvmppc_vcore *vc, int where), + + TP_ARGS(vc, where), + + TP_STRUCT__entry( + __field(int, n_runnable) + __field(int, runner_vcpu) + __field(int, where) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->runner_vcpu = vc->runner->vcpu_id; + __entry->n_runnable = vc->n_runnable; + __entry->where = where; + __entry->tgid = current->tgid; + ), + + TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d", + __entry->where ? "Exit" : "Enter", + __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) +); + +TRACE_EVENT(kvmppc_run_vcpu_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->tgid = current->tgid; + ), + + TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid) +); + +TRACE_EVENT(kvmppc_run_vcpu_exit, + TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run), + + TP_ARGS(vcpu, run), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, exit) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->exit = run->exit_reason; + __entry->ret = vcpu->arch.ret; + ), + + TP_printk("VCPU %d: exit=%d, ret=%d", + __entry->vcpu_id, __entry->exit, __entry->ret) +); + +#endif /* _TRACE_KVM_HV_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index e1357cd8dc1f..810507cb688a 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -3,36 +3,13 @@ #define _TRACE_KVM_PR_H #include <linux/tracepoint.h> +#include "trace_book3s.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE trace_pr -#define kvm_trace_symbol_exit \ - {0x100, "SYSTEM_RESET"}, \ - {0x200, "MACHINE_CHECK"}, \ - {0x300, "DATA_STORAGE"}, \ - {0x380, "DATA_SEGMENT"}, \ - {0x400, "INST_STORAGE"}, \ - {0x480, "INST_SEGMENT"}, \ - {0x500, "EXTERNAL"}, \ - {0x501, "EXTERNAL_LEVEL"}, \ - {0x502, "EXTERNAL_HV"}, \ - {0x600, "ALIGNMENT"}, \ - {0x700, "PROGRAM"}, \ - {0x800, "FP_UNAVAIL"}, \ - {0x900, "DECREMENTER"}, \ - {0x980, "HV_DECREMENTER"}, \ - {0xc00, "SYSCALL"}, \ - {0xd00, "TRACE"}, \ - {0xe00, "H_DATA_STORAGE"}, \ - {0xe20, "H_INST_STORAGE"}, \ - {0xe40, "H_EMUL_ASSIST"}, \ - {0xf00, "PERFMON"}, \ - {0xf20, "ALTIVEC"}, \ - {0xf40, "VSX"} - TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), TP_ARGS(r, vcpu), diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index dba34088da28..f162d0b8eea3 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev, \ } \ ret = sprintf(buf, _fmt, _expr); \ e_free: \ - kfree(page); \ + kmem_cache_free(hv_page_cache, page); \ return ret; \ } \ static DEVICE_ATTR_RO(_name) @@ -217,11 +217,14 @@ static bool is_physical_domain(int domain) domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; } +DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096); +DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096); + static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret = -ENOMEM; + unsigned long ret; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, BUILD_BUG_ON(sizeof(*request_buffer) > 4096); BUILD_BUG_ON(sizeof(*result_buffer) > 4096); - request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!request_buffer) - goto out; + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); - result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!result_buffer) - goto out_free_request_buffer; + memset(request_buffer, 0, 4096); + memset(result_buffer, 0, 4096); *request_buffer = (struct reqb) { .buf = { @@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, domain, offset, ix, lpar, ret, ret, result_buffer->buf.detailed_rc, result_buffer->buf.failing_request_ix); - goto out_free_result_buffer; + goto out; } *res = be64_to_cpu(result_buffer->result); -out_free_result_buffer: - kfree(result_buffer); -out_free_request_buffer: - kfree(request_buffer); out: return ret; } diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 0a299be588af..54eca8b3b288 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -158,6 +158,43 @@ opal_tracepoint_return: blr #endif +/* + * Make opal call in realmode. This is a generic function to be called + * from realmode. It handles endianness. + * + * r13 - paca pointer + * r1 - stack pointer + * r0 - opal token + */ +_GLOBAL(opal_call_realmode) + mflr r12 + std r12,PPC_LR_STKOFF(r1) + ld r2,PACATOC(r13) + /* Set opal return address */ + LOAD_REG_ADDR(r12,return_from_opal_call) + mtlr r12 + + mfmsr r12 +#ifdef __LITTLE_ENDIAN__ + /* Handle endian-ness */ + li r11,MSR_LE + andc r12,r12,r11 +#endif + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +return_from_opal_call: +#ifdef __LITTLE_ENDIAN__ + FIXUP_ENDIAN +#endif + ld r12,PPC_LR_STKOFF(r1) + mtlr r12 + blr + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -247,6 +284,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); @@ -254,3 +292,4 @@ OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); +OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cb0b6de79cd4..f10b9ec8c1f5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -9,8 +9,9 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define pr_fmt(fmt) "opal: " fmt +#include <linux/printk.h> #include <linux/types.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -625,6 +626,39 @@ static int opal_sysfs_init(void) return 0; } +static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + return memory_read_from_buffer(buf, count, &off, bin_attr->private, + bin_attr->size); +} + +static BIN_ATTR_RO(symbol_map, 0); + +static void opal_export_symmap(void) +{ + const __be64 *syms; + unsigned int size; + struct device_node *fw; + int rc; + + fw = of_find_node_by_path("/ibm,opal/firmware"); + if (!fw) + return; + syms = of_get_property(fw, "symbol-map", &size); + if (!syms || size != 2 * sizeof(__be64)) + return; + + /* Setup attributes */ + bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0])); + bin_attr_symbol_map.size = be64_to_cpu(syms[1]); + + rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map); + if (rc) + pr_warn("Error %d creating OPAL symbols file\n", rc); +} + static void __init opal_dump_region_init(void) { void *addr; @@ -653,6 +687,14 @@ static void opal_ipmi_init(struct device_node *opal_node) of_platform_device_create(np, NULL, NULL); } +static void opal_i2c_create_devs(void) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, "ibm,opal-i2c") + of_platform_device_create(np, NULL, NULL); +} + static int __init opal_init(void) { struct device_node *np, *consoles; @@ -679,6 +721,9 @@ static int __init opal_init(void) of_node_put(consoles); } + /* Create i2c platform devices */ + opal_i2c_create_devs(); + /* Find all OPAL interrupts and request them */ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); pr_debug("opal: Found %d interrupts reserved for OPAL\n", @@ -702,6 +747,8 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { + /* Export symbol map to userspace */ + opal_export_symmap(); /* Setup dump region interface */ opal_dump_region_init(); /* Setup error log interface */ @@ -824,3 +871,4 @@ EXPORT_SYMBOL_GPL(opal_rtc_read); EXPORT_SYMBOL_GPL(opal_rtc_write); EXPORT_SYMBOL_GPL(opal_tpo_read); EXPORT_SYMBOL_GPL(opal_tpo_write); +EXPORT_SYMBOL_GPL(opal_i2c_request); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 6c8e2d188cd0..604c48e7879a 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) } #endif +extern u32 pnv_get_supported_cpuidle_states(void); + extern void pnv_lpc_init(void); bool cpu_core_split_required(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 30b1c3e298a6..b700a329c31d 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -36,8 +36,12 @@ #include <asm/opal.h> #include <asm/kexec.h> #include <asm/smp.h> +#include <asm/cputhreads.h> +#include <asm/cpuidle.h> +#include <asm/code-patching.h> #include "powernv.h" +#include "subcore.h" static void __init pnv_setup_arch(void) { @@ -288,6 +292,168 @@ static void __init pnv_setup_machdep_rtas(void) } #endif /* CONFIG_PPC_POWERNV_RTAS */ +static u32 supported_cpuidle_states; + +int pnv_save_sprs_for_winkle(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * all cpus at boot. Get these reg values of current cpu and use the + * same accross all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + + for_each_possible_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + + /* + * HSPRG0 is used to store the cpu's pointer to paca. Hence last + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 + * with 63rd bit set, so that when a thread wakes up at 0x100 we + * can use this bit to distinguish between fastsleep and + * deep winkle. + */ + hsprg0_val |= 1; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + + return 0; +} + +static void pnv_alloc_idle_core_states(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + u32 *core_idle_state; + + /* + * core_idle_state - First 8 bits track the idle state of each thread + * of the core. The 8th bit is the lock bit. Initially all thread bits + * are set. They are cleared when the thread enters deep idle state + * like sleep and winkle. Initially the lock bit is cleared. + * The lock bit has 2 purposes + * a. While the first thread is restoring core state, it prevents + * other threads in the core from switching to process context. + * b. While the last thread in the core is saving the core state, it + * prevents a different thread from waking up. + */ + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].core_idle_state_ptr = core_idle_state; + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; + paca[cpu].thread_mask = 1 << j; + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) + pnv_save_sprs_for_winkle(); +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static int __init pnv_init_idle_states(void) +{ + struct device_node *power_mgt; + int dt_idle_states; + const __be32 *idle_state_flags; + u32 len_flags, flags; + int i; + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + return 0; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + return 0; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + return 0; + } + + idle_state_flags = of_get_property(power_mgt, + "ibm,cpu-idle-state-flags", &len_flags); + if (!idle_state_flags) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); + return 0; + } + + dt_idle_states = len_flags / sizeof(u32); + + for (i = 0; i < dt_idle_states; i++) { + flags = be32_to_cpu(idle_state_flags[i]); + supported_cpuidle_states |= flags; + } + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + } + pnv_alloc_idle_core_states(); + return 0; +} + +subsys_initcall(pnv_init_idle_states); + static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b716f666e48a..fc34025ef822 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -150,6 +150,7 @@ static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; unsigned long srr1; + u32 idle_states; /* Standard hot unplug procedure */ local_irq_disable(); @@ -160,13 +161,23 @@ static void pnv_smp_cpu_kill_self(void) generic_set_cpu_dead(cpu); smp_wmb(); + idle_states = pnv_get_supported_cpuidle_states(); /* We don't want to take decrementer interrupts while we are offline, * so clear LPCR:PECE1. We keep PECE2 enabled. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { + ppc64_runlatch_off(); - srr1 = power7_nap(1); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) + srr1 = power7_winkle(); + else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || + (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + srr1 = power7_sleep(); + else + srr1 = power7_nap(1); + ppc64_runlatch_on(); /* @@ -198,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void) #endif /* CONFIG_HOTPLUG_CPU */ +static int pnv_cpu_bootable(unsigned int nr) +{ + /* + * Starting with POWER8, the subcore logic relies on all threads of a + * core being booted so that they can participate in split mode + * switches. So on those machines we ignore the smt_enabled_at_boot + * setting (smt-enabled on the kernel command line). + */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return 1; + + return smp_generic_cpu_bootable(nr); +} + static struct smp_ops_t pnv_smp_ops = { .message_pass = smp_muxed_ipi_message_pass, .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ .probe = xics_smp_probe, .kick_cpu = pnv_smp_kick_cpu, .setup_cpu = pnv_smp_setup_cpu, - .cpu_bootable = smp_generic_cpu_bootable, + .cpu_bootable = pnv_cpu_bootable, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = pnv_smp_cpu_disable, .cpu_die = generic_cpu_die, diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index c87f96b79d1a..f60f80ada903 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -160,6 +160,18 @@ static void wait_for_sync_step(int step) mb(); } +static void update_hid_in_slw(u64 hid0) +{ + u64 idle_states = pnv_get_supported_cpuidle_states(); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) { + /* OPAL call to patch slw with the new HID0 value */ + u64 cpu_pir = hard_smp_processor_id(); + + opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); + } +} + static void unsplit_core(void) { u64 hid0, mask; @@ -179,6 +191,7 @@ static void unsplit_core(void) hid0 = mfspr(SPRN_HID0); hid0 &= ~HID0_POWER8_DYNLPARDIS; mtspr(SPRN_HID0, hid0); + update_hid_in_slw(hid0); while (mfspr(SPRN_HID0) & mask) cpu_relax(); @@ -215,6 +228,7 @@ static void split_core(int new_mode) hid0 = mfspr(SPRN_HID0); hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; mtspr(SPRN_HID0, hid0); + update_hid_in_slw(hid0); /* Wait for it to happen */ while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) @@ -251,6 +265,25 @@ bool cpu_core_split_required(void) return true; } +void update_subcore_sibling_mask(void) +{ + int cpu; + /* + * sibling mask for the first cpu. Left shift this by required bits + * to get sibling mask for the rest of the cpus. + */ + int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; + + for_each_possible_cpu(cpu) { + int tid = cpu_thread_in_core(cpu); + int offset = (tid / threads_per_subcore) * threads_per_subcore; + int mask = sibling_mask_first_cpu << offset; + + paca[cpu].subcore_sibling_mask = mask; + + } +} + static int cpu_update_split_mode(void *data) { int cpu, new_mode = *(int *)data; @@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data) /* Make the new mode public */ subcores_per_core = new_mode; threads_per_subcore = threads_per_core / subcores_per_core; + update_subcore_sibling_mask(); /* Make sure the new mode is written before we exit */ mb(); diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h index 148abc91debf..84e02ae52895 100644 --- a/arch/powerpc/platforms/powernv/subcore.h +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -14,5 +14,12 @@ #define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */ #ifndef __ASSEMBLY__ + +#ifdef CONFIG_SMP void split_core_secondary_loop(u8 *state); -#endif +extern void update_subcore_sibling_mask(void); +#else +static inline void update_subcore_sibling_mask(void) { }; +#endif /* CONFIG_SMP */ + +#endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2175f911a73a..9cba74d5d853 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -123,7 +123,7 @@ struct kvm_s390_sie_block { #define ICPT_PARTEXEC 0x38 #define ICPT_IOINST 0x40 __u8 icptcode; /* 0x0050 */ - __u8 reserved51; /* 0x0051 */ + __u8 icptstatus; /* 0x0051 */ __u16 ihcpu; /* 0x0052 */ __u8 reserved54[2]; /* 0x0054 */ __u16 ipa; /* 0x0056 */ @@ -226,10 +226,17 @@ struct kvm_vcpu_stat { u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; + u32 instruction_sigp_cond_emergency; + u32 instruction_sigp_start; u32 instruction_sigp_stop; + u32 instruction_sigp_stop_store_status; + u32 instruction_sigp_store_status; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 instruction_sigp_init_cpu_reset; + u32 instruction_sigp_cpu_reset; + u32 instruction_sigp_unknown; u32 diagnose_10; u32 diagnose_44; u32 diagnose_9c; @@ -288,6 +295,79 @@ struct kvm_vcpu_stat { #define PGM_PER 0x80 #define PGM_CRYPTO_OPERATION 0x119 +/* irq types in order of priority */ +enum irq_types { + IRQ_PEND_MCHK_EX = 0, + IRQ_PEND_SVC, + IRQ_PEND_PROG, + IRQ_PEND_MCHK_REP, + IRQ_PEND_EXT_IRQ_KEY, + IRQ_PEND_EXT_MALFUNC, + IRQ_PEND_EXT_EMERGENCY, + IRQ_PEND_EXT_EXTERNAL, + IRQ_PEND_EXT_CLOCK_COMP, + IRQ_PEND_EXT_CPU_TIMER, + IRQ_PEND_EXT_TIMING, + IRQ_PEND_EXT_SERVICE, + IRQ_PEND_EXT_HOST, + IRQ_PEND_PFAULT_INIT, + IRQ_PEND_PFAULT_DONE, + IRQ_PEND_VIRTIO, + IRQ_PEND_IO_ISC_0, + IRQ_PEND_IO_ISC_1, + IRQ_PEND_IO_ISC_2, + IRQ_PEND_IO_ISC_3, + IRQ_PEND_IO_ISC_4, + IRQ_PEND_IO_ISC_5, + IRQ_PEND_IO_ISC_6, + IRQ_PEND_IO_ISC_7, + IRQ_PEND_SIGP_STOP, + IRQ_PEND_RESTART, + IRQ_PEND_SET_PREFIX, + IRQ_PEND_COUNT +}; + +/* + * Repressible (non-floating) machine check interrupts + * subclass bits in MCIC + */ +#define MCHK_EXTD_BIT 58 +#define MCHK_DEGR_BIT 56 +#define MCHK_WARN_BIT 55 +#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \ + (1UL << MCHK_EXTD_BIT) | \ + (1UL << MCHK_WARN_BIT)) + +/* Exigent machine check interrupts subclass bits in MCIC */ +#define MCHK_SD_BIT 63 +#define MCHK_PD_BIT 62 +#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT)) + +#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY) | \ + (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \ + (1UL << IRQ_PEND_EXT_CPU_TIMER) | \ + (1UL << IRQ_PEND_EXT_MALFUNC) | \ + (1UL << IRQ_PEND_EXT_EMERGENCY) | \ + (1UL << IRQ_PEND_EXT_EXTERNAL) | \ + (1UL << IRQ_PEND_EXT_TIMING) | \ + (1UL << IRQ_PEND_EXT_HOST) | \ + (1UL << IRQ_PEND_EXT_SERVICE) | \ + (1UL << IRQ_PEND_VIRTIO) | \ + (1UL << IRQ_PEND_PFAULT_INIT) | \ + (1UL << IRQ_PEND_PFAULT_DONE)) + +#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \ + (1UL << IRQ_PEND_IO_ISC_1) | \ + (1UL << IRQ_PEND_IO_ISC_2) | \ + (1UL << IRQ_PEND_IO_ISC_3) | \ + (1UL << IRQ_PEND_IO_ISC_4) | \ + (1UL << IRQ_PEND_IO_ISC_5) | \ + (1UL << IRQ_PEND_IO_ISC_6) | \ + (1UL << IRQ_PEND_IO_ISC_7)) + +#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \ + (1UL << IRQ_PEND_MCHK_EX)) + struct kvm_s390_interrupt_info { struct list_head list; u64 type; @@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info { #define ACTION_STORE_ON_STOP (1<<0) #define ACTION_STOP_ON_STOP (1<<1) +struct kvm_s390_irq_payload { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_mchk_info mchk; +}; + struct kvm_s390_local_interrupt { spinlock_t lock; - struct list_head list; - atomic_t active; struct kvm_s390_float_interrupt *float_int; wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; + DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); + struct kvm_s390_irq_payload irq; + unsigned long pending_irqs; }; struct kvm_s390_float_interrupt { @@ -434,6 +525,8 @@ struct kvm_arch{ int user_cpu_state_ctrl; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; + int ipte_lock_count; + struct mutex ipte_mutex; spinlock_t start_stop_lock; struct kvm_s390_crypto crypto; }; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index e510b9460efa..3009c2ba46d2 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 49576115dbb7..fad4ae23ece0 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -10,6 +10,7 @@ #define SIGP_RESTART 6 #define SIGP_STOP_AND_STORE_STATUS 9 #define SIGP_INITIAL_CPU_RESET 11 +#define SIGP_CPU_RESET 12 #define SIGP_SET_PREFIX 13 #define SIGP_STORE_STATUS_AT_ADDRESS 14 #define SIGP_SET_ARCHITECTURE 18 diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index ca38139423ae..437e61159279 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0f961a1c64b3..8a1be9017730 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -207,8 +207,6 @@ union raddress { unsigned long pfra : 52; /* Page-Frame Real Address */ }; -static int ipte_lock_count; -static DEFINE_MUTEX(ipte_mutex); int ipte_lock_held(struct kvm_vcpu *vcpu) { @@ -216,47 +214,48 @@ int ipte_lock_held(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->eca & 1) return ic->kh != 0; - return ipte_lock_count != 0; + return vcpu->kvm->arch.ipte_lock_count != 0; } static void ipte_lock_simple(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - mutex_lock(&ipte_mutex); - ipte_lock_count++; - if (ipte_lock_count > 1) + mutex_lock(&vcpu->kvm->arch.ipte_mutex); + vcpu->kvm->arch.ipte_lock_count++; + if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = ACCESS_ONCE(*ic); + old = READ_ONCE(*ic); while (old.k) { cond_resched(); - old = ACCESS_ONCE(*ic); + old = READ_ONCE(*ic); } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); out: - mutex_unlock(&ipte_mutex); + mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } static void ipte_unlock_simple(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - mutex_lock(&ipte_mutex); - ipte_lock_count--; - if (ipte_lock_count) + mutex_lock(&vcpu->kvm->arch.ipte_mutex); + vcpu->kvm->arch.ipte_lock_count--; + if (vcpu->kvm->arch.ipte_lock_count) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - new = old = ACCESS_ONCE(*ic); + old = READ_ONCE(*ic); + new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); wake_up(&vcpu->kvm->arch.ipte_wq); out: - mutex_unlock(&ipte_mutex); + mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } static void ipte_lock_siif(struct kvm_vcpu *vcpu) @@ -265,10 +264,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = ACCESS_ONCE(*ic); + old = READ_ONCE(*ic); while (old.kg) { cond_resched(); - old = ACCESS_ONCE(*ic); + old = READ_ONCE(*ic); } new = old; new.k = 1; @@ -282,7 +281,8 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - new = old = ACCESS_ONCE(*ic); + old = READ_ONCE(*ic); + new = old; new.kh--; if (!new.kh) new.k = 0; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index eaf46291d361..81c77ab8102e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = { [0xeb] = kvm_s390_handle_eb, }; +void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) +{ + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + + /* Use the length of the EXECUTE instruction if necessary */ + if (sie_block->icptstatus & 1) { + ilc = (sie_block->icptstatus >> 4) & 0x6; + if (!ilc) + ilc = 4; + } + sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); +} + static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { @@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) static int handle_external_interrupt(struct kvm_vcpu *vcpu) { u16 eic = vcpu->arch.sie_block->eic; - struct kvm_s390_interrupt irq; + struct kvm_s390_irq irq; psw_t newpsw; int rc; @@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) if (kvm_s390_si_ext_call_pending(vcpu)) return 0; irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.parm = vcpu->arch.sie_block->extcpuaddr; + irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr; break; default: return -EOPNOTSUPP; @@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) */ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long srcaddr, dstaddr; int reg1, reg2, rc; @@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) if (rc != 0) return rc; - psw->addr = __rewind_psw(*psw, 4); + kvm_s390_rewind_psw(vcpu, 4); return 0; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a39838457f01..f00f31e66cd8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -16,6 +16,7 @@ #include <linux/mmu_context.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/bitmap.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> #include "kvm-s390.h" @@ -27,8 +28,8 @@ #define IOINT_CSSID_MASK 0x03fc0000 #define IOINT_AI_MASK 0x04000000 #define PFAULT_INIT 0x0600 - -static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu); +#define PFAULT_DONE 0x0680 +#define VIRTIO_PARAM 0x0d00 static int is_ioint(u64 type) { @@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 0; } +static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.local_int.pending_irqs; +} + +static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) +{ + unsigned long active_mask = pending_local_irqs(vcpu); + + if (psw_extint_disabled(vcpu)) + active_mask &= ~IRQ_PEND_EXT_MASK; + if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul)) + __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul)) + __clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul)) + __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); + if (psw_mchk_disabled(vcpu)) + active_mask &= ~IRQ_PEND_MCHK_MASK; + + return active_mask; +} + static void __set_cpu_idle(struct kvm_vcpu *vcpu) { atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); @@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); } +static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) +{ + if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) + return; + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; +} + +static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) +{ + if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) + return; + if (psw_mchk_disabled(vcpu)) + vcpu->arch.sie_block->ictl |= ICTL_LPSW; + else + vcpu->arch.sie_block->lctl |= LCTL_CR14; +} + +/* Set interception request for non-deliverable local interrupts */ +static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) +{ + set_intercept_indicators_ext(vcpu); + set_intercept_indicators_mchk(vcpu); +} + static void __set_intercept_indicator(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { - case KVM_S390_INT_EXTERNAL_CALL: - case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: - case KVM_S390_INT_CLOCK_COMP: - case KVM_S390_INT_CPU_TIMER: if (psw_extint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_EXT_INT); else vcpu->arch.sie_block->lctl |= LCTL_CR0; break; - case KVM_S390_SIGP_STOP: - __set_cpuflag(vcpu, CPUSTAT_STOP_INT); - break; case KVM_S390_MCHK: if (psw_mchk_disabled(vcpu)) vcpu->arch.sie_block->ictl |= ICTL_LPSW; @@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu) } } -static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, + 0, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, + 0, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP, + (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_ext_info ext; + int rc; + + spin_lock(&li->lock); + ext = li->irq.ext; + clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); + li->irq.ext.ext_params2 = 0; + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx", + 0, ext.ext_params2); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_INIT, + 0, ext.ext_params2); + + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_mchk_info mchk; + int rc; + + spin_lock(&li->lock); + mchk = li->irq.mchk; + /* + * If there was an exigent machine check pending, then any repressible + * machine checks that might have been pending are indicated along + * with it, so always clear both bits + */ + clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + memset(&li->irq.mchk, 0, sizeof(mchk)); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, + mchk.cr14, mchk.mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_lc(vcpu, mchk.mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk.failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk.fixed_logout, sizeof(mchk.fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); + + rc = write_guest_lc(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + clear_bit(IRQ_PEND_RESTART, &li->pending_irqs); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_stop(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP, + 0, 0); + + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs); + return 0; +} + +static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_prefix_info prefix; + + spin_lock(&li->lock); + prefix = li->irq.prefix; + li->irq.prefix.address = 0; + clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address); + vcpu->stat.deliver_prefix_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_SIGP_SET_PREFIX, + prefix.address, 0); + + kvm_s390_set_prefix(vcpu, prefix.address); + return 0; +} + +static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + int cpu_addr; + + spin_lock(&li->lock); + cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS); + clear_bit(cpu_addr, li->sigp_emerg_pending); + if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS)) + clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, + cpu_addr, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_extcall_info extcall; + int rc; + + spin_lock(&li->lock); + extcall = li->irq.extcall; + li->irq.extcall.code = 0; + clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_EXTERNAL_CALL, + extcall.code, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) { + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_pgm_info pgm_info; int rc = 0; u16 ilc = get_ilc(vcpu); - switch (pgm_info->code & ~PGM_PER) { + spin_lock(&li->lock); + pgm_info = li->irq.pgm; + clear_bit(IRQ_PEND_PROG, &li->pending_irqs); + memset(&li->irq.pgm, 0, sizeof(pgm_info)); + spin_unlock(&li->lock); + + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + pgm_info.code, ilc); + vcpu->stat.deliver_program_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, + pgm_info.code, 0); + + switch (pgm_info.code & ~PGM_PER) { case PGM_AFX_TRANSLATION: case PGM_ASX_TRANSLATION: case PGM_EX_TRANSLATION: @@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: case PGM_SPACE_SWITCH: - rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); break; case PGM_ALEN_TRANSLATION: @@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY: - rc = put_guest_lc(vcpu, pgm_info->exc_access_id, + rc = put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); break; case PGM_ASCE_TYPE: @@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION: - rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); - rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + rc |= put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); - rc |= put_guest_lc(vcpu, pgm_info->op_access_id, + rc |= put_guest_lc(vcpu, pgm_info.op_access_id, (u8 *)__LC_OP_ACCESS_ID); break; case PGM_MONITOR: - rc = put_guest_lc(vcpu, pgm_info->mon_class_nr, - (u64 *)__LC_MON_CLASS_NR); - rc |= put_guest_lc(vcpu, pgm_info->mon_code, + rc = put_guest_lc(vcpu, pgm_info.mon_class_nr, + (u16 *)__LC_MON_CLASS_NR); + rc |= put_guest_lc(vcpu, pgm_info.mon_code, (u64 *)__LC_MON_CODE); break; case PGM_DATA: - rc = put_guest_lc(vcpu, pgm_info->data_exc_code, + rc = put_guest_lc(vcpu, pgm_info.data_exc_code, (u32 *)__LC_DATA_EXC_CODE); break; case PGM_PROTECTION: - rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); - rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + rc |= put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); break; } - if (pgm_info->code & PGM_PER) { - rc |= put_guest_lc(vcpu, pgm_info->per_code, + if (pgm_info.code & PGM_PER) { + rc |= put_guest_lc(vcpu, pgm_info.per_code, (u8 *) __LC_PER_CODE); - rc |= put_guest_lc(vcpu, pgm_info->per_atmid, + rc |= put_guest_lc(vcpu, pgm_info.per_atmid, (u8 *)__LC_PER_ATMID); - rc |= put_guest_lc(vcpu, pgm_info->per_address, + rc |= put_guest_lc(vcpu, pgm_info.per_address, (u64 *) __LC_PER_ADDRESS); - rc |= put_guest_lc(vcpu, pgm_info->per_access_id, + rc |= put_guest_lc(vcpu, pgm_info.per_access_id, (u8 *) __LC_PER_ACCESS_ID); } rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); - rc |= put_guest_lc(vcpu, pgm_info->code, + rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} - return rc; +static int __must_check __deliver_service(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + + rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + return rc ? -EFAULT : 0; } -static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) { - const unsigned short table[] = { 2, 4, 4, 6 }; - int rc = 0; + int rc; + + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_io(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr, + ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word); + + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_mchk_info *mchk = &inti->mchk; + int rc; + + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk->mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, + mchk->cr14, mchk->mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_lc(vcpu, mchk->mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk->failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk->fixed_logout, sizeof(mchk->fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + +typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); + +static const deliver_irq_t deliver_irq_funcs[] = { + [IRQ_PEND_MCHK_EX] = __deliver_machine_check, + [IRQ_PEND_PROG] = __deliver_prog, + [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, + [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, + [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, + [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, + [IRQ_PEND_RESTART] = __deliver_restart, + [IRQ_PEND_SIGP_STOP] = __deliver_stop, + [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, + [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, +}; + +static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + int rc; switch (inti->type) { - case KVM_S390_INT_EMERGENCY: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); - vcpu->stat.deliver_emergency_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->emerg.code, 0); - rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, inti->emerg.code, - (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - break; - case KVM_S390_INT_EXTERNAL_CALL: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); - vcpu->stat.deliver_external_call++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->extcall.code, 0); - rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, inti->extcall.code, - (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - break; - case KVM_S390_INT_CLOCK_COMP: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); - rc = deliver_ckc_interrupt(vcpu); - break; - case KVM_S390_INT_CPU_TIMER: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); - rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, - (u16 *)__LC_EXT_INT_CODE); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - break; case KVM_S390_INT_SERVICE: - VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", - inti->ext.ext_params); - vcpu->stat.deliver_service_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); - rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - break; - case KVM_S390_INT_PFAULT_INIT: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, - inti->ext.ext_params2); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, - (u16 *) __LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *) __LC_EXT_PARAMS2); + rc = __deliver_service(vcpu, inti); break; case KVM_S390_INT_PFAULT_DONE: - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, - inti->ext.ext_params2); - rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + rc = __deliver_pfault_done(vcpu, inti); break; case KVM_S390_INT_VIRTIO: - VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", - inti->ext.ext_params, inti->ext.ext_params2); - vcpu->stat.deliver_virtio_interrupt++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, - inti->ext.ext_params2); - rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); - break; - case KVM_S390_SIGP_STOP: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); - vcpu->stat.deliver_stop_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - 0, 0); - __set_intercept_indicator(vcpu, inti); - break; - - case KVM_S390_SIGP_SET_PREFIX: - VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", - inti->prefix.address); - vcpu->stat.deliver_prefix_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->prefix.address, 0); - kvm_s390_set_prefix(vcpu, inti->prefix.address); - break; - - case KVM_S390_RESTART: - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); - vcpu->stat.deliver_restart_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - 0, 0); - rc = write_guest_lc(vcpu, - offsetof(struct _lowcore, restart_old_psw), - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); + rc = __deliver_virtio(vcpu, inti); break; - case KVM_S390_PROGRAM_INT: - VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", - inti->pgm.code, - table[vcpu->arch.sie_block->ipa >> 14]); - vcpu->stat.deliver_program_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->pgm.code, 0); - rc = __deliver_prog_irq(vcpu, &inti->pgm); - break; - case KVM_S390_MCHK: - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - inti->mchk.mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->mchk.cr14, - inti->mchk.mcic); - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc = __deliver_mchk_floating(vcpu, inti); break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - { - __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | - inti->io.subchannel_nr; - __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | - inti->io.io_int_word; - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); - vcpu->stat.deliver_io_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - param0, param1); - rc = put_guest_lc(vcpu, inti->io.subchannel_id, - (u16 *)__LC_SUBCHANNEL_ID); - rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, - (u16 *)__LC_SUBCHANNEL_NR); - rc |= put_guest_lc(vcpu, inti->io.io_int_parm, - (u32 *)__LC_IO_INT_PARM); - rc |= put_guest_lc(vcpu, inti->io.io_int_word, - (u32 *)__LC_IO_INT_WORD); - rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); + rc = __deliver_io(vcpu, inti); break; - } default: BUG(); } @@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, return rc; } -static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu) -{ - int rc; - - rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - return rc; -} - /* Check whether SIGP interpretation facility has an external call pending */ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_interrupt_info *inti; - int rc = 0; + int rc; - if (atomic_read(&li->active)) { - spin_lock(&li->lock); - list_for_each_entry(inti, &li->list, list) - if (__interrupt_is_deliverable(vcpu, inti)) { - rc = 1; - break; - } - spin_unlock(&li->lock); - } + rc = !!deliverable_local_irqs(vcpu); if ((!rc) && atomic_read(&fi->active)) { spin_lock(&fi->lock); @@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_interrupt_info *n, *inti = NULL; spin_lock(&li->lock); - list_for_each_entry_safe(inti, n, &li->list, list) { - list_del(&inti->list); - kfree(inti); - } - atomic_set(&li->active, 0); + li->pending_irqs = 0; + bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS); + memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); /* clear pending external calls set by sigp interpretation facility */ - atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags); atomic_clear_mask(SIGP_CTRL_C, &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); } @@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_interrupt_info *n, *inti = NULL; + deliver_irq_t func; int deliver; int rc = 0; + unsigned long irq_type; + unsigned long deliverable_irqs; __reset_intercept_indicators(vcpu); - if (atomic_read(&li->active)) { - do { - deliver = 0; - spin_lock(&li->lock); - list_for_each_entry_safe(inti, n, &li->list, list) { - if (__interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&li->list)) - atomic_set(&li->active, 0); - spin_unlock(&li->lock); - if (deliver) { - rc = __do_deliver_interrupt(vcpu, inti); - kfree(inti); - } - } while (!rc && deliver); - } - if (!rc && kvm_cpu_has_pending_timer(vcpu)) - rc = deliver_ckc_interrupt(vcpu); + /* pending ckc conditions might have been invalidated */ + clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + if (kvm_cpu_has_pending_timer(vcpu)) + set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + + do { + deliverable_irqs = deliverable_local_irqs(vcpu); + /* bits are in the order of interrupt priority */ + irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT); + if (irq_type == IRQ_PEND_COUNT) + break; + func = deliver_irq_funcs[irq_type]; + if (!func) { + WARN_ON_ONCE(func == NULL); + clear_bit(irq_type, &li->pending_irqs); + continue; + } + rc = func(vcpu); + } while (!rc && irq_type != IRQ_PEND_COUNT); + + set_intercept_indicators_local(vcpu); if (!rc && atomic_read(&fi->active)) { do { @@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) atomic_set(&fi->active, 0); spin_unlock(&fi->lock); if (deliver) { - rc = __do_deliver_interrupt(vcpu, inti); + rc = __deliver_floating_interrupt(vcpu, inti); kfree(inti); } } while (!rc && deliver); @@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) return rc; } -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_interrupt_info *inti; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; + li->irq.pgm = irq->u.pgm; + set_bit(IRQ_PEND_PROG, &li->pending_irqs); + return 0; +} - inti->type = KVM_S390_PROGRAM_INT; - inti->pgm.code = code; +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_irq irq; VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code, + 0, 1); spin_lock(&li->lock); - list_add(&inti->list, &li->list); - atomic_set(&li->active, 1); + irq.u.pgm.code = code; + __inject_prog(vcpu, &irq); BUG_ON(waitqueue_active(li->wq)); spin_unlock(&li->lock); return 0; @@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, struct kvm_s390_pgm_info *pgm_info) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_interrupt_info *inti; - - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; + struct kvm_s390_irq irq; + int rc; VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", pgm_info->code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info->code, 0, 1); - - inti->type = KVM_S390_PROGRAM_INT; - memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); spin_lock(&li->lock); - list_add(&inti->list, &li->list); - atomic_set(&li->active, 1); + irq.u.pgm = *pgm_info; + rc = __inject_prog(vcpu, &irq); BUG_ON(waitqueue_active(li->wq)); spin_unlock(&li->lock); + return rc; +} + +static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx", + irq->u.ext.ext_params, irq->u.ext.ext_params2); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, + irq->u.ext.ext_params, + irq->u.ext.ext_params2, 2); + + li->irq.ext = irq->u.ext; + set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); return 0; } +int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_extcall_info *extcall = &li->irq.extcall; + + VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + irq->u.extcall.code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, + irq->u.extcall.code, 0, 2); + + *extcall = irq->u.extcall; + set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + +static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_prefix_info *prefix = &li->irq.prefix; + + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + prefix->address); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, + prefix->address, 0, 2); + + *prefix = irq->u.prefix; + set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); + return 0; +} + +static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); + + li->action_bits |= ACTION_STOP_ON_STOP; + set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); + return 0; +} + +static int __inject_sigp_restart(struct kvm_vcpu *vcpu, + struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2); + + set_bit(IRQ_PEND_RESTART, &li->pending_irqs); + return 0; +} + +static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, + struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_emerg_info *emerg = &li->irq.emerg; + + VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", + irq->u.emerg.code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, + emerg->code, 0, 2); + + set_bit(emerg->code, li->sigp_emerg_pending); + set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + +static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_mchk_info *mchk = &li->irq.mchk; + + VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + mchk->mcic); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, + mchk->mcic, 2); + + /* + * Because repressible machine checks can be indicated along with + * exigent machine checks (PoP, Chapter 11, Interruption action) + * we need to combine cr14, mcic and external damage code. + * Failing storage address and the logout area should not be or'ed + * together, we just indicate the last occurrence of the corresponding + * machine check + */ + mchk->cr14 |= irq->u.mchk.cr14; + mchk->mcic |= irq->u.mchk.mcic; + mchk->ext_damage_code |= irq->u.mchk.ext_damage_code; + mchk->failing_storage_address = irq->u.mchk.failing_storage_address; + memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout, + sizeof(mchk->fixed_logout)); + if (mchk->mcic & MCHK_EX_MASK) + set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + else if (mchk->mcic & MCHK_REP_MASK) + set_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + return 0; +} + +static int __inject_ckc(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, + 0, 0, 2); + + set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + +static int __inject_cpu_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, + 0, 0, 2); + + set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + return 0; +} + + struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid) { @@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + switch (inti->type) { + case KVM_S390_MCHK: + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags); + break; + default: + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + break; + } spin_unlock(&li->lock); kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); unlock_fi: @@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm, __inject_vm(kvm, inti); } -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int) +int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, + struct kvm_s390_irq *irq) { - struct kvm_s390_local_interrupt *li; - struct kvm_s390_interrupt_info *inti; + irq->type = s390int->type; + switch (irq->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) + return -EINVAL; + irq->u.pgm.code = s390int->parm; + break; + case KVM_S390_SIGP_SET_PREFIX: + irq->u.prefix.address = s390int->parm; + break; + case KVM_S390_INT_EXTERNAL_CALL: + if (irq->u.extcall.code & 0xffff0000) + return -EINVAL; + irq->u.extcall.code = s390int->parm; + break; + case KVM_S390_INT_EMERGENCY: + if (irq->u.emerg.code & 0xffff0000) + return -EINVAL; + irq->u.emerg.code = s390int->parm; + break; + case KVM_S390_MCHK: + irq->u.mchk.mcic = s390int->parm64; + break; + } + return 0; +} - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; - switch (s390int->type) { + spin_lock(&li->lock); + switch (irq->type) { case KVM_S390_PROGRAM_INT: - if (s390int->parm & 0xffff0000) { - kfree(inti); - return -EINVAL; - } - inti->type = s390int->type; - inti->pgm.code = s390int->parm; VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", - s390int->parm); + irq->u.pgm.code); + rc = __inject_prog(vcpu, irq); break; case KVM_S390_SIGP_SET_PREFIX: - inti->prefix.address = s390int->parm; - inti->type = s390int->type; - VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", - s390int->parm); + rc = __inject_set_prefix(vcpu, irq); break; case KVM_S390_SIGP_STOP: + rc = __inject_sigp_stop(vcpu, irq); + break; case KVM_S390_RESTART: + rc = __inject_sigp_restart(vcpu, irq); + break; case KVM_S390_INT_CLOCK_COMP: + rc = __inject_ckc(vcpu); + break; case KVM_S390_INT_CPU_TIMER: - VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); - inti->type = s390int->type; + rc = __inject_cpu_timer(vcpu); break; case KVM_S390_INT_EXTERNAL_CALL: - if (s390int->parm & 0xffff0000) { - kfree(inti); - return -EINVAL; - } - VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", - s390int->parm); - inti->type = s390int->type; - inti->extcall.code = s390int->parm; + rc = __inject_extcall(vcpu, irq); break; case KVM_S390_INT_EMERGENCY: - if (s390int->parm & 0xffff0000) { - kfree(inti); - return -EINVAL; - } - VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm); - inti->type = s390int->type; - inti->emerg.code = s390int->parm; + rc = __inject_sigp_emergency(vcpu, irq); break; case KVM_S390_MCHK: - VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", - s390int->parm64); - inti->type = s390int->type; - inti->mchk.mcic = s390int->parm64; + rc = __inject_mchk(vcpu, irq); break; case KVM_S390_INT_PFAULT_INIT: - inti->type = s390int->type; - inti->ext.ext_params2 = s390int->parm64; + rc = __inject_pfault_init(vcpu, irq); break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: default: - kfree(inti); - return -EINVAL; + rc = -EINVAL; } - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, - s390int->parm64, 2); - - li = &vcpu->arch.local_int; - spin_lock(&li->lock); - if (inti->type == KVM_S390_PROGRAM_INT) - list_add(&inti->list, &li->list); - else - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); - if (inti->type == KVM_S390_SIGP_STOP) - li->action_bits |= ACTION_STOP_ON_STOP; - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); spin_unlock(&li->lock); - kvm_s390_vcpu_wakeup(vcpu); - return 0; + if (!rc) + kvm_s390_vcpu_wakeup(vcpu); + return rc; } void kvm_s390_clear_float_irqs(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b049ee75a56..3e09801e3104 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, + { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, + { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, + { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, + { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { "diagnose_9c", VCPU_STAT(diagnose_9c) }, @@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) spin_lock_init(&kvm->arch.float_int.lock); INIT_LIST_HEAD(&kvm->arch.float_int.list); init_waitqueue_head(&kvm->arch.ipte_wq); + mutex_init(&kvm->arch.ipte_mutex); debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); @@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, } spin_lock_init(&vcpu->arch.local_int.lock); - INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; @@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, unsigned long token) { struct kvm_s390_interrupt inti; - inti.parm64 = token; + struct kvm_s390_irq irq; if (start_token) { - inti.type = KVM_S390_INT_PFAULT_INIT; - WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); + irq.u.ext.ext_params2 = token; + irq.type = KVM_S390_INT_PFAULT_INIT; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); } else { inti.type = KVM_S390_INT_PFAULT_DONE; + inti.parm64 = token; WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); } } @@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; + struct kvm_s390_irq s390irq; r = -EFAULT; if (copy_from_user(&s390int, argp, sizeof(s390int))) break; - r = kvm_s390_inject_vcpu(vcpu, &s390int); + if (s390int_to_s390irq(&s390int, &s390irq)) + return -EINVAL; + r = kvm_s390_inject_vcpu(vcpu, &s390irq); break; } case KVM_S390_STORE_STATUS: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 244d02303182..a8f3d9b71c11 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ extern unsigned long *vfacilities; -int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); - /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) #define TDB_FORMAT1 1 @@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm); int __must_check kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); + struct kvm_s390_irq *irq); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); @@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm, struct kvm_s390_interrupt_info *inti); int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); +/* implemented in intercept.c */ +void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + /* implemented in priv.c */ int is_valid_psw(psw_t *psw); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); @@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc) return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); } +int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, + struct kvm_s390_irq *s390irq); + /* implemented in interrupt.c */ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int psw_extint_disabled(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f47cb0c6d906..1be578d64dfc 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - vcpu->arch.sie_block->gpsw.addr = - __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + kvm_s390_rewind_psw(vcpu, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } static int handle_ipte_interlock(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - vcpu->stat.instruction_ipte_interlock++; - if (psw_bits(*psw).p) + if (psw_bits(vcpu->arch.sie_block->gpsw).p) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); - psw->addr = __rewind_psw(*psw, 4); + kvm_s390_rewind_psw(vcpu, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } @@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_protection(vcpu, start)) - return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); - } + start = kvm_s390_logical_to_effective(vcpu, start); switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { case 0x00000000: @@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) default: return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_protection(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + while (start < end) { unsigned long useraddr, abs_addr; @@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* Rewind PSW to repeat the ESSA instruction */ - vcpu->arch.sie_block->gpsw.addr = - __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + kvm_s390_rewind_psw(vcpu, 4); vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); @@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u32 val = 0; - int reg, rc; + int reg, rc, nr_regs; + u32 ctl_array[16]; u64 ga; vcpu->stat.instruction_lctl++; @@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); + nr_regs = ((reg3 - reg1) & 0xf) + 1; + rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; + nr_regs = 0; do { - rc = read_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - ga += 4; + vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int reg, rc, nr_regs; + u32 ctl_array[16]; u64 ga; - u32 val; - int reg, rc; vcpu->stat.instruction_stctl++; @@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); reg = reg1; + nr_regs = 0; do { - val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful; - rc = write_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - ga += 4; + ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - - return 0; + rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } static int handle_lctlg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 ga, val; - int reg, rc; + int reg, rc, nr_regs; + u64 ctl_array[16]; + u64 ga; vcpu->stat.instruction_lctlg++; @@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - reg = reg1; - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); + nr_regs = ((reg3 - reg1) & 0xf) + 1; + rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + reg = reg1; + nr_regs = 0; do { - rc = read_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - vcpu->arch.sie_block->gcr[reg] = val; - ga += 8; + vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 ga, val; - int reg, rc; + int reg, rc, nr_regs; + u64 ctl_array[16]; + u64 ga; vcpu->stat.instruction_stctg++; @@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - reg = reg1; - VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); + reg = reg1; + nr_regs = 0; do { - val = vcpu->arch.sie_block->gcr[reg]; - rc = write_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - ga += 8; + ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - - return 0; + rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } static const intercept_handler_t eb_handlers[256] = { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index cf243ba3d50f..6651f9f73973 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -20,20 +20,13 @@ #include "kvm-s390.h" #include "trace.h" -static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, +static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u64 *reg) { struct kvm_s390_local_interrupt *li; - struct kvm_vcpu *dst_vcpu = NULL; int cpuflags; int rc; - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; cpuflags = atomic_read(li->cpuflags); @@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, rc = SIGP_CC_STATUS_STORED; } - VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id, + rc); return rc; } -static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu) { - struct kvm_s390_interrupt s390int = { + struct kvm_s390_irq irq = { .type = KVM_S390_INT_EMERGENCY, - .parm = vcpu->vcpu_id, + .u.emerg.code = vcpu->vcpu_id, }; - struct kvm_vcpu *dst_vcpu = NULL; int rc = 0; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - - rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); if (!rc) - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", + dst_vcpu->vcpu_id); return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } -static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, +static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) +{ + return __inject_sigp_emergency(vcpu, dst_vcpu); +} + +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u16 asn, u64 *reg) { - struct kvm_vcpu *dst_vcpu = NULL; const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; u16 p_asn, s_asn; psw_t *psw; u32 flags; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); psw = &dst_vcpu->arch.sie_block->gpsw; p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ - /* Deliver the emergency signal? */ + /* Inject the emergency signal? */ if (!(flags & CPUSTAT_STOPPED) || (psw->mask & psw_int_mask) != psw_int_mask || ((flags & CPUSTAT_WAIT) && psw->addr != 0) || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { - return __sigp_emergency(vcpu, cpu_addr); + return __inject_sigp_emergency(vcpu, dst_vcpu); } else { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; @@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, } } -static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +static int __sigp_external_call(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu) { - struct kvm_s390_interrupt s390int = { + struct kvm_s390_irq irq = { .type = KVM_S390_INT_EXTERNAL_CALL, - .parm = vcpu->vcpu_id, + .u.extcall.code = vcpu->vcpu_id, }; - struct kvm_vcpu *dst_vcpu = NULL; int rc; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - - rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); if (!rc) - VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", + dst_vcpu->vcpu_id); return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } @@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) { struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; - struct kvm_s390_interrupt_info *inti; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; - inti = kzalloc(sizeof(*inti), GFP_ATOMIC); - if (!inti) - return -ENOMEM; - inti->type = KVM_S390_SIGP_STOP; - spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) { /* another SIGP STOP is pending */ - kfree(inti); rc = SIGP_CC_BUSY; goto out; } if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { - kfree(inti); if ((action & ACTION_STORE_ON_STOP) != 0) rc = -ESHUTDOWN; goto out; } - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); + set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); li->action_bits |= action; atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); kvm_s390_vcpu_wakeup(dst_vcpu); @@ -160,23 +138,27 @@ out: return rc; } -static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) +static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) { - struct kvm_vcpu *dst_vcpu = NULL; int rc; - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; + rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP); + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id); - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; + return rc; +} - rc = __inject_sigp_stop(dst_vcpu, action); +static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u64 *reg) +{ + int rc; - VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP | + ACTION_STORE_ON_STOP); + VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x", + dst_vcpu->vcpu_id); - if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + if (rc == -ESHUTDOWN) { /* If the CPU has already been stopped, we still have * to save the status when doing stop-and-store. This * has to be done after unlocking all spinlocks. */ @@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) return rc; } -static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, - u64 *reg) +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, + u32 address, u64 *reg) { struct kvm_s390_local_interrupt *li; - struct kvm_vcpu *dst_vcpu = NULL; - struct kvm_s390_interrupt_info *inti; int rc; - if (cpu_addr < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; /* @@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, return SIGP_CC_STATUS_STORED; } - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return SIGP_CC_BUSY; - spin_lock(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; rc = SIGP_CC_STATUS_STORED; - kfree(inti); goto out_li; } - inti->type = KVM_S390_SIGP_SET_PREFIX; - inti->prefix.address = address; - - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); + li->irq.prefix.address = address; + set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); kvm_s390_vcpu_wakeup(dst_vcpu); rc = SIGP_CC_ORDER_CODE_ACCEPTED; - VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id, + address); out_li: spin_unlock(&li->lock); return rc; } -static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, - u32 addr, u64 *reg) +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, + u32 addr, u64 *reg) { - struct kvm_vcpu *dst_vcpu = NULL; int flags; int rc; - if (cpu_id < KVM_MAX_VCPUS) - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&dst_vcpu->arch.local_int.lock); flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); spin_unlock(&dst_vcpu->arch.local_int.lock); @@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, return rc; } -static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, - u64 *reg) +static int __sigp_sense_running(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u64 *reg) { struct kvm_s390_local_interrupt *li; - struct kvm_vcpu *dst_vcpu = NULL; int rc; - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { /* running */ @@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, rc = SIGP_CC_STATUS_STORED; } - VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, - rc); + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", + dst_vcpu->vcpu_id, rc); return rc; } -/* Test whether the destination CPU is available and not busy */ -static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) +static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u8 order_code) { - struct kvm_s390_local_interrupt *li; - int rc = SIGP_CC_ORDER_CODE_ACCEPTED; - struct kvm_vcpu *dst_vcpu = NULL; - - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; + struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; + /* handle (RE)START in user space */ + int rc = -EOPNOTSUPP; - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; - li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; @@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) return rc; } -int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu, u8 order_code) { - int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int r3 = vcpu->arch.sie_block->ipa & 0x000f; - u32 parameter; - u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; - u8 order_code; - int rc; + /* handle (INITIAL) CPU RESET in user space */ + return -EOPNOTSUPP; +} - /* sigp in userspace can exit */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); +static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu, + struct kvm_vcpu *dst_vcpu) +{ + /* handle unknown orders in user space */ + return -EOPNOTSUPP; +} - order_code = kvm_s390_get_base_disp_rs(vcpu); +static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, + u16 cpu_addr, u32 parameter, u64 *status_reg) +{ + int rc; + struct kvm_vcpu *dst_vcpu; - if (r1 % 2) - parameter = vcpu->run->s.regs.gprs[r1]; - else - parameter = vcpu->run->s.regs.gprs[r1 + 1]; + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; - trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter); switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; - rc = __sigp_sense(vcpu, cpu_addr, - &vcpu->run->s.regs.gprs[r1]); + rc = __sigp_sense(vcpu, dst_vcpu, status_reg); break; case SIGP_EXTERNAL_CALL: vcpu->stat.instruction_sigp_external_call++; - rc = __sigp_external_call(vcpu, cpu_addr); + rc = __sigp_external_call(vcpu, dst_vcpu); break; case SIGP_EMERGENCY_SIGNAL: vcpu->stat.instruction_sigp_emergency++; - rc = __sigp_emergency(vcpu, cpu_addr); + rc = __sigp_emergency(vcpu, dst_vcpu); break; case SIGP_STOP: vcpu->stat.instruction_sigp_stop++; - rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP); + rc = __sigp_stop(vcpu, dst_vcpu); break; case SIGP_STOP_AND_STORE_STATUS: - vcpu->stat.instruction_sigp_stop++; - rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | - ACTION_STOP_ON_STOP); + vcpu->stat.instruction_sigp_stop_store_status++; + rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg); break; case SIGP_STORE_STATUS_AT_ADDRESS: - rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, - &vcpu->run->s.regs.gprs[r1]); - break; - case SIGP_SET_ARCHITECTURE: - vcpu->stat.instruction_sigp_arch++; - rc = __sigp_set_arch(vcpu, parameter); + vcpu->stat.instruction_sigp_store_status++; + rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter, + status_reg); break; case SIGP_SET_PREFIX: vcpu->stat.instruction_sigp_prefix++; - rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, - &vcpu->run->s.regs.gprs[r1]); + rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg); break; case SIGP_COND_EMERGENCY_SIGNAL: - rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, - &vcpu->run->s.regs.gprs[r1]); + vcpu->stat.instruction_sigp_cond_emergency++; + rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter, + status_reg); break; case SIGP_SENSE_RUNNING: vcpu->stat.instruction_sigp_sense_running++; - rc = __sigp_sense_running(vcpu, cpu_addr, - &vcpu->run->s.regs.gprs[r1]); + rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg); break; case SIGP_START: - rc = sigp_check_callable(vcpu, cpu_addr); - if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) - rc = -EOPNOTSUPP; /* Handle START in user space */ + vcpu->stat.instruction_sigp_start++; + rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code); break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; - rc = sigp_check_callable(vcpu, cpu_addr); - if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { - VCPU_EVENT(vcpu, 4, - "sigp restart %x to handle userspace", - cpu_addr); - /* user space must know about restart */ - rc = -EOPNOTSUPP; - } + rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code); + break; + case SIGP_INITIAL_CPU_RESET: + vcpu->stat.instruction_sigp_init_cpu_reset++; + rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code); + break; + case SIGP_CPU_RESET: + vcpu->stat.instruction_sigp_cpu_reset++; + rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code); + break; + default: + vcpu->stat.instruction_sigp_unknown++; + rc = __prepare_sigp_unknown(vcpu, dst_vcpu); + } + + if (rc == -EOPNOTSUPP) + VCPU_EVENT(vcpu, 4, + "sigp order %u -> cpu %x: handled in user space", + order_code, dst_vcpu->vcpu_id); + + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + u32 parameter; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; + u8 order_code; + int rc; + + /* sigp in userspace can exit */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + order_code = kvm_s390_get_base_disp_rs(vcpu); + + if (r1 % 2) + parameter = vcpu->run->s.regs.gprs[r1]; + else + parameter = vcpu->run->s.regs.gprs[r1 + 1]; + + trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter); + switch (order_code) { + case SIGP_SET_ARCHITECTURE: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); break; default: - return -EOPNOTSUPP; + rc = handle_sigp_dst(vcpu, order_code, cpu_addr, + parameter, + &vcpu->run->s.regs.gprs[r1]); } if (rc < 0) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 71c7eff2c89f..be99357d238c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, down_read(&mm->mmap_sem); retry: - ptep = get_locked_pte(current->mm, addr, &ptl); + ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; @@ -888,6 +888,45 @@ retry: } EXPORT_SYMBOL(set_guest_storage_key); +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +{ + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + uint64_t physaddr; + unsigned long key = 0; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + pgste = pgste_get_lock(ptep); + + if (pte_val(*ptep) & _PAGE_INVALID) { + key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; + key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; + key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; + key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; + } else { + physaddr = pte_val(*ptep) & PAGE_MASK; + key = page_get_storage_key(physaddr); + + /* Reflect guest's logical view, not physical */ + if (pgste_val(pgste) & PGSTE_GR_BIT) + key |= _PAGE_REFERENCED; + if (pgste_val(pgste) & PGSTE_GC_BIT) + key |= _PAGE_CHANGED; + } + + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + up_read(&mm->mmap_sem); + return key; +} +EXPORT_SYMBOL(get_guest_storage_key); + #else /* CONFIG_PGSTE */ static inline int page_table_with_pgste(struct page *page) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index c6b6ee5f38b2..0f09f5285d5e 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -223,7 +223,7 @@ config CPU_SHX3 config ARCH_SHMOBILE bool select ARCH_SUSPEND_POSSIBLE - select PM_RUNTIME + select PM config CPU_HAS_PMU depends on CPU_SH4 || CPU_SH4A diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index ec70475da890..a8d975793b6d 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -47,7 +47,7 @@ CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=y CONFIG_PM=y CONFIG_PM_DEBUG=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_CPU_IDLE=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 76a76a295d74..e7e56a4131b4 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -82,7 +82,7 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_MISC=y CONFIG_PM=y CONFIG_PM_DEBUG=y -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_CPU_IDLE=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index be65f035d18a..5cbc96d801ff 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -460,10 +460,12 @@ static void __init sparc_context_init(int numctx) void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { + unsigned long flags; + if (mm->context == NO_CONTEXT) { - spin_lock(&srmmu_context_spinlock); + spin_lock_irqsave(&srmmu_context_spinlock, flags); alloc_context(old_mm, mm); - spin_unlock(&srmmu_context_spinlock); + spin_unlock_irqrestore(&srmmu_context_spinlock, flags); srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd); } @@ -986,14 +988,15 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { + unsigned long flags; if (mm->context != NO_CONTEXT) { flush_cache_mm(mm); srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir); flush_tlb_mm(mm); - spin_lock(&srmmu_context_spinlock); + spin_lock_irqsave(&srmmu_context_spinlock, flags); free_context(mm->context); - spin_unlock(&srmmu_context_spinlock); + spin_unlock_irqrestore(&srmmu_context_spinlock, flags); mm->context = NO_CONTEXT; } } diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c index 320ff5e6e61e..6f00e9850636 100644 --- a/arch/tile/gxio/mpipe.c +++ b/arch/tile/gxio/mpipe.c @@ -463,6 +463,7 @@ int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context, (uint64_t)ts->tv_nsec, (uint64_t)cycles); } +EXPORT_SYMBOL_GPL(gxio_mpipe_set_timestamp); int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context, struct timespec *ts) @@ -485,11 +486,13 @@ int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context, } return ret; } +EXPORT_SYMBOL_GPL(gxio_mpipe_get_timestamp); int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context, int64_t delta) { return gxio_mpipe_adjust_timestamp_aux(context, delta); } +EXPORT_SYMBOL_GPL(gxio_mpipe_adjust_timestamp); /* Get our internal context used for link name access. This context is * special in that it is not associated with an mPIPE service domain. @@ -542,6 +545,7 @@ int gxio_mpipe_link_instance(const char *link_name) return gxio_mpipe_info_instance_aux(context, name); } +EXPORT_SYMBOL_GPL(gxio_mpipe_link_instance); int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac) { diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index d372641054d9..6ef4ecab1df2 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -396,8 +396,7 @@ extern void ioport_unmap(void __iomem *addr); static inline long ioport_panic(void) { #ifdef __tilegx__ - panic("PCI IO space support is disabled. Configure the kernel with" - " CONFIG_TILE_PCI_IO to enable it"); + panic("PCI IO space support is disabled. Configure the kernel with CONFIG_TILE_PCI_IO to enable it"); #else panic("inb/outb and friends do not exist on tile"); #endif @@ -406,7 +405,7 @@ static inline long ioport_panic(void) static inline void __iomem *ioport_map(unsigned long port, unsigned int len) { - pr_info("ioport_map: mapping IO resources is unsupported on tile.\n"); + pr_info("ioport_map: mapping IO resources is unsupported on tile\n"); return NULL; } diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 33587f16c152..5d1950788c69 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -235,9 +235,9 @@ static inline void __pte_clear(pte_t *ptep) #define pte_donemigrate(x) hv_pte_set_present(hv_pte_clear_migrating(x)) #define pte_ERROR(e) \ - pr_err("%s:%d: bad pte 0x%016llx.\n", __FILE__, __LINE__, pte_val(e)) + pr_err("%s:%d: bad pte 0x%016llx\n", __FILE__, __LINE__, pte_val(e)) #define pgd_ERROR(e) \ - pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd 0x%016llx\n", __FILE__, __LINE__, pgd_val(e)) /* Return PA and protection info for a given kernel VA. */ int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte); diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 2c8a9cd102d3..e96cec52f6d8 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -86,7 +86,7 @@ static inline int pud_huge_page(pud_t pud) } #define pmd_ERROR(e) \ - pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e)) + pr_err("%s:%d: bad pmd 0x%016llx\n", __FILE__, __LINE__, pmd_val(e)) static inline void pud_clear(pud_t *pudp) { diff --git a/arch/tile/include/uapi/asm/ptrace.h b/arch/tile/include/uapi/asm/ptrace.h index 7757e1985fb6..d03b829857e8 100644 --- a/arch/tile/include/uapi/asm/ptrace.h +++ b/arch/tile/include/uapi/asm/ptrace.h @@ -52,12 +52,16 @@ typedef uint_reg_t pt_reg_t; * system call or exception. "struct sigcontext" has the same shape. */ struct pt_regs { - /* Saved main processor registers; 56..63 are special. */ - /* tp, sp, and lr must immediately follow regs[] for aliasing. */ - pt_reg_t regs[53]; - pt_reg_t tp; /* aliases regs[TREG_TP] */ - pt_reg_t sp; /* aliases regs[TREG_SP] */ - pt_reg_t lr; /* aliases regs[TREG_LR] */ + union { + /* Saved main processor registers; 56..63 are special. */ + pt_reg_t regs[56]; + struct { + pt_reg_t __regs[53]; + pt_reg_t tp; /* aliases regs[TREG_TP] */ + pt_reg_t sp; /* aliases regs[TREG_SP] */ + pt_reg_t lr; /* aliases regs[TREG_LR] */ + }; + }; /* Saved special registers. */ pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ diff --git a/arch/tile/include/uapi/asm/sigcontext.h b/arch/tile/include/uapi/asm/sigcontext.h index 6348e59d3724..39ff5d1a232d 100644 --- a/arch/tile/include/uapi/asm/sigcontext.h +++ b/arch/tile/include/uapi/asm/sigcontext.h @@ -24,10 +24,16 @@ * but is simplified since we know the fault is from userspace. */ struct sigcontext { - __uint_reg_t gregs[53]; /* General-purpose registers. */ - __uint_reg_t tp; /* Aliases gregs[TREG_TP]. */ - __uint_reg_t sp; /* Aliases gregs[TREG_SP]. */ - __uint_reg_t lr; /* Aliases gregs[TREG_LR]. */ + __extension__ union { + /* General-purpose registers. */ + __uint_reg_t gregs[56]; + __extension__ struct { + __uint_reg_t __gregs[53]; + __uint_reg_t tp; /* Aliases gregs[TREG_TP]. */ + __uint_reg_t sp; /* Aliases gregs[TREG_SP]. */ + __uint_reg_t lr; /* Aliases gregs[TREG_LR]. */ + }; + }; __uint_reg_t pc; /* Program counter. */ __uint_reg_t ics; /* In Interrupt Critical Section? */ __uint_reg_t faultnum; /* Fault number. */ diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index aca6000bca75..c4646bb99342 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -365,8 +365,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * to quiesce. */ if (rect->teardown_in_progress) { - pr_notice("cpu %d: detected %s hardwall violation %#lx" - " while teardown already in progress\n", + pr_notice("cpu %d: detected %s hardwall violation %#lx while teardown already in progress\n", cpu, hwt->name, (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); goto done; @@ -630,8 +629,7 @@ static void _hardwall_deactivate(struct hardwall_type *hwt, struct thread_struct *ts = &task->thread; if (cpumask_weight(&task->cpus_allowed) != 1) { - pr_err("pid %d (%s) releasing %s hardwall with" - " an affinity mask containing %d cpus!\n", + pr_err("pid %d (%s) releasing %s hardwall with an affinity mask containing %d cpus!\n", task->pid, task->comm, hwt->name, cpumask_weight(&task->cpus_allowed)); BUG(); diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index ba85765e1436..22044fc691ef 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -107,9 +107,8 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) { long sp = stack_pointer - (long) current_thread_info(); if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { - pr_emerg("tile_dev_intr: " - "stack overflow: %ld\n", - sp - sizeof(struct thread_info)); + pr_emerg("%s: stack overflow: %ld\n", + __func__, sp - sizeof(struct thread_info)); dump_stack(); } } diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c index 4cd88381a83e..ff5335ae050d 100644 --- a/arch/tile/kernel/kgdb.c +++ b/arch/tile/kernel/kgdb.c @@ -125,9 +125,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) { - int reg; struct pt_regs *thread_regs; - unsigned long *ptr = gdb_regs; if (task == NULL) return; @@ -136,9 +134,7 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) memset(gdb_regs, 0, NUMREGBYTES); thread_regs = task_pt_regs(task); - for (reg = 0; reg <= TREG_LAST_GPR; reg++) - *(ptr++) = thread_regs->regs[reg]; - + memcpy(gdb_regs, thread_regs, TREG_LAST_GPR * sizeof(unsigned long)); gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc; gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum; } diff --git a/arch/tile/kernel/kprobes.c b/arch/tile/kernel/kprobes.c index 27cdcacbe81d..f8a45c51e9e4 100644 --- a/arch/tile/kernel/kprobes.c +++ b/arch/tile/kernel/kprobes.c @@ -90,8 +90,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; if (insn_has_control(*p->addr)) { - pr_notice("Kprobes for control instructions are not " - "supported\n"); + pr_notice("Kprobes for control instructions are not supported\n"); return -EINVAL; } diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index f0b54a934712..008aa2faef55 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -77,16 +77,13 @@ void machine_crash_shutdown(struct pt_regs *regs) int machine_kexec_prepare(struct kimage *image) { if (num_online_cpus() > 1) { - pr_warning("%s: detected attempt to kexec " - "with num_online_cpus() > 1\n", - __func__); + pr_warn("%s: detected attempt to kexec with num_online_cpus() > 1\n", + __func__); return -ENOSYS; } if (image->type != KEXEC_TYPE_DEFAULT) { - pr_warning("%s: detected attempt to kexec " - "with unsupported type: %d\n", - __func__, - image->type); + pr_warn("%s: detected attempt to kexec with unsupported type: %d\n", + __func__, image->type); return -ENOSYS; } return 0; @@ -131,8 +128,8 @@ static unsigned char *kexec_bn2cl(void *pg) */ csum = ip_compute_csum(pg, bhdrp->b_size); if (csum != 0) { - pr_warning("%s: bad checksum %#x (size %d)\n", - __func__, csum, bhdrp->b_size); + pr_warn("%s: bad checksum %#x (size %d)\n", + __func__, csum, bhdrp->b_size); return 0; } @@ -160,8 +157,7 @@ static unsigned char *kexec_bn2cl(void *pg) while (*desc != '\0') { desc++; if (((unsigned long)desc & PAGE_MASK) != (unsigned long)pg) { - pr_info("%s: ran off end of page\n", - __func__); + pr_info("%s: ran off end of page\n", __func__); return 0; } } @@ -195,20 +191,18 @@ static void kexec_find_and_set_command_line(struct kimage *image) } if (command_line != 0) { - pr_info("setting new command line to \"%s\"\n", - command_line); + pr_info("setting new command line to \"%s\"\n", command_line); hverr = hv_set_command_line( (HV_VirtAddr) command_line, strlen(command_line)); kunmap_atomic(command_line); } else { - pr_info("%s: no command line found; making empty\n", - __func__); + pr_info("%s: no command line found; making empty\n", __func__); hverr = hv_set_command_line((HV_VirtAddr) command_line, 0); } if (hverr) - pr_warning("%s: hv_set_command_line returned error: %d\n", - __func__, hverr); + pr_warn("%s: hv_set_command_line returned error: %d\n", + __func__, hverr); } /* diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index ac950be1318e..7475af3aacec 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -59,9 +59,8 @@ void hv_message_intr(struct pt_regs *regs, int intnum) { long sp = stack_pointer - (long) current_thread_info(); if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { - pr_emerg("hv_message_intr: " - "stack overflow: %ld\n", - sp - sizeof(struct thread_info)); + pr_emerg("%s: stack overflow: %ld\n", + __func__, sp - sizeof(struct thread_info)); dump_stack(); } } diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index d19b13e3a59f..96447c9160a0 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -96,8 +96,8 @@ void module_free(struct module *mod, void *module_region) static int validate_hw2_last(long value, struct module *me) { if (((value << 16) >> 16) != value) { - pr_warning("module %s: Out of range HW2_LAST value %#lx\n", - me->name, value); + pr_warn("module %s: Out of range HW2_LAST value %#lx\n", + me->name, value); return 0; } return 1; @@ -210,10 +210,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, value -= (unsigned long) location; /* pc-relative */ value = (long) value >> 3; /* count by instrs */ if (!validate_jumpoff(value)) { - pr_warning("module %s: Out of range jump to" - " %#llx at %#llx (%p)\n", me->name, - sym->st_value + rel[i].r_addend, - rel[i].r_offset, location); + pr_warn("module %s: Out of range jump to %#llx at %#llx (%p)\n", + me->name, + sym->st_value + rel[i].r_addend, + rel[i].r_offset, location); return -ENOEXEC; } MUNGE(create_JumpOff_X1); diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index 1f80a88c75a6..f70c7892fa25 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -178,8 +178,8 @@ int __init tile_pci_init(void) continue; hv_cfg_fd1 = tile_pcie_open(i, 1); if (hv_cfg_fd1 < 0) { - pr_err("PCI: Couldn't open config fd to HV " - "for controller %d\n", i); + pr_err("PCI: Couldn't open config fd to HV for controller %d\n", + i); goto err_cont; } @@ -423,8 +423,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) for (i = 0; i < 6; i++) { r = &dev->resource[i]; if (r->flags & IORESOURCE_UNSET) { - pr_err("PCI: Device %s not available " - "because of resource collisions\n", + pr_err("PCI: Device %s not available because of resource collisions\n", pci_name(dev)); return -EINVAL; } diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index e717af20dada..2c95f37ebbed 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -131,8 +131,7 @@ static int tile_irq_cpu(int irq) count = cpumask_weight(&intr_cpus_map); if (unlikely(count == 0)) { - pr_warning("intr_cpus_map empty, interrupts will be" - " delievered to dataplane tiles\n"); + pr_warn("intr_cpus_map empty, interrupts will be delievered to dataplane tiles\n"); return irq % (smp_height * smp_width); } @@ -197,16 +196,16 @@ static int tile_pcie_open(int trio_index) /* Get the properties of the PCIe ports on this TRIO instance. */ ret = gxio_trio_get_port_property(context, &pcie_ports[trio_index]); if (ret < 0) { - pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d," - " on TRIO %d\n", ret, trio_index); + pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d, on TRIO %d\n", + ret, trio_index); goto get_port_property_failure; } context->mmio_base_mac = iorpc_ioremap(context->fd, 0, HV_TRIO_CONFIG_IOREMAP_SIZE); if (context->mmio_base_mac == NULL) { - pr_err("PCI: TRIO config space mapping failure, error %d," - " on TRIO %d\n", ret, trio_index); + pr_err("PCI: TRIO config space mapping failure, error %d, on TRIO %d\n", + ret, trio_index); ret = -ENOMEM; goto trio_mmio_mapping_failure; @@ -622,9 +621,8 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller) dev_control.max_read_req_sz, mac); if (err < 0) { - pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, " - "MAC %d on TRIO %d\n", - mac, controller->trio_index); + pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, MAC %d on TRIO %d\n", + mac, controller->trio_index); } } @@ -720,27 +718,24 @@ int __init pcibios_init(void) reg_offset); if (!port_status.dl_up) { if (rc_delay[trio_index][mac]) { - pr_info("Delaying PCIe RC TRIO init %d sec" - " on MAC %d on TRIO %d\n", + pr_info("Delaying PCIe RC TRIO init %d sec on MAC %d on TRIO %d\n", rc_delay[trio_index][mac], mac, trio_index); msleep(rc_delay[trio_index][mac] * 1000); } ret = gxio_trio_force_rc_link_up(trio_context, mac); if (ret < 0) - pr_err("PCI: PCIE_FORCE_LINK_UP failure, " - "MAC %d on TRIO %d\n", mac, trio_index); + pr_err("PCI: PCIE_FORCE_LINK_UP failure, MAC %d on TRIO %d\n", + mac, trio_index); } - pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i, - trio_index, controller->mac); + pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", + i, trio_index, controller->mac); /* Delay the bus probe if needed. */ if (rc_delay[trio_index][mac]) { - pr_info("Delaying PCIe RC bus enumerating %d sec" - " on MAC %d on TRIO %d\n", - rc_delay[trio_index][mac], mac, - trio_index); + pr_info("Delaying PCIe RC bus enumerating %d sec on MAC %d on TRIO %d\n", + rc_delay[trio_index][mac], mac, trio_index); msleep(rc_delay[trio_index][mac] * 1000); } else { /* @@ -758,11 +753,10 @@ int __init pcibios_init(void) if (pcie_ports[trio_index].ports[mac].removable) { pr_info("PCI: link is down, MAC %d on TRIO %d\n", mac, trio_index); - pr_info("This is expected if no PCIe card" - " is connected to this link\n"); + pr_info("This is expected if no PCIe card is connected to this link\n"); } else pr_err("PCI: link is down, MAC %d on TRIO %d\n", - mac, trio_index); + mac, trio_index); continue; } @@ -829,8 +823,8 @@ int __init pcibios_init(void) /* Alloc a PIO region for PCI config access per MAC. */ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); if (ret < 0) { - pr_err("PCI: PCI CFG PIO alloc failure for mac %d " - "on TRIO %d, give up\n", mac, trio_index); + pr_err("PCI: PCI CFG PIO alloc failure for mac %d on TRIO %d, give up\n", + mac, trio_index); continue; } @@ -842,8 +836,8 @@ int __init pcibios_init(void) trio_context->pio_cfg_index[mac], mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE); if (ret < 0) { - pr_err("PCI: PCI CFG PIO init failure for mac %d " - "on TRIO %d, give up\n", mac, trio_index); + pr_err("PCI: PCI CFG PIO init failure for mac %d on TRIO %d, give up\n", + mac, trio_index); continue; } @@ -865,7 +859,7 @@ int __init pcibios_init(void) (TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT - 1))); if (trio_context->mmio_base_pio_cfg[mac] == NULL) { pr_err("PCI: PIO map failure for mac %d on TRIO %d\n", - mac, trio_index); + mac, trio_index); continue; } @@ -925,9 +919,8 @@ int __init pcibios_init(void) /* Alloc a PIO region for PCI memory access for each RC port. */ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); if (ret < 0) { - pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, give up\n", + controller->trio_index, controller->mac); continue; } @@ -944,9 +937,8 @@ int __init pcibios_init(void) 0, 0); if (ret < 0) { - pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, give up\n", + controller->trio_index, controller->mac); continue; } @@ -957,9 +949,8 @@ int __init pcibios_init(void) */ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); if (ret < 0) { - pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, give up\n", + controller->trio_index, controller->mac); continue; } @@ -976,9 +967,8 @@ int __init pcibios_init(void) 0, HV_TRIO_PIO_FLAG_IO_SPACE); if (ret < 0) { - pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, give up\n", + controller->trio_index, controller->mac); continue; } @@ -997,10 +987,9 @@ int __init pcibios_init(void) ret = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0); if (ret < 0) { - pr_err("PCI: Mem-Map alloc failure on TRIO %d " - "mac %d for MC %d, give up\n", - controller->trio_index, - controller->mac, j); + pr_err("PCI: Mem-Map alloc failure on TRIO %d mac %d for MC %d, give up\n", + controller->trio_index, controller->mac, + j); goto alloc_mem_map_failed; } @@ -1030,10 +1019,9 @@ int __init pcibios_init(void) j, GXIO_TRIO_ORDER_MODE_UNORDERED); if (ret < 0) { - pr_err("PCI: Mem-Map init failure on TRIO %d " - "mac %d for MC %d, give up\n", - controller->trio_index, - controller->mac, j); + pr_err("PCI: Mem-Map init failure on TRIO %d mac %d for MC %d, give up\n", + controller->trio_index, controller->mac, + j); goto alloc_mem_map_failed; } @@ -1510,9 +1498,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) * Most PCIe endpoint devices do support 64-bit message addressing. */ if (desc->msi_attrib.is_64 == 0) { - dev_printk(KERN_INFO, &pdev->dev, - "64-bit MSI message address not supported, " - "falling back to legacy interrupts.\n"); + dev_info(&pdev->dev, "64-bit MSI message address not supported, falling back to legacy interrupts\n"); ret = -ENOMEM; goto is_64_failure; @@ -1549,11 +1535,8 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) /* SQ regions are out, allocate from map mem regions. */ mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0); if (mem_map < 0) { - dev_printk(KERN_INFO, &pdev->dev, - "%s Mem-Map alloc failure. " - "Failed to initialize MSI interrupts. " - "Falling back to legacy interrupts.\n", - desc->msi_attrib.is_msix ? "MSI-X" : "MSI"); + dev_info(&pdev->dev, "%s Mem-Map alloc failure - failed to initialize MSI interrupts - falling back to legacy interrupts\n", + desc->msi_attrib.is_msix ? "MSI-X" : "MSI"); ret = -ENOMEM; goto msi_mem_map_alloc_failure; } @@ -1580,7 +1563,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) mem_map, mem_map_base, mem_map_limit, trio_context->asid); if (ret < 0) { - dev_printk(KERN_INFO, &pdev->dev, "HV MSI config failed.\n"); + dev_info(&pdev->dev, "HV MSI config failed\n"); goto hv_msi_config_failure; } diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 0050cbc1d9de..48e5773dd0b7 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -52,7 +52,7 @@ static int __init idle_setup(char *str) return -EINVAL; if (!strcmp(str, "poll")) { - pr_info("using polling idle threads.\n"); + pr_info("using polling idle threads\n"); cpu_idle_poll_ctrl(true); return 0; } else if (!strcmp(str, "halt")) { @@ -547,27 +547,25 @@ void show_regs(struct pt_regs *regs) struct task_struct *tsk = validate_current(); int i; - pr_err("\n"); if (tsk != &corrupt_current) show_regs_print_info(KERN_ERR); #ifdef __tilegx__ for (i = 0; i < 17; i++) - pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", + pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n", i, regs->regs[i], i+18, regs->regs[i+18], i+36, regs->regs[i+36]); - pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n", + pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n", regs->regs[17], regs->regs[35], regs->tp); - pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); + pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr); #else for (i = 0; i < 13; i++) - pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT - " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", + pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n", i, regs->regs[i], i+14, regs->regs[i+14], i+27, regs->regs[i+27], i+40, regs->regs[i+40]); - pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", + pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n", regs->regs[13], regs->tp, regs->sp, regs->lr); #endif - pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n", + pr_err(" pc : " REGFMT " ex1: %ld faultnum: %ld\n", regs->pc, regs->ex1, regs->faultnum); dump_stack_regs(regs); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 7f079bbfdf4c..864eea69556d 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -130,7 +130,7 @@ static int __init setup_maxmem(char *str) maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", - maxmem_pfn >> (20 - PAGE_SHIFT)); + maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; } early_param("maxmem", setup_maxmem); @@ -149,7 +149,7 @@ static int __init setup_maxnodemem(char *str) maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", - node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); + node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); return 0; } early_param("maxnodemem", setup_maxnodemem); @@ -417,8 +417,7 @@ static void __init setup_memory(void) range.start = (start_pa + HPAGE_SIZE - 1) & HPAGE_MASK; range.size -= (range.start - start_pa); range.size &= HPAGE_MASK; - pr_err("Range not hugepage-aligned: %#llx..%#llx:" - " now %#llx-%#llx\n", + pr_err("Range not hugepage-aligned: %#llx..%#llx: now %#llx-%#llx\n", start_pa, start_pa + orig_size, range.start, range.start + range.size); } @@ -437,8 +436,8 @@ static void __init setup_memory(void) if (PFN_DOWN(range.size) > maxnodemem_pfn[i]) { int max_size = maxnodemem_pfn[i]; if (max_size > 0) { - pr_err("Maxnodemem reduced node %d to" - " %d pages\n", i, max_size); + pr_err("Maxnodemem reduced node %d to %d pages\n", + i, max_size); range.size = PFN_PHYS(max_size); } else { pr_err("Maxnodemem disabled node %d\n", i); @@ -490,8 +489,8 @@ static void __init setup_memory(void) NR_CPUS * (PFN_UP(per_cpu_size) >> PAGE_SHIFT); if (end < pci_reserve_end_pfn + percpu_pages) { end = pci_reserve_start_pfn; - pr_err("PCI mapping region reduced node %d to" - " %ld pages\n", i, end - start); + pr_err("PCI mapping region reduced node %d to %ld pages\n", + i, end - start); } } #endif @@ -555,10 +554,9 @@ static void __init setup_memory(void) MAXMEM_PFN : mappable_physpages; highmem_pages = (long) (physpages - lowmem_pages); - pr_notice("%ldMB HIGHMEM available.\n", - pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); - pr_notice("%ldMB LOWMEM available.\n", - pages_to_mb(lowmem_pages)); + pr_notice("%ldMB HIGHMEM available\n", + pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); + pr_notice("%ldMB LOWMEM available\n", pages_to_mb(lowmem_pages)); #else /* Set max_low_pfn based on what node 0 can directly address. */ max_low_pfn = node_end_pfn[0]; @@ -571,8 +569,8 @@ static void __init setup_memory(void) max_pfn = MAXMEM_PFN; node_end_pfn[0] = MAXMEM_PFN; } else { - pr_notice("%ldMB memory available.\n", - pages_to_mb(node_end_pfn[0])); + pr_notice("%ldMB memory available\n", + pages_to_mb(node_end_pfn[0])); } for (i = 1; i < MAX_NUMNODES; ++i) { node_start_pfn[i] = 0; @@ -587,8 +585,7 @@ static void __init setup_memory(void) if (pages) high_memory = pfn_to_kaddr(node_end_pfn[i]); } - pr_notice("%ldMB memory available.\n", - pages_to_mb(lowmem_pages)); + pr_notice("%ldMB memory available\n", pages_to_mb(lowmem_pages)); #endif #endif } @@ -1535,8 +1532,7 @@ static void __init pcpu_fc_populate_pte(unsigned long addr) BUG_ON(pgd_addr_invalid(addr)); if (addr < VMALLOC_START || addr >= VMALLOC_END) - panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;" - " try increasing CONFIG_VMALLOC_RESERVE\n", + panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx; try increasing CONFIG_VMALLOC_RESERVE\n", addr, VMALLOC_START, VMALLOC_END); pgd = swapper_pg_dir + pgd_index(addr); @@ -1591,8 +1587,8 @@ void __init setup_per_cpu_areas(void) lowmem_va = (unsigned long)pfn_to_kaddr(pfn); ptep = virt_to_kpte(lowmem_va); if (pte_huge(*ptep)) { - printk(KERN_DEBUG "early shatter of huge page" - " at %#lx\n", lowmem_va); + printk(KERN_DEBUG "early shatter of huge page at %#lx\n", + lowmem_va); shatter_pmd((pmd_t *)ptep); ptep = virt_to_kpte(lowmem_va); BUG_ON(pte_huge(*ptep)); diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 7c2fecc52177..bb0a9ce7ae23 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -45,8 +45,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { - int err = 0; - int i; + int err; /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; @@ -57,9 +56,7 @@ int restore_sigcontext(struct pt_regs *regs, */ BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs)); BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0); - - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) - err |= __get_user(regs->regs[i], &sc->gregs[i]); + err = __copy_from_user(regs, sc, sizeof(*regs)); /* Ensure that the PL is always set to USER_PL. */ regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1)); @@ -110,12 +107,7 @@ badframe: int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) { - int i, err = 0; - - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) - err |= __put_user(regs->regs[i], &sc->gregs[i]); - - return err; + return __copy_to_user(sc, regs, sizeof(*regs)); } /* @@ -345,7 +337,6 @@ static void dump_mem(void __user *address) int i, j, k; int found_readable_mem = 0; - pr_err("\n"); if (!access_ok(VERIFY_READ, address, 1)) { pr_err("Not dumping at address 0x%lx (kernel address)\n", (unsigned long)address); @@ -367,7 +358,7 @@ static void dump_mem(void __user *address) (unsigned long)address); found_readable_mem = 1; } - j = sprintf(line, REGFMT":", (unsigned long)addr); + j = sprintf(line, REGFMT ":", (unsigned long)addr); for (k = 0; k < bytes_per_line; ++k) j += sprintf(&line[j], " %02x", buf[k]); pr_err("%s\n", line); @@ -411,8 +402,7 @@ void trace_unhandled_signal(const char *type, struct pt_regs *regs, case SIGFPE: case SIGSEGV: case SIGBUS: - pr_err("User crash: signal %d," - " trap %ld, address 0x%lx\n", + pr_err("User crash: signal %d, trap %ld, address 0x%lx\n", sig, regs->faultnum, address); show_regs(regs); dump_mem((void __user *)address); diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 6cb2ce31b5a2..862973074bf9 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -222,11 +222,9 @@ static tilepro_bundle_bits rewrite_load_store_unaligned( } if (unaligned_printk || unaligned_fixup_count == 0) { - pr_info("Process %d/%s: PC %#lx: Fixup of" - " unaligned %s at %#lx.\n", + pr_info("Process %d/%s: PC %#lx: Fixup of unaligned %s at %#lx\n", current->pid, current->comm, regs->pc, - (mem_op == MEMOP_LOAD || - mem_op == MEMOP_LOAD_POSTINCR) ? + mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR ? "load" : "store", (unsigned long)addr); if (!unaligned_printk) { diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 0d59a1b60c74..20d52a98e171 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -127,8 +127,7 @@ static __init int reset_init_affinity(void) { long rc = sched_setaffinity(current->pid, &init_affinity); if (rc != 0) - pr_warning("couldn't reset init affinity (%ld)\n", - rc); + pr_warn("couldn't reset init affinity (%ld)\n", rc); return 0; } late_initcall(reset_init_affinity); @@ -174,7 +173,7 @@ static void start_secondary(void) /* Indicate that we're ready to come up. */ /* Must not do this before we're ready to receive messages */ if (cpumask_test_and_set_cpu(cpuid, &cpu_started)) { - pr_warning("CPU#%d already started!\n", cpuid); + pr_warn("CPU#%d already started!\n", cpuid); for (;;) local_irq_enable(); } diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index c93977a62116..7ff5afdbd3aa 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -387,9 +387,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) * then bust_spinlocks() spit out a space in front of us * and it will mess up our KERN_ERR. */ - pr_err("\n"); - pr_err("Starting stack dump of tid %d, pid %d (%s)" - " on cpu %d at cycle %lld\n", + pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n", kbt->task->pid, kbt->task->tgid, kbt->task->comm, raw_smp_processor_id(), get_cycles()); } @@ -411,8 +409,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) i++, address, namebuf, (unsigned long)(kbt->it.sp)); if (i >= 100) { - pr_err("Stack dump truncated" - " (%d frames)\n", i); + pr_err("Stack dump truncated (%d frames)\n", i); break; } } diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index b854a1cd0079..d412b0856c0a 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -98,8 +98,8 @@ void __init calibrate_delay(void) { loops_per_jiffy = get_clock_rate() / HZ; pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); + loops_per_jiffy / (500000 / HZ), + (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); } /* Called fairly late in init/main.c, but before we go smp. */ diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 86900ccd4977..bf841ca517bb 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -46,9 +46,9 @@ static int __init setup_unaligned_fixup(char *str) return 0; pr_info("Fixups for unaligned data accesses are %s\n", - unaligned_fixup >= 0 ? - (unaligned_fixup ? "enabled" : "disabled") : - "completely disabled"); + unaligned_fixup >= 0 ? + (unaligned_fixup ? "enabled" : "disabled") : + "completely disabled"); return 1; } __setup("unaligned_fixup=", setup_unaligned_fixup); @@ -305,8 +305,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { - pr_err("Unreadable instruction for INT_ILL:" - " %#lx\n", regs->pc); + pr_err("Unreadable instruction for INT_ILL: %#lx\n", + regs->pc); do_exit(SIGKILL); return; } diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index c02ea2a45f67..7d9a83be0aca 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -969,8 +969,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, unaligned_fixup_count++; if (unaligned_printk) { - pr_info("%s/%d. Unalign fixup for kernel access " - "to userspace %lx.", + pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n", current->comm, current->pid, regs->regs[ra]); } @@ -985,7 +984,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, .si_addr = (unsigned char __user *)0 }; if (unaligned_printk) - pr_info("Unalign bundle: unexp @%llx, %llx", + pr_info("Unalign bundle: unexp @%llx, %llx\n", (unsigned long long)regs->pc, (unsigned long long)bundle); @@ -1370,8 +1369,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, frag.bundle = bundle; if (unaligned_printk) { - pr_info("%s/%d, Unalign fixup: pc=%lx " - "bundle=%lx %d %d %d %d %d %d %d %d.", + pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n", current->comm, current->pid, (unsigned long)frag.pc, (unsigned long)frag.bundle, @@ -1380,8 +1378,8 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, (int)y1_lr, (int)y1_br, (int)x1_add); for (k = 0; k < n; k += 2) - pr_info("[%d] %016llx %016llx", k, - (unsigned long long)frag.insn[k], + pr_info("[%d] %016llx %016llx\n", + k, (unsigned long long)frag.insn[k], (unsigned long long)frag.insn[k+1]); } @@ -1402,7 +1400,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, .si_addr = (void __user *)&jit_code_area[idx] }; - pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx", + pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n", current->pid, current->comm, (unsigned long long)&jit_code_area[idx]); @@ -1485,7 +1483,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum) /* If exception came from kernel, try fix it up. */ if (fixup_exception(regs)) { if (unaligned_printk) - pr_info("Unalign fixup: %d %llx @%llx", + pr_info("Unalign fixup: %d %llx @%llx\n", (int)unaligned_fixup, (unsigned long long)regs->ex1, (unsigned long long)regs->pc); @@ -1519,7 +1517,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum) }; if (unaligned_printk) - pr_info("Unalign fixup: %d %llx @%llx", + pr_info("Unalign fixup: %d %llx @%llx\n", (int)unaligned_fixup, (unsigned long long)regs->ex1, (unsigned long long)regs->pc); @@ -1579,14 +1577,14 @@ void do_unaligned(struct pt_regs *regs, int vecnum) 0); if (IS_ERR((void __force *)user_page)) { - pr_err("Out of kernel pages trying do_mmap.\n"); + pr_err("Out of kernel pages trying do_mmap\n"); return; } /* Save the address in the thread_info struct */ info->unalign_jit_base = user_page; if (unaligned_printk) - pr_info("Unalign bundle: %d:%d, allocate page @%llx", + pr_info("Unalign bundle: %d:%d, allocate page @%llx\n", raw_smp_processor_id(), current->pid, (unsigned long long)user_page); } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 6c0571216a9d..565e25a98334 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -169,8 +169,7 @@ static void wait_for_migration(pte_t *pte) while (pte_migrating(*pte)) { barrier(); if (++retries > bound) - panic("Hit migrating PTE (%#llx) and" - " page PFN %#lx still migrating", + panic("Hit migrating PTE (%#llx) and page PFN %#lx still migrating", pte->val, pte_pfn(*pte)); } } @@ -292,11 +291,10 @@ static int handle_page_fault(struct pt_regs *regs, */ stack_offset = stack_pointer & (THREAD_SIZE-1); if (stack_offset < THREAD_SIZE / 8) { - pr_alert("Potential stack overrun: sp %#lx\n", - stack_pointer); + pr_alert("Potential stack overrun: sp %#lx\n", stack_pointer); show_regs(regs); pr_alert("Killing current process %d/%s\n", - tsk->pid, tsk->comm); + tsk->pid, tsk->comm); do_group_exit(SIGKILL); } @@ -421,7 +419,7 @@ good_area: } else if (write) { #ifdef TEST_VERIFY_AREA if (!is_page_fault && regs->cs == KERNEL_CS) - pr_err("WP fault at "REGFMT"\n", regs->eip); + pr_err("WP fault at " REGFMT "\n", regs->eip); #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; @@ -519,16 +517,15 @@ no_context: pte_t *pte = lookup_address(address); if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) - pr_crit("kernel tried to execute" - " non-executable page - exploit attempt?" - " (uid: %d)\n", current->uid); + pr_crit("kernel tried to execute non-executable page - exploit attempt? (uid: %d)\n", + current->uid); } #endif if (address < PAGE_SIZE) pr_alert("Unable to handle kernel NULL pointer dereference\n"); else pr_alert("Unable to handle kernel paging request\n"); - pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n", + pr_alert(" at virtual address " REGFMT ", pc " REGFMT "\n", address, regs->pc); show_regs(regs); @@ -575,9 +572,10 @@ do_sigbus: #ifndef __tilegx__ /* We must release ICS before panicking or we won't get anywhere. */ -#define ics_panic(fmt, ...) do { \ - __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \ - panic(fmt, __VA_ARGS__); \ +#define ics_panic(fmt, ...) \ +do { \ + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \ + panic(fmt, ##__VA_ARGS__); \ } while (0) /* @@ -615,8 +613,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, fault_num != INT_DTLB_ACCESS)) { unsigned long old_pc = regs->pc; regs->pc = pc; - ics_panic("Bad ICS page fault args:" - " old PC %#lx, fault %d/%d at %#lx\n", + ics_panic("Bad ICS page fault args: old PC %#lx, fault %d/%d at %#lx", old_pc, fault_num, write, address); } @@ -669,8 +666,8 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, #endif fixup = search_exception_tables(pc); if (!fixup) - ics_panic("ICS atomic fault not in table:" - " PC %#lx, fault %d", pc, fault_num); + ics_panic("ICS atomic fault not in table: PC %#lx, fault %d", + pc, fault_num); regs->pc = fixup->fixup; regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0); } @@ -826,8 +823,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num, set_thread_flag(TIF_ASYNC_TLB); if (async->fault_num != 0) { - panic("Second async fault %d;" - " old fault was %d (%#lx/%ld)", + panic("Second async fault %d; old fault was %d (%#lx/%ld)", fault_num, async->fault_num, address, write); } diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 33294fdc402e..cd3387370ebb 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -152,12 +152,10 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy); - pr_err("hv_flush_remote(%#llx, %#lx, %p [%s]," - " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", + pr_err("hv_flush_remote(%#llx, %#lx, %p [%s], %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", cache_pa, cache_control, cache_cpumask, cache_buf, (unsigned long)tlb_va, tlb_length, tlb_pgsize, - tlb_cpumask, tlb_buf, - asids, asidcount, rc); + tlb_cpumask, tlb_buf, asids, asidcount, rc); panic("Unsafe to continue."); } diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index e514899e1100..3270e0019266 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -284,22 +284,21 @@ static __init int __setup_hugepagesz(unsigned long ps) int level, base_shift; if ((1UL << log_ps) != ps || (log_ps & 1) != 0) { - pr_warn("Not enabling %ld byte huge pages;" - " must be a power of four.\n", ps); + pr_warn("Not enabling %ld byte huge pages; must be a power of four\n", + ps); return -EINVAL; } if (ps > 64*1024*1024*1024UL) { - pr_warn("Not enabling %ld MB huge pages;" - " largest legal value is 64 GB .\n", ps >> 20); + pr_warn("Not enabling %ld MB huge pages; largest legal value is 64 GB\n", + ps >> 20); return -EINVAL; } else if (ps >= PUD_SIZE) { static long hv_jpage_size; if (hv_jpage_size == 0) hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO); if (hv_jpage_size != PUD_SIZE) { - pr_warn("Not enabling >= %ld MB huge pages:" - " hypervisor reports size %ld\n", + pr_warn("Not enabling >= %ld MB huge pages: hypervisor reports size %ld\n", PUD_SIZE >> 20, hv_jpage_size); return -EINVAL; } @@ -320,14 +319,13 @@ static __init int __setup_hugepagesz(unsigned long ps) int shift_val = log_ps - base_shift; if (huge_shift[level] != 0) { int old_shift = base_shift + huge_shift[level]; - pr_warn("Not enabling %ld MB huge pages;" - " already have size %ld MB.\n", + pr_warn("Not enabling %ld MB huge pages; already have size %ld MB\n", ps >> 20, (1UL << old_shift) >> 20); return -EINVAL; } if (hv_set_pte_super_shift(level, shift_val) != 0) { - pr_warn("Not enabling %ld MB huge pages;" - " no hypervisor support.\n", ps >> 20); + pr_warn("Not enabling %ld MB huge pages; no hypervisor support\n", + ps >> 20); return -EINVAL; } printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20); diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index caa270165f86..be240cc4978d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -357,11 +357,11 @@ static int __init setup_ktext(char *str) cpulist_scnprintf(buf, sizeof(buf), &ktext_mask); if (cpumask_weight(&ktext_mask) > 1) { ktext_small = 1; - pr_info("ktext: using caching neighborhood %s " - "with small pages\n", buf); + pr_info("ktext: using caching neighborhood %s with small pages\n", + buf); } else { pr_info("ktext: caching on cpu %s with one huge page\n", - buf); + buf); } } @@ -413,19 +413,16 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) int rc, i; if (ktext_arg_seen && ktext_hash) { - pr_warning("warning: \"ktext\" boot argument ignored" - " if \"kcache_hash\" sets up text hash-for-home\n"); + pr_warn("warning: \"ktext\" boot argument ignored if \"kcache_hash\" sets up text hash-for-home\n"); ktext_small = 0; } if (kdata_arg_seen && kdata_hash) { - pr_warning("warning: \"kdata\" boot argument ignored" - " if \"kcache_hash\" sets up data hash-for-home\n"); + pr_warn("warning: \"kdata\" boot argument ignored if \"kcache_hash\" sets up data hash-for-home\n"); } if (kdata_huge && !hash_default) { - pr_warning("warning: disabling \"kdata=huge\"; requires" - " kcache_hash=all or =allbutstack\n"); + pr_warn("warning: disabling \"kdata=huge\"; requires kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } @@ -470,8 +467,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pte[pte_ofs] = pfn_pte(pfn, prot); } else { if (kdata_huge) - printk(KERN_DEBUG "pre-shattered huge" - " page at %#lx\n", address); + printk(KERN_DEBUG "pre-shattered huge page at %#lx\n", + address); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) { pgprot_t prot = init_pgprot(address); @@ -501,8 +498,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pr_info("ktext: not using unavailable cpus %s\n", buf); } if (cpumask_empty(&ktext_mask)) { - pr_warning("ktext: no valid cpus; caching on %d.\n", - smp_processor_id()); + pr_warn("ktext: no valid cpus; caching on %d\n", + smp_processor_id()); cpumask_copy(&ktext_mask, cpumask_of(smp_processor_id())); } @@ -798,11 +795,9 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM /* check that fixmap and pkmap do not overlap */ if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) { - pr_err("fixmap and kmap areas overlap" - " - this will crash\n"); + pr_err("fixmap and kmap areas overlap - this will crash\n"); pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), - FIXADDR_START); + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), FIXADDR_START); BUG(); } #endif @@ -926,8 +921,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) unsigned long addr = (unsigned long) begin; if (kdata_huge && !initfree) { - pr_warning("Warning: ignoring initfree=0:" - " incompatible with kdata=huge\n"); + pr_warn("Warning: ignoring initfree=0: incompatible with kdata=huge\n"); initfree = 1; } end = (end + PAGE_SIZE - 1) & PAGE_MASK; diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 5e86eac4bfae..7bf2491a9c1f 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -44,9 +44,7 @@ void show_mem(unsigned int filter) { struct zone *zone; - pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu" - " free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu" - " pagecache:%lu swap:%lu\n", + pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n", (global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE)), (global_page_state(NR_INACTIVE_ANON) + diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d69f1cd87fd9..ba397bde7948 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -249,10 +249,6 @@ config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI -config X86_INTEL_MPX - def_bool y - depends on CPU_SUP_INTEL - config X86_32_SMP def_bool y depends on X86_32 && SMP @@ -887,11 +883,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI + select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ config X86_IO_APIC - def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI - select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC + depends on X86_LOCAL_APIC select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS @@ -1594,6 +1590,32 @@ config X86_SMAP If unsure, say Y. +config X86_INTEL_MPX + prompt "Intel MPX (Memory Protection Extensions)" + def_bool n + depends on CPU_SUP_INTEL + ---help--- + MPX provides hardware features that can be used in + conjunction with compiler-instrumented code to check + memory references. It is designed to detect buffer + overflow or underflow bugs. + + This option enables running applications which are + instrumented or otherwise use MPX. It does not use MPX + itself inside the kernel or to protect the kernel + against bad memory references. + + Enabling this option will make the kernel larger: + ~8k of kernel text and 36 bytes of data on a 64-bit + defconfig. It adds a long to the 'mm_struct' which + will increase the kernel memory overhead of each + process and adds some branches to paths used during + exec() and munmap(). + + For details, see Documentation/x86/intel_mpx.txt + + If unsure, say N. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4615906d83df..9662290e0b20 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,30 +94,7 @@ extern void trace_call_function_single_interrupt(void); #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #endif /* CONFIG_TRACING */ -/* IOAPIC */ -#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) -extern unsigned long io_apic_irqs; - -extern void setup_IO_APIC(void); -extern void disable_IO_APIC(void); - -struct io_apic_irq_attr { - int ioapic; - int ioapic_pin; - int trigger; - int polarity; -}; - -static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, - int ioapic, int ioapic_pin, - int trigger, int polarity) -{ - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; -} - +#ifdef CONFIG_IRQ_REMAP /* Intel specific interrupt remapping information */ struct irq_2_iommu { struct intel_iommu *iommu; @@ -131,14 +108,12 @@ struct irq_2_irte { u16 devid; /* Device ID for IRTE table */ u16 index; /* Index into IRTE table*/ }; +#endif /* CONFIG_IRQ_REMAP */ + +#ifdef CONFIG_X86_LOCAL_APIC +struct irq_data; -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ struct irq_cfg { - struct irq_pin_list *irq_2_pin; cpumask_var_t domain; cpumask_var_t old_domain; u8 vector; @@ -150,18 +125,39 @@ struct irq_cfg { struct irq_2_irte irq_2_irte; }; #endif + union { +#ifdef CONFIG_X86_IO_APIC + struct { + struct list_head irq_2_pin; + }; +#endif + }; }; +extern struct irq_cfg *irq_cfg(unsigned int irq); +extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); +extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void clear_irq_vector(int irq, struct irq_cfg *cfg); +extern void setup_vector_irq(int cpu); +#ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); +extern void irq_complete_move(struct irq_cfg *cfg); +#else +static inline void send_cleanup_vector(struct irq_cfg *c) { } +static inline void irq_complete_move(struct irq_cfg *c) { } +#endif -struct irq_data; -int __ioapic_set_affinity(struct irq_data *, const struct cpumask *, - unsigned int *dest_id); -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); -extern void setup_ioapic_dest(void); - -extern void enable_IO_APIC(void); +extern int apic_retrigger_irq(struct irq_data *data); +extern void apic_ack_edge(struct irq_data *data); +extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id); +#else /* CONFIG_X86_LOCAL_APIC */ +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +#endif /* CONFIG_X86_LOCAL_APIC */ /* Statistics */ extern atomic_t irq_err_count; @@ -185,7 +181,8 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif -extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); +extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR + - FIRST_EXTERNAL_VECTOR])(void); #ifdef CONFIG_TRACING #define trace_interrupt interrupt #endif @@ -195,17 +192,6 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void setup_vector_irq(int cpu); - -#ifdef CONFIG_X86_IO_APIC -extern void lock_vector_lock(void); -extern void unlock_vector_lock(void); -extern void __setup_vector_irq(int cpu); -#else -static inline void lock_vector_lock(void) {} -static inline void unlock_vector_lock(void) {} -static inline void __setup_vector_irq(int cpu) {} -#endif #endif /* !ASSEMBLY_ */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 1733ab49ac5e..bf006cce9418 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -132,6 +132,10 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; +extern unsigned long io_apic_irqs; + +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) + /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -139,18 +143,15 @@ extern int noioapicreroute; #define io_apic_assign_pci_irqs \ (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -struct io_apic_irq_attr; struct irq_cfg; extern void ioapic_insert_resources(void); +extern int arch_early_ioapic_init(void); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, unsigned int, int, struct io_apic_irq_attr *); extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); -extern void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id); extern void native_eoi_ioapic_pin(int apic, int pin, int vector); extern int save_ioapic_entries(void); @@ -160,6 +161,13 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); +struct io_apic_irq_attr { + int ioapic; + int ioapic_pin; + int trigger; + int polarity; +}; + enum ioapic_domain_type { IOAPIC_DOMAIN_INVALID, IOAPIC_DOMAIN_LEGACY, @@ -188,8 +196,10 @@ extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); extern void mp_unmap_irq(int irq); -extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, - struct ioapic_domain_cfg *cfg); +extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg); +extern int mp_unregister_ioapic(u32 gsi_base); +extern int mp_ioapic_registered(u32 gsi_base); extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); @@ -227,19 +237,25 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned extern void io_apic_eoi(unsigned int apic, unsigned int vector); -extern bool mp_should_keep_irq(struct device *dev); - +extern void setup_IO_APIC(void); +extern void enable_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void setup_ioapic_dest(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); +extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ +#define IO_APIC_IRQ(x) 0 #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop static inline void ioapic_insert_resources(void) { } +static inline int arch_early_ioapic_init(void) { return 0; } +static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } static inline void mp_unmap_irq(int irq) { } -static inline bool mp_should_keep_irq(struct device *dev) { return 1; } static inline int save_ioapic_entries(void) { @@ -262,7 +278,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL -#define native_compose_msi_msg NULL #define native_eoi_ioapic_pin NULL #endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5702d7e3111d..666c89ec4bd7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -126,6 +126,12 @@ #define NR_VECTORS 256 +#ifdef CONFIG_X86_LOCAL_APIC +#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#else +#define FIRST_SYSTEM_VECTOR NR_VECTORS +#endif + #define FPU_IRQ 13 #define FIRST_VM86_IRQ 3 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6ed0c30d6a0c..d89c6b828c96 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -33,7 +33,7 @@ #define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 -#define KVM_USER_MEM_SLOTS 125 +#define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) @@ -51,6 +51,7 @@ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL +#define CR3_PCID_INVD (1UL << 63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ @@ -361,6 +362,7 @@ struct kvm_vcpu_arch { int mp_state; u64 ia32_misc_enable_msr; bool tpr_access_reporting; + u64 ia32_xss; /* * Paging state of the vcpu @@ -542,7 +544,7 @@ struct kvm_apic_map { struct rcu_head rcu; u8 ldr_bits; /* fields bellow are used to decode ldr values in different modes */ - u32 cid_shift, cid_mask, lid_mask; + u32 cid_shift, cid_mask, lid_mask, broadcast; struct kvm_lapic *phys_map[256]; /* first index is cluster id second is cpu id in a cluster */ struct kvm_lapic *logical_map[16][16]; @@ -602,6 +604,9 @@ struct kvm_arch { struct kvm_xen_hvm_config xen_hvm_config; + /* reads protected by irq_srcu, writes by irq_lock */ + struct hlist_head mask_notifier_list; + /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; @@ -659,6 +664,16 @@ struct msr_data { u64 data; }; +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -767,6 +782,7 @@ struct kvm_x86_ops { enum x86_intercept_stage stage); void (*handle_external_intr)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); + bool (*xsaves_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); @@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask); + extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); @@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); +bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); static inline int __kvm_irq_line_state(unsigned long *irq_state, int irq_source_id, int level) @@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 0892ea0e683f..4e370a5d8117 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -96,12 +96,15 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; +void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, + unsigned int dest, struct msi_msg *msg, u8 hpet_id); int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, unsigned int irq_base, unsigned int irq_offset); #else +#define native_compose_msi_msg NULL #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index fa1195dae425..164e3f8d3c3d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); +extern bool mp_should_keep_irq(struct device *dev); + struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index af447f95e3be..25bcd4a89517 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -452,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); +extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index a4efe477ceab..625660f8a2fc 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) unsigned count = SPIN_THRESHOLD; do { - if (ACCESS_ONCE(lock->tickets.head) == inc.tail) + if (READ_ONCE(lock->tickets.head) == inc.tail) goto out; cpu_relax(); } while (--count); @@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) { arch_spinlock_t old, new; - old.tickets = ACCESS_ONCE(lock->tickets); + old.tickets = READ_ONCE(lock->tickets); if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) return 0; @@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return tmp.tail != tmp.head; } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index bcbfade26d8d..45afaee9555c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,6 +69,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_XSAVES 0x00100000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -159,6 +160,8 @@ enum vmcs_field { EOI_EXIT_BITMAP3_HIGH = 0x00002023, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f58ef6c0613b..5eea09915a15 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -41,10 +41,12 @@ typedef struct xpaddr { extern unsigned long *machine_to_phys_mapping; extern unsigned long machine_to_phys_nr; +extern unsigned long *xen_p2m_addr; +extern unsigned long xen_p2m_size; +extern unsigned long xen_max_p2m_pfn; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); -extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); @@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -extern int m2p_add_override(unsigned long mfn, struct page *page, - struct gnttab_map_grant_ref *kmap_op); extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -extern int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, - unsigned long mfn); -extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); +/* + * Helper functions to write or read unsigned long values to/from + * memory, when the access may fault. + */ +static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) +{ + return __put_user(val, (unsigned long __user *)addr); +} + +static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) +{ + return __get_user(*val, (unsigned long __user *)addr); +} + +/* + * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): + * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator + * bits (identity or foreign) are set. + * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set + * identity or foreign indicator will be still set. __pfn_to_mfn() is + * encapsulating get_phys_to_machine() which is called in special cases only. + * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special + * cases needing an extended handling. + */ +static inline unsigned long __pfn_to_mfn(unsigned long pfn) +{ + unsigned long mfn; + + if (pfn < xen_p2m_size) + mfn = xen_p2m_addr[pfn]; + else if (unlikely(pfn < xen_max_p2m_pfn)) + return get_phys_to_machine(pfn); + else + return IDENTITY_FRAME(pfn); + + if (unlikely(mfn == INVALID_P2M_ENTRY)) + return get_phys_to_machine(pfn); + + return mfn; +} + static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; @@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - mfn = get_phys_to_machine(pfn); + mfn = __pfn_to_mfn(pfn); if (mfn != INVALID_P2M_ENTRY) mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); @@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; - return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; + return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; } static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) @@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn); if (ret < 0) return ~0; @@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) return mfn; pfn = mfn_to_pfn_no_overrides(mfn); - if (get_phys_to_machine(pfn) != mfn) { + if (__pfn_to_mfn(pfn) != mfn) { /* * If this appears to be a foreign mfn (because the pfn * doesn't map back to the mfn), then check the local override @@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) * entry doesn't map back to the mfn and m2p_override doesn't have a * valid entry for it. */ - if (pfn == ~0 && - get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) + if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) pfn = mfn; return pfn; @@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) return mfn; pfn = mfn_to_pfn(mfn); - if (get_phys_to_machine(pfn) != mfn) + if (__pfn_to_mfn(pfn) != mfn) return -1; /* force !pfn_valid() */ return pfn; } diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 7e7a79ada658..5fa9770035dc 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -16,6 +16,7 @@ #define XSTATE_Hi16_ZMM 0x80 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Bit 63 of XCR0 is reserved for future expansion */ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb37bfe..6e1aaf73852a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 990a2fe1588d..b813bf9da1e2 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -72,6 +72,8 @@ #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 #define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -116,6 +118,8 @@ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ { EXIT_REASON_INVVPID, "INVVPID" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_XSAVES, "XSAVES" }, \ + { EXIT_REASON_XRSTORS, "XRSTORS" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a142e77693e1..4433a4be8171 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -76,6 +76,19 @@ int acpi_fix_pin2_polarity __initdata; static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #endif +/* + * Locks related to IOAPIC hotplug + * Hotplug side: + * ->device_hotplug_lock + * ->acpi_ioapic_lock + * ->ioapic_lock + * Interrupt mapping side: + * ->acpi_ioapic_lock + * ->ioapic_mutex + * ->ioapic_lock + */ +static DEFINE_MUTEX(acpi_ioapic_lock); + /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -395,10 +408,6 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); - trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; node = dev ? dev_to_node(dev) : NUMA_NO_NODE; @@ -411,7 +420,8 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (irq < 0) return irq; - if (enable_update_mptable) + /* Don't set up the ACPI SCI because it's already set up */ + if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi) mp_config_acpi_gsi(dev, gsi, trigger, polarity); return irq; @@ -424,9 +434,6 @@ static void mp_unregister_gsi(u32 gsi) if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return; - if (acpi_gbl_FADT.sci_interrupt == gsi) - return; - irq = mp_map_gsi_to_irq(gsi, 0); if (irq > 0) mp_unmap_irq(irq); @@ -609,8 +616,10 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { *irqp = gsi; } else { + mutex_lock(&acpi_ioapic_lock); irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); + mutex_unlock(&acpi_ioapic_lock); if (irq < 0) return -1; *irqp = irq; @@ -650,7 +659,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int irq = gsi; #ifdef CONFIG_X86_IO_APIC + mutex_lock(&acpi_ioapic_lock); irq = mp_register_gsi(dev, gsi, trigger, polarity); + mutex_unlock(&acpi_ioapic_lock); #endif return irq; @@ -659,7 +670,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, static void acpi_unregister_gsi_ioapic(u32 gsi) { #ifdef CONFIG_X86_IO_APIC + mutex_lock(&acpi_ioapic_lock); mp_unregister_gsi(gsi); + mutex_unlock(&acpi_ioapic_lock); #endif } @@ -690,6 +703,7 @@ void acpi_unregister_gsi(u32 gsi) } EXPORT_SYMBOL_GPL(acpi_unregister_gsi); +#ifdef CONFIG_X86_LOCAL_APIC static void __init acpi_set_irq_model_ioapic(void) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; @@ -697,6 +711,7 @@ static void __init acpi_set_irq_model_ioapic(void) __acpi_unregister_gsi = acpi_unregister_gsi_ioapic; acpi_ioapic = 1; } +#endif /* * ACPI based hotplug support for CPU @@ -759,20 +774,74 @@ EXPORT_SYMBOL(acpi_unmap_lsapic); int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { - /* TBD */ - return -EINVAL; -} + int ret = -ENOSYS; +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC + int ioapic_id; + u64 addr; + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_DYNAMIC, + .ops = &acpi_irqdomain_ops, + }; + + ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr); + if (ioapic_id < 0) { + unsigned long long uid; + acpi_status status; + status = acpi_evaluate_integer(handle, METHOD_NAME__UID, + NULL, &uid); + if (ACPI_FAILURE(status)) { + acpi_handle_warn(handle, "failed to get IOAPIC ID.\n"); + return -EINVAL; + } + ioapic_id = (int)uid; + } + + mutex_lock(&acpi_ioapic_lock); + ret = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg); + mutex_unlock(&acpi_ioapic_lock); +#endif + + return ret; +} EXPORT_SYMBOL(acpi_register_ioapic); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) { - /* TBD */ - return -EINVAL; -} + int ret = -ENOSYS; +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC + mutex_lock(&acpi_ioapic_lock); + ret = mp_unregister_ioapic(gsi_base); + mutex_unlock(&acpi_ioapic_lock); +#endif + + return ret; +} EXPORT_SYMBOL(acpi_unregister_ioapic); +/** + * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base + * has been registered + * @handle: ACPI handle of the IOAPIC deivce + * @gsi_base: GSI base associated with the IOAPIC + * + * Assume caller holds some type of lock to serialize acpi_ioapic_registered() + * with acpi_register_ioapic()/acpi_unregister_ioapic(). + */ +int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base) +{ + int ret = 0; + +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC + mutex_lock(&acpi_ioapic_lock); + ret = mp_ioapic_registered(gsi_base); + mutex_unlock(&acpi_ioapic_lock); +#endif + + return ret; +} + static int __init acpi_parse_sbf(struct acpi_table_header *table) { struct acpi_table_boot *sb; @@ -1185,7 +1254,9 @@ static void __init acpi_process_madt(void) /* * Parse MADT IO-APIC entries */ + mutex_lock(&acpi_ioapic_lock); error = acpi_parse_madt_ioapic_entries(); + mutex_unlock(&acpi_ioapic_lock); if (!error) { acpi_set_irq_model_ioapic(); diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index dcb5b15401ce..8bb12ddc5db8 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,10 +2,12 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o +obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_HT_IRQ) += htirq.o obj-$(CONFIG_SMP) += ipi.o ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ba6cc041edb1..29b5b18afa27 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -196,7 +196,7 @@ static int disable_apic_timer __initdata; int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); -int first_system_vector = 0xfe; +int first_system_vector = FIRST_SYSTEM_VECTOR; /* * Debug level, exported for io_apic.c @@ -1930,7 +1930,7 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -static inline void __smp_spurious_interrupt(void) +static inline void __smp_spurious_interrupt(u8 vector) { u32 v; @@ -1939,30 +1939,32 @@ static inline void __smp_spurious_interrupt(void) * if it is a vectored one. Just in case... * Spurious interrupts should not be ACKed. */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); + if (v & (1 << (vector & 0x1f))) ack_APIC_irq(); inc_irq_stat(irq_spurious_count); /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); + pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " + "should never happen.\n", vector, smp_processor_id()); } __visible void smp_spurious_interrupt(struct pt_regs *regs) { entering_irq(); - __smp_spurious_interrupt(); + __smp_spurious_interrupt(~regs->orig_ax); exiting_irq(); } __visible void smp_trace_spurious_interrupt(struct pt_regs *regs) { + u8 vector = ~regs->orig_ax; + entering_irq(); - trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR); - __smp_spurious_interrupt(); - trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR); + trace_spurious_apic_entry(vector); + __smp_spurious_interrupt(vector); + trace_spurious_apic_exit(vector); exiting_irq(); } diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c new file mode 100644 index 000000000000..816f36e979ad --- /dev/null +++ b/arch/x86/kernel/apic/htirq.c @@ -0,0 +1,107 @@ +/* + * Support Hypertransport IRQ + * + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * Moved from arch/x86/kernel/apic/io_apic.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/htirq.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/hypertransport.h> + +/* + * Hypertransport interrupt support + */ +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + struct ht_irq_msg msg; + + fetch_ht_irq_msg(irq, &msg); + + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); + + write_ht_irq_msg(irq, &msg); +} + +static int +ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int dest; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + target_ht_irq(data->irq, dest, cfg->vector); + return IRQ_SET_MASK_OK_NOCOPY; +} + +static struct irq_chip ht_irq_chip = { + .name = "PCI-HT", + .irq_mask = mask_ht_irq, + .irq_unmask = unmask_ht_irq, + .irq_ack = apic_ack_edge, + .irq_set_affinity = ht_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +{ + struct irq_cfg *cfg; + struct ht_irq_msg msg; + unsigned dest; + int err; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((apic->irq_dest_mode == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; + + write_ht_irq_msg(irq, &msg); + + irq_set_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + + dev_dbg(&dev->dev, "irq %d for HT\n", irq); + + return 0; +} diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7ffe0a2b870f..3f5f60406ab1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -32,15 +32,11 @@ #include <linux/module.h> #include <linux/syscore_ops.h> #include <linux/irqdomain.h> -#include <linux/msi.h> -#include <linux/htirq.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ #include <linux/slab.h> #include <linux/bootmem.h> -#include <linux/dmar.h> -#include <linux/hpet.h> #include <asm/idle.h> #include <asm/io.h> @@ -52,17 +48,12 @@ #include <asm/dma.h> #include <asm/timer.h> #include <asm/i8259.h> -#include <asm/msidef.h> -#include <asm/hypertransport.h> #include <asm/setup.h> #include <asm/irq_remapping.h> -#include <asm/hpet.h> #include <asm/hw_irq.h> #include <asm/apic.h> -#define __apicdebuginit(type) static type __init - #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) #define for_each_ioapic_reverse(idx) \ @@ -74,7 +65,7 @@ for_each_pin((idx), (pin)) #define for_each_irq_pin(entry, head) \ - for (entry = head; entry; entry = entry->next) + list_for_each_entry(entry, &head, list) /* * Is the SiS APIC rmw bug present ? @@ -83,7 +74,6 @@ int sis_apic_bug = -1; static DEFINE_RAW_SPINLOCK(ioapic_lock); -static DEFINE_RAW_SPINLOCK(vector_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; @@ -112,6 +102,7 @@ static struct ioapic { struct ioapic_domain_cfg irqdomain_cfg; struct irq_domain *irqdomain; struct mp_pin_info *pin_info; + struct resource *iomem_res; } ioapics[MAX_IO_APICS]; #define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver @@ -205,8 +196,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); - /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ void mp_save_irq(struct mpc_intsrc *m) { @@ -228,8 +217,8 @@ void mp_save_irq(struct mpc_intsrc *m) } struct irq_pin_list { + struct list_head list; int apic, pin; - struct irq_pin_list *next; }; static struct irq_pin_list *alloc_irq_pin_list(int node) @@ -237,7 +226,26 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); } -int __init arch_early_irq_init(void) +static void alloc_ioapic_saved_registers(int idx) +{ + size_t size; + + if (ioapics[idx].saved_registers) + return; + + size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers; + ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL); + if (!ioapics[idx].saved_registers) + pr_err("IOAPIC %d: suspend/resume impossible!\n", idx); +} + +static void free_ioapic_saved_registers(int idx) +{ + kfree(ioapics[idx].saved_registers); + ioapics[idx].saved_registers = NULL; +} + +int __init arch_early_ioapic_init(void) { struct irq_cfg *cfg; int i, node = cpu_to_node(0); @@ -245,13 +253,8 @@ int __init arch_early_irq_init(void) if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; - for_each_ioapic(i) { - ioapics[i].saved_registers = - kzalloc(sizeof(struct IO_APIC_route_entry) * - ioapics[i].nr_registers, GFP_KERNEL); - if (!ioapics[i].saved_registers) - pr_err("IOAPIC %d: suspend/resume impossible!\n", i); - } + for_each_ioapic(i) + alloc_ioapic_saved_registers(i); /* * For legacy IRQ's, start with assigning irq0 to irq15 to @@ -266,61 +269,6 @@ int __init arch_early_irq_init(void) return 0; } -static inline struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irq_get_chip_data(irq); -} - -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - - cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); - if (!cfg) - return NULL; - if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) - goto out_cfg; - if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) - goto out_domain; - return cfg; -out_domain: - free_cpumask_var(cfg->domain); -out_cfg: - kfree(cfg); - return NULL; -} - -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) -{ - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); -} - -static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) -{ - int res = irq_alloc_desc_at(at, node); - struct irq_cfg *cfg; - - if (res < 0) { - if (res != -EEXIST) - return NULL; - cfg = irq_cfg(at); - if (cfg) - return cfg; - } - - cfg = alloc_irq_cfg(at, node); - if (cfg) - irq_set_chip_data(at, cfg); - else - irq_free_desc(at); - return cfg; -} - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -445,15 +393,12 @@ static void ioapic_mask_entry(int apic, int pin) */ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { - struct irq_pin_list **last, *entry; + struct irq_pin_list *entry; /* don't allow duplicates */ - last = &cfg->irq_2_pin; - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, cfg->irq_2_pin) if (entry->apic == apic && entry->pin == pin) return 0; - last = &entry->next; - } entry = alloc_irq_pin_list(node); if (!entry) { @@ -464,22 +409,19 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi entry->apic = apic; entry->pin = pin; - *last = entry; + list_add_tail(&entry->list, &cfg->irq_2_pin); return 0; } static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) { - struct irq_pin_list **last, *entry; + struct irq_pin_list *tmp, *entry; - last = &cfg->irq_2_pin; - for_each_irq_pin(entry, cfg->irq_2_pin) + list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list) if (entry->apic == apic && entry->pin == pin) { - *last = entry->next; + list_del(&entry->list); kfree(entry); return; - } else { - last = &entry->next; } } @@ -559,7 +501,7 @@ static void mask_ioapic(struct irq_cfg *cfg) static void mask_ioapic_irq(struct irq_data *data) { - mask_ioapic(data->chip_data); + mask_ioapic(irqd_cfg(data)); } static void __unmask_ioapic(struct irq_cfg *cfg) @@ -578,7 +520,7 @@ static void unmask_ioapic(struct irq_cfg *cfg) static void unmask_ioapic_irq(struct irq_data *data) { - unmask_ioapic(data->chip_data); + unmask_ioapic(irqd_cfg(data)); } /* @@ -1164,8 +1106,7 @@ void mp_unmap_irq(int irq) * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules */ -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, - struct io_apic_irq_attr *irq_attr) +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) { int irq, i, best_ioapic = -1, best_idx = -1; @@ -1219,195 +1160,11 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, return -1; out: - irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, - IOAPIC_MAP_ALLOC); - if (irq > 0) - set_io_apic_irq_attr(irq_attr, best_ioapic, - mp_irqs[best_idx].dstirq, - irq_trigger(best_idx), - irq_polarity(best_idx)); - return irq; + return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, + IOAPIC_MAP_ALLOC); } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - raw_spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - raw_spin_unlock(&vector_lock); -} - -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, err; - cpumask_var_t tmp_mask; - - if (cfg->move_in_progress) - return -EBUSY; - - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; - - /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; - cpumask_clear(cfg->old_domain); - cpu = cpumask_first_and(mask, cpu_online_mask); - while (cpu < nr_cpu_ids) { - int new_cpu, vector, offset; - - apic->vector_allocation_domain(cpu, tmp_mask, mask); - - if (cpumask_subset(tmp_mask, cfg->domain)) { - err = 0; - if (cpumask_equal(tmp_mask, cfg->domain)) - break; - /* - * New cpumask using the vector is a proper subset of - * the current in use mask. So cleanup the vector - * allocation for the members that are not used anymore. - */ - cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - cpumask_and(cfg->domain, cfg->domain, tmp_mask); - break; - } - - vector = current_vector; - offset = current_offset; -next: - vector += 16; - if (vector >= first_system_vector) { - offset = (offset + 1) % 16; - vector = FIRST_EXTERNAL_VECTOR + offset; - } - - if (unlikely(current_vector == vector)) { - cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); - cpumask_andnot(tmp_mask, mask, cfg->old_domain); - cpu = cpumask_first_and(tmp_mask, cpu_online_mask); - continue; - } - - if (test_bit(vector, used_vectors)) - goto next; - - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { - if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) - goto next; - } - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (cfg->vector) { - cpumask_copy(cfg->old_domain, cfg->domain); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); - err = 0; - break; - } - free_cpumask_var(tmp_mask); - return err; -} - -int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) -{ - int err; - unsigned long flags; - - raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); - raw_spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq, struct irq_cfg *cfg) -{ - int cpu, vector; - - BUG_ON(!cfg->vector); - - vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - - cfg->vector = 0; - cpumask_clear(cfg->domain); - - if (likely(!cfg->move_in_progress)) - return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - if (per_cpu(vector_irq, cpu)[vector] != irq) - continue; - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - break; - } - } - cfg->move_in_progress = 0; -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - int irq, vector; - struct irq_cfg *cfg; - - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); - /* Mark the inuse vectors */ - for_each_active_irq(irq) { - cfg = irq_cfg(irq); - if (!cfg) - continue; - - if (!cpumask_test_cpu(cpu, cfg->domain)) - continue; - vector = cfg->vector; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq <= VECTOR_UNDEFINED) - continue; - - cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - } - raw_spin_unlock(&vector_lock); -} - static struct irq_chip ioapic_chip; #ifdef CONFIG_X86_32 @@ -1496,7 +1253,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, &dest)) { pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - __clear_irq_vector(irq, cfg); + clear_irq_vector(irq, cfg); return; } @@ -1510,7 +1267,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - __clear_irq_vector(irq, cfg); + clear_irq_vector(irq, cfg); return; } @@ -1641,7 +1398,7 @@ void ioapic_zap_locks(void) raw_spin_lock_init(&ioapic_lock); } -__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +static void __init print_IO_APIC(int ioapic_idx) { union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; @@ -1698,7 +1455,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); } -__apicdebuginit(void) print_IO_APICs(void) +void __init print_IO_APICs(void) { int ioapic_idx; struct irq_cfg *cfg; @@ -1731,8 +1488,7 @@ __apicdebuginit(void) print_IO_APICs(void) cfg = irq_cfg(irq); if (!cfg) continue; - entry = cfg->irq_2_pin; - if (!entry) + if (list_empty(&cfg->irq_2_pin)) continue; printk(KERN_DEBUG "IRQ%d ", irq); for_each_irq_pin(entry, cfg->irq_2_pin) @@ -1743,205 +1499,6 @@ __apicdebuginit(void) print_IO_APICs(void) printk(KERN_INFO ".................................... done.\n"); } -__apicdebuginit(void) print_APIC_field(int base) -{ - int i; - - printk(KERN_DEBUG); - - for (i = 0; i < 8; i++) - pr_cont("%08x", apic_read(base + i*0x10)); - - pr_cont("\n"); -} - -__apicdebuginit(void) print_local_APIC(void *dummy) -{ - unsigned int i, v, ver, maxlvt; - u64 icr; - - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", - smp_processor_id(), hard_smp_processor_id()); - v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); - v = apic_read(APIC_LVR); - printk(KERN_INFO "... APIC VERSION: %08x\n", v); - ver = GET_APIC_VERSION(v); - maxlvt = lapic_get_maxlvt(); - - v = apic_read(APIC_TASKPRI); - printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (!APIC_XAPIC(ver)) { - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - } - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); - } - - /* - * Remote read supported only in the 82489DX and local APIC for - * Pentium processors. - */ - if (!APIC_INTEGRATED(ver) || maxlvt == 3) { - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); - } - - v = apic_read(APIC_LDR); - printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - if (!x2apic_enabled()) { - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); - } - v = apic_read(APIC_SPIV); - printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); - - printk(KERN_DEBUG "... APIC ISR field:\n"); - print_APIC_field(APIC_ISR); - printk(KERN_DEBUG "... APIC TMR field:\n"); - print_APIC_field(APIC_TMR); - printk(KERN_DEBUG "... APIC IRR field:\n"); - print_APIC_field(APIC_IRR); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - - if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { - v = apic_read(APIC_EFEAT); - maxlvt = (v >> 16) & 0xff; - printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); - v = apic_read(APIC_ECTRL); - printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); - for (i = 0; i < maxlvt; i++) { - v = apic_read(APIC_EILVTn(i)); - printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); - } - } - pr_cont("\n"); -} - -__apicdebuginit(void) print_local_APICs(int maxcpu) -{ - int cpu; - - if (!maxcpu) - return; - - preempt_disable(); - for_each_online_cpu(cpu) { - if (cpu >= maxcpu) - break; - smp_call_function_single(cpu, print_local_APIC, NULL, 1); - } - preempt_enable(); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (!nr_legacy_irqs()) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -static int __initdata show_lapic = 1; -static __init int setup_show_lapic(char *arg) -{ - int num = -1; - - if (strcmp(arg, "all") == 0) { - show_lapic = CONFIG_NR_CPUS; - } else { - get_option(&arg, &num); - if (num >= 0) - show_lapic = num; - } - - return 1; -} -__setup("show_lapic=", setup_show_lapic); - -__apicdebuginit(int) print_ICs(void) -{ - if (apic_verbosity == APIC_QUIET) - return 0; - - print_PIC(); - - /* don't print out if apic is not there */ - if (!cpu_has_apic && !apic_from_smp_config()) - return 0; - - print_local_APICs(show_lapic); - print_IO_APICs(); - - return 0; -} - -late_initcall(print_ICs); - - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -2244,26 +1801,12 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) if (legacy_pic->irq_pending(irq)) was_pending = 1; } - __unmask_ioapic(data->chip_data); + __unmask_ioapic(irqd_cfg(data)); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } -static int ioapic_retrigger_irq(struct irq_data *data) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned long flags; - int cpu; - - raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(cfg->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} - /* * Level and edge triggered IO-APIC interrupts need different handling, * so we use two separate IRQ descriptors. Edge triggered IRQs can be @@ -2273,113 +1816,6 @@ static int ioapic_retrigger_irq(struct irq_data *data) * races. */ -#ifdef CONFIG_SMP -void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - - ack_APIC_irq(); - irq_enter(); - exit_idle(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - int irq; - unsigned int irr; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __this_cpu_read(vector_irq[vector]); - - if (irq <= VECTOR_UNDEFINED) - continue; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - if (!cfg) - continue; - - raw_spin_lock(&desc->lock); - - /* - * Check if the irq migration is in progress. If so, we - * haven't received the cleanup request yet for this irq. - */ - if (cfg->move_in_progress) - goto unlock; - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - goto unlock; - - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - /* - * Check if the vector that needs to be cleanedup is - * registered at the cpu's IRR. If so, then this is not - * the best time to clean it up. Lets clean it up in the - * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to myself. - */ - if (irr & (1 << (vector % 32))) { - apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - goto unlock; - } - __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); -unlock: - raw_spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) -{ - unsigned me; - - if (likely(!cfg->move_in_progress)) - return; - - me = smp_processor_id(); - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); -} - -static void irq_complete_move(struct irq_cfg *cfg) -{ - __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); -} - -void irq_force_complete_move(int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - if (!cfg) - return; - - __irq_complete_move(cfg, cfg->vector); -} -#else -static inline void irq_complete_move(struct irq_cfg *cfg) { } -#endif - static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; @@ -2400,41 +1836,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int irq = data->irq; - int err; - - if (!config_enabled(CONFIG_SMP)) - return -EPERM; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); - if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - cpumask_copy(data->affinity, mask); - - return 0; -} - - int native_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -2447,24 +1848,17 @@ int native_ioapic_set_affinity(struct irq_data *data, return -EPERM; raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = __ioapic_set_affinity(data, mask, &dest); + ret = apic_set_affinity(data, mask, &dest); if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, data->chip_data); + __target_IO_APIC_irq(irq, dest, irqd_cfg(data)); ret = IRQ_SET_MASK_OK_NOCOPY; } raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } -static void ack_apic_edge(struct irq_data *data) -{ - irq_complete_move(data->chip_data); - irq_move_irq(data); - ack_APIC_irq(); -} - atomic_t irq_mis_count; #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -2547,9 +1941,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, } #endif -static void ack_apic_level(struct irq_data *data) +static void ack_ioapic_level(struct irq_data *data) { - struct irq_cfg *cfg = data->chip_data; + struct irq_cfg *cfg = irqd_cfg(data); int i, irq = data->irq; unsigned long v; bool masked; @@ -2619,10 +2013,10 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_startup = startup_ioapic_irq, .irq_mask = mask_ioapic_irq, .irq_unmask = unmask_ioapic_irq, - .irq_ack = ack_apic_edge, - .irq_eoi = ack_apic_level, + .irq_ack = apic_ack_edge, + .irq_eoi = ack_ioapic_level, .irq_set_affinity = native_ioapic_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, + .irq_retrigger = apic_retrigger_irq, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2965,6 +2359,16 @@ static int mp_irqdomain_create(int ioapic) return 0; } +static void ioapic_destroy_irqdomain(int idx) +{ + if (ioapics[idx].irqdomain) { + irq_domain_remove(ioapics[idx].irqdomain); + ioapics[idx].irqdomain = NULL; + } + kfree(ioapics[idx].pin_info); + ioapics[idx].pin_info = NULL; +} + void __init setup_IO_APIC(void) { int ioapic; @@ -3044,399 +2448,6 @@ static int __init ioapic_init_ops(void) device_initcall(ioapic_init_ops); -/* - * Dynamic irq allocate and deallocation. Should be replaced by irq domains! - */ -int arch_setup_hwirq(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - unsigned long flags; - int ret; - - cfg = alloc_irq_cfg(irq, node); - if (!cfg) - return -ENOMEM; - - raw_spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - if (!ret) - irq_set_chip_data(irq, cfg); - else - free_irq_cfg(irq, cfg); - return ret; -} - -void arch_teardown_hwirq(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - free_remapped_irq(irq); - raw_spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); - raw_spin_unlock_irqrestore(&vector_lock, flags); - free_irq_cfg(irq, cfg); -} - -/* - * MSI message composition - */ -void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - msg->address_hi = MSI_ADDR_BASE_HI; - - if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); - - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); -} - -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); - - return 0; -} - -static int -msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - struct msi_msg msg; - unsigned int dest; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - __get_cached_msi_msg(data->msi_desc, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - __pci_write_msi_msg(data->msi_desc, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, - .irq_ack = ack_apic_edge, - .irq_set_affinity = msi_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) -{ - struct irq_chip *chip = &msi_chip; - struct msi_msg msg; - unsigned int irq = irq_base + irq_offset; - int ret; - - ret = msi_compose_msg(dev, irq, &msg, -1); - if (ret < 0) - return ret; - - irq_set_msi_desc_off(irq_base, irq_offset, msidesc); - - /* - * MSI-X message is written per-IRQ, the offset is always 0. - * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. - */ - if (!irq_offset) - pci_write_msi_msg(irq, &msg); - - setup_remapped_irq(irq, irq_cfg(irq), chip); - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); - - return 0; -} - -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - struct msi_desc *msidesc; - unsigned int irq; - int node, ret; - - /* Multiple MSI vectors only supported with interrupt remapping */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - - node = dev_to_node(&dev->dev); - - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_alloc_hwirq(node); - if (!irq) - return -ENOSPC; - - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) { - irq_free_hwirq(irq); - return ret; - } - - } - return 0; -} - -void native_teardown_msi_irq(unsigned int irq) -{ - irq_free_hwirq(irq); -} - -#ifdef CONFIG_DMAR_TABLE -static int -dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct msi_msg msg; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - -static struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .irq_unmask = dmar_msi_unmask, - .irq_mask = dmar_msi_mask, - .irq_ack = ack_apic_edge, - .irq_set_affinity = dmar_msi_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg, -1); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#ifdef CONFIG_HPET_TIMER - -static int hpet_msi_set_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - struct msi_msg msg; - unsigned int dest; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - hpet_msi_read(data->handler_data, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - hpet_msi_write(data->handler_data, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - -static struct irq_chip hpet_msi_type = { - .name = "HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, - .irq_ack = ack_apic_edge, - .irq_set_affinity = hpet_msi_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int default_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - struct irq_chip *chip = &hpet_msi_type; - struct msi_msg msg; - int ret; - - ret = msi_compose_msg(NULL, irq, &msg, id); - if (ret < 0) - return ret; - - hpet_msi_write(irq_get_handler_data(irq), &msg); - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_cfg(irq), chip); - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static int -ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - target_ht_irq(data->irq, dest, cfg->vector); - return IRQ_SET_MASK_OK_NOCOPY; -} - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .irq_mask = mask_ht_irq, - .irq_unmask = unmask_ht_irq, - .irq_ack = ack_apic_edge, - .irq_set_affinity = ht_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - struct ht_irq_msg msg; - unsigned dest; - int err; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - - return 0; -} -#endif /* CONFIG_HT_IRQ */ - static int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) { @@ -3451,7 +2462,7 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) return ret; } -static int __init io_apic_get_redir_entries(int ioapic) +static int io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3476,28 +2487,8 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) return ioapic_initialized ? ioapic_dynirq_base : gsi_top; } -int __init arch_probe_nr_irqs(void) -{ - int nr; - - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; - - nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) - /* - * for MSI and HT dyn irq - */ - nr += gsi_top * 16; -#endif - if (nr < nr_irqs) - nr_irqs = nr; - - return 0; -} - #ifdef CONFIG_X86_32 -static int __init io_apic_get_unique_id(int ioapic, int apic_id) +static int io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; @@ -3572,30 +2563,63 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } -static u8 __init io_apic_unique_id(u8 id) +static u8 io_apic_unique_id(int idx, u8 id) { if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); + return io_apic_get_unique_id(idx, id); else return id; } #else -static u8 __init io_apic_unique_id(u8 id) +static u8 io_apic_unique_id(int idx, u8 id) { - int i; + union IO_APIC_reg_00 reg_00; DECLARE_BITMAP(used, 256); + unsigned long flags; + u8 new_id; + int i; bitmap_zero(used, 256); for_each_ioapic(i) __set_bit(mpc_ioapic_id(i), used); + + /* Hand out the requested id if available */ if (!test_bit(id, used)) return id; - return find_first_zero_bit(used, 256); + + /* + * Read the current id from the ioapic and keep it if + * available. + */ + raw_spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(idx, 0); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + new_id = reg_00.bits.ID; + if (!test_bit(new_id, used)) { + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Using reg apic_id %d instead of %d\n", + idx, new_id, id); + return new_id; + } + + /* + * Get the next free id and write it to the ioapic. + */ + new_id = find_first_zero_bit(used, 256); + reg_00.bits.ID = new_id; + raw_spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(idx, 0, reg_00.raw); + reg_00.raw = io_apic_read(idx, 0); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + /* Sanity check */ + BUG_ON(reg_00.bits.ID != new_id); + + return new_id; } #endif -static int __init io_apic_get_version(int ioapic) +static int io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3702,6 +2726,7 @@ static struct resource * __init ioapic_setup_resources(void) snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; num++; + ioapics[i].iomem_res = res; } ioapic_resources = res; @@ -3799,21 +2824,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) return gsi - gsi_cfg->gsi_base; } -static __init int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n", - MAX_IO_APICS, nr_ioapics); - return 1; - } - if (!address) { - pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n"); - return 1; - } - return 0; -} - -static __init int bad_ioapic_register(int idx) +static int bad_ioapic_register(int idx) { union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; @@ -3832,32 +2843,61 @@ static __init int bad_ioapic_register(int idx) return 0; } -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, - struct ioapic_domain_cfg *cfg) +static int find_free_ioapic_entry(void) { - int idx = 0; - int entries; + int idx; + + for (idx = 0; idx < MAX_IO_APICS; idx++) + if (ioapics[idx].nr_registers == 0) + return idx; + + return MAX_IO_APICS; +} + +/** + * mp_register_ioapic - Register an IOAPIC device + * @id: hardware IOAPIC ID + * @address: physical address of IOAPIC register area + * @gsi_base: base of GSI associated with the IOAPIC + * @cfg: configuration information for the IOAPIC + */ +int mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg) +{ + bool hotplug = !!ioapic_initialized; struct mp_ioapic_gsi *gsi_cfg; + int idx, ioapic, entries; + u32 gsi_end; - if (bad_ioapic(address)) - return; + if (!address) { + pr_warn("Bogus (zero) I/O APIC address found, skipping!\n"); + return -EINVAL; + } + for_each_ioapic(ioapic) + if (ioapics[ioapic].mp_config.apicaddr == address) { + pr_warn("address 0x%x conflicts with IOAPIC%d\n", + address, ioapic); + return -EEXIST; + } - idx = nr_ioapics; + idx = find_free_ioapic_entry(); + if (idx >= MAX_IO_APICS) { + pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n", + MAX_IO_APICS, idx); + return -ENOSPC; + } ioapics[idx].mp_config.type = MP_IOAPIC; ioapics[idx].mp_config.flags = MPC_APIC_USABLE; ioapics[idx].mp_config.apicaddr = address; - ioapics[idx].irqdomain = NULL; - ioapics[idx].irqdomain_cfg = *cfg; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if (bad_ioapic_register(idx)) { clear_fixmap(FIX_IO_APIC_BASE_0 + idx); - return; + return -ENODEV; } - ioapics[idx].mp_config.apicid = io_apic_unique_id(id); + ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id); ioapics[idx].mp_config.apicver = io_apic_get_version(idx); /* @@ -3865,24 +2905,112 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ entries = io_apic_get_redir_entries(idx); + gsi_end = gsi_base + entries - 1; + for_each_ioapic(ioapic) { + gsi_cfg = mp_ioapic_gsi_routing(ioapic); + if ((gsi_base >= gsi_cfg->gsi_base && + gsi_base <= gsi_cfg->gsi_end) || + (gsi_end >= gsi_cfg->gsi_base && + gsi_end <= gsi_cfg->gsi_end)) { + pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n", + gsi_base, gsi_end, + gsi_cfg->gsi_base, gsi_cfg->gsi_end); + clear_fixmap(FIX_IO_APIC_BASE_0 + idx); + return -ENOSPC; + } + } gsi_cfg = mp_ioapic_gsi_routing(idx); gsi_cfg->gsi_base = gsi_base; - gsi_cfg->gsi_end = gsi_base + entries - 1; + gsi_cfg->gsi_end = gsi_end; + + ioapics[idx].irqdomain = NULL; + ioapics[idx].irqdomain_cfg = *cfg; /* - * The number of IO-APIC IRQ registers (== #pins): + * If mp_register_ioapic() is called during early boot stage when + * walking ACPI/SFI/DT tables, it's too early to create irqdomain, + * we are still using bootmem allocator. So delay it to setup_IO_APIC(). */ - ioapics[idx].nr_registers = entries; + if (hotplug) { + if (mp_irqdomain_create(idx)) { + clear_fixmap(FIX_IO_APIC_BASE_0 + idx); + return -ENOMEM; + } + alloc_ioapic_saved_registers(idx); + } if (gsi_cfg->gsi_end >= gsi_top) gsi_top = gsi_cfg->gsi_end + 1; + if (nr_ioapics <= idx) + nr_ioapics = idx + 1; + + /* Set nr_registers to mark entry present */ + ioapics[idx].nr_registers = entries; pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", idx, mpc_ioapic_id(idx), mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), gsi_cfg->gsi_base, gsi_cfg->gsi_end); - nr_ioapics++; + return 0; +} + +int mp_unregister_ioapic(u32 gsi_base) +{ + int ioapic, pin; + int found = 0; + struct mp_pin_info *pin_info; + + for_each_ioapic(ioapic) + if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) { + found = 1; + break; + } + if (!found) { + pr_warn("can't find IOAPIC for GSI %d\n", gsi_base); + return -ENODEV; + } + + for_each_pin(ioapic, pin) { + pin_info = mp_pin_info(ioapic, pin); + if (pin_info->count) { + pr_warn("pin%d on IOAPIC%d is still in use.\n", + pin, ioapic); + return -EBUSY; + } + } + + /* Mark entry not present */ + ioapics[ioapic].nr_registers = 0; + ioapic_destroy_irqdomain(ioapic); + free_ioapic_saved_registers(ioapic); + if (ioapics[ioapic].iomem_res) + release_resource(ioapics[ioapic].iomem_res); + clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic); + memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic])); + + return 0; +} + +int mp_ioapic_registered(u32 gsi_base) +{ + int ioapic; + + for_each_ioapic(ioapic) + if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) + return 1; + + return 0; +} + +static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, + int ioapic, int ioapic_pin, + int trigger, int polarity) +{ + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, @@ -3931,7 +3059,7 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) ioapic_mask_entry(ioapic, pin); __remove_pin_from_irq(cfg, ioapic, pin); - WARN_ON(cfg->irq_2_pin != NULL); + WARN_ON(!list_empty(&cfg->irq_2_pin)); arch_teardown_hwirq(virq); } @@ -3964,18 +3092,6 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) return ret; } -bool mp_should_keep_irq(struct device *dev) -{ - if (dev->power.is_prepared) - return true; -#ifdef CONFIG_PM_RUNTIME - if (dev->power.runtime_status == RPM_SUSPENDING) - return true; -#endif - - return false; -} - /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c new file mode 100644 index 000000000000..d6ba2d660dc5 --- /dev/null +++ b/arch/x86/kernel/apic/msi.c @@ -0,0 +1,286 @@ +/* + * Support of MSI, HPET and DMAR interrupts. + * + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * Moved from arch/x86/kernel/apic/io_apic.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/dmar.h> +#include <linux/hpet.h> +#include <linux/msi.h> +#include <asm/msidef.h> +#include <asm/hpet.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/irq_remapping.h> + +void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + msg->address_hi = MSI_ADDR_BASE_HI; + + if (x2apic_enabled()) + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); + + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL : + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU : + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED : + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); +} + +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); + + return 0; +} + +static int +msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + struct msi_msg msg; + unsigned int dest; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + __get_cached_msi_msg(data->msi_desc, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + __pci_write_msi_msg(data->msi_desc, &msg); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip msi_chip = { + .name = "PCI-MSI", + .irq_unmask = pci_msi_unmask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_ack = apic_ack_edge, + .irq_set_affinity = msi_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) +{ + struct irq_chip *chip = &msi_chip; + struct msi_msg msg; + unsigned int irq = irq_base + irq_offset; + int ret; + + ret = msi_compose_msg(dev, irq, &msg, -1); + if (ret < 0) + return ret; + + irq_set_msi_desc_off(irq_base, irq_offset, msidesc); + + /* + * MSI-X message is written per-IRQ, the offset is always 0. + * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. + */ + if (!irq_offset) + pci_write_msi_msg(irq, &msg); + + setup_remapped_irq(irq, irq_cfg(irq), chip); + + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); + + dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq); + + return 0; +} + +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + struct msi_desc *msidesc; + unsigned int irq; + int node, ret; + + /* Multiple MSI vectors only supported with interrupt remapping */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + node = dev_to_node(&dev->dev); + + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = irq_alloc_hwirq(node); + if (!irq) + return -ENOSPC; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) { + irq_free_hwirq(irq); + return ret; + } + + } + return 0; +} + +void native_teardown_msi_irq(unsigned int irq) +{ + irq_free_hwirq(irq); +} + +#ifdef CONFIG_DMAR_TABLE +static int +dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int dest, irq = data->irq; + struct msi_msg msg; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +static struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = apic_ack_edge, + .irq_set_affinity = dmar_msi_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg, -1); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +/* + * MSI message composition + */ +#ifdef CONFIG_HPET_TIMER + +static int hpet_msi_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + struct msi_msg msg; + unsigned int dest; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + hpet_msi_read(data->handler_data, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + hpet_msi_write(data->handler_data, &msg); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +static struct irq_chip hpet_msi_type = { + .name = "HPET_MSI", + .irq_unmask = hpet_msi_unmask, + .irq_mask = hpet_msi_mask, + .irq_ack = apic_ack_edge, + .irq_set_affinity = hpet_msi_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int default_setup_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_chip *chip = &hpet_msi_type; + struct msi_msg msg; + int ret; + + ret = msi_compose_msg(NULL, irq, &msg, id); + if (ret < 0) + return ret; + + hpet_msi_write(irq_get_handler_data(irq), &msg); + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + setup_remapped_irq(irq, irq_cfg(irq), chip); + + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); + return 0; +} +#endif diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c new file mode 100644 index 000000000000..6cedd7914581 --- /dev/null +++ b/arch/x86/kernel/apic/vector.c @@ -0,0 +1,719 @@ +/* + * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc. + * + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * Moved from arch/x86/kernel/apic/io_apic.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/compiler.h> +#include <linux/irqdomain.h> +#include <linux/slab.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/i8259.h> +#include <asm/desc.h> +#include <asm/irq_remapping.h> + +static DEFINE_RAW_SPINLOCK(vector_lock); + +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + raw_spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + raw_spin_unlock(&vector_lock); +} + +struct irq_cfg *irq_cfg(unsigned int irq) +{ + return irq_get_chip_data(irq); +} + +struct irq_cfg *irqd_cfg(struct irq_data *irq_data) +{ + return irq_data->chip_data; +} + +static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) +{ + struct irq_cfg *cfg; + + cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); + if (!cfg) + return NULL; + if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) + goto out_cfg; + if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) + goto out_domain; +#ifdef CONFIG_X86_IO_APIC + INIT_LIST_HEAD(&cfg->irq_2_pin); +#endif + return cfg; +out_domain: + free_cpumask_var(cfg->domain); +out_cfg: + kfree(cfg); + return NULL; +} + +struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) +{ + int res = irq_alloc_desc_at(at, node); + struct irq_cfg *cfg; + + if (res < 0) { + if (res != -EEXIST) + return NULL; + cfg = irq_cfg(at); + if (cfg) + return cfg; + } + + cfg = alloc_irq_cfg(at, node); + if (cfg) + irq_set_chip_data(at, cfg); + else + irq_free_desc(at); + return cfg; +} + +static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) +{ + if (!cfg) + return; + irq_set_chip_data(at, NULL); + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); +} + +static int +__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +{ + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 16; + int cpu, err; + cpumask_var_t tmp_mask; + + if (cfg->move_in_progress) + return -EBUSY; + + if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) + return -ENOMEM; + + /* Only try and allocate irqs on cpus that are present */ + err = -ENOSPC; + cpumask_clear(cfg->old_domain); + cpu = cpumask_first_and(mask, cpu_online_mask); + while (cpu < nr_cpu_ids) { + int new_cpu, vector, offset; + + apic->vector_allocation_domain(cpu, tmp_mask, mask); + + if (cpumask_subset(tmp_mask, cfg->domain)) { + err = 0; + if (cpumask_equal(tmp_mask, cfg->domain)) + break; + /* + * New cpumask using the vector is a proper subset of + * the current in use mask. So cleanup the vector + * allocation for the members that are not used anymore. + */ + cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); + cfg->move_in_progress = + cpumask_intersects(cfg->old_domain, cpu_online_mask); + cpumask_and(cfg->domain, cfg->domain, tmp_mask); + break; + } + + vector = current_vector; + offset = current_offset; +next: + vector += 16; + if (vector >= first_system_vector) { + offset = (offset + 1) % 16; + vector = FIRST_EXTERNAL_VECTOR + offset; + } + + if (unlikely(current_vector == vector)) { + cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); + cpumask_andnot(tmp_mask, mask, cfg->old_domain); + cpu = cpumask_first_and(tmp_mask, cpu_online_mask); + continue; + } + + if (test_bit(vector, used_vectors)) + goto next; + + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { + if (per_cpu(vector_irq, new_cpu)[vector] > + VECTOR_UNDEFINED) + goto next; + } + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (cfg->vector) { + cpumask_copy(cfg->old_domain, cfg->domain); + cfg->move_in_progress = + cpumask_intersects(cfg->old_domain, cpu_online_mask); + } + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cpumask_copy(cfg->domain, tmp_mask); + err = 0; + break; + } + free_cpumask_var(tmp_mask); + + return err; +} + +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +{ + int err; + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + err = __assign_irq_vector(irq, cfg, mask); + raw_spin_unlock_irqrestore(&vector_lock, flags); + return err; +} + +void clear_irq_vector(int irq, struct irq_cfg *cfg) +{ + int cpu, vector; + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + BUG_ON(!cfg->vector); + + vector = cfg->vector; + for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + + cfg->vector = 0; + cpumask_clear(cfg->domain); + + if (likely(!cfg->move_in_progress)) { + raw_spin_unlock_irqrestore(&vector_lock, flags); + return; + } + + for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != irq) + continue; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + break; + } + } + cfg->move_in_progress = 0; + raw_spin_unlock_irqrestore(&vector_lock, flags); +} + +int __init arch_probe_nr_irqs(void) +{ + int nr; + + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; + + nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + if (gsi_top <= NR_IRQS_LEGACY) + nr += 8 * nr_cpu_ids; + else + nr += gsi_top * 16; +#endif + if (nr < nr_irqs) + nr_irqs = nr; + + return nr_legacy_irqs(); +} + +int __init arch_early_irq_init(void) +{ + return arch_early_ioapic_init(); +} + +static void __setup_vector_irq(int cpu) +{ + /* Initialize vector_irq on a new cpu */ + int irq, vector; + struct irq_cfg *cfg; + + /* + * vector_lock will make sure that we don't run into irq vector + * assignments that might be happening on another cpu in parallel, + * while we setup our initial vector to irq mappings. + */ + raw_spin_lock(&vector_lock); + /* Mark the inuse vectors */ + for_each_active_irq(irq) { + cfg = irq_cfg(irq); + if (!cfg) + continue; + + if (!cpumask_test_cpu(cpu, cfg->domain)) + continue; + vector = cfg->vector; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq <= VECTOR_UNDEFINED) + continue; + + cfg = irq_cfg(irq); + if (!cpumask_test_cpu(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + } + raw_spin_unlock(&vector_lock); +} + +/* + * Setup the vector to irq mappings. + */ +void setup_vector_irq(int cpu) +{ + int irq; + + /* + * On most of the platforms, legacy PIC delivers the interrupts on the + * boot cpu. But there are certain platforms where PIC interrupts are + * delivered to multiple cpu's. If the legacy IRQ is handled by the + * legacy PIC, for the new cpu that is coming online, setup the static + * legacy vector to irq mapping: + */ + for (irq = 0; irq < nr_legacy_irqs(); irq++) + per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; + + __setup_vector_irq(cpu); +} + +int apic_retrigger_irq(struct irq_data *data) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned long flags; + int cpu; + + raw_spin_lock_irqsave(&vector_lock, flags); + cpu = cpumask_first_and(cfg->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +} + +void apic_ack_edge(struct irq_data *data) +{ + irq_complete_move(irqd_cfg(data)); + irq_move_irq(data); + ack_APIC_irq(); +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int irq = data->irq; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -EPERM; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + cpumask_copy(data->affinity, mask); + + return 0; +} + +#ifdef CONFIG_SMP +void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + apic->send_IPI_mask(cpumask_of(i), + IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + +asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + + ack_APIC_irq(); + irq_enter(); + exit_idle(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + int irq; + unsigned int irr; + struct irq_desc *desc; + struct irq_cfg *cfg; + + irq = __this_cpu_read(vector_irq[vector]); + + if (irq <= VECTOR_UNDEFINED) + continue; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + if (!cfg) + continue; + + raw_spin_lock(&desc->lock); + + /* + * Check if the irq migration is in progress. If so, we + * haven't received the cleanup request yet for this irq. + */ + if (cfg->move_in_progress) + goto unlock; + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + goto unlock; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + /* + * Check if the vector that needs to be cleanedup is + * registered at the cpu's IRR. If so, then this is not + * the best time to clean it up. Lets clean it up in the + * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR + * to myself. + */ + if (irr & (1 << (vector % 32))) { + apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); + goto unlock; + } + __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); +unlock: + raw_spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) +{ + unsigned me; + + if (likely(!cfg->move_in_progress)) + return; + + me = smp_processor_id(); + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + send_cleanup_vector(cfg); +} + +void irq_complete_move(struct irq_cfg *cfg) +{ + __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + if (!cfg) + return; + + __irq_complete_move(cfg, cfg->vector); +} +#endif + +/* + * Dynamic irq allocate and deallocation. Should be replaced by irq domains! + */ +int arch_setup_hwirq(unsigned int irq, int node) +{ + struct irq_cfg *cfg; + unsigned long flags; + int ret; + + cfg = alloc_irq_cfg(irq, node); + if (!cfg) + return -ENOMEM; + + raw_spin_lock_irqsave(&vector_lock, flags); + ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + if (!ret) + irq_set_chip_data(irq, cfg); + else + free_irq_cfg(irq, cfg); + return ret; +} + +void arch_teardown_hwirq(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + free_remapped_irq(irq); + clear_irq_vector(irq, cfg); + free_irq_cfg(irq, cfg); +} + +static void __init print_APIC_field(int base) +{ + int i; + + printk(KERN_DEBUG); + + for (i = 0; i < 8; i++) + pr_cont("%08x", apic_read(base + i*0x10)); + + pr_cont("\n"); +} + +static void __init print_local_APIC(void *dummy) +{ + unsigned int i, v, ver, maxlvt; + u64 icr; + + pr_debug("printing local APIC contents on CPU#%d/%d:\n", + smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); + pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id()); + v = apic_read(APIC_LVR); + pr_info("... APIC VERSION: %08x\n", v); + ver = GET_APIC_VERSION(v); + maxlvt = lapic_get_maxlvt(); + + v = apic_read(APIC_TASKPRI); + pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); + + /* !82489DX */ + if (APIC_INTEGRATED(ver)) { + if (!APIC_XAPIC(ver)) { + v = apic_read(APIC_ARBPRI); + pr_debug("... APIC ARBPRI: %08x (%02x)\n", + v, v & APIC_ARBPRI_MASK); + } + v = apic_read(APIC_PROCPRI); + pr_debug("... APIC PROCPRI: %08x\n", v); + } + + /* + * Remote read supported only in the 82489DX and local APIC for + * Pentium processors. + */ + if (!APIC_INTEGRATED(ver) || maxlvt == 3) { + v = apic_read(APIC_RRR); + pr_debug("... APIC RRR: %08x\n", v); + } + + v = apic_read(APIC_LDR); + pr_debug("... APIC LDR: %08x\n", v); + if (!x2apic_enabled()) { + v = apic_read(APIC_DFR); + pr_debug("... APIC DFR: %08x\n", v); + } + v = apic_read(APIC_SPIV); + pr_debug("... APIC SPIV: %08x\n", v); + + pr_debug("... APIC ISR field:\n"); + print_APIC_field(APIC_ISR); + pr_debug("... APIC TMR field:\n"); + print_APIC_field(APIC_TMR); + pr_debug("... APIC IRR field:\n"); + print_APIC_field(APIC_IRR); + + /* !82489DX */ + if (APIC_INTEGRATED(ver)) { + /* Due to the Pentium erratum 3AP. */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + pr_debug("... APIC ESR: %08x\n", v); + } + + icr = apic_icr_read(); + pr_debug("... APIC ICR: %08x\n", (u32)icr); + pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32)); + + v = apic_read(APIC_LVTT); + pr_debug("... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { + /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + pr_debug("... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + pr_debug("... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + pr_debug("... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { + /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + pr_debug("... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + pr_debug("... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + pr_debug("... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + pr_debug("... APIC TDCR: %08x\n", v); + + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + v = apic_read(APIC_EFEAT); + maxlvt = (v >> 16) & 0xff; + pr_debug("... APIC EFEAT: %08x\n", v); + v = apic_read(APIC_ECTRL); + pr_debug("... APIC ECTRL: %08x\n", v); + for (i = 0; i < maxlvt; i++) { + v = apic_read(APIC_EILVTn(i)); + pr_debug("... APIC EILVT%d: %08x\n", i, v); + } + } + pr_cont("\n"); +} + +static void __init print_local_APICs(int maxcpu) +{ + int cpu; + + if (!maxcpu) + return; + + preempt_disable(); + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; + smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } + preempt_enable(); +} + +static void __init print_PIC(void) +{ + unsigned int v; + unsigned long flags; + + if (!nr_legacy_irqs()) + return; + + pr_debug("\nprinting PIC contents\n"); + + raw_spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + pr_debug("... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + pr_debug("... PIC IRR: %04x\n", v); + + outb(0x0b, 0xa0); + outb(0x0b, 0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a, 0xa0); + outb(0x0a, 0x20); + + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + + pr_debug("... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + pr_debug("... PIC ELCR: %04x\n", v); +} + +static int show_lapic __initdata = 1; +static __init int setup_show_lapic(char *arg) +{ + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +static int __init print_ICs(void) +{ + if (apic_verbosity == APIC_QUIET) + return 0; + + print_PIC(); + + /* don't print out if apic is not there */ + if (!cpu_has_apic && !apic_from_smp_config()) + return 0; + + print_local_APICs(show_lapic); + print_IO_APICs(); + + return 0; +} + +late_initcall(print_ICs); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 08f3fed2b0f2..10b8d3eaaf15 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -276,6 +276,17 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, return box; } +/* + * Using uncore_pmu_event_init pmu event_init callback + * as a detection point for uncore events. + */ +static int uncore_pmu_event_init(struct perf_event *event); + +static bool is_uncore_event(struct perf_event *event) +{ + return event->pmu->event_init == uncore_pmu_event_init; +} + static int uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { @@ -290,13 +301,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b return -EINVAL; n = box->n_events; - box->event_list[n] = leader; - n++; + + if (is_uncore_event(leader)) { + box->event_list[n] = leader; + n++; + } + if (!dogrp) return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF) + if (!is_uncore_event(event) || + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f5ab56d14287..aceb2f90c716 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,6 +28,7 @@ #include <asm/nmi.h> #include <asm/hw_irq.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/hpet.h> #include <linux/kdebug.h> #include <asm/cpu.h> diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 2e1a6853e00c..fe9f0b79a18b 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -455,6 +455,23 @@ struct intel_stolen_funcs { u32 (*base)(int num, int slot, int func, size_t size); }; +static size_t __init gen9_stolen_size(int num, int slot, int func) +{ + u16 gmch_ctrl; + + gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); + gmch_ctrl >>= BDW_GMCH_GMS_SHIFT; + gmch_ctrl &= BDW_GMCH_GMS_MASK; + + if (gmch_ctrl < 0xf0) + return gmch_ctrl << 25; /* 32 MB units */ + else + /* 4MB increments starting at 0xf0 for 4MB */ + return (gmch_ctrl - 0xf0 + 1) << 22; +} + +typedef size_t (*stolen_size_fn)(int num, int slot, int func); + static const struct intel_stolen_funcs i830_stolen_funcs __initconst = { .base = i830_stolen_base, .size = i830_stolen_size, @@ -490,6 +507,11 @@ static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = { .size = gen8_stolen_size, }; +static const struct intel_stolen_funcs gen9_stolen_funcs __initconst = { + .base = intel_stolen_base, + .size = gen9_stolen_size, +}; + static const struct intel_stolen_funcs chv_stolen_funcs __initconst = { .base = intel_stolen_base, .size = chv_stolen_size, @@ -523,6 +545,7 @@ static const struct pci_device_id intel_stolen_ids[] __initconst = { INTEL_BDW_M_IDS(&gen8_stolen_funcs), INTEL_BDW_D_IDS(&gen8_stolen_funcs), INTEL_CHV_IDS(&chv_stolen_funcs), + INTEL_SKL_IDS(&gen9_stolen_funcs), }; static void __init intel_graphics_stolen(int num, int slot, int func) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 1cf7c97ff175..000d4199b03e 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -732,10 +732,10 @@ ENTRY(interrupt) ENTRY(irq_entries_start) RING0_INT_FRAME vector=FIRST_EXTERNAL_VECTOR -.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 +.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .balign 32 .rept 7 - .if vector < NR_VECTORS + .if vector < FIRST_SYSTEM_VECTOR .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -4 .endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 90878aa38dbd..9ebaf63ba182 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -740,10 +740,10 @@ ENTRY(interrupt) ENTRY(irq_entries_start) INTR_FRAME vector=FIRST_EXTERNAL_VECTOR -.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 +.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .balign 32 .rept 7 - .if vector < NR_VECTORS + .if vector < FIRST_SYSTEM_VECTOR .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -8 .endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 4de73ee78361..70e181ea1eac 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -99,32 +99,9 @@ void __init init_IRQ(void) x86_init.irqs.intr_init(); } -/* - * Setup the vector to irq mappings. - */ -void setup_vector_irq(int cpu) -{ -#ifndef CONFIG_X86_IO_APIC - int irq; - - /* - * On most of the platforms, legacy PIC delivers the interrupts on the - * boot cpu. But there are certain platforms where PIC interrupts are - * delivered to multiple cpu's. If the legacy IRQ is handled by the - * legacy PIC, for the new cpu that is coming online, setup the static - * legacy vector to irq mapping: - */ - for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; -#endif - - __setup_vector_irq(cpu); -} - static void __init smp_intr_init(void) { #ifdef CONFIG_SMP -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -144,7 +121,6 @@ static void __init smp_intr_init(void) /* IPI used for rebooting/stopping */ alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); -#endif #endif /* CONFIG_SMP */ } @@ -159,7 +135,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) +#ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -197,10 +173,17 @@ void __init native_init_IRQ(void) * 'special' SMP interrupts) */ i = FIRST_EXTERNAL_VECTOR; - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { +#ifndef CONFIG_X86_LOCAL_APIC +#define first_system_vector NR_VECTORS +#endif + for_each_clear_bit_from(i, used_vectors, first_system_vector) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } +#ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) + set_intr_gate(i, spurious_interrupt); +#endif if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f6945bef2cd1..94f643484300 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d9156ceecdff..42caaef897c8 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now) native_write_msr(msr_kvm_wall_clock, low, high); - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; pvclock_read_wallclock(&wall_clock, vcpu_time, now); - preempt_enable(); + put_cpu(); } static int kvm_set_wallclock(const struct timespec *now) @@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void) int cpu; unsigned long tsc_khz; - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); src = &hv_clock[cpu].pvti; tsc_khz = pvclock_tsc_khz(src); - preempt_enable(); + put_cpu(); return tsc_khz; } @@ -263,7 +261,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) @@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void) size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; flags = pvclock_read_flags(vcpu_time); if (!(flags & PVCLOCK_TSC_STABLE_BIT)) { - preempt_enable(); + put_cpu(); return 1; } if ((ret = pvclock_init_vsyscall(hv_clock, size))) { - preempt_enable(); + put_cpu(); return ret; } - preempt_enable(); + put_cpu(); kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 72e8e310258d..469b23d6acc2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -20,6 +20,7 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/cpufeature.h> #include <asm/desc.h> #include <asm/cacheflush.h> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 485981059a40..415480d3ea84 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -22,6 +22,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/io_apic.h> #include <asm/debugreg.h> #include <asm/kexec-bzimage64.h> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 17962e667a91..bae6c609888e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/desc.h> #include <asm/hpet.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7a8f5845e8eb..6d7022c683e3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1084,7 +1084,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) { unsigned int i; - preempt_disable(); smp_cpu_index_default(); /* @@ -1102,22 +1101,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); - if (smp_sanity_check(max_cpus) < 0) { pr_info("SMP disabled\n"); disable_smp(); - goto out; + return; } default_setup_apic_routing(); - preempt_disable(); if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } - preempt_enable(); connect_bsp_APIC(); @@ -1151,8 +1147,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) uv_system_init(); set_mtrr_aps_delayed_init(); -out: - preempt_enable(); } void arch_enable_nonboot_cpus_begin(void) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 3e551eee87b9..4e942f31b1a7 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -55,12 +55,6 @@ static bool tls_desc_okay(const struct user_desc *info) if (info->seg_not_present) return false; -#ifdef CONFIG_X86_64 - /* The L bit makes no sense for data. */ - if (info->lm) - return false; -#endif - return true; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a9ae20579895..88900e288021 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -331,7 +331,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) break; /* Success, it was handled */ case 1: /* Bound violation. */ info = mpx_generate_siginfo(regs, xsave_buf); - if (PTR_ERR(info)) { + if (IS_ERR(info)) { /* * We failed to decode the MPX instruction. Act as if * the exception was not caused by MPX. diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 4c540c4719d8..0de1fae2bdf0 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate) return (void *)xsave + xstate_comp_offsets[feature]; } +EXPORT_SYMBOL_GPL(get_xsave_addr); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 25d22b2d6509..08f790dfadc9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I. KVM := ../../../virt/kvm -kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ - $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ +kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o cpuid.o pmu.o + i8254.o ioapic.o irq_comm.o cpuid.o pmu.o +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o kvm-intel-y += vmx.o kvm-amd-y += svm.o diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c new file mode 100644 index 000000000000..6eb5c20ee373 --- /dev/null +++ b/arch/x86/kvm/assigned-dev.c @@ -0,0 +1,1052 @@ +/* + * Kernel-based Virtual Machine - device assignment support + * + * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h> +#include "irq.h" +#include "assigned-dev.h" + +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct list_head list; + int assigned_dev_id; + int host_segnr; + int host_busnr; + int host_devfn; + unsigned int entries_nr; + int host_irq; + bool host_irq_disabled; + bool pci_2_3; + struct msix_entry *host_msix_entries; + int guest_irq; + struct msix_entry *guest_msix_entries; + unsigned long irq_requested_type; + int irq_source_id; + int flags; + struct pci_dev *dev; + struct kvm *kvm; + spinlock_t intx_lock; + spinlock_t intx_mask_lock; + char irq_name[32]; + struct pci_saved_state *pci_saved_state; +}; + +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static int find_index_from_host_irq(struct kvm_assigned_dev_kernel + *assigned_dev, int irq) +{ + int i, index; + struct msix_entry *host_msix_entries; + + host_msix_entries = assigned_dev->host_msix_entries; + + index = -1; + for (i = 0; i < assigned_dev->entries_nr; i++) + if (irq == host_msix_entries[i].vector) { + index = i; + break; + } + if (index < 0) + printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); + + return index; +} + +static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int ret; + + spin_lock(&assigned_dev->intx_lock); + if (pci_check_and_mask_intx(assigned_dev->dev)) { + assigned_dev->host_irq_disabled = true; + ret = IRQ_WAKE_THREAD; + } else + ret = IRQ_NONE; + spin_unlock(&assigned_dev->intx_lock); + + return ret; +} + +static void +kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, + int vector) +{ + if (unlikely(assigned_dev->irq_requested_type & + KVM_DEV_IRQ_GUEST_INTX)) { + spin_lock(&assigned_dev->intx_mask_lock); + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, vector, 1, + false); + spin_unlock(&assigned_dev->intx_mask_lock); + } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + vector, 1, false); +} + +static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + spin_lock_irq(&assigned_dev->intx_lock); + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + spin_unlock_irq(&assigned_dev->intx_lock); + } + + kvm_assigned_dev_raise_guest_irq(assigned_dev, + assigned_dev->guest_irq); + + return IRQ_HANDLED; +} + +#ifdef __KVM_HAVE_MSI +static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int ret = kvm_set_irq_inatomic(assigned_dev->kvm, + assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; +} + +static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + + kvm_assigned_dev_raise_guest_irq(assigned_dev, + assigned_dev->guest_irq); + + return IRQ_HANDLED; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int index = find_index_from_host_irq(assigned_dev, irq); + u32 vector; + int ret = 0; + + if (index >= 0) { + vector = assigned_dev->guest_msix_entries[index].vector; + ret = kvm_set_irq_inatomic(assigned_dev->kvm, + assigned_dev->irq_source_id, + vector, 1); + } + + return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; +} + +static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int index = find_index_from_host_irq(assigned_dev, irq); + u32 vector; + + if (index >= 0) { + vector = assigned_dev->guest_msix_entries[index].vector; + kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); + } + + return IRQ_HANDLED; +} +#endif + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev = + container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); + + spin_lock(&dev->intx_mask_lock); + + if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { + bool reassert = false; + + spin_lock_irq(&dev->intx_lock); + /* + * The guest IRQ may be shared so this ack can come from an + * IRQ for another guest device. + */ + if (dev->host_irq_disabled) { + if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) + enable_irq(dev->host_irq); + else if (!pci_check_and_unmask_intx(dev->dev)) + reassert = true; + dev->host_irq_disabled = reassert; + } + spin_unlock_irq(&dev->intx_lock); + + if (reassert) + kvm_set_irq(dev->kvm, dev->irq_source_id, + dev->guest_irq, 1, false); + } + + spin_unlock(&dev->intx_mask_lock); +} + +static void deassign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + if (assigned_dev->ack_notifier.gsi != -1) + kvm_unregister_irq_ack_notifier(kvm, + &assigned_dev->ack_notifier); + + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 0, false); + + if (assigned_dev->irq_source_id != -1) + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); + assigned_dev->irq_source_id = -1; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); +} + +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ +static void deassign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + /* + * We disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * on a currently running IRQ handler. + */ + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int i; + for (i = 0; i < assigned_dev->entries_nr; i++) + disable_irq(assigned_dev->host_msix_entries[i].vector); + + for (i = 0; i < assigned_dev->entries_nr; i++) + free_irq(assigned_dev->host_msix_entries[i].vector, + assigned_dev); + + assigned_dev->entries_nr = 0; + kfree(assigned_dev->host_msix_entries); + kfree(assigned_dev->guest_msix_entries); + pci_disable_msix(assigned_dev->dev); + } else { + /* Deal with MSI and INTx */ + if ((assigned_dev->irq_requested_type & + KVM_DEV_IRQ_HOST_INTX) && + (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + spin_lock_irq(&assigned_dev->intx_lock); + pci_intx(assigned_dev->dev, false); + spin_unlock_irq(&assigned_dev->intx_lock); + synchronize_irq(assigned_dev->host_irq); + } else + disable_irq(assigned_dev->host_irq); + + free_irq(assigned_dev->host_irq, assigned_dev); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + } + + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); +} + +static int kvm_deassign_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev, + unsigned long irq_requested_type) +{ + unsigned long guest_irq_type, host_irq_type; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + /* no irq assignment to deassign */ + if (!assigned_dev->irq_requested_type) + return -ENXIO; + + host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; + guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; + + if (host_irq_type) + deassign_host_irq(kvm, assigned_dev); + if (guest_irq_type) + deassign_guest_irq(kvm, assigned_dev); + + return 0; +} + +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + kvm_free_assigned_irq(kvm, assigned_dev); + + pci_reset_function(assigned_dev->dev); + if (pci_load_and_free_saved_state(assigned_dev->dev, + &assigned_dev->pci_saved_state)) + printk(KERN_INFO "%s: Couldn't reload %s saved state\n", + __func__, dev_name(&assigned_dev->dev->dev)); + else + pci_restore_state(assigned_dev->dev); + + pci_clear_dev_assigned(assigned_dev->dev); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int assigned_device_enable_host_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + irq_handler_t irq_handler; + unsigned long flags; + + dev->host_irq = dev->dev->irq; + + /* + * We can only share the IRQ line with other host devices if we are + * able to disable the IRQ source at device-level - independently of + * the guest driver. Otherwise host devices may suffer from unbounded + * IRQ latencies when the guest keeps the line asserted. + */ + if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { + irq_handler = kvm_assigned_dev_intx; + flags = IRQF_SHARED; + } else { + irq_handler = NULL; + flags = IRQF_ONESHOT; + } + if (request_threaded_irq(dev->host_irq, irq_handler, + kvm_assigned_dev_thread_intx, flags, + dev->irq_name, dev)) + return -EIO; + + if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { + spin_lock_irq(&dev->intx_lock); + pci_intx(dev->dev, true); + spin_unlock_irq(&dev->intx_lock); + } + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_host_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int r; + + if (!dev->dev->msi_enabled) { + r = pci_enable_msi(dev->dev); + if (r) + return r; + } + + dev->host_irq = dev->dev->irq; + if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, + kvm_assigned_dev_thread_msi, 0, + dev->irq_name, dev)) { + pci_disable_msi(dev->dev); + return -EIO; + } + + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_host_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int i, r = -EINVAL; + + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (dev->entries_nr == 0) + return r; + + r = pci_enable_msix_exact(dev->dev, + dev->host_msix_entries, dev->entries_nr); + if (r) + return r; + + for (i = 0; i < dev->entries_nr; i++) { + r = request_threaded_irq(dev->host_msix_entries[i].vector, + kvm_assigned_dev_msix, + kvm_assigned_dev_thread_msix, + 0, dev->irq_name, dev); + if (r) + goto err; + } + + return 0; +err: + for (i -= 1; i >= 0; i--) + free_irq(dev->host_msix_entries[i].vector, dev); + pci_disable_msix(dev->dev); + return r; +} + +#endif + +static int assigned_device_enable_guest_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = irq->guest_irq; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_guest_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_guest_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + return 0; +} +#endif + +static int assign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + __u32 host_irq_type) +{ + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) + return r; + + snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", + pci_name(dev->dev)); + + switch (host_irq_type) { + case KVM_DEV_IRQ_HOST_INTX: + r = assigned_device_enable_host_intx(kvm, dev); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_HOST_MSI: + r = assigned_device_enable_host_msi(kvm, dev); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_HOST_MSIX: + r = assigned_device_enable_host_msix(kvm, dev); + break; +#endif + default: + r = -EINVAL; + } + dev->host_irq_disabled = false; + + if (!r) + dev->irq_requested_type |= host_irq_type; + + return r; +} + +static int assign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq, + unsigned long guest_irq_type) +{ + int id; + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) + return r; + + id = kvm_request_irq_source_id(kvm); + if (id < 0) + return id; + + dev->irq_source_id = id; + + switch (guest_irq_type) { + case KVM_DEV_IRQ_GUEST_INTX: + r = assigned_device_enable_guest_intx(kvm, dev, irq); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_GUEST_MSI: + r = assigned_device_enable_guest_msi(kvm, dev, irq); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_GUEST_MSIX: + r = assigned_device_enable_guest_msix(kvm, dev, irq); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) { + dev->irq_requested_type |= guest_irq_type; + if (dev->ack_notifier.gsi != -1) + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + } else { + kvm_free_irq_source_id(kvm, dev->irq_source_id); + dev->irq_source_id = -1; + } + + return r; +} + +/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int r = -EINVAL; + struct kvm_assigned_dev_kernel *match; + unsigned long host_irq_type, guest_irq_type; + + if (!irqchip_in_kernel(kvm)) + return r; + + mutex_lock(&kvm->lock); + r = -ENODEV; + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); + guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); + + r = -EINVAL; + /* can only assign one type at a time */ + if (hweight_long(host_irq_type) > 1) + goto out; + if (hweight_long(guest_irq_type) > 1) + goto out; + if (host_irq_type == 0 && guest_irq_type == 0) + goto out; + + r = 0; + if (host_irq_type) + r = assign_host_irq(kvm, match, host_irq_type); + if (r) + goto out; + + if (guest_irq_type) + r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = -ENODEV; + struct kvm_assigned_dev_kernel *match; + unsigned long irq_type; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | + KVM_DEV_IRQ_GUEST_MASK); + r = kvm_deassign_irq(kvm, match, irq_type); +out: + mutex_unlock(&kvm->lock); + return r; +} + +/* + * We want to test whether the caller has been granted permissions to + * use this device. To be able to configure and control the device, + * the user needs access to PCI configuration space and BAR resources. + * These are accessed through PCI sysfs. PCI config space is often + * passed to the process calling this ioctl via file descriptor, so we + * can't rely on access to that file. We can check for permissions + * on each of the BAR resource files, which is a pretty clear + * indicator that the user has been granted access to the device. + */ +static int probe_sysfs_permissions(struct pci_dev *dev) +{ +#ifdef CONFIG_SYSFS + int i; + bool bar_found = false; + + for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { + char *kpath, *syspath; + struct path path; + struct inode *inode; + int r; + + if (!pci_resource_len(dev, i)) + continue; + + kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); + if (!kpath) + return -ENOMEM; + + /* Per sysfs-rules, sysfs is always at /sys */ + syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); + kfree(kpath); + if (!syspath) + return -ENOMEM; + + r = kern_path(syspath, LOOKUP_FOLLOW, &path); + kfree(syspath); + if (r) + return r; + + inode = path.dentry->d_inode; + + r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); + path_put(&path); + if (r) + return r; + + bar_found = true; + } + + /* If no resources, probably something special */ + if (!bar_found) + return -EPERM; + + return 0; +#else + return -EINVAL; /* No way to control the device without sysfs */ +#endif +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0, idx; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) + return -EINVAL; + + mutex_lock(&kvm->lock); + idx = srcu_read_lock(&kvm->srcu); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EEXIST; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, + assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + + /* Don't allow bridges to be assigned */ + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { + r = -EPERM; + goto out_put; + } + + r = probe_sysfs_permissions(dev); + if (r) + goto out_put; + + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + + pci_reset_function(dev); + pci_save_state(dev); + match->pci_saved_state = pci_store_saved_state(dev); + if (!match->pci_saved_state) + printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", + __func__, dev_name(&dev->dev)); + + if (!pci_intx_mask_supported(dev)) + assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; + + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_segnr = assigned_dev->segnr; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->flags = assigned_dev->flags; + match->dev = dev; + spin_lock_init(&match->intx_lock); + spin_lock_init(&match->intx_mask_lock); + match->irq_source_id = -1; + match->kvm = kvm; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); + if (r) + goto out_list_del; + } + r = kvm_assign_device(kvm, match->dev); + if (r) + goto out_list_del; + +out: + srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->lock); + return r; +out_list_del: + if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) + printk(KERN_INFO "%s: Couldn't reload %s saved state\n", + __func__, dev_name(&dev->dev)); + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + printk(KERN_INFO "%s: device hasn't been assigned before, " + "so cannot be deassigned\n", __func__); + r = -EINVAL; + goto out; + } + + kvm_deassign_device(kvm, match->dev); + + kvm_free_assigned_device(kvm, match); + +out: + mutex_unlock(&kvm->lock); + return r; +} + + +#ifdef __KVM_HAVE_MSIX +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, + struct kvm_assigned_msix_nr *entry_nr) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry_nr->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto msix_nr_out; + } + + if (adev->entries_nr == 0) { + adev->entries_nr = entry_nr->entry_nr; + if (adev->entries_nr == 0 || + adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto msix_nr_out; + } + + adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * + entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->host_msix_entries) { + r = -ENOMEM; + goto msix_nr_out; + } + adev->guest_msix_entries = + kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->guest_msix_entries) { + kfree(adev->host_msix_entries); + r = -ENOMEM; + goto msix_nr_out; + } + } else /* Not allowed set MSI-X number twice */ + r = -EINVAL; +msix_nr_out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0, i; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto msix_entry_out; + } + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].vector == 0 || + adev->guest_msix_entries[i].entry == entry->entry) { + adev->guest_msix_entries[i].entry = entry->entry; + adev->guest_msix_entries[i].vector = entry->gsi; + adev->host_msix_entries[i].entry = entry->entry; + break; + } + if (i == adev->entries_nr) { + r = -ENOSPC; + goto msix_entry_out; + } + +msix_entry_out: + mutex_unlock(&kvm->lock); + + return r; +} +#endif + +static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + r = -ENODEV; + goto out; + } + + spin_lock(&match->intx_mask_lock); + + match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; + match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; + + if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { + kvm_set_irq(match->kvm, match->irq_source_id, + match->guest_irq, 0, false); + /* + * Masking at hardware-level is performed on demand, + * i.e. when an IRQ actually arrives at the host. + */ + } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + /* + * Unmask the IRQ line if required. Unmasking at + * device level will be performed by user space. + */ + spin_lock_irq(&match->intx_lock); + if (match->host_irq_disabled) { + enable_irq(match->host_irq); + match->host_irq_disabled = false; + } + spin_unlock_irq(&match->intx_lock); + } + } + + spin_unlock(&match->intx_mask_lock); + +out: + mutex_unlock(&kvm->lock); + return r; +} + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + r = -EOPNOTSUPP; + break; + } + case KVM_ASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } + case KVM_DEASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } + case KVM_DEASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } +#ifdef __KVM_HAVE_MSIX + case KVM_ASSIGN_SET_MSIX_NR: { + struct kvm_assigned_msix_nr entry_nr; + r = -EFAULT; + if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) + goto out; + r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); + if (r) + goto out; + break; + } + case KVM_ASSIGN_SET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); + if (r) + goto out; + break; + } +#endif + case KVM_ASSIGN_SET_INTX_MASK: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); + break; + } + default: + r = -ENOTTY; + break; + } +out: + return r; +} diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h new file mode 100644 index 000000000000..a428c1a211b2 --- /dev/null +++ b/arch/x86/kvm/assigned-dev.h @@ -0,0 +1,32 @@ +#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H +#define ARCH_X86_KVM_ASSIGNED_DEV_H + +#include <linux/kvm_host.h> + +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT +int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev); +int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev); + +int kvm_iommu_map_guest(struct kvm *kvm); +int kvm_iommu_unmap_guest(struct kvm *kvm); + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg); + +void kvm_free_all_assigned_devices(struct kvm *kvm); +#else +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + return 0; +} + +static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + return -ENOTTY; +} + +static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ + +#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 976e3a57f9ea..8a80737ee6e6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -23,7 +23,7 @@ #include "mmu.h" #include "trace.h" -static u32 xstate_required_size(u64 xstate_bv) +static u32 xstate_required_size(u64 xstate_bv, bool compacted) { int feature_bit = 0; u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; @@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv) xstate_bv &= XSTATE_EXTEND_MASK; while (xstate_bv) { if (xstate_bv & 0x1) { - u32 eax, ebx, ecx, edx; + u32 eax, ebx, ecx, edx, offset; cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); - ret = max(ret, eax + ebx); + offset = compacted ? ret : ebx; + ret = max(ret, offset + eax); } xstate_bv >>= 1; @@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void) return xcr0; } +#define F(x) bit(X86_FEATURE_##x) + int kvm_update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) /* Update OSXSAVE bit */ if (cpu_has_xsave && best->function == 0x1) { - best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); + best->ecx &= ~F(OSXSAVE); if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) - best->ecx |= bit(X86_FEATURE_OSXSAVE); + best->ecx |= F(OSXSAVE); } if (apic) { - if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) + if (best->ecx & F(TSC_DEADLINE_TIMER)) apic->lapic_timer.timer_mode_mask = 3 << 17; else apic->lapic_timer.timer_mode_mask = 1 << 17; @@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) (best->eax | ((u64)best->edx << 32)) & kvm_supported_xcr0(); vcpu->arch.guest_xstate_size = best->ebx = - xstate_required_size(vcpu->arch.xcr0); + xstate_required_size(vcpu->arch.xcr0, false); } + best = kvm_find_cpuid_entry(vcpu, 0xD, 1); + if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) + best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. @@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) { - entry->edx &= ~bit(X86_FEATURE_NX); + if (entry && (entry->edx & F(NX)) && !is_efer_nx()) { + entry->edx &= ~F(NX); printk(KERN_INFO "kvm: guest NX capability removed\n"); } } @@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags = 0; } -#define F(x) bit(X86_FEATURE_##x) - static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, u32 func, u32 index, int *nent, int maxnent) { @@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; + unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; /* cpuid 1.edx */ const u32 kvm_supported_word0_x86_features = @@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word9_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | - F(ADX) | F(SMAP); + F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | + F(AVX512CD); + + /* cpuid 0xD.1.eax */ + const u32 kvm_supported_word10_x86_features = + F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, u64 supported = kvm_supported_xcr0(); entry->eax &= supported; + entry->ebx = xstate_required_size(supported, false); + entry->ecx = entry->ebx; entry->edx &= supported >> 32; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (!supported) + break; + for (idx = 1, i = 1; idx < 64; ++idx) { u64 mask = ((u64)1 << idx); if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[i], function, idx); - if (entry[i].eax == 0 || !(supported & mask)) - continue; + if (idx == 1) { + entry[i].eax &= kvm_supported_word10_x86_features; + entry[i].ebx = 0; + if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) + entry[i].ebx = + xstate_required_size(supported, + true); + } else { + if (entry[i].eax == 0 || !(supported & mask)) + continue; + if (WARN_ON_ONCE(entry[i].ecx & 1)) + continue; + } + entry[i].ecx = 0; + entry[i].edx = 0; entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9f8a2faf5040..169b09d76ddd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -123,6 +123,7 @@ #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ #define Escape (5<<15) /* Escape to coprocessor instruction */ +#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ #define Sse (1<<18) /* SSE Vector instruction */ /* Generic ModRM decode. */ #define ModRM (1<<19) @@ -166,6 +167,8 @@ #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ #define NoBigReal ((u64)1 << 50) /* No big real mode */ #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ +#define NearBranch ((u64)1 << 52) /* Near branches */ +#define No16 ((u64)1 << 53) /* No 16 bit operand */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -209,6 +212,7 @@ struct opcode { const struct group_dual *gdual; const struct gprefix *gprefix; const struct escape *esc; + const struct instr_dual *idual; void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); @@ -231,6 +235,11 @@ struct escape { struct opcode high[64]; }; +struct instr_dual { + struct opcode mod012; + struct opcode mod3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); ON64(FOP2E(op##q, rax, cl)) \ FOP_END +/* 2 operand, src and dest are reversed */ +#define FASTOP2R(op, name) \ + FOP_START(name) \ + FOP2E(op##b, dl, al) \ + FOP2E(op##w, dx, ax) \ + FOP2E(op##l, edx, eax) \ + ON64(FOP2E(op##q, rdx, rax)) \ + FOP_END + #define FOP3E(op, dst, src, src2) \ FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET @@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) } static inline unsigned long -register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg) +register_address(struct x86_emulate_ctxt *ctxt, int reg) { - return address_mask(ctxt, reg); + return address_mask(ctxt, reg_read(ctxt, reg)); } static void masked_increment(ulong *reg, ulong mask, int inc) @@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc) } static inline void -register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) +register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc) { ulong mask; @@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in mask = ~0UL; else mask = ad_mask(ctxt); - masked_increment(reg, mask, inc); + masked_increment(reg_rmw(ctxt, reg), mask, inc); } static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) @@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } -static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, - int cs_l) -{ - switch (ctxt->op_bytes) { - case 2: - ctxt->_eip = (u16)dst; - break; - case 4: - ctxt->_eip = (u32)dst; - break; -#ifdef CONFIG_X86_64 - case 8: - if ((cs_l && is_noncanonical_address(dst)) || - (!cs_l && (dst >> 32) != 0)) - return emulate_gp(ctxt, 0); - ctxt->_eip = dst; - break; -#endif - default: - WARN(1, "unsupported eip assignment size\n"); - } - return X86EMUL_CONTINUE; -} - -static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) -{ - return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); -} - -static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) -{ - return assign_eip_near(ctxt, ctxt->_eip + rel); -} - static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) { u16 selector; @@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) return true; } -static int __linearize(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr, - unsigned *max_size, unsigned size, - bool write, bool fetch, - ulong *linear) +static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned *max_size, unsigned size, + bool write, bool fetch, + enum x86emul_mode mode, ulong *linear) { struct desc_struct desc; bool usable; ulong la; u32 lim; u16 sel; - unsigned cpl; la = seg_base(ctxt, addr.seg) + addr.ea; *max_size = 0; - switch (ctxt->mode) { + switch (mode) { case X86EMUL_MODE_PROT64: - if (((signed long)la << 16) >> 16 != la) - return emulate_gp(ctxt, 0); + if (is_noncanonical_address(la)) + goto bad; *max_size = min_t(u64, ~0u, (1ull << 48) - la); if (size > *max_size) @@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (!fetch && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); - if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && - (ctxt->d & NoBigReal)) { - /* la is between zero and 0xffff */ - if (la > 0xffff) - goto bad; - *max_size = 0x10000 - la; - } else if ((desc.type & 8) || !(desc.type & 4)) { - /* expand-up segment */ - if (addr.ea > lim) - goto bad; - *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); - } else { + if (!(desc.type & 8) && (desc.type & 4)) { /* expand-down segment */ if (addr.ea <= lim) goto bad; lim = desc.d ? 0xffffffff : 0xffff; - if (addr.ea > lim) - goto bad; - *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); } + if (addr.ea > lim) + goto bad; + *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); if (size > *max_size) goto bad; - cpl = ctxt->ops->cpl(ctxt); - if (!(desc.type & 8)) { - /* data segment */ - if (cpl > desc.dpl) - goto bad; - } else if ((desc.type & 8) && !(desc.type & 4)) { - /* nonconforming code segment */ - if (cpl != desc.dpl) - goto bad; - } else if ((desc.type & 8) && (desc.type & 4)) { - /* conforming code segment */ - if (cpl < desc.dpl) - goto bad; - } + la &= (u32)-1; break; } - if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) - la &= (u32)-1; if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) return emulate_gp(ctxt, 0); *linear = la; @@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ulong *linear) { unsigned max_size; - return __linearize(ctxt, addr, &max_size, size, write, false, linear); + return __linearize(ctxt, addr, &max_size, size, write, false, + ctxt->mode, linear); +} + +static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, + enum x86emul_mode mode) +{ + ulong linear; + int rc; + unsigned max_size; + struct segmented_address addr = { .seg = VCPU_SREG_CS, + .ea = dst }; + + if (ctxt->op_bytes != sizeof(unsigned long)) + addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); + rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->_eip = addr.ea; + return rc; +} + +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +{ + return assign_eip(ctxt, dst, ctxt->mode); } +static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, + const struct desc_struct *cs_desc) +{ + enum x86emul_mode mode = ctxt->mode; + +#ifdef CONFIG_X86_64 + if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) { + u64 efer = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + mode = X86EMUL_MODE_PROT64; + } +#endif + if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) + mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + return assign_eip(ctxt, dst, mode); +} + +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +{ + return assign_eip_near(ctxt, ctxt->_eip + rel); +} static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, @@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) * boundary check itself. Instead, we use max_size to check * against op_size. */ - rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear); + rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode, + &linear); if (unlikely(rc != X86EMUL_CONTINUE)) return rc; @@ -911,6 +915,8 @@ FASTOP2W(btc); FASTOP2(xadd); +FASTOP2R(cmp, cmp_r); + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (index_reg != 4) modrm_ea += reg_read(ctxt, index_reg) << scale; } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { + modrm_ea += insn_fetch(s32, ctxt); if (ctxt->mode == X86EMUL_MODE_PROT64) ctxt->rip_relative = 1; } else { @@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, adjust_modrm_seg(ctxt, base_reg); } switch (ctxt->modrm_mod) { - case 0: - if (ctxt->modrm_rm == 5) - modrm_ea += insn_fetch(s32, ctxt); - break; case 1: modrm_ea += insn_fetch(s8, ctxt); break; @@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) else sv = (s64)ctxt->src.val & (s64)mask; - ctxt->dst.addr.mem.ea += (sv >> 3); + ctxt->dst.addr.mem.ea = address_mask(ctxt, + ctxt->dst.addr.mem.ea + (sv >> 3)); } /* only subword offset */ @@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, sizeof(base3), &ctxt->exception); if (ret != X86EMUL_CONTINUE) return ret; + if (is_noncanonical_address(get_desc_base(&seg_desc) | + ((u64)base3 << 32))) + return emulate_gp(ctxt, 0); } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); @@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt) int seg = ctxt->src2.val; ctxt->src.val = get_segment_selector(ctxt, seg); + if (ctxt->op_bytes == 4) { + rsp_increment(ctxt, -2); + ctxt->op_bytes = 2; + } return em_push(ctxt); } @@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) static int em_pushf(struct x86_emulate_ctxt *ctxt) { - ctxt->src.val = (unsigned long)ctxt->eflags; + ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM; return em_push(ctxt); } @@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); if (rc != X86EMUL_CONTINUE) { WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); /* assigning eip failed; restore the old cs */ @@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return rc; } -static int em_grp45(struct x86_emulate_ctxt *ctxt) +static int em_jmp_abs(struct x86_emulate_ctxt *ctxt) { - int rc = X86EMUL_CONTINUE; + return assign_eip_near(ctxt, ctxt->src.val); +} - switch (ctxt->modrm_reg) { - case 2: /* call near abs */ { - long int old_eip; - old_eip = ctxt->_eip; - rc = assign_eip_near(ctxt, ctxt->src.val); - if (rc != X86EMUL_CONTINUE) - break; - ctxt->src.val = old_eip; - rc = em_push(ctxt); - break; - } - case 4: /* jmp abs */ - rc = assign_eip_near(ctxt, ctxt->src.val); - break; - case 5: /* jmp far */ - rc = em_jmp_far(ctxt); - break; - case 6: /* push */ - rc = em_push(ctxt); - break; - } +static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) +{ + int rc; + long int old_eip; + + old_eip = ctxt->_eip; + rc = assign_eip_near(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->src.val = old_eip; + rc = em_push(ctxt); return rc; } @@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) /* Outer-privilege level return is not implemented */ if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) return X86EMUL_UNHANDLEABLE; - rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false, &new_desc); if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, eip, new_desc.l); + rc = assign_eip_far(ctxt, eip, &new_desc); if (rc != X86EMUL_CONTINUE) { WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); @@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~msr_data; + ctxt->eflags |= EFLG_RESERVED_ONE_MASK; #endif } else { /* legacy mode */ @@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) && !vendor_intel(ctxt)) return emulate_ud(ctxt); - /* XXX sysenter/sysexit have not been tested in 64bit mode. - * Therefore, we inject an #UD. - */ + /* sysenter/sysexit have not been tested in 64bit mode. */ if (ctxt->mode == X86EMUL_MODE_PROT64) - return emulate_ud(ctxt); + return X86EMUL_UNHANDLEABLE; setup_syscalls_segments(ctxt, &cs, &ss); @@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); ss_sel = (u16)(msr_data + 24); + rcx = (u32)rcx; + rdx = (u32)rdx; break; case X86EMUL_MODE_PROT64: cs_sel = (u16)(msr_data + 32); @@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; save_state_to_tss16(ctxt, &tss_seg); @@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; if (old_tss_sel != 0xffff) { @@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, sizeof tss_seg.prev_task_link, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; } @@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, * * 1. jmp/call/int to task gate: Check against DPL of the task gate * 2. Exception/IRQ/iret: No check is performed - * 3. jmp/call to TSS: Check against DPL of the TSS + * 3. jmp/call to TSS/task-gate: No check is performed since the + * hardware checks it before exiting. */ if (reason == TASK_SWITCH_GATE) { if (idt_index != -1) { @@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) return emulate_gp(ctxt, (idt_index << 3) | 0x2); } - } else if (reason != TASK_SWITCH_IRET) { - int dpl = next_tss_desc.dpl; - if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) - return emulate_gp(ctxt, tss_selector); } - desc_limit = desc_limit_scaled(&next_tss_desc); if (!next_tss_desc.p || ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || @@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, { int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; - register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); - op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); + register_address_increment(ctxt, reg, df * op->bytes); + op->addr.mem.ea = register_address(ctxt, reg); } static int em_das(struct x86_emulate_ctxt *ctxt) @@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return X86EMUL_CONTINUE; - rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); if (rc != X86EMUL_CONTINUE) goto fail; @@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) return emulate_ud(ctxt); ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg); + if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM) + ctxt->dst.bytes = 2; return X86EMUL_CONTINUE; } @@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt) return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt); } -static int em_lgdt(struct x86_emulate_ctxt *ctxt) +static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) { struct desc_ptr desc_ptr; int rc; @@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->set_gdt(ctxt, &desc_ptr); + if (ctxt->mode == X86EMUL_MODE_PROT64 && + is_noncanonical_address(desc_ptr.address)) + return emulate_gp(ctxt, 0); + if (lgdt) + ctxt->ops->set_gdt(ctxt, &desc_ptr); + else + ctxt->ops->set_idt(ctxt, &desc_ptr); /* Disable writeback. */ ctxt->dst.type = OP_NONE; return X86EMUL_CONTINUE; } +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + return em_lgdt_lidt(ctxt, true); +} + static int em_vmmcall(struct x86_emulate_ctxt *ctxt) { int rc; @@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt) static int em_lidt(struct x86_emulate_ctxt *ctxt) { - struct desc_ptr desc_ptr; - int rc; - - if (ctxt->mode == X86EMUL_MODE_PROT64) - ctxt->op_bytes = 8; - rc = read_descriptor(ctxt, ctxt->src.addr.mem, - &desc_ptr.size, &desc_ptr.address, - ctxt->op_bytes); - if (rc != X86EMUL_CONTINUE) - return rc; - ctxt->ops->set_idt(ctxt, &desc_ptr); - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; - return X86EMUL_CONTINUE; + return em_lgdt_lidt(ctxt, false); } static int em_smsw(struct x86_emulate_ctxt *ctxt) @@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; - register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); + register_address_increment(ctxt, VCPU_REGS_RCX, -1); if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) rc = jmp_rel(ctxt, ctxt->src.val); @@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (efer & EFER_LMA) - rsvd = CR3_L_MODE_RESERVED_BITS; + rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; if (new_val & rsvd) return emulate_gp(ctxt, 0); @@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); - if (check_dr7_gd(ctxt)) + if (check_dr7_gd(ctxt)) { + ulong dr6; + + ctxt->ops->get_dr(ctxt, 6, &dr6); + dr6 &= ~15; + dr6 |= DR6_BD | DR6_RTM; + ctxt->ops->set_dr(ctxt, 6, dr6); return emulate_db(ctxt); + } return X86EMUL_CONTINUE; } @@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } +#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } @@ -3780,11 +3783,11 @@ static const struct opcode group4[] = { static const struct opcode group5[] = { F(DstMem | SrcNone | Lock, em_inc), F(DstMem | SrcNone | Lock, em_dec), - I(SrcMem | Stack, em_grp45), + I(SrcMem | NearBranch, em_call_near_abs), I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), - I(SrcMem | Stack, em_grp45), - I(SrcMemFAddr | ImplicitOps, em_grp45), - I(SrcMem | Stack, em_grp45), D(Undefined), + I(SrcMem | NearBranch, em_jmp_abs), + I(SrcMemFAddr | ImplicitOps, em_jmp_far), + I(SrcMem | Stack, em_push), D(Undefined), }; static const struct opcode group6[] = { @@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = { I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; +static const struct instr_dual instr_dual_0f_2b = { + I(0, em_mov), N +}; + static const struct gprefix pfx_0f_2b = { - I(0, em_mov), I(0, em_mov), N, N, + ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N, }; static const struct gprefix pfx_0f_28_0f_29 = { @@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { { N, N, N, N, N, N, N, N, } }; +static const struct instr_dual instr_dual_0f_c3 = { + I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N +}; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ F6ALU(Lock, em_add), @@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = { I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ - X16(D(SrcImmByte)), + X16(D(SrcImmByte | NearBranch)), /* 0x80 - 0x87 */ G(ByteOp | DstMem | SrcImm, group1), G(DstMem | SrcImm, group1), @@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite, em_cmp), + F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r), /* 0xA8 - 0xAF */ F2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), + F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), - I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), - I(ImplicitOps | Stack, em_ret), + I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm), + I(ImplicitOps | NearBranch, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), G(ByteOp, group11), G(0, group11), @@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = { /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, /* 0xE0 - 0xE7 */ - X3(I(SrcImmByte, em_loop)), - I(SrcImmByte, em_jcxz), + X3(I(SrcImmByte | NearBranch, em_loop)), + I(SrcImmByte | NearBranch, em_jcxz), I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), /* 0xE8 - 0xEF */ - I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps), - I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), + I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch), + I(SrcImmFAddr | No64, em_jmp_far), + D(SrcImmByte | ImplicitOps | NearBranch), I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), /* 0xF0 - 0xF7 */ @@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ - X16(D(SrcImm)), + X16(D(SrcImm | NearBranch)), /* 0x90 - 0x9F */ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ @@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = { D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), - N, D(DstMem | SrcReg | ModRM | Mov), + N, ID(0, &instr_dual_0f_c3), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ X8(I(DstReg, em_bswap)), @@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N }; +static const struct instr_dual instr_dual_0f_38_f0 = { + I(DstReg | SrcMem | Mov, em_movbe), N +}; + +static const struct instr_dual instr_dual_0f_38_f1 = { + I(DstMem | SrcReg | Mov, em_movbe), N +}; + static const struct gprefix three_byte_0f_38_f0 = { - I(DstReg | SrcMem | Mov, em_movbe), N, N, N + ID(0, &instr_dual_0f_38_f0), N, N, N }; static const struct gprefix three_byte_0f_38_f1 = { - I(DstMem | SrcReg | Mov, em_movbe), N, N, N + ID(0, &instr_dual_0f_38_f1), N, N, N }; /* @@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = { /* 0x80 - 0xef */ X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), /* 0xf0 - 0xf1 */ - GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), - GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), + GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0), + GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1), /* 0xf2 - 0xff */ N, N, X4(N), X8(N) }; @@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); + register_address(ctxt, VCPU_REGS_RDI); op->addr.mem.seg = VCPU_SREG_ES; op->val = 0; op->count = 1; @@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); + register_address(ctxt, VCPU_REGS_RSI); op->addr.mem.seg = ctxt->seg_override; op->val = 0; op->count = 1; @@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, + address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RBX) + (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); op->addr.mem.seg = ctxt->seg_override; @@ -4510,8 +4530,7 @@ done_prefixes: /* vex-prefix instructions are not implemented */ if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && - (mode == X86EMUL_MODE_PROT64 || - (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) { + (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) { ctxt->d = NotImpl; } @@ -4549,6 +4568,12 @@ done_prefixes: else opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; break; + case InstrDual: + if ((ctxt->modrm >> 6) == 3) + opcode = opcode.u.idual->mod3; + else + opcode = opcode.u.idual->mod012; + break; default: return EMULATION_FAILED; } @@ -4567,7 +4592,8 @@ done_prefixes: return EMULATION_FAILED; if (unlikely(ctxt->d & - (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { + (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch| + No16))) { /* * These are copied unconditionally here, and checked unconditionally * in x86_emulate_insn. @@ -4578,8 +4604,12 @@ done_prefixes: if (ctxt->d & NotImpl) return EMULATION_FAILED; - if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) - ctxt->op_bytes = 8; + if (mode == X86EMUL_MODE_PROT64) { + if (ctxt->op_bytes == 4 && (ctxt->d & Stack)) + ctxt->op_bytes = 8; + else if (ctxt->d & NearBranch) + ctxt->op_bytes = 8; + } if (ctxt->d & Op3264) { if (mode == X86EMUL_MODE_PROT64) @@ -4588,6 +4618,9 @@ done_prefixes: ctxt->op_bytes = 4; } + if ((ctxt->d & No16) && ctxt->op_bytes == 2) + ctxt->op_bytes = 4; + if (ctxt->d & Sse) ctxt->op_bytes = 16; else if (ctxt->d & Mmx) @@ -4631,7 +4664,8 @@ done_prefixes: rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); if (ctxt->rip_relative) - ctxt->memopp->addr.mem.ea += ctxt->_eip; + ctxt->memopp->addr.mem.ea = address_mask(ctxt, + ctxt->memopp->addr.mem.ea + ctxt->_eip); done: return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; @@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + /* Instruction can only be executed in protected mode */ + if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { + rc = emulate_ud(ctxt); + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ if ((ctxt->d & Priv) && ops->cpl(ctxt)) { if (ctxt->d & PrivUD) @@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - /* Instruction can only be executed in protected mode */ - if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { - rc = emulate_ud(ctxt); - goto done; - } - /* Do instruction specific permission checks */ if (ctxt->d & CheckPerm) { rc = ctxt->check_perm(ctxt); @@ -4974,8 +5008,7 @@ writeback: count = ctxt->src.count; else count = ctxt->dst.count; - register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), - -count); + register_address_increment(ctxt, VCPU_REGS_RCX, -count); if (!string_insn_completed(ctxt)) { /* @@ -5053,11 +5086,6 @@ twobyte_insn: ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : (s16) ctxt->src.val; break; - case 0xc3: /* movnti */ - ctxt->dst.bytes = ctxt->op_bytes; - ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val : - (u32) ctxt->src.val; - break; default: goto cannot_emulate; } diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c new file mode 100644 index 000000000000..b1947e0f3e10 --- /dev/null +++ b/arch/x86/kvm/ioapic.c @@ -0,0 +1,675 @@ +/* + * Copyright (C) 2001 MandrakeSoft S.A. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * + * MandrakeSoft S.A. + * 43, rue d'Aboukir + * 75002 Paris - France + * http://www.linux-mandrake.com/ + * http://www.mandrakesoft.com/ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Yunhong Jiang <yunhong.jiang@intel.com> + * Yaozu (Eddie) Dong <eddie.dong@intel.com> + * Based on Xen 3.1 code. + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/smp.h> +#include <linux/hrtimer.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/current.h> +#include <trace/events/kvm.h> + +#include "ioapic.h" +#include "lapic.h" +#include "irq.h" + +#if 0 +#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) +#else +#define ioapic_debug(fmt, arg...) +#endif +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, + bool line_status); + +static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, + unsigned long addr, + unsigned long length) +{ + unsigned long result = 0; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) + | (IOAPIC_VERSION_ID & 0xff)); + break; + + case IOAPIC_REG_APIC_ID: + case IOAPIC_REG_ARB_ID: + result = ((ioapic->id & 0xf) << 24); + break; + + default: + { + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; + u64 redir_content; + + if (redir_index < IOAPIC_NUM_PINS) + redir_content = + ioapic->redirtbl[redir_index].bits; + else + redir_content = ~0ULL; + + result = (ioapic->ioregsel & 0x1) ? + (redir_content >> 32) & 0xffffffff : + redir_content & 0xffffffff; + break; + } + } + + return result; +} + +static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) +{ + ioapic->rtc_status.pending_eoi = 0; + bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); + +static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) +{ + if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + bool new_val, old_val; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + + e = &ioapic->redirtbl[RTC_GSI]; + if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, + e->fields.dest_mode)) + return; + + new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); + old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + + if (new_val == old_val) + return; + + if (new_val) { + __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi++; + } else { + __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi--; + rtc_status_pending_eoi_check_valid(ioapic); + } +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __rtc_irq_eoi_tracking_restore_one(vcpu); + spin_unlock(&ioapic->lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ + struct kvm_vcpu *vcpu; + int i; + + if (RTC_GSI >= IOAPIC_NUM_PINS) + return; + + rtc_irq_eoi_tracking_reset(ioapic); + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) + __rtc_irq_eoi_tracking_restore_one(vcpu); +} + +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { + --ioapic->rtc_status.pending_eoi; + rtc_status_pending_eoi_check_valid(ioapic); + } +} + +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) +{ + if (ioapic->rtc_status.pending_eoi > 0) + return true; /* coalesced */ + + return false; +} + +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, + int irq_level, bool line_status) +{ + union kvm_ioapic_redirect_entry entry; + u32 mask = 1 << irq; + u32 old_irr; + int edge, ret; + + entry = ioapic->redirtbl[irq]; + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + goto out; + } + + /* + * Return 0 for coalesced interrupts; for edge-triggered interrupts, + * this only happens if a previous edge has not been delivered due + * do masking. For level interrupts, the remote_irr field tells + * us if the interrupt is waiting for an EOI. + * + * RTC is special: it is edge-triggered, but userspace likes to know + * if it has been already ack-ed via EOI because coalesced RTC + * interrupts lead to time drift in Windows guests. So we track + * EOI manually for the RTC interrupt. + */ + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; + goto out; + } + + old_irr = ioapic->irr; + ioapic->irr |= mask; + if ((edge && old_irr == ioapic->irr) || + (!edge && entry.fields.remote_irr)) { + ret = 0; + goto out; + } + + ret = ioapic_service(ioapic, irq, line_status); + +out: + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + return ret; +} + +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) +{ + u32 idx; + + rtc_irq_eoi_tracking_reset(ioapic); + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) + ioapic_set_irq(ioapic, idx, 1, true); + + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + + +static void update_handled_vectors(struct kvm_ioapic *ioapic) +{ + DECLARE_BITMAP(handled_vectors, 256); + int i; + + memset(handled_vectors, 0, sizeof(handled_vectors)); + for (i = 0; i < IOAPIC_NUM_PINS; ++i) + __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); + memcpy(ioapic->handled_vectors, handled_vectors, + sizeof(handled_vectors)); + smp_wmb(); +} + +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + int index; + + spin_lock(&ioapic->lock); + for (index = 0; index < IOAPIC_NUM_PINS; index++) { + e = &ioapic->redirtbl[index]; + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || + kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || + index == RTC_GSI) { + if (kvm_apic_match_dest(vcpu, NULL, 0, + e->fields.dest_id, e->fields.dest_mode)) { + __set_bit(e->fields.vector, + (unsigned long *)eoi_exit_bitmap); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) + __set_bit(e->fields.vector, + (unsigned long *)tmr); + } + } + } + spin_unlock(&ioapic->lock); +} + +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (!ioapic) + return; + kvm_make_scan_ioapic_request(kvm); +} + +static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) +{ + unsigned index; + bool mask_before, mask_after; + union kvm_ioapic_redirect_entry *e; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + /* Writes are ignored. */ + break; + + case IOAPIC_REG_APIC_ID: + ioapic->id = (val >> 24) & 0xf; + break; + + case IOAPIC_REG_ARB_ID: + break; + + default: + index = (ioapic->ioregsel - 0x10) >> 1; + + ioapic_debug("change redir index %x val %x\n", index, val); + if (index >= IOAPIC_NUM_PINS) + return; + e = &ioapic->redirtbl[index]; + mask_before = e->fields.mask; + if (ioapic->ioregsel & 1) { + e->bits &= 0xffffffff; + e->bits |= (u64) val << 32; + } else { + e->bits &= ~0xffffffffULL; + e->bits |= (u32) val; + e->fields.remote_irr = 0; + } + update_handled_vectors(ioapic); + mask_after = e->fields.mask; + if (mask_before != mask_after) + kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG + && ioapic->irr & (1 << index)) + ioapic_service(ioapic, index, false); + kvm_vcpu_request_scan_ioapic(ioapic->kvm); + break; + } +} + +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) +{ + union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; + struct kvm_lapic_irq irqe; + int ret; + + if (entry->fields.mask) + return -1; + + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " + "vector=%x trig_mode=%x\n", + entry->fields.dest_id, entry->fields.dest_mode, + entry->fields.delivery_mode, entry->fields.vector, + entry->fields.trig_mode); + + irqe.dest_id = entry->fields.dest_id; + irqe.vector = entry->fields.vector; + irqe.dest_mode = entry->fields.dest_mode; + irqe.trig_mode = entry->fields.trig_mode; + irqe.delivery_mode = entry->fields.delivery_mode << 8; + irqe.level = 1; + irqe.shorthand = 0; + + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) + ioapic->irr &= ~(1 << irq); + + if (irq == RTC_GSI && line_status) { + /* + * pending_eoi cannot ever become negative (see + * rtc_status_pending_eoi_check_valid) and the caller + * ensures that it is only called if it is >= zero, namely + * if rtc_irq_check_coalesced returns false). + */ + BUG_ON(ioapic->rtc_status.pending_eoi != 0); + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); + } else + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) + entry->fields.remote_irr = 1; + + return ret; +} + +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, + int level, bool line_status) +{ + int ret, irq_level; + + BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); + + spin_lock(&ioapic->lock); + irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], + irq_source_id, level); + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); + + spin_unlock(&ioapic->lock); + + return ret; +} + +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) +{ + int i; + + spin_lock(&ioapic->lock); + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) + __clear_bit(irq_source_id, &ioapic->irq_states[i]); + spin_unlock(&ioapic->lock); +} + +static void kvm_ioapic_eoi_inject_work(struct work_struct *work) +{ + int i; + struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, + eoi_inject.work); + spin_lock(&ioapic->lock); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) + continue; + + if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) + ioapic_service(ioapic, i, false); + } + spin_unlock(&ioapic->lock); +} + +#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 + +static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, int vector, int trigger_mode) +{ + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.vector != vector) + continue; + + if (i == RTC_GSI) + rtc_irq_eoi(ioapic, vcpu); + /* + * We are dropping lock while calling ack notifiers because ack + * notifier callbacks for assigned devices call into IOAPIC + * recursively. Since remote_irr is cleared only after call + * to notifiers if the same vector will be delivered while lock + * is dropped it will be put into irr and will be delivered + * after ack notifier returns. + */ + spin_unlock(&ioapic->lock); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); + spin_lock(&ioapic->lock); + + if (trigger_mode != IOAPIC_LEVEL_TRIG) + continue; + + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << i))) { + ++ioapic->irq_eoi[i]; + if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the interrupt + * immediately during eoi broadcast, and this + * lets a buggy guest make slow progress + * even if it does not correctly handle a + * level-triggered interrupt. Emulate this + * behavior if we detect an interrupt storm. + */ + schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); + ioapic->irq_eoi[i] = 0; + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); + } else { + ioapic_service(ioapic, i, false); + } + } else { + ioapic->irq_eoi[i] = 0; + } + } +} + +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); + spin_unlock(&ioapic->lock); +} + +static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) +{ + return container_of(dev, struct kvm_ioapic, dev); +} + +static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) +{ + return ((addr >= ioapic->base_address && + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); +} + +static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_ioapic *ioapic = to_ioapic(this); + u32 result; + if (!ioapic_in_range(ioapic, addr)) + return -EOPNOTSUPP; + + ioapic_debug("addr %lx\n", (unsigned long)addr); + ASSERT(!(addr & 0xf)); /* check alignment */ + + addr &= 0xff; + spin_lock(&ioapic->lock); + switch (addr) { + case IOAPIC_REG_SELECT: + result = ioapic->ioregsel; + break; + + case IOAPIC_REG_WINDOW: + result = ioapic_read_indirect(ioapic, addr, len); + break; + + default: + result = 0; + break; + } + spin_unlock(&ioapic->lock); + + switch (len) { + case 8: + *(u64 *) val = result; + break; + case 1: + case 2: + case 4: + memcpy(val, (char *)&result, len); + break; + default: + printk(KERN_WARNING "ioapic: wrong length %d\n", len); + } + return 0; +} + +static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_ioapic *ioapic = to_ioapic(this); + u32 data; + if (!ioapic_in_range(ioapic, addr)) + return -EOPNOTSUPP; + + ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", + (void*)addr, len, val); + ASSERT(!(addr & 0xf)); /* check alignment */ + + switch (len) { + case 8: + case 4: + data = *(u32 *) val; + break; + case 2: + data = *(u16 *) val; + break; + case 1: + data = *(u8 *) val; + break; + default: + printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); + return 0; + } + + addr &= 0xff; + spin_lock(&ioapic->lock); + switch (addr) { + case IOAPIC_REG_SELECT: + ioapic->ioregsel = data & 0xFF; /* 8-bit register */ + break; + + case IOAPIC_REG_WINDOW: + ioapic_write_indirect(ioapic, data); + break; + + default: + break; + } + spin_unlock(&ioapic->lock); + return 0; +} + +static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) +{ + int i; + + cancel_delayed_work_sync(&ioapic->eoi_inject); + for (i = 0; i < IOAPIC_NUM_PINS; i++) + ioapic->redirtbl[i].fields.mask = 1; + ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; + ioapic->ioregsel = 0; + ioapic->irr = 0; + ioapic->id = 0; + memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); + rtc_irq_eoi_tracking_reset(ioapic); + update_handled_vectors(ioapic); +} + +static const struct kvm_io_device_ops ioapic_mmio_ops = { + .read = ioapic_mmio_read, + .write = ioapic_mmio_write, +}; + +int kvm_ioapic_init(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic; + int ret; + + ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); + if (!ioapic) + return -ENOMEM; + spin_lock_init(&ioapic->lock); + INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); + kvm->arch.vioapic = ioapic; + kvm_ioapic_reset(ioapic); + kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); + ioapic->kvm = kvm; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, + IOAPIC_MEM_LENGTH, &ioapic->dev); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm->arch.vioapic = NULL; + kfree(ioapic); + } + + return ret; +} + +void kvm_ioapic_destroy(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + cancel_delayed_work_sync(&ioapic->eoi_inject); + if (ioapic) { + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + kvm->arch.vioapic = NULL; + kfree(ioapic); + } +} + +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + spin_lock(&ioapic->lock); + memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + spin_unlock(&ioapic->lock); + return 0; +} + +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + spin_lock(&ioapic->lock); + memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + ioapic->irr = 0; + update_handled_vectors(ioapic); + kvm_vcpu_request_scan_ioapic(kvm); + kvm_ioapic_inject_all(ioapic, state->irr); + spin_unlock(&ioapic->lock); + return 0; +} diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h new file mode 100644 index 000000000000..3c9195535ffc --- /dev/null +++ b/arch/x86/kvm/ioapic.h @@ -0,0 +1,119 @@ +#ifndef __KVM_IO_APIC_H +#define __KVM_IO_APIC_H + +#include <linux/kvm_host.h> + +#include "iodev.h" + +struct kvm; +struct kvm_vcpu; + +#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS +#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ +#define IOAPIC_EDGE_TRIG 0 +#define IOAPIC_LEVEL_TRIG 1 + +#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 +#define IOAPIC_MEM_LENGTH 0x100 + +/* Direct registers. */ +#define IOAPIC_REG_SELECT 0x00 +#define IOAPIC_REG_WINDOW 0x10 + +/* Indirect registers. */ +#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ +#define IOAPIC_REG_VERSION 0x01 +#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ + +/*ioapic delivery mode*/ +#define IOAPIC_FIXED 0x0 +#define IOAPIC_LOWEST_PRIORITY 0x1 +#define IOAPIC_PMI 0x2 +#define IOAPIC_NMI 0x4 +#define IOAPIC_INIT 0x5 +#define IOAPIC_EXTINT 0x7 + +#ifdef CONFIG_X86 +#define RTC_GSI 8 +#else +#define RTC_GSI -1U +#endif + +struct rtc_status { + int pending_eoi; + DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); +}; + +union kvm_ioapic_redirect_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; +}; + +struct kvm_ioapic { + u64 base_address; + u32 ioregsel; + u32 id; + u32 irr; + u32 pad; + union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; + unsigned long irq_states[IOAPIC_NUM_PINS]; + struct kvm_io_device dev; + struct kvm *kvm; + void (*ack_notifier)(void *opaque, int irq); + spinlock_t lock; + DECLARE_BITMAP(handled_vectors, 256); + struct rtc_status rtc_status; + struct delayed_work eoi_inject; + u32 irq_eoi[IOAPIC_NUM_PINS]; +}; + +#ifdef DEBUG +#define ASSERT(x) \ +do { \ + if (!(x)) { \ + printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ + __FILE__, __LINE__, #x); \ + BUG(); \ + } \ +} while (0) +#else +#define ASSERT(x) do { } while (0) +#endif + +static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) +{ + return kvm->arch.vioapic; +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, unsigned int dest, int dest_mode); +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, + int trigger_mode); +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); +int kvm_ioapic_init(struct kvm *kvm); +void kvm_ioapic_destroy(struct kvm *kvm); +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, + int level, bool line_status); +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, unsigned long *dest_map); +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr); + +#endif diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c new file mode 100644 index 000000000000..17b73eeac8a4 --- /dev/null +++ b/arch/x86/kvm/iommu.c @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * + * Author: Allen M. Kay <allen.m.kay@intel.com> + * Author: Weidong Han <weidong.han@intel.com> + * Author: Ben-Ami Yassour <benami@il.ibm.com> + */ + +#include <linux/list.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/stat.h> +#include <linux/dmar.h> +#include <linux/iommu.h> +#include <linux/intel-iommu.h> +#include "assigned-dev.h" + +static bool allow_unsafe_assigned_interrupts; +module_param_named(allow_unsafe_assigned_interrupts, + allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, + "Enable device assignment on platforms without interrupt remapping support."); + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + +static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, + unsigned long npages) +{ + gfn_t end_gfn; + pfn_t pfn; + + pfn = gfn_to_pfn_memslot(slot, gfn); + end_gfn = gfn + npages; + gfn += 1; + + if (is_error_noslot_pfn(pfn)) + return pfn; + + while (gfn < end_gfn) + gfn_to_pfn_memslot(slot, gfn++); + + return pfn; +} + +static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) + kvm_release_pfn_clean(pfn + i); +} + +int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + gfn_t gfn, end_gfn; + pfn_t pfn; + int r = 0; + struct iommu_domain *domain = kvm->arch.iommu_domain; + int flags; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + gfn = slot->base_gfn; + end_gfn = gfn + slot->npages; + + flags = IOMMU_READ; + if (!(slot->flags & KVM_MEM_READONLY)) + flags |= IOMMU_WRITE; + if (!kvm->arch.iommu_noncoherent) + flags |= IOMMU_CACHE; + + + while (gfn < end_gfn) { + unsigned long page_size; + + /* Check if already mapped */ + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { + gfn += 1; + continue; + } + + /* Get the page size we could use to map */ + page_size = kvm_host_page_size(kvm, gfn); + + /* Make sure the page_size does not exceed the memslot */ + while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) + page_size >>= 1; + + /* Make sure gfn is aligned to the page size we want to map */ + while ((gfn << PAGE_SHIFT) & (page_size - 1)) + page_size >>= 1; + + /* Make sure hva is aligned to the page size we want to map */ + while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) + page_size >>= 1; + + /* + * Pin all pages we are about to map in memory. This is + * important because we unmap and unpin in 4kb steps later. + */ + pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); + if (is_error_noslot_pfn(pfn)) { + gfn += 1; + continue; + } + + /* Map into IO address space */ + r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), + page_size, flags); + if (r) { + printk(KERN_ERR "kvm_iommu_map_address:" + "iommu failed to map pfn=%llx\n", pfn); + kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); + goto unmap_pages; + } + + gfn += page_size >> PAGE_SHIFT; + + + } + + return 0; + +unmap_pages: + kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); + return r; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int idx, r = 0; + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + if (kvm->arch.iommu_noncoherent) + kvm_arch_register_noncoherent_dma(kvm); + + idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); + + kvm_for_each_memslot(memslot, slots) { + r = kvm_iommu_map_pages(kvm, memslot); + if (r) + break; + } + srcu_read_unlock(&kvm->srcu, idx); + + return r; +} + +int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) +{ + struct iommu_domain *domain = kvm->arch.iommu_domain; + int r; + bool noncoherent; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + if (pdev == NULL) + return -ENODEV; + + r = iommu_attach_device(domain, &pdev->dev); + if (r) { + dev_err(&pdev->dev, "kvm assign device failed ret %d", r); + return r; + } + + noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); + + /* Check if need to update IOMMU page table for guest memory */ + if (noncoherent != kvm->arch.iommu_noncoherent) { + kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_noncoherent = noncoherent; + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + } + + pci_set_dev_assigned(pdev); + + dev_info(&pdev->dev, "kvm assign device\n"); + + return 0; +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; +} + +int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) +{ + struct iommu_domain *domain = kvm->arch.iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + if (pdev == NULL) + return -ENODEV; + + iommu_detach_device(domain, &pdev->dev); + + pci_clear_dev_assigned(pdev); + + dev_info(&pdev->dev, "kvm deassign device\n"); + + return 0; +} + +int kvm_iommu_map_guest(struct kvm *kvm) +{ + int r; + + if (!iommu_present(&pci_bus_type)) { + printk(KERN_ERR "%s: iommu not found\n", __func__); + return -ENODEV; + } + + mutex_lock(&kvm->slots_lock); + + kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); + if (!kvm->arch.iommu_domain) { + r = -ENOMEM; + goto out_unlock; + } + + if (!allow_unsafe_assigned_interrupts && + !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { + printk(KERN_WARNING "%s: No interrupt remapping support," + " disallowing device assignment." + " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" + " module option.\n", __func__); + iommu_domain_free(kvm->arch.iommu_domain); + kvm->arch.iommu_domain = NULL; + r = -EPERM; + goto out_unlock; + } + + r = kvm_iommu_map_memslots(kvm); + if (r) + kvm_iommu_unmap_memslots(kvm); + +out_unlock: + mutex_unlock(&kvm->slots_lock); + return r; +} + +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + struct iommu_domain *domain; + gfn_t end_gfn, gfn; + pfn_t pfn; + u64 phys; + + domain = kvm->arch.iommu_domain; + end_gfn = base_gfn + npages; + gfn = base_gfn; + + /* check if iommu exists and in use */ + if (!domain) + return; + + while (gfn < end_gfn) { + unsigned long unmap_pages; + size_t size; + + /* Get physical address */ + phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); + + if (!phys) { + gfn++; + continue; + } + + pfn = phys >> PAGE_SHIFT; + + /* Unmap address from IO address space */ + size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); + unmap_pages = 1ULL << get_order(size); + + /* Unpin all pages we just unmapped to not leak any memory */ + kvm_unpin_pages(kvm, pfn, unmap_pages); + + gfn += unmap_pages; + } +} + +void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); +} + +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{ + int idx; + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); + + kvm_for_each_memslot(memslot, slots) + kvm_iommu_unmap_pages(kvm, memslot); + + srcu_read_unlock(&kvm->srcu, idx); + + if (kvm->arch.iommu_noncoherent) + kvm_arch_unregister_noncoherent_dma(kvm); + + return 0; +} + +int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + struct iommu_domain *domain = kvm->arch.iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + mutex_lock(&kvm->slots_lock); + kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_domain = NULL; + kvm->arch.iommu_noncoherent = false; + mutex_unlock(&kvm->slots_lock); + + iommu_domain_free(domain); + return 0; +} diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c new file mode 100644 index 000000000000..72298b3ac025 --- /dev/null +++ b/arch/x86/kvm/irq_comm.c @@ -0,0 +1,332 @@ +/* + * irq_comm.c: Common API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <trace/events/kvm.h> + +#include <asm/msidef.h> + +#include "irq.h" + +#include "ioapic.h" + +static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + struct kvm_pic *pic = pic_irqchip(kvm); + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); +} + +static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, + line_status); +} + +inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) +{ + return irq->delivery_mode == APIC_DM_LOWEST; +} + +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, unsigned long *dest_map) +{ + int i, r = -1; + struct kvm_vcpu *vcpu, *lowest = NULL; + + if (irq->dest_mode == 0 && irq->dest_id == 0xff && + kvm_is_dm_lowest_prio(irq)) { + printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); + irq->delivery_mode = APIC_DM_FIXED; + } + + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) + return r; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (!kvm_is_dm_lowest_prio(irq)) { + if (r < 0) + r = 0; + r += kvm_apic_set_irq(vcpu, irq, dest_map); + } else if (kvm_lapic_enabled(vcpu)) { + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; + } + } + + if (lowest) + r = kvm_apic_set_irq(lowest, irq, dest_map); + + return r; +} + +static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm_lapic_irq *irq) +{ + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); + + irq->dest_id = (e->msi.address_lo & + MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + irq->vector = (e->msi.data & + MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; + irq->delivery_mode = e->msi.data & 0x700; + irq->level = 1; + irq->shorthand = 0; + /* TODO Deal with RH bit of MSI message address */ +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + struct kvm_lapic_irq irq; + + if (!level) + return -1; + + kvm_set_msi_irq(e, &irq); + + return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); +} + + +static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm) +{ + struct kvm_lapic_irq irq; + int r; + + kvm_set_msi_irq(e, &irq); + + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + else + return -EWOULDBLOCK; +} + +/* + * Deliver an IRQ in an atomic context if we can, or return a failure, + * user can retry in a process context. + * Return value: + * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. + * Other values - No need to retry. + */ +int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) +{ + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + struct kvm_kernel_irq_routing_entry *e; + int ret = -EINVAL; + int idx; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* + * Injection into either PIC or IOAPIC might need to scan all CPUs, + * which would need to be retried from thread context; when same GSI + * is connected to both PIC and IOAPIC, we'd have to report a + * partial failure here. + * Since there's no easy way to do this, we only support injecting MSI + * which is limited to 1:1 GSI mapping. + */ + idx = srcu_read_lock(&kvm->irq_srcu); + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } + srcu_read_unlock(&kvm->irq_srcu, idx); + return ret; +} + +int kvm_request_irq_source_id(struct kvm *kvm) +{ + unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; + int irq_source_id; + + mutex_lock(&kvm->irq_lock); + irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); + + if (irq_source_id >= BITS_PER_LONG) { + printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); + irq_source_id = -EFAULT; + goto unlock; + } + + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); + ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); + set_bit(irq_source_id, bitmap); +unlock: + mutex_unlock(&kvm->irq_lock); + + return irq_source_id; +} + +void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) +{ + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); + ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); + + mutex_lock(&kvm->irq_lock); + if (irq_source_id < 0 || + irq_source_id >= BITS_PER_LONG) { + printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); + goto unlock; + } + clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); + if (!irqchip_in_kernel(kvm)) + goto unlock; + + kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); + kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); +unlock: + mutex_unlock(&kvm->irq_lock); +} + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + mutex_lock(&kvm->irq_lock); + kimn->irq = irq; + hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list); + mutex_unlock(&kvm->irq_lock); +} + +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_rcu(&kimn->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +} + +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask) +{ + struct kvm_irq_mask_notifier *kimn; + int idx, gsi; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link) + if (kimn->irq == gsi) + kimn->func(kimn, mask); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + int delta; + unsigned max_pin; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + delta = 0; + switch (ue->u.irqchip.irqchip) { + case KVM_IRQCHIP_PIC_MASTER: + e->set = kvm_set_pic_irq; + max_pin = PIC_NUM_PINS; + break; + case KVM_IRQCHIP_PIC_SLAVE: + e->set = kvm_set_pic_irq; + max_pin = PIC_NUM_PINS; + delta = 8; + break; + case KVM_IRQCHIP_IOAPIC: + max_pin = KVM_IOAPIC_NUM_PINS; + e->set = kvm_set_ioapic_irq; + break; + default: + goto out; + } + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin + delta; + if (e->irqchip.pin >= max_pin) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + + r = 0; +out: + return r; +} + +#define IOAPIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } +#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) + +#define PIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } +#define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) + +static const struct kvm_irq_routing_entry default_routing[] = { + ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), + ROUTING_ENTRY2(2), ROUTING_ENTRY2(3), + ROUTING_ENTRY2(4), ROUTING_ENTRY2(5), + ROUTING_ENTRY2(6), ROUTING_ENTRY2(7), + ROUTING_ENTRY2(8), ROUTING_ENTRY2(9), + ROUTING_ENTRY2(10), ROUTING_ENTRY2(11), + ROUTING_ENTRY2(12), ROUTING_ENTRY2(13), + ROUTING_ENTRY2(14), ROUTING_ENTRY2(15), + ROUTING_ENTRY1(16), ROUTING_ENTRY1(17), + ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), + ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), + ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), +}; + +int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, default_routing, + ARRAY_SIZE(default_routing), 0); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b8345dd41b25..4f0c0b954686 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -68,6 +68,9 @@ #define MAX_APIC_VECTOR 256 #define APIC_VECTORS_PER_REG 32 +#define APIC_BROADCAST 0xFF +#define X2APIC_BROADCAST 0xFFFFFFFFul + #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) @@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } -#define KVM_X2APIC_CID_BITS 0 - static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm) new->cid_shift = 8; new->cid_mask = 0; new->lid_mask = 0xff; + new->broadcast = APIC_BROADCAST; kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; - u16 cid, lid; - u32 ldr; if (!kvm_apic_present(vcpu)) continue; + if (apic_x2apic_mode(apic)) { + new->ldr_bits = 32; + new->cid_shift = 16; + new->cid_mask = new->lid_mask = 0xffff; + new->broadcast = X2APIC_BROADCAST; + } else if (kvm_apic_get_reg(apic, APIC_LDR)) { + if (kvm_apic_get_reg(apic, APIC_DFR) == + APIC_DFR_CLUSTER) { + new->cid_shift = 4; + new->cid_mask = 0xf; + new->lid_mask = 0xf; + } else { + new->cid_shift = 8; + new->cid_mask = 0; + new->lid_mask = 0xff; + } + } + /* * All APICs have to be configured in the same mode by an OS. * We take advatage of this while building logical id loockup - * table. After reset APICs are in xapic/flat mode, so if we - * find apic with different setting we assume this is the mode + * table. After reset APICs are in software disabled mode, so if + * we find apic with different setting we assume this is the mode * OS wants all apics to be in; build lookup table accordingly. */ - if (apic_x2apic_mode(apic)) { - new->ldr_bits = 32; - new->cid_shift = 16; - new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1; - new->lid_mask = 0xffff; - } else if (kvm_apic_sw_enabled(apic) && - !new->cid_mask /* flat mode */ && - kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { - new->cid_shift = 4; - new->cid_mask = 0xf; - new->lid_mask = 0xf; - } + if (kvm_apic_sw_enabled(apic)) + break; + } - new->phys_map[kvm_apic_id(apic)] = apic; + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvm_lapic *apic = vcpu->arch.apic; + u16 cid, lid; + u32 ldr, aid; + aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); cid = apic_cluster_id(new, ldr); lid = apic_logical_id(new, ldr); - if (lid) + if (aid < ARRAY_SIZE(new->phys_map)) + new->phys_map[aid] = apic; + if (lid && cid < ARRAY_SIZE(new->logical_map)) new->logical_map[cid][ffs(lid) - 1] = apic; } out: @@ -201,11 +216,13 @@ out: static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) { - u32 prev = kvm_apic_get_reg(apic, APIC_SPIV); + bool enabled = val & APIC_SPIV_APIC_ENABLED; apic_set_reg(apic, APIC_SPIV, val); - if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) { - if (val & APIC_SPIV_APIC_ENABLED) { + + if (enabled != apic->sw_enabled) { + apic->sw_enabled = enabled; + if (enabled) { static_key_slow_dec_deferred(&apic_sw_disabled); recalculate_apic_map(apic->vcpu->kvm); } else @@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) { - return ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; } static inline int apic_lvtt_period(struct kvm_lapic *apic) { - return ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; } static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) { - return ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) == - APIC_LVT_TIMER_TSCDEADLINE); + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; } static inline int apic_lvt_nmi_mode(u32 lvt_val) @@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr); static inline void apic_set_irr(int vec, struct kvm_lapic *apic) { - apic->irr_pending = true; apic_set_vector(vec, apic->regs + APIC_IRR); + /* + * irr_pending must be true if any interrupt is pending; set it after + * APIC_IRR to avoid race with apic_clear_irr + */ + apic->irr_pending = true; } static inline int apic_search_irr(struct kvm_lapic *apic) @@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; - apic_clear_vector(vec, apic->regs + APIC_IRR); - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) { /* try to update RVI */ + apic_clear_vector(vec, apic->regs + APIC_IRR); kvm_make_request(KVM_REQ_EVENT, vcpu); - else { - vec = apic_search_irr(apic); - apic->irr_pending = (vec != -1); + } else { + apic->irr_pending = false; + apic_clear_vector(vec, apic->regs + APIC_IRR); + if (apic_search_irr(apic) != -1) + apic->irr_pending = true; } } @@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) apic_update_ppr(apic); } -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) +{ + return dest == (apic_x2apic_mode(apic) ? + X2APIC_BROADCAST : APIC_BROADCAST); +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) { - return dest == 0xff || kvm_apic_id(apic) == dest; + return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); } -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) { int result = 0; u32 logical_id; + if (kvm_apic_broadcast(apic, mda)) + return 1; + if (apic_x2apic_mode(apic)) { logical_id = kvm_apic_get_reg(apic, APIC_LDR); return logical_id & mda; @@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) } int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode) + int short_hand, unsigned int dest, int dest_mode) { int result = 0; struct kvm_lapic *target = vcpu->arch.apic; @@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (!map) goto out; + if (irq->dest_id == map->broadcast) + goto out; + + ret = true; + if (irq->dest_mode == 0) { /* physical mode */ - if (irq->delivery_mode == APIC_DM_LOWEST || - irq->dest_id == 0xff) + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) goto out; - dst = &map->phys_map[irq->dest_id & 0xff]; + + dst = &map->phys_map[irq->dest_id]; } else { u32 mda = irq->dest_id << (32 - map->ldr_bits); + u16 cid = apic_cluster_id(map, mda); + + if (cid >= ARRAY_SIZE(map->logical_map)) + goto out; - dst = map->logical_map[apic_cluster_id(map, mda)]; + dst = map->logical_map[cid]; bitmap = apic_logical_id(map, mda); @@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = 0; *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } - - ret = true; out: rcu_read_unlock(); return ret; @@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic) apic->divide_count); } +static void apic_timer_expired(struct kvm_lapic *apic) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + wait_queue_head_t *q = &vcpu->wq; + + /* + * Note: KVM_REQ_PENDING_TIMER is implicitly checked in + * vcpu_enter_guest. + */ + if (atomic_read(&apic->lapic_timer.pending)) + return; + + atomic_inc(&apic->lapic_timer.pending); + /* FIXME: this code should not know anything about vcpus */ + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + + if (waitqueue_active(q)) + wake_up_interruptible(q); +} + static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now; @@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic) if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); - } - hrtimer_start(&apic->lapic_timer.timer, - ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + } else + apic_timer_expired(apic); local_irq_restore(flags); } @@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; - case APIC_LVTT: - if ((kvm_apic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask) != - (val & apic->lapic_timer.timer_mode_mask)) + case APIC_LVTT: { + u32 timer_mode = val & apic->lapic_timer.timer_mode_mask; + + if (apic->lapic_timer.timer_mode != timer_mode) { + apic->lapic_timer.timer_mode = timer_mode; hrtimer_cancel(&apic->lapic_timer.timer); + } if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); apic_set_reg(apic, APIC_LVTT, val); break; + } case APIC_TMICT: if (apic_lvtt_tscdeadline(apic)) @@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) static_key_slow_dec_deferred(&apic_hw_disabled); - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) + if (!apic->sw_enabled) static_key_slow_dec_deferred(&apic_sw_disabled); if (apic->regs) @@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) return; hrtimer_cancel(&apic->lapic_timer.timer); - /* Inject here so clearing tscdeadline won't override new value */ - if (apic_has_pending_timer(vcpu)) - kvm_inject_apic_timer_irqs(vcpu); apic->lapic_timer.tscdeadline = data; start_apic_timer(apic); } @@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; + if ((value & MSR_IA32_APICBASE_ENABLE) && + apic->base_address != APIC_DEFAULT_PHYS_BASE) + pr_warn_once("APIC base relocation is unsupported by KVM"); + /* with FSB delivery interrupt, we can restart APIC functionality */ apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); @@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) for (i = 0; i < APIC_LVT_NUM; i++) apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + apic->lapic_timer.timer_mode = 0; apic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); @@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) { struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); - struct kvm_vcpu *vcpu = apic->vcpu; - wait_queue_head_t *q = &vcpu->wq; - - /* - * There is a race window between reading and incrementing, but we do - * not care about potentially losing timer events in the !reinject - * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked - * in vcpu_enter_guest. - */ - if (!atomic_read(&ktimer->pending)) { - atomic_inc(&ktimer->pending); - /* FIXME: this code should not know anything about vcpus */ - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); - } - if (waitqueue_active(q)) - wake_up_interruptible(q); + apic_timer_expired(apic); if (lapic_is_periodic(apic)) { hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); @@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + apic_find_highest_irr(apic)); kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_rtc_eoi_tracking_restore_one(vcpu); @@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) return 1; + if (reg == APIC_ICR2) + return 1; + /* if this is ICR write vector before command */ - if (msr == 0x830) + if (reg == APIC_ICR) apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); return apic_reg_write(apic, reg, (u32)data); } @@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) return 1; + if (reg == APIC_DFR || reg == APIC_ICR2) { + apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n", + reg); + return 1; + } + if (apic_reg_read(apic, reg, 4, &low)) return 1; - if (msr == 0x830) + if (reg == APIC_ICR) apic_reg_read(apic, APIC_ICR2, 4, &high); *data = (((u64)high) << 32) | low; @@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) void kvm_apic_accept_events(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - unsigned int sipi_vector; + u8 sipi_vector; unsigned long pe; if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 6a11845fd8b9..c674fce53cf9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -11,6 +11,7 @@ struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ + u32 timer_mode; u32 timer_mode_mask; u64 tscdeadline; atomic_t pending; /* accumulated triggered timers */ @@ -22,6 +23,7 @@ struct kvm_lapic { struct kvm_timer lapic_timer; u32 divide_count; struct kvm_vcpu *vcpu; + bool sw_enabled; bool irr_pending; /* Number of bits set in ISR. */ s16 isr_count; @@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); @@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic) extern struct static_key_deferred apic_sw_disabled; -static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic) +static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) { if (static_key_false(&apic_sw_disabled.key)) - return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; - return APIC_SPIV_APIC_ENABLED; + return apic->sw_enabled; + return true; } static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) @@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) ldr >>= 32 - map->ldr_bits; cid = (ldr >> map->cid_shift) & map->cid_mask; - BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); - return cid; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 978f402006ee..f83fc6c5e0ba 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); #define MMIO_GEN_LOW_SHIFT 10 #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) -#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) static u64 generation_mmio_spte_mask(unsigned int gen) { u64 mask; - WARN_ON(gen > MMIO_MAX_GEN); + WARN_ON(gen & ~MMIO_GEN_MASK); mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; @@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; + u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; return (spte & ~mask) >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; + u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; return (spte & ~mask) & ~PAGE_MASK; } @@ -4449,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) * zap all shadow pages. */ if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { - printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); + printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7527cefc5a43..41dd0387cccb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho { struct vcpu_svm *svm = to_svm(vcpu); - WARN_ON(adjustment < 0); - if (host) - adjustment = svm_scale_tsc(vcpu, adjustment); + if (host) { + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + WARN_ON(adjustment < 0); + adjustment = svm_scale_tsc(vcpu, (u64)adjustment); + } svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) @@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm) { int reg, dr; unsigned long val; - int err; if (svm->vcpu.guest_debug == 0) { /* @@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm) dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; if (dr >= 16) { /* mov to DRn */ + if (!kvm_require_dr(&svm->vcpu, dr - 16)) + return 1; val = kvm_register_read(&svm->vcpu, reg); kvm_set_dr(&svm->vcpu, dr - 16, val); } else { - err = kvm_get_dr(&svm->vcpu, dr, &val); - if (!err) - kvm_register_write(&svm->vcpu, reg, val); + if (!kvm_require_dr(&svm->vcpu, dr)) + return 1; + kvm_get_dr(&svm->vcpu, dr, &val); + kvm_register_write(&svm->vcpu, reg, val); } skip_emulated_instruction(&svm->vcpu); @@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void) return false; } +static bool svm_xsaves_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = { .rdtscp_supported = svm_rdtscp_supported, .invpcid_supported = svm_invpcid_supported, .mpx_supported = svm_mpx_supported, + .xsaves_supported = svm_xsaves_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6b06ab8748dd..c2a34bb5ad93 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -5,6 +5,7 @@ #include <asm/vmx.h> #include <asm/svm.h> #include <asm/clocksource.h> +#include <asm/pvclock-abi.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm @@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window, #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \ trace_kvm_ple_window(false, vcpu_id, new, old) +TRACE_EVENT(kvm_pvclock_update, + TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock), + TP_ARGS(vcpu_id, pvclock), + + TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( __u32, version ) + __field( __u64, tsc_timestamp ) + __field( __u64, system_time ) + __field( __u32, tsc_to_system_mul ) + __field( __s8, tsc_shift ) + __field( __u8, flags ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->version = pvclock->version; + __entry->tsc_timestamp = pvclock->tsc_timestamp; + __entry->system_time = pvclock->system_time; + __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul; + __entry->tsc_shift = pvclock->tsc_shift; + __entry->flags = pvclock->flags; + ), + + TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, " + "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, " + "flags 0x%x }", + __entry->vcpu_id, + __entry->version, + __entry->tsc_timestamp, + __entry->system_time, + __entry->tsc_to_system_mul, + __entry->tsc_shift, + __entry->flags) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3e556c68351b..d4c58d884838 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); static bool __read_mostly nested = 0; module_param(nested, bool, S_IRUGO); +static u64 __read_mostly host_xss; + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT) + | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -214,6 +216,7 @@ struct __packed vmcs12 { u64 virtual_apic_page_addr; u64 apic_access_addr; u64 ept_pointer; + u64 xss_exit_bitmap; u64 guest_physical_address; u64 vmcs_link_pointer; u64 guest_ia32_debugctl; @@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), FIELD64(APIC_ACCESS_ADDR, apic_access_addr), FIELD64(EPT_POINTER, ept_pointer), + FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), @@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), }; -static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table); static inline short vmcs_field_to_offset(unsigned long field) { - if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0) - return -1; + BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || + vmcs_field_to_offset_table[field] == 0) + return -ENOENT; + return vmcs_field_to_offset_table[field]; } @@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void); +static bool vmx_xsaves_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); } +static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && + vmx_xsaves_supported(); +} + static inline bool is_exception(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) vmx->guest_msrs[efer_offset].mask = ~ignore_bits; clear_atomic_switch_msr(vmx, MSR_EFER); - /* On ept, can't emulate nx, and must switch nx atomically */ - if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { + + /* + * On EPT, we can't emulate NX, so we must switch EFER atomically. + * On CPUs that support "load IA32_EFER", always switch EFER + * atomically, since it's faster than switching it manually. + */ + if (cpu_has_load_ia32_efer || + (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { guest_efer = vmx->vcpu.arch.efer; if (!(guest_efer & EFER_LMA)) guest_efer &= ~EFER_LME; - add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); + if (guest_efer != host_efer) + add_atomic_switch_msr(vmx, MSR_EFER, + guest_efer, host_efer); return false; } @@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_UNRESTRICTED_GUEST | - SECONDARY_EXEC_WBINVD_EXITING; + SECONDARY_EXEC_WBINVD_EXITING | + SECONDARY_EXEC_XSAVES; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ - nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; + nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_UNRESTRICTED_GUEST; nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; @@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) if (!nested_vmx_allowed(vcpu)) return 1; return vmx_get_vmx_msr(vcpu, msr_index, pdata); + case MSR_IA32_XSS: + if (!vmx_xsaves_supported()) + return 1; + data = vcpu->arch.ia32_xss; + break; case MSR_TSC_AUX: if (!to_vmx(vcpu)->rdtscp_enabled) return 1; @@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return 1; /* they are read-only */ + case MSR_IA32_XSS: + if (!vmx_xsaves_supported()) + return 1; + /* + * The only supported bit as of Skylake is bit 8, but + * it is not supported on KVM. + */ + if (data != 0) + return 1; + vcpu->arch.ia32_xss = data; + if (vcpu->arch.ia32_xss != host_xss) + add_atomic_switch_msr(vmx, MSR_IA32_XSS, + vcpu->arch.ia32_xss, host_xss); + else + clear_atomic_switch_msr(vmx, MSR_IA32_XSS); + break; case MSR_TSC_AUX: if (!vmx->rdtscp_enabled) return 1; @@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_SHADOW_VMCS; + SECONDARY_EXEC_SHADOW_VMCS | + SECONDARY_EXEC_XSAVES; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) } } + if (cpu_has_xsaves) + rdmsrl(MSR_IA32_XSS, host_xss); + return 0; } @@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void) return 0; } -static __init int hardware_setup(void) -{ - if (setup_vmcs_config(&vmcs_config) < 0) - return -EIO; - - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - - if (!cpu_has_vmx_vpid()) - enable_vpid = 0; - if (!cpu_has_vmx_shadow_vmcs()) - enable_shadow_vmcs = 0; - if (enable_shadow_vmcs) - init_vmcs_shadow_fields(); - - if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels()) { - enable_ept = 0; - enable_unrestricted_guest = 0; - enable_ept_ad_bits = 0; - } - - if (!cpu_has_vmx_ept_ad_bits()) - enable_ept_ad_bits = 0; - - if (!cpu_has_vmx_unrestricted_guest()) - enable_unrestricted_guest = 0; - - if (!cpu_has_vmx_flexpriority()) { - flexpriority_enabled = 0; - - /* - * set_apic_access_page_addr() is used to reload apic access - * page upon invalidation. No need to do anything if the - * processor does not have the APIC_ACCESS_ADDR VMCS field. - */ - kvm_x86_ops->set_apic_access_page_addr = NULL; - } - - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; - - if (enable_ept && !cpu_has_vmx_ept_2m_page()) - kvm_disable_largepages(); - - if (!cpu_has_vmx_ple()) - ple_gap = 0; - - if (!cpu_has_vmx_apicv()) - enable_apicv = 0; - - if (enable_apicv) - kvm_x86_ops->update_cr8_intercept = NULL; - else { - kvm_x86_ops->hwapic_irr_update = NULL; - kvm_x86_ops->deliver_posted_interrupt = NULL; - kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; - } - - if (nested) - nested_vmx_setup_ctls_msrs(); - - return alloc_kvm_area(); -} - -static __exit void hardware_unsetup(void) -{ - free_kvm_area(); -} - static bool emulation_required(struct kvm_vcpu *vcpu) { return emulate_invalid_guest_state && !guest_state_valid(vcpu); @@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void) kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); } +#define VMX_XSS_EXIT_BITMAP 0 /* * Sets up the vmcs for emulated real mode. */ @@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); set_cr4_guest_host_mask(vmx); + if (vmx_xsaves_supported()) + vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); + return 0; } @@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu) static int handle_dr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; - int dr, reg; + int dr, dr7, reg; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + dr = exit_qualification & DEBUG_REG_ACCESS_NUM; + + /* First, if DR does not exist, trigger UD */ + if (!kvm_require_dr(vcpu, dr)) + return 1; /* Do not handle if the CPL > 0, will trigger GP on re-entry */ if (!kvm_require_cpl(vcpu, 0)) return 1; - dr = vmcs_readl(GUEST_DR7); - if (dr & DR7_GD) { + dr7 = vmcs_readl(GUEST_DR7); + if (dr7 & DR7_GD) { /* * As the vm-exit takes precedence over the debug trap, we * need to emulate the latter, either for the host or the @@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; - vcpu->run->debug.arch.dr7 = dr; - vcpu->run->debug.arch.pc = - vmcs_readl(GUEST_CS_BASE) + - vmcs_readl(GUEST_RIP); + vcpu->run->debug.arch.dr7 = dr7; + vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); vcpu->run->debug.arch.exception = DB_VECTOR; vcpu->run->exit_reason = KVM_EXIT_DEBUG; return 0; } else { - vcpu->arch.dr7 &= ~DR7_GD; + vcpu->arch.dr6 &= ~15; vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); kvm_queue_exception(vcpu, DB_VECTOR); return 1; } @@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; } - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & DEBUG_REG_ACCESS_NUM; reg = DEBUG_REG_ACCESS_REG(exit_qualification); if (exit_qualification & TYPE_MOV_FROM_DR) { unsigned long val; @@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) return 1; } +static int handle_xsaves(struct kvm_vcpu *vcpu) +{ + skip_emulated_instruction(vcpu); + WARN(1, "this should never happen\n"); + return 1; +} + +static int handle_xrstors(struct kvm_vcpu *vcpu) +{ + skip_emulated_instruction(vcpu); + WARN(1, "this should never happen\n"); + return 1; +} + static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { @@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) } /* clear all local breakpoint enable flags */ - vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55); + vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155); /* * TODO: What about debug traps on tss switch? @@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) trace_kvm_page_fault(gpa, exit_qualification); /* It is a write fault? */ - error_code = exit_qualification & (1U << 1); + error_code = exit_qualification & PFERR_WRITE_MASK; /* It is a fetch fault? */ - error_code |= (exit_qualification & (1U << 2)) << 2; + error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; /* ept page table is present? */ - error_code |= (exit_qualification >> 3) & 0x1; + error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK; vcpu->arch.exit_qualification = exit_qualification; @@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void) ple_window_grow, INT_MIN); } +static __init int hardware_setup(void) +{ + int r = -ENOMEM, i, msr; + + rdmsrl_safe(MSR_EFER, &host_efer); + + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) + kvm_define_shared_msr(i, vmx_msr_index[i]); + + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_io_bitmap_a) + return r; + + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_io_bitmap_b) + goto out; + + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy) + goto out1; + + vmx_msr_bitmap_legacy_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy_x2apic) + goto out2; + + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode) + goto out3; + + vmx_msr_bitmap_longmode_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode_x2apic) + goto out4; + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmread_bitmap) + goto out5; + + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmwrite_bitmap) + goto out6; + + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); + + /* + * Allow direct access to the PC debug port (it is often used for I/O + * delays, but the vmexits simply slow things down). + */ + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); + clear_bit(0x80, vmx_io_bitmap_a); + + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); + + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); + + if (setup_vmcs_config(&vmcs_config) < 0) { + r = -EIO; + goto out7; + } + + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); + + if (!cpu_has_vmx_vpid()) + enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) + enable_shadow_vmcs = 0; + if (enable_shadow_vmcs) + init_vmcs_shadow_fields(); + + if (!cpu_has_vmx_ept() || + !cpu_has_vmx_ept_4levels()) { + enable_ept = 0; + enable_unrestricted_guest = 0; + enable_ept_ad_bits = 0; + } + + if (!cpu_has_vmx_ept_ad_bits()) + enable_ept_ad_bits = 0; + + if (!cpu_has_vmx_unrestricted_guest()) + enable_unrestricted_guest = 0; + + if (!cpu_has_vmx_flexpriority()) { + flexpriority_enabled = 0; + + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if the + * processor does not have the APIC_ACCESS_ADDR VMCS field. + */ + kvm_x86_ops->set_apic_access_page_addr = NULL; + } + + if (!cpu_has_vmx_tpr_shadow()) + kvm_x86_ops->update_cr8_intercept = NULL; + + if (enable_ept && !cpu_has_vmx_ept_2m_page()) + kvm_disable_largepages(); + + if (!cpu_has_vmx_ple()) + ple_gap = 0; + + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; + + if (enable_apicv) + kvm_x86_ops->update_cr8_intercept = NULL; + else { + kvm_x86_ops->hwapic_irr_update = NULL; + kvm_x86_ops->deliver_posted_interrupt = NULL; + kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; + } + + if (nested) + nested_vmx_setup_ctls_msrs(); + + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); + vmx_disable_intercept_for_msr(MSR_GS_BASE, false); + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); + + memcpy(vmx_msr_bitmap_legacy_x2apic, + vmx_msr_bitmap_legacy, PAGE_SIZE); + memcpy(vmx_msr_bitmap_longmode_x2apic, + vmx_msr_bitmap_longmode, PAGE_SIZE); + + if (enable_apicv) { + for (msr = 0x800; msr <= 0x8ff; msr++) + vmx_disable_intercept_msr_read_x2apic(msr); + + /* According SDM, in x2apic mode, the whole id reg is used. + * But in KVM, it only use the highest eight bits. Need to + * intercept it */ + vmx_enable_intercept_msr_read_x2apic(0x802); + /* TMCCT */ + vmx_enable_intercept_msr_read_x2apic(0x839); + /* TPR */ + vmx_disable_intercept_msr_write_x2apic(0x808); + /* EOI */ + vmx_disable_intercept_msr_write_x2apic(0x80b); + /* SELF-IPI */ + vmx_disable_intercept_msr_write_x2apic(0x83f); + } + + if (enable_ept) { + kvm_mmu_set_mask_ptes(0ull, + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, + (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, + 0ull, VMX_EPT_EXECUTABLE_MASK); + ept_set_mmio_spte_mask(); + kvm_enable_tdp(); + } else + kvm_disable_tdp(); + + update_ple_window_actual_max(); + + return alloc_kvm_area(); + +out7: + free_page((unsigned long)vmx_vmwrite_bitmap); +out6: + free_page((unsigned long)vmx_vmread_bitmap); +out5: + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); +out4: + free_page((unsigned long)vmx_msr_bitmap_longmode); +out3: + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); +out2: + free_page((unsigned long)vmx_msr_bitmap_legacy); +out1: + free_page((unsigned long)vmx_io_bitmap_b); +out: + free_page((unsigned long)vmx_io_bitmap_a); + + return r; +} + +static __exit void hardware_unsetup(void) +{ + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); + free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_msr_bitmap_longmode); + free_page((unsigned long)vmx_io_bitmap_b); + free_page((unsigned long)vmx_io_bitmap_a); + free_page((unsigned long)vmx_vmwrite_bitmap); + free_page((unsigned long)vmx_vmread_bitmap); + + free_kvm_area(); +} + /* * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE * exiting, so only get here on cpu with PAUSE-Loop-Exiting. @@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field) * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of * 64-bit fields are to be returned). */ -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, - unsigned long field, u64 *ret) +static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 *ret) { short offset = vmcs_field_to_offset(field); char *p; if (offset < 0) - return 0; + return offset; p = ((char *)(get_vmcs12(vcpu))) + offset; switch (vmcs_field_type(field)) { case VMCS_FIELD_TYPE_NATURAL_WIDTH: *ret = *((natural_width *)p); - return 1; + return 0; case VMCS_FIELD_TYPE_U16: *ret = *((u16 *)p); - return 1; + return 0; case VMCS_FIELD_TYPE_U32: *ret = *((u32 *)p); - return 1; + return 0; case VMCS_FIELD_TYPE_U64: *ret = *((u64 *)p); - return 1; + return 0; default: - return 0; /* can never happen. */ + WARN_ON(1); + return -ENOENT; } } -static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, - unsigned long field, u64 field_value){ +static inline int vmcs12_write_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 field_value){ short offset = vmcs_field_to_offset(field); char *p = ((char *) get_vmcs12(vcpu)) + offset; if (offset < 0) - return false; + return offset; switch (vmcs_field_type(field)) { case VMCS_FIELD_TYPE_U16: *(u16 *)p = field_value; - return true; + return 0; case VMCS_FIELD_TYPE_U32: *(u32 *)p = field_value; - return true; + return 0; case VMCS_FIELD_TYPE_U64: *(u64 *)p = field_value; - return true; + return 0; case VMCS_FIELD_TYPE_NATURAL_WIDTH: *(natural_width *)p = field_value; - return true; + return 0; default: - return false; /* can never happen. */ + WARN_ON(1); + return -ENOENT; } } @@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) case VMCS_FIELD_TYPE_NATURAL_WIDTH: field_value = vmcs_readl(field); break; + default: + WARN_ON(1); + continue; } vmcs12_write_any(&vmx->vcpu, field, field_value); } @@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) case VMCS_FIELD_TYPE_NATURAL_WIDTH: vmcs_writel(field, (long)field_value); break; + default: + WARN_ON(1); + break; } } } @@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* Decode instruction info and find the field to read */ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ - if (!vmcs12_read_any(vcpu, field, &field_value)) { + if (vmcs12_read_any(vcpu, field, &field_value) < 0) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; @@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; } - if (!vmcs12_write_any(vcpu, field, field_value)) { + if (vmcs12_write_any(vcpu, field, field_value) < 0) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; @@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVVPID] = handle_invvpid, + [EXIT_REASON_XSAVES] = handle_xsaves, + [EXIT_REASON_XRSTORS] = handle_xrstors, }; static const int kvm_vmx_max_exit_handlers = @@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: return 1; + case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: + /* + * This should never happen, since it is not possible to + * set XSS to a non-zero value---neither in L1 nor in L2. + * If if it were, XSS would have to be checked against + * the XSS exit bitmap in vmcs12. + */ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); default: return 1; } @@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector) u16 status; u8 old; + if (vector == -1) + vector = 0; + status = vmcs_read16(GUEST_INTR_STATUS); old = (u8)status & 0xff; if ((u8)vector != old) { @@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector) static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) { + if (!is_guest_mode(vcpu)) { + vmx_set_rvi(max_irr); + return; + } + if (max_irr == -1) return; /* - * If a vmexit is needed, vmx_check_nested_events handles it. + * In guest mode. If a vmexit is needed, vmx_check_nested_events + * handles it. */ - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + if (nested_exit_on_intr(vcpu)) return; - if (!is_guest_mode(vcpu)) { - vmx_set_rvi(max_irr); - return; - } - /* - * Fall back to pre-APICv interrupt injection since L2 + * Else, fall back to pre-APICv interrupt injection since L2 * is run without virtual interrupt delivery. */ if (!kvm_event_needs_reinjection(vcpu) && @@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void) (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); } +static bool vmx_xsaves_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_XSAVES; +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); + if (nested_cpu_has_xsaves(vmcs12)) + vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); vmcs_write64(VMCS_LINK_POINTER, -1ull); exec_control = vmcs12->pin_based_vm_exec_control; @@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); if (vmx_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + if (nested_cpu_has_xsaves(vmcs12)) + vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); /* update exit information fields: */ @@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .check_intercept = vmx_check_intercept, .handle_external_intr = vmx_handle_external_intr, .mpx_supported = vmx_mpx_supported, + .xsaves_supported = vmx_xsaves_supported, .check_nested_events = vmx_check_nested_events, @@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - int r, i, msr; - - rdmsrl_safe(MSR_EFER, &host_efer); - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) - kvm_define_shared_msr(i, vmx_msr_index[i]); - - vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_io_bitmap_a) - return -ENOMEM; - - r = -ENOMEM; - - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_io_bitmap_b) - goto out; - - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out3; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out4; - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_vmread_bitmap) - goto out5; - - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_vmwrite_bitmap) - goto out6; - - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); - - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); + int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) - goto out7; + return r; #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, crash_vmclear_local_loaded_vmcss); #endif - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - - if (enable_apicv) { - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr); - - /* According SDM, in x2apic mode, the whole id reg is used. - * But in KVM, it only use the highest eight bits. Need to - * intercept it */ - vmx_enable_intercept_msr_read_x2apic(0x802); - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f); - } - - if (enable_ept) { - kvm_mmu_set_mask_ptes(0ull, - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK); - ept_set_mmio_spte_mask(); - kvm_enable_tdp(); - } else - kvm_disable_tdp(); - - update_ple_window_actual_max(); - return 0; - -out7: - free_page((unsigned long)vmx_vmwrite_bitmap); -out6: - free_page((unsigned long)vmx_vmread_bitmap); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out4: - free_page((unsigned long)vmx_msr_bitmap_longmode); -out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); -out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); -out1: - free_page((unsigned long)vmx_io_bitmap_b); -out: - free_page((unsigned long)vmx_io_bitmap_a); - return r; } static void __exit vmx_exit(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); - free_page((unsigned long)vmx_io_bitmap_b); - free_page((unsigned long)vmx_io_bitmap_a); - free_page((unsigned long)vmx_vmwrite_bitmap); - free_page((unsigned long)vmx_vmread_bitmap); - #ifdef CONFIG_KEXEC RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); synchronize_rcu(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0033df32a745..c259814200bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -27,6 +27,7 @@ #include "kvm_cache_regs.h" #include "x86.h" #include "cpuid.h" +#include "assigned-dev.h" #include <linux/clocksource.h> #include <linux/interrupt.h> @@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, if (!vcpu->arch.exception.pending) { queue: + if (has_error && !is_protmode(vcpu)) + has_error = false; vcpu->arch.exception.pending = true; vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; @@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) } EXPORT_SYMBOL_GPL(kvm_require_cpl); +bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) +{ + if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE)) + return true; + + kvm_queue_exception(vcpu, UD_VECTOR); + return false; +} +EXPORT_SYMBOL_GPL(kvm_require_dr); + /* * This function will be used to read from the physical memory of the currently * running guest. The difference to kvm_read_guest_page is that this function @@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) return 1; + if (xcr0 & XSTATE_AVX512) { + if (!(xcr0 & XSTATE_YMM)) + return 1; + if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512) + return 1; + } kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; @@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { +#ifdef CONFIG_X86_64 + cr3 &= ~CR3_PCID_INVD; +#endif + if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); @@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) vcpu->arch.eff_db[dr] = val; break; case 4: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* #UD */ /* fall through */ case 6: if (val & 0xffffffff00000000ULL) @@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) kvm_update_dr6(vcpu); break; case 5: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* #UD */ /* fall through */ default: /* 7 */ if (val & 0xffffffff00000000ULL) @@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { - int res; - - res = __kvm_set_dr(vcpu, dr, val); - if (res > 0) - kvm_queue_exception(vcpu, UD_VECTOR); - else if (res < 0) + if (__kvm_set_dr(vcpu, dr, val)) { kvm_inject_gp(vcpu, 0); - - return res; + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(kvm_set_dr); -static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { switch (dr) { case 0 ... 3: *val = vcpu->arch.db[dr]; break; case 4: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* fall through */ case 6: if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) @@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) *val = kvm_x86_ops->get_dr6(vcpu); break; case 5: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return 1; /* fall through */ default: /* 7 */ *val = vcpu->arch.dr7; break; } - - return 0; -} - -int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) -{ - if (_kvm_get_dr(vcpu, dr, val)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } return 0; } EXPORT_SYMBOL_GPL(kvm_get_dr); @@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 bool vcpus_matched; - bool do_request = false; struct kvm_arch *ka = &vcpu->kvm->arch; struct pvclock_gtod_data *gtod = &pvclock_gtod_data; vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == atomic_read(&vcpu->kvm->online_vcpus)); - if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC) - if (!ka->use_master_clock) - do_request = 1; - - if (!vcpus_matched && ka->use_master_clock) - do_request = 1; - - if (do_request) + /* + * Once the masterclock is enabled, always perform request in + * order to update it. + * + * In order to enable masterclock, the host clocksource must be TSC + * and the vcpus need to have matched TSCs. When that happens, + * perform request to enable masterclock. + */ + if (ka->use_master_clock || + (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, @@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_guest_tsc = tsc_timestamp; + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + &guest_hv_clock, sizeof(guest_hv_clock)))) + return 0; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate * state, we just increase by 2 at the end. */ - vcpu->hv_clock.version += 2; - - if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, - &guest_hv_clock, sizeof(guest_hv_clock)))) - return 0; + vcpu->hv_clock.version = guest_hv_clock.version + 2; /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; + trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); @@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC_ADJUST: if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { - u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; + s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); } vcpu->arch.ia32_tsc_adjust_msr = data; @@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, unsigned long val; memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); - _kvm_get_dr(vcpu, 6, &val); + kvm_get_dr(vcpu, 6, &val); dbgregs->dr6 = val; dbgregs->dr7 = vcpu->arch.dr7; dbgregs->flags = 0; @@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } +#define XSTATE_COMPACTION_ENABLED (1ULL << 63) + +static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) +{ + struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + u64 xstate_bv = xsave->xsave_hdr.xstate_bv; + u64 valid; + + /* + * Copy legacy XSAVE area, to avoid complications with CPUID + * leaves 0 and 1 in the loop below. + */ + memcpy(dest, xsave, XSAVE_HDR_OFFSET); + + /* Set XSTATE_BV */ + *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; + + /* + * Copy each region from the possibly compacted offset to the + * non-compacted offset. + */ + valid = xstate_bv & ~XSTATE_FPSSE; + while (valid) { + u64 feature = valid & -valid; + int index = fls64(feature) - 1; + void *src = get_xsave_addr(xsave, feature); + + if (src) { + u32 size, offset, ecx, edx; + cpuid_count(XSTATE_CPUID, index, + &size, &offset, &ecx, &edx); + memcpy(dest + offset, src, size); + } + + valid -= feature; + } +} + +static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) +{ + struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); + u64 valid; + + /* + * Copy legacy XSAVE area, to avoid complications with CPUID + * leaves 0 and 1 in the loop below. + */ + memcpy(xsave, src, XSAVE_HDR_OFFSET); + + /* Set XSTATE_BV and possibly XCOMP_BV. */ + xsave->xsave_hdr.xstate_bv = xstate_bv; + if (cpu_has_xsaves) + xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; + + /* + * Copy each region from the non-compacted offset to the + * possibly compacted offset. + */ + valid = xstate_bv & ~XSTATE_FPSSE; + while (valid) { + u64 feature = valid & -valid; + int index = fls64(feature) - 1; + void *dest = get_xsave_addr(xsave, feature); + + if (dest) { + u32 size, offset, ecx, edx; + cpuid_count(XSTATE_CPUID, index, + &size, &offset, &ecx, &edx); + memcpy(dest, src + offset, size); + } else + WARN_ON_ONCE(1); + + valid -= feature; + } +} + static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { if (cpu_has_xsave) { - memcpy(guest_xsave->region, - &vcpu->arch.guest_fpu.state->xsave, - vcpu->arch.guest_xstate_size); - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &= - vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE; + memset(guest_xsave, 0, sizeof(struct kvm_xsave)); + fill_xsave((u8 *) guest_xsave->region, vcpu); } else { memcpy(guest_xsave->region, &vcpu->arch.guest_fpu.state->fxsave, @@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, */ if (xstate_bv & ~kvm_supported_xcr0()) return -EINVAL; - memcpy(&vcpu->arch.guest_fpu.state->xsave, - guest_xsave->region, vcpu->arch.guest_xstate_size); + load_xsave(vcpu, (u8 *)guest_xsave->region); } else { if (xstate_bv & ~XSTATE_FPSSE) return -EINVAL; @@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } default: - ; + r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } out: return r; @@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); + return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) @@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) { - struct kvm_run *kvm_run = vcpu->run; - unsigned long eip = vcpu->arch.emulate_ctxt.eip; - u32 dr6 = 0; - if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { - dr6 = kvm_vcpu_check_hw_bp(eip, 0, + struct kvm_run *kvm_run = vcpu->run; + unsigned long eip = kvm_get_linear_rip(vcpu); + u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.guest_debug_dr7, vcpu->arch.eff_db); if (dr6 != 0) { kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; - kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); - + kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; *r = EMULATE_USER_EXIT; @@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { - dr6 = kvm_vcpu_check_hw_bp(eip, 0, + unsigned long eip = kvm_get_linear_rip(vcpu); + u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.dr7, vcpu->arch.db); @@ -5365,7 +5439,9 @@ restart: kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE) kvm_vcpu_check_singlestep(vcpu, rflags, &r); - __kvm_set_rflags(vcpu, ctxt->eflags); + if (!ctxt->have_exception || + exception_type(ctxt->exception.vector) == EXCPT_TRAP) + __kvm_set_rflags(vcpu, ctxt->eflags); /* * For STI, interrupts are shadowed; so KVM_REQ_EVENT will @@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); + if (vcpu->arch.exception.nr == DB_VECTOR && + (vcpu->arch.dr7 & DR7_GD)) { + vcpu->arch.dr7 &= ~DR7_GD; + kvm_update_dr7(vcpu); + } + kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, @@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu) return err; fpu_finit(&vcpu->arch.guest_fpu); + if (cpu_has_xsaves) + vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = + host_xcr0 | XSTATE_COMPACTION_ENABLED; /* * Ensure guest xcr0 is valid for loading @@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_reset(vcpu); } -void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) { struct kvm_segment cs; @@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type) return -EINVAL; + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); @@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) return kvm_x86_ops->interrupt_allowed(vcpu); } -bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) +unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) { - unsigned long current_rip = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); + if (is_64_bit_mode(vcpu)) + return kvm_rip_read(vcpu); + return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + + kvm_rip_read(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_get_linear_rip); - return current_rip == linear_rip; +bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) +{ + return kvm_get_linear_rip(vcpu) == linear_rip; } EXPORT_SYMBOL_GPL(kvm_is_linear_rip); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7cb9c45a5fe0..cc1d61af6140 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_BNDREGS | XSTATE_BNDCSR) + | XSTATE_BNDREGS | XSTATE_BNDCSR \ + | XSTATE_AVX512) extern u64 host_xcr0; extern u64 kvm_supported_xcr0(void); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index aae94132bc24..c1c1544b8485 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -841,7 +841,7 @@ static void __init lguest_init_IRQ(void) { unsigned int i; - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); if (i != SYSCALL_VECTOR) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d973e61e450d..38dcec403b46 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -844,11 +844,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; int code = BUS_ADRERR; - up_read(&mm->mmap_sem); - /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); @@ -879,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { if (fatal_signal_pending(current) && !(error_code & PF_USER)) { - up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, 0, 0); return; } @@ -887,14 +883,11 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { - up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, SIGSEGV, SEGV_MAPERR); return; } - up_read(¤t->mm->mmap_sem); - /* * We ran out of memory, call the OOM killer, and return the * userspace (which will retry the fault, or kill us if we got @@ -1062,7 +1055,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; - int fault; + int fault, major = 0; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; @@ -1237,47 +1230,50 @@ good_area: * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. */ fault = handle_mm_fault(mm, vma, address, flags); + major |= fault & VM_FAULT_MAJOR; /* - * If we need to retry but a fatal signal is pending, handle the - * signal first. We do not need to release the mmap_sem because it - * would already be released in __lock_page_or_retry in mm/filemap.c. + * If we need to retry the mmap_sem has already been released, + * and if there is a fatal signal pending there is no guarantee + * that we made any progress. Handle this case first. */ - if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) + if (unlikely(fault & VM_FAULT_RETRY)) { + /* Retry at most once */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + if (!fatal_signal_pending(tsk)) + goto retry; + } + + /* User mode? Just return to handle the fatal exception */ + if (flags & FAULT_FLAG_USER) + return; + + /* Not returning to user mode? Handle exceptions or die: */ + no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); return; + } + up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); return; } /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. + * Major/minor page fault accounting. If any of the events + * returned VM_FAULT_MAJOR, we account it as a major fault. */ - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; - flags |= FAULT_FLAG_TRIED; - goto retry; - } + if (major) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } check_v8086_mode(regs, address, tsk); - - up_read(&mm->mmap_sem); } NOKPROBE_SYMBOL(__do_page_fault); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 207d9aef662d..d7547824e763 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -15,7 +15,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) { #ifndef CONFIG_X86_PAE - return ACCESS_ONCE(*ptep); + return READ_ONCE(*ptep); #else /* * With get_user_pages_fast, we walk down the pagetables without taking diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dfaf2e0f5f8f..536ea2fb6e33 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -384,6 +384,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, } /* + * Lookup the PMD entry for a virtual address. Return a pointer to the entry + * or NULL if not present. + */ +pmd_t *lookup_pmd_address(unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) + return NULL; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) + return NULL; + + return pmd_offset(pud, address); +} + +/* * This is necessary because __pa() does not work on some * kinds of memory, like vmalloc() or the alloc_remap() * areas on 32-bit NUMA systems. The percpu areas can diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index b9958c364075..44b9271580b5 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -210,6 +210,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) { int polarity; + if (dev->irq_managed && dev->irq > 0) + return 0; + if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) polarity = 0; /* active high */ else @@ -224,13 +227,18 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) return -EBUSY; + dev->irq_managed = 1; + return 0; } static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0) + if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed && + dev->irq > 0) { mp_unmap_irq(dev->irq); + dev->irq_managed = 0; + } } struct pci_ops intel_mid_pci_ops = { diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index eb500c2592ad..5dc6ca5e1741 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1200,11 +1200,12 @@ static int pirq_enable_irq(struct pci_dev *dev) #ifdef CONFIG_X86_IO_APIC struct pci_dev *temp_dev; int irq; - struct io_apic_irq_attr irq_attr; + + if (dev->irq_managed && dev->irq > 0) + return 0; irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, - PCI_SLOT(dev->devfn), - pin - 1, &irq_attr); + PCI_SLOT(dev->devfn), pin - 1); /* * Busses behind bridges are typically not listed in the MP-table. * In this case we have to look up the IRQ based on the parent bus, @@ -1218,7 +1219,7 @@ static int pirq_enable_irq(struct pci_dev *dev) pin = pci_swizzle_interrupt_pin(dev, pin); irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), - pin - 1, &irq_attr); + pin - 1); if (irq >= 0) dev_warn(&dev->dev, "using bridge %s " "INT %c to get IRQ %d\n", @@ -1228,6 +1229,7 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { + dev->irq_managed = 1; dev->irq = irq; dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); @@ -1254,11 +1256,24 @@ static int pirq_enable_irq(struct pci_dev *dev) return 0; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + static void pirq_disable_irq(struct pci_dev *dev) { if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && - dev->irq) { + dev->irq_managed && dev->irq) { mp_unmap_irq(dev->irq); dev->irq = 0; + dev->irq_managed = 0; } } diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index b233681af4de..0ce673645432 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; int mmr_pnode, err; @@ -198,13 +198,13 @@ static int uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = data->chip_data; + struct irq_cfg *cfg = irqd_cfg(data); unsigned int dest; unsigned long mmr_value, mmr_offset; struct uv_IO_APIC_route_entry *entry; int mmr_pnode; - if (__ioapic_set_affinity(data, mask, &dest)) + if (apic_set_affinity(data, mask, &dest)) return -1; mmr_value = 0; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 8c8298d78185..5c1f9ace7ae7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) unsigned long mfn; if (!xen_feature(XENFEAT_auto_translated_physmap)) - mfn = get_phys_to_machine(pfn); + mfn = __pfn_to_mfn(pfn); else mfn = pfn; /* @@ -1113,20 +1113,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr, * instead of somewhere later and be confusing. */ xen_mc_flush(); } -static void __init xen_pagetable_p2m_copy(void) + +static void __init xen_pagetable_p2m_free(void) { unsigned long size; unsigned long addr; - unsigned long new_mfn_list; - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - new_mfn_list = xen_revector_p2m_tree(); /* No memory or already called. */ - if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) + if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) return; /* using __ka address and sticking INVALID_P2M_ENTRY! */ @@ -1144,8 +1140,6 @@ static void __init xen_pagetable_p2m_copy(void) size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); memblock_free(__pa(xen_start_info->mfn_list), size); - /* And revector! Bye bye old array */ - xen_start_info->mfn_list = new_mfn_list; /* At this stage, cleanup_highmap has already cleaned __ka space * from _brk_limit way up to the max_pfn_mapped (which is the end of @@ -1169,17 +1163,35 @@ static void __init xen_pagetable_p2m_copy(void) } #endif -static void __init xen_pagetable_init(void) +static void __init xen_pagetable_p2m_setup(void) { - paging_init(); + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + + xen_vmalloc_p2m_tree(); + #ifdef CONFIG_X86_64 - xen_pagetable_p2m_copy(); + xen_pagetable_p2m_free(); #endif + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; +} + +static void __init xen_pagetable_init(void) +{ + paging_init(); + xen_post_allocator_init(); + + xen_pagetable_p2m_setup(); + /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); + /* Remap memory freed due to conflicts with E820 map */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + xen_remap_memory(); + xen_setup_shared_info(); - xen_post_allocator_init(); } static void xen_write_cr2(unsigned long cr2) { diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b456b048eca9..edbc7a63fd73 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -3,21 +3,22 @@ * guests themselves, but it must also access and update the p2m array * during suspend/resume when all the pages are reallocated. * - * The p2m table is logically a flat array, but we implement it as a - * three-level tree to allow the address space to be sparse. + * The logical flat p2m table is mapped to a linear kernel memory area. + * For accesses by Xen a three-level tree linked via mfns only is set up to + * allow the address space to be sparse. * - * Xen - * | - * p2m_top p2m_top_mfn - * / \ / \ - * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn - * / \ / \ / / - * p2m p2m p2m p2m p2m p2m p2m ... + * Xen + * | + * p2m_top_mfn + * / \ + * p2m_mid_mfn p2m_mid_mfn + * / / + * p2m p2m p2m ... * * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. * - * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the - * maximum representable pseudo-physical address space is: + * The p2m_top_mfn level is limited to 1 page, so the maximum representable + * pseudo-physical address space is: * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages * * P2M_PER_PAGE depends on the architecture, as a mfn is always @@ -30,6 +31,9 @@ * leaf entries, or for the top root, or middle one, for which there is a void * entry, we assume it is "missing". So (for example) * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. + * We have a dedicated page p2m_missing with all entries being + * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m + * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. * * We also have the possibility of setting 1-1 mappings on certain regions, so * that: @@ -39,122 +43,20 @@ * PCI BARs, or ACPI spaces), we can create mappings easily because we * get the PFN value to match the MFN. * - * For this to work efficiently we have one new page p2m_identity and - * allocate (via reserved_brk) any other pages we need to cover the sides - * (1GB or 4MB boundary violations). All entries in p2m_identity are set to - * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, - * no other fancy value). + * For this to work efficiently we have one new page p2m_identity. All entries + * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only + * recognizes that and MFNs, no other fancy value). * * On lookup we spot that the entry points to p2m_identity and return the * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. * If the entry points to an allocated page, we just proceed as before and - * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in * appropriate functions (pfn_to_mfn). * * The reason for having the IDENTITY_FRAME_BIT instead of just returning the * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a * non-identity pfn. To protect ourselves against we elect to set (and get) the * IDENTITY_FRAME_BIT on all identity mapped PFNs. - * - * This simplistic diagram is used to explain the more subtle piece of code. - * There is also a digram of the P2M at the end that can help. - * Imagine your E820 looking as so: - * - * 1GB 2GB 4GB - * /-------------------+---------\/----\ /----------\ /---+-----\ - * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | - * \-------------------+---------/\----/ \----------/ \---+-----/ - * ^- 1029MB ^- 2001MB - * - * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), - * 2048MB = 524288 (0x80000)] - * - * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB - * is actually not present (would have to kick the balloon driver to put it in). - * - * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: - * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start - * of the PFN and the end PFN (263424 and 512256 respectively). The first step - * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page - * covers 512^2 of page estate (1GB) and in case the start or end PFN is not - * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as - * required to split any existing p2m_mid_missing middle pages. - * - * With the E820 example above, 263424 is not 1GB aligned so we allocate a - * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. - * Each entry in the allocate page is "missing" (points to p2m_missing). - * - * Next stage is to determine if we need to do a more granular boundary check - * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. - * We check if the start pfn and end pfn violate that boundary check, and if - * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer - * granularity of setting which PFNs are missing and which ones are identity. - * In our example 263424 and 512256 both fail the check so we reserve_brk two - * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" - * values) and assign them to p2m[1][2] and p2m[1][488] respectively. - * - * At this point we would at minimum reserve_brk one page, but could be up to - * three. Each call to set_phys_range_identity has at maximum a three page - * cost. If we were to query the P2M at this stage, all those entries from - * start PFN through end PFN (so 1029MB -> 2001MB) would return - * INVALID_P2M_ENTRY ("missing"). - * - * The next step is to walk from the start pfn to the end pfn setting - * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. - * If we find that the middle entry is pointing to p2m_missing we can swap it - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and - * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). - * At this point we do not need to worry about boundary aligment (so no need to - * reserve_brk a middle page, figure out which PFNs are "missing" and which - * ones are identity), as that has been done earlier. If we find that the - * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference - * that page (which covers 512 PFNs) and set the appropriate PFN with - * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we - * set from p2m[1][2][256->511] and p2m[1][488][0->256] with - * IDENTITY_FRAME_BIT set. - * - * All other regions that are void (or not filled) either point to p2m_missing - * (considered missing) or have the default value of INVALID_P2M_ENTRY (also - * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] - * contain the INVALID_P2M_ENTRY value and are considered "missing." - * - * Finally, the region beyond the end of of the E820 (4 GB in this example) - * is set to be identity (in case there are MMIO regions placed here). - * - * This is what the p2m ends up looking (for the E820 above) with this - * fabulous drawing: - * - * p2m /--------------\ - * /-----\ | &mfn_list[0],| /-----------------\ - * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | - * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | - * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | - * |-----| \ | [p2m_identity]+\\ | .... | - * | 2 |--\ \-------------------->| ... | \\ \----------------/ - * |-----| \ \---------------/ \\ - * | 3 |-\ \ \\ p2m_identity [1] - * |-----| \ \-------------------->/---------------\ /-----------------\ - * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | - * \-----/ | | | [p2m_identity]+-->| ..., ~0 | - * | | | .... | \-----------------/ - * | | +-[x], ~0, ~0.. +\ - * | | \---------------/ \ - * | | \-> /---------------\ - * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | - * | /-----------------\ /------------\ | IDENTITY[@256]| - * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | - * | | [p2m_missing] +---->| ..., ~0 | \---------------/ - * | | ... | \------------/ - * | \-----------------/ - * | - * | p2m_mid_identity - * | /-----------------\ - * \-->| [p2m_identity] +---->[1] - * | [p2m_identity] +---->[1] - * | ... | - * \-----------------/ - * - * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ #include <linux/init.h> @@ -164,9 +66,11 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/bootmem.h> +#include <linux/slab.h> #include <asm/cache.h> #include <asm/setup.h> +#include <asm/uaccess.h> #include <asm/xen/page.h> #include <asm/xen/hypercall.h> @@ -178,31 +82,26 @@ #include "multicalls.h" #include "xen-ops.h" +#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) + static void __init m2p_override_init(void); +unsigned long *xen_p2m_addr __read_mostly; +EXPORT_SYMBOL_GPL(xen_p2m_addr); +unsigned long xen_p2m_size __read_mostly; +EXPORT_SYMBOL_GPL(xen_p2m_size); unsigned long xen_max_p2m_pfn __read_mostly; +EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); + +static DEFINE_SPINLOCK(p2m_update_lock); static unsigned long *p2m_mid_missing_mfn; static unsigned long *p2m_top_mfn; static unsigned long **p2m_top_mfn_p; - -/* Placeholders for holes in the address space */ -static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); - -RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); - -/* For each I/O range remapped we may lose up to two leaf pages for the boundary - * violations and three mid pages to cover up to 3GB. With - * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the - * remapped region. - */ -RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES); +static unsigned long *p2m_missing; +static unsigned long *p2m_identity; +static pte_t *p2m_missing_pte; +static pte_t *p2m_identity_pte; static inline unsigned p2m_top_index(unsigned long pfn) { @@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn) return pfn % P2M_PER_PAGE; } -static void p2m_top_init(unsigned long ***top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = p2m_mid_missing; -} - static void p2m_top_mfn_init(unsigned long *top) { unsigned i; @@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top) top[i] = p2m_mid_missing_mfn; } -static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) +static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = leaf; + mid[i] = virt_to_mfn(leaf); } -static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) +static void p2m_init(unsigned long *p2m) { unsigned i; - for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = virt_to_mfn(leaf); + for (i = 0; i < P2M_PER_PAGE; i++) + p2m[i] = INVALID_P2M_ENTRY; } -static void p2m_init(unsigned long *p2m) +static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) { unsigned i; - for (i = 0; i < P2M_MID_PER_PAGE; i++) - p2m[i] = INVALID_P2M_ENTRY; + for (i = 0; i < P2M_PER_PAGE; i++) + p2m[i] = IDENTITY_FRAME(pfn + i); +} + +static void * __ref alloc_p2m_page(void) +{ + if (unlikely(!slab_is_available())) + return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + + return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); +} + +/* Only to be called in case of a race for a page just allocated! */ +static void free_p2m_page(void *p) +{ + BUG_ON(!slab_is_available()); + free_page((unsigned long)p); } /* @@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m) */ void __ref xen_build_mfn_list_list(void) { - unsigned long pfn; + unsigned long pfn, mfn; + pte_t *ptep; + unsigned int level, topidx, mididx; + unsigned long *mid_mfn_p; if (xen_feature(XENFEAT_auto_translated_physmap)) return; /* Pre-initialize p2m_top_mfn to be completely missing */ if (p2m_top_mfn == NULL) { - p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + p2m_mid_missing_mfn = alloc_p2m_page(); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); - p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + p2m_top_mfn_p = alloc_p2m_page(); p2m_top_mfn_p_init(p2m_top_mfn_p); - p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + p2m_top_mfn = alloc_p2m_page(); p2m_top_mfn_init(p2m_top_mfn); } else { /* Reinitialise, mfn's all change after migration */ p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); } - for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned long **mid; - unsigned long *mid_mfn_p; + for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; + pfn += P2M_PER_PAGE) { + topidx = p2m_top_index(pfn); + mididx = p2m_mid_index(pfn); - mid = p2m_top[topidx]; mid_mfn_p = p2m_top_mfn_p[topidx]; + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), + &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); + mfn = pte_mfn(*ptep); + ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); /* Don't bother allocating any mfn mid levels if * they're just missing, just update the stored mfn, * since all could have changed over a migrate. */ - if (mid == p2m_mid_missing) { + if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { BUG_ON(mididx); BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); @@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void) } if (mid_mfn_p == p2m_mid_missing_mfn) { - /* - * XXX boot-time only! We should never find - * missing parts of the mfn tree after - * runtime. - */ - mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + mid_mfn_p = alloc_p2m_page(); p2m_mid_mfn_init(mid_mfn_p, p2m_missing); p2m_top_mfn_p[topidx] = mid_mfn_p; } p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); - mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); + mid_mfn_p[mididx] = mfn; } } @@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void) /* Set up p2m_top to point to the domain-builder provided p2m pages */ void __init xen_build_dynamic_phys_to_machine(void) { - unsigned long *mfn_list; - unsigned long max_pfn; unsigned long pfn; if (xen_feature(XENFEAT_auto_translated_physmap)) return; - mfn_list = (unsigned long *)xen_start_info->mfn_list; - max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); - xen_max_p2m_pfn = max_pfn; + xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; + xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); - p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_missing); - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_identity); + for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) + xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; - p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_missing, p2m_missing); - p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_identity, p2m_identity); + xen_max_p2m_pfn = xen_p2m_size; +} - p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_init(p2m_top); +#define P2M_TYPE_IDENTITY 0 +#define P2M_TYPE_MISSING 1 +#define P2M_TYPE_PFN 2 +#define P2M_TYPE_UNKNOWN 3 - /* - * The domain builder gives us a pre-constructed p2m array in - * mfn_list for all the pages initially given to us, so we just - * need to graft that into our tree structure. - */ - for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); +static int xen_p2m_elem_type(unsigned long pfn) +{ + unsigned long mfn; - if (p2m_top[topidx] == p2m_mid_missing) { - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(mid, p2m_missing); + if (pfn >= xen_p2m_size) + return P2M_TYPE_IDENTITY; - p2m_top[topidx] = mid; - } + mfn = xen_p2m_addr[pfn]; - /* - * As long as the mfn_list has enough entries to completely - * fill a p2m page, pointing into the array is ok. But if - * not the entries beyond the last pfn will be undefined. - */ - if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { - unsigned long p2midx; + if (mfn == INVALID_P2M_ENTRY) + return P2M_TYPE_MISSING; - p2midx = max_pfn % P2M_PER_PAGE; - for ( ; p2midx < P2M_PER_PAGE; p2midx++) - mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; - } - p2m_top[topidx][mididx] = &mfn_list[pfn]; - } + if (mfn & IDENTITY_FRAME_BIT) + return P2M_TYPE_IDENTITY; - m2p_override_init(); + return P2M_TYPE_PFN; } -#ifdef CONFIG_X86_64 -unsigned long __init xen_revector_p2m_tree(void) + +static void __init xen_rebuild_p2m_list(unsigned long *p2m) { - unsigned long va_start; - unsigned long va_end; + unsigned int i, chunk; unsigned long pfn; - unsigned long pfn_free = 0; - unsigned long *mfn_list = NULL; - unsigned long size; - - va_start = xen_start_info->mfn_list; - /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), - * so make sure it is rounded up to that */ - size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - va_end = va_start + size; - - /* If we were revectored already, don't do it again. */ - if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) - return 0; + unsigned long *mfns; + pte_t *ptep; + pmd_t *pmdp; + int type; - mfn_list = alloc_bootmem_align(size, PAGE_SIZE); - if (!mfn_list) { - pr_warn("Could not allocate space for a new P2M tree!\n"); - return xen_start_info->mfn_list; - } - /* Fill it out with INVALID_P2M_ENTRY value */ - memset(mfn_list, 0xFF, size); + p2m_missing = alloc_p2m_page(); + p2m_init(p2m_missing); + p2m_identity = alloc_p2m_page(); + p2m_init(p2m_identity); - for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx; - unsigned long *mid_p; + p2m_missing_pte = alloc_p2m_page(); + paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); + p2m_identity_pte = alloc_p2m_page(); + paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); + for (i = 0; i < PTRS_PER_PTE; i++) { + set_pte(p2m_missing_pte + i, + pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); + set_pte(p2m_identity_pte + i, + pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); + } - if (!p2m_top[topidx]) + for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { + /* + * Try to map missing/identity PMDs or p2m-pages if possible. + * We have to respect the structure of the mfn_list_list + * which will be built just afterwards. + * Chunk size to test is one p2m page if we are in the middle + * of a mfn_list_list mid page and the complete mid page area + * if we are at index 0 of the mid page. Please note that a + * mid page might cover more than one PMD, e.g. on 32 bit PAE + * kernels. + */ + chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? + P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; + + type = xen_p2m_elem_type(pfn); + i = 0; + if (type != P2M_TYPE_PFN) + for (i = 1; i < chunk; i++) + if (xen_p2m_elem_type(pfn + i) != type) + break; + if (i < chunk) + /* Reset to minimal chunk size. */ + chunk = P2M_PER_PAGE; + + if (type == P2M_TYPE_PFN || i < chunk) { + /* Use initial p2m page contents. */ +#ifdef CONFIG_X86_64 + mfns = alloc_p2m_page(); + copy_page(mfns, xen_p2m_addr + pfn); +#else + mfns = xen_p2m_addr + pfn; +#endif + ptep = populate_extra_pte((unsigned long)(p2m + pfn)); + set_pte(ptep, + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); continue; + } - if (p2m_top[topidx] == p2m_mid_missing) + if (chunk == P2M_PER_PAGE) { + /* Map complete missing or identity p2m-page. */ + mfns = (type == P2M_TYPE_MISSING) ? + p2m_missing : p2m_identity; + ptep = populate_extra_pte((unsigned long)(p2m + pfn)); + set_pte(ptep, + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); continue; + } - mididx = p2m_mid_index(pfn); - mid_p = p2m_top[topidx][mididx]; - if (!mid_p) - continue; - if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) - continue; + /* Complete missing or identity PMD(s) can be mapped. */ + ptep = (type == P2M_TYPE_MISSING) ? + p2m_missing_pte : p2m_identity_pte; + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { + pmdp = populate_extra_pmd( + (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); + set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); + } + } +} - if ((unsigned long)mid_p == INVALID_P2M_ENTRY) - continue; +void __init xen_vmalloc_p2m_tree(void) +{ + static struct vm_struct vm; - /* The old va. Rebase it on mfn_list */ - if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { - unsigned long *new; + vm.flags = VM_ALLOC; + vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, + PMD_SIZE * PMDS_PER_MID_PAGE); + vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); + pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); - if (pfn_free > (size / sizeof(unsigned long))) { - WARN(1, "Only allocated for %ld pages, but we want %ld!\n", - size / sizeof(unsigned long), pfn_free); - return 0; - } - new = &mfn_list[pfn_free]; + xen_max_p2m_pfn = vm.size / sizeof(unsigned long); - copy_page(new, mid_p); - p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + xen_rebuild_p2m_list(vm.addr); - pfn_free += P2M_PER_PAGE; + xen_p2m_addr = vm.addr; + xen_p2m_size = xen_max_p2m_pfn; - } - /* This should be the leafs allocated for identity from _brk. */ - } - return (unsigned long)mfn_list; + xen_inv_extra_mem(); + m2p_override_init(); } -#else -unsigned long __init xen_revector_p2m_tree(void) -{ - return 0; -} -#endif + unsigned long get_phys_to_machine(unsigned long pfn) { - unsigned topidx, mididx, idx; + pte_t *ptep; + unsigned int level; + + if (unlikely(pfn >= xen_p2m_size)) { + if (pfn < xen_max_p2m_pfn) + return xen_chk_extra_mem(pfn); - if (unlikely(pfn >= MAX_P2M_PFN)) return IDENTITY_FRAME(pfn); + } - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); /* * The INVALID_P2M_ENTRY is filled in both p2m_*identity * and in p2m_*missing, so returning the INVALID_P2M_ENTRY * would be wrong. */ - if (p2m_top[topidx][mididx] == p2m_identity) + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) return IDENTITY_FRAME(pfn); - return p2m_top[topidx][mididx][idx]; + return xen_p2m_addr[pfn]; } EXPORT_SYMBOL_GPL(get_phys_to_machine); -static void *alloc_p2m_page(void) +/* + * Allocate new pmd(s). It is checked whether the old pmd is still in place. + * If not, nothing is changed. This is okay as the only reason for allocating + * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual + * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! + */ +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) { - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); -} + pte_t *ptechk; + pte_t *pteret = ptep; + pte_t *pte_newpg[PMDS_PER_MID_PAGE]; + pmd_t *pmdp; + unsigned int level; + unsigned long flags; + unsigned long vaddr; + int i; -static void free_p2m_page(void *p) -{ - free_page((unsigned long)p); + /* Do all allocations first to bail out in error case. */ + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { + pte_newpg[i] = alloc_p2m_page(); + if (!pte_newpg[i]) { + for (i--; i >= 0; i--) + free_p2m_page(pte_newpg[i]); + + return NULL; + } + } + + vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); + + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { + copy_page(pte_newpg[i], pte_pg); + paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); + + pmdp = lookup_pmd_address(vaddr); + BUG_ON(!pmdp); + + spin_lock_irqsave(&p2m_update_lock, flags); + + ptechk = lookup_address(vaddr, &level); + if (ptechk == pte_pg) { + set_pmd(pmdp, + __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); + if (vaddr == (addr & ~(PMD_SIZE - 1))) + pteret = pte_offset_kernel(pmdp, addr); + pte_newpg[i] = NULL; + } + + spin_unlock_irqrestore(&p2m_update_lock, flags); + + if (pte_newpg[i]) { + paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); + free_p2m_page(pte_newpg[i]); + } + + vaddr += PMD_SIZE; + } + + return pteret; } /* @@ -530,58 +501,62 @@ static void free_p2m_page(void *p) static bool alloc_p2m(unsigned long pfn) { unsigned topidx, mididx; - unsigned long ***top_p, **mid; unsigned long *top_mfn_p, *mid_mfn; - unsigned long *p2m_orig; + pte_t *ptep, *pte_pg; + unsigned int level; + unsigned long flags; + unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); + unsigned long p2m_pfn; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); - top_p = &p2m_top[topidx]; - mid = ACCESS_ONCE(*top_p); + ptep = lookup_address(addr, &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); + pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); - if (mid == p2m_mid_missing) { - /* Mid level is missing, allocate a new one */ - mid = alloc_p2m_page(); - if (!mid) + if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { + /* PMD level is missing, allocate a new one */ + ptep = alloc_p2m_pmd(addr, ptep, pte_pg); + if (!ptep) return false; - - p2m_mid_init(mid, p2m_missing); - - if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) - free_p2m_page(mid); } - top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); + if (p2m_top_mfn) { + top_mfn_p = &p2m_top_mfn[topidx]; + mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); - BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); + BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); - if (mid_mfn == p2m_mid_missing_mfn) { - /* Separately check the mid mfn level */ - unsigned long missing_mfn; - unsigned long mid_mfn_mfn; - unsigned long old_mfn; + if (mid_mfn == p2m_mid_missing_mfn) { + /* Separately check the mid mfn level */ + unsigned long missing_mfn; + unsigned long mid_mfn_mfn; + unsigned long old_mfn; - mid_mfn = alloc_p2m_page(); - if (!mid_mfn) - return false; + mid_mfn = alloc_p2m_page(); + if (!mid_mfn) + return false; - p2m_mid_mfn_init(mid_mfn, p2m_missing); + p2m_mid_mfn_init(mid_mfn, p2m_missing); - missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); - mid_mfn_mfn = virt_to_mfn(mid_mfn); - old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); - if (old_mfn != missing_mfn) { - free_p2m_page(mid_mfn); - mid_mfn = mfn_to_virt(old_mfn); - } else { - p2m_top_mfn_p[topidx] = mid_mfn; + missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); + mid_mfn_mfn = virt_to_mfn(mid_mfn); + old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); + if (old_mfn != missing_mfn) { + free_p2m_page(mid_mfn); + mid_mfn = mfn_to_virt(old_mfn); + } else { + p2m_top_mfn_p[topidx] = mid_mfn; + } } + } else { + mid_mfn = NULL; } - p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); - if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { + p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep)); + if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || + p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { /* p2m leaf page is missing */ unsigned long *p2m; @@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn) if (!p2m) return false; - p2m_init(p2m); - - if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) - free_p2m_page(p2m); + if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) + p2m_init(p2m); else - mid_mfn[mididx] = virt_to_mfn(p2m); - } - - return true; -} - -static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) -{ - unsigned topidx, mididx, idx; - unsigned long *p2m; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* Pfff.. No boundary cross-over, lets get out. */ - if (!idx && check_boundary) - return false; - - WARN(p2m_top[topidx][mididx] == p2m_identity, - "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", - topidx, mididx); - - /* - * Could be done by xen_build_dynamic_phys_to_machine.. - */ - if (p2m_top[topidx][mididx] != p2m_missing) - return false; - - /* Boundary cross-over for the edges: */ - p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); - - p2m_init(p2m); + p2m_init_identity(p2m, pfn); - p2m_top[topidx][mididx] = p2m; + spin_lock_irqsave(&p2m_update_lock, flags); - return true; -} - -static bool __init early_alloc_p2m_middle(unsigned long pfn) -{ - unsigned topidx = p2m_top_index(pfn); - unsigned long **mid; - - mid = p2m_top[topidx]; - if (mid == p2m_mid_missing) { - mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - - p2m_mid_init(mid, p2m_missing); - - p2m_top[topidx] = mid; - } - return true; -} - -/* - * Skim over the P2M tree looking at pages that are either filled with - * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and - * replace the P2M leaf with a p2m_missing or p2m_identity. - * Stick the old page in the new P2M tree location. - */ -static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn) -{ - unsigned topidx; - unsigned mididx; - unsigned ident_pfns; - unsigned inv_pfns; - unsigned long *p2m; - unsigned idx; - unsigned long pfn; - - /* We only look when this entails a P2M middle layer */ - if (p2m_index(set_pfn)) - return false; - - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { - topidx = p2m_top_index(pfn); - - if (!p2m_top[topidx]) - continue; - - if (p2m_top[topidx] == p2m_mid_missing) - continue; - - mididx = p2m_mid_index(pfn); - p2m = p2m_top[topidx][mididx]; - if (!p2m) - continue; - - if ((p2m == p2m_missing) || (p2m == p2m_identity)) - continue; - - if ((unsigned long)p2m == INVALID_P2M_ENTRY) - continue; - - ident_pfns = 0; - inv_pfns = 0; - for (idx = 0; idx < P2M_PER_PAGE; idx++) { - /* IDENTITY_PFNs are 1:1 */ - if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) - ident_pfns++; - else if (p2m[idx] == INVALID_P2M_ENTRY) - inv_pfns++; - else - break; + if (pte_pfn(*ptep) == p2m_pfn) { + set_pte(ptep, + pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); + if (mid_mfn) + mid_mfn[mididx] = virt_to_mfn(p2m); + p2m = NULL; } - if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) - goto found; - } - return false; -found: - /* Found one, replace old with p2m_identity or p2m_missing */ - p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); - - /* Reset where we want to stick the old page in. */ - topidx = p2m_top_index(set_pfn); - mididx = p2m_mid_index(set_pfn); - - /* This shouldn't happen */ - if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) - early_alloc_p2m_middle(set_pfn); - - if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) - return false; - - p2m_init(p2m); - p2m_top[topidx][mididx] = p2m; - return true; -} -bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!early_alloc_p2m_middle(pfn)) - return false; - - if (early_can_reuse_p2m_middle(pfn)) - return __set_phys_to_machine(pfn, mfn); - - if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) - return false; + spin_unlock_irqrestore(&p2m_update_lock, flags); - if (!__set_phys_to_machine(pfn, mfn)) - return false; + if (p2m) + free_p2m_page(p2m); } return true; } -static void __init early_split_p2m(unsigned long pfn) -{ - unsigned long mididx, idx; - - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* - * Allocate new middle and leaf pages if this pfn lies in the - * middle of one. - */ - if (mididx || idx) - early_alloc_p2m_middle(pfn); - if (idx) - early_alloc_p2m(pfn, false); -} - unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { unsigned long pfn; - if (unlikely(pfn_s >= MAX_P2M_PFN)) + if (unlikely(pfn_s >= xen_p2m_size)) return 0; if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) @@ -774,101 +602,51 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, if (pfn_s > pfn_e) return 0; - if (pfn_e > MAX_P2M_PFN) - pfn_e = MAX_P2M_PFN; - - early_split_p2m(pfn_s); - early_split_p2m(pfn_e); - - for (pfn = pfn_s; pfn < pfn_e;) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - - if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) - break; - pfn++; - - /* - * If the PFN was set to a middle or leaf identity - * page the remainder must also be identity, so skip - * ahead to the next middle or leaf entry. - */ - if (p2m_top[topidx] == p2m_mid_identity) - pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); - else if (p2m_top[topidx][mididx] == p2m_identity) - pfn = ALIGN(pfn, P2M_PER_PAGE); - } + if (pfn_e > xen_p2m_size) + pfn_e = xen_p2m_size; - WARN((pfn - pfn_s) != (pfn_e - pfn_s), - "Identity mapping failed. We are %ld short of 1-1 mappings!\n", - (pfn_e - pfn_s) - (pfn - pfn_s)); + for (pfn = pfn_s; pfn < pfn_e; pfn++) + xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); return pfn - pfn_s; } -/* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - unsigned topidx, mididx, idx; + pte_t *ptep; + unsigned int level; /* don't track P2M changes in autotranslate guests */ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) return true; - if (unlikely(pfn >= MAX_P2M_PFN)) { + if (unlikely(pfn >= xen_p2m_size)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; } - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* For sparse holes were the p2m leaf has real PFN along with - * PCI holes, stick in the PFN as the MFN value. - * - * set_phys_range_identity() will have allocated new middle - * and leaf pages as required so an existing p2m_mid_missing - * or p2m_missing mean that whole range will be identity so - * these can be switched to p2m_mid_identity or p2m_identity. - */ - if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { - if (p2m_top[topidx] == p2m_mid_identity) - return true; - - if (p2m_top[topidx] == p2m_mid_missing) { - WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, - p2m_mid_identity) != p2m_mid_missing); - return true; - } - - if (p2m_top[topidx][mididx] == p2m_identity) - return true; + if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) + return true; - /* Swap over from MISSING to IDENTITY if needed. */ - if (p2m_top[topidx][mididx] == p2m_missing) { - WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, - p2m_identity) != p2m_missing); - return true; - } - } + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); - if (p2m_top[topidx][mididx] == p2m_missing) + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) return mfn == INVALID_P2M_ENTRY; - p2m_top[topidx][mididx][idx] = mfn; + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) + return mfn == IDENTITY_FRAME(pfn); - return true; + return false; } bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!alloc_p2m(pfn)) return false; - if (!__set_phys_to_machine(pfn, mfn)) - return false; + return __set_phys_to_machine(pfn, mfn); } return true; @@ -877,15 +655,16 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) #define M2P_OVERRIDE_HASH_SHIFT 10 #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) -static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); +static struct list_head *m2p_overrides; static DEFINE_SPINLOCK(m2p_override_lock); static void __init m2p_override_init(void) { unsigned i; - m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, - sizeof(unsigned long)); + m2p_overrides = alloc_bootmem_align( + sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, + sizeof(unsigned long)); for (i = 0; i < M2P_OVERRIDE_HASH; i++) INIT_LIST_HEAD(&m2p_overrides[i]); @@ -896,68 +675,9 @@ static unsigned long mfn_hash(unsigned long mfn) return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); } -int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) -{ - int i, ret = 0; - bool lazy = false; - pte_t *pte; - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - - if (kmap_ops && - !in_interrupt() && - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { - arch_enter_lazy_mmu_mode(); - lazy = true; - } - - for (i = 0; i < count; i++) { - unsigned long mfn, pfn; - - /* Do not add to override if the map failed. */ - if (map_ops[i].status) - continue; - - if (map_ops[i].flags & GNTMAP_contains_pte) { - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + - (map_ops[i].host_addr & ~PAGE_MASK)); - mfn = pte_mfn(*pte); - } else { - mfn = PFN_DOWN(map_ops[i].dev_bus_addr); - } - pfn = page_to_pfn(pages[i]); - - WARN_ON(PagePrivate(pages[i])); - SetPagePrivate(pages[i]); - set_page_private(pages[i], mfn); - pages[i]->index = pfn_to_mfn(pfn); - - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; - } - - if (kmap_ops) { - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); - if (ret) - goto out; - } - } - -out: - if (lazy) - arch_leave_lazy_mmu_mode(); - - return ret; -} -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); - /* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page, - struct gnttab_map_grant_ref *kmap_op) +static int m2p_add_override(unsigned long mfn, struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long pfn; @@ -970,7 +690,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_add_override: pfn %lx not mapped", pfn)) + "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } @@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ pfn = mfn_to_pfn_no_overrides(mfn); - if (get_phys_to_machine(pfn) == mfn) + if (__pfn_to_mfn(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; } -EXPORT_SYMBOL_GPL(m2p_add_override); -int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) +int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret = 0; bool lazy = false; + pte_t *pte; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; @@ -1029,35 +749,75 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, } for (i = 0; i < count; i++) { - unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); - unsigned long pfn = page_to_pfn(pages[i]); + unsigned long mfn, pfn; - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { - ret = -EINVAL; - goto out; + /* Do not add to override if the map failed. */ + if (map_ops[i].status) + continue; + + if (map_ops[i].flags & GNTMAP_contains_pte) { + pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + + (map_ops[i].host_addr & ~PAGE_MASK)); + mfn = pte_mfn(*pte); + } else { + mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } + pfn = page_to_pfn(pages[i]); - set_page_private(pages[i], INVALID_P2M_ENTRY); - WARN_ON(!PagePrivate(pages[i])); - ClearPagePrivate(pages[i]); - set_phys_to_machine(pfn, pages[i]->index); + WARN_ON(PagePrivate(pages[i])); + SetPagePrivate(pages[i]); + set_page_private(pages[i], mfn); + pages[i]->index = pfn_to_mfn(pfn); - if (kmap_ops) - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); - if (ret) + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { + ret = -ENOMEM; goto out; + } + + if (kmap_ops) { + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); + if (ret) + goto out; + } } out: if (lazy) arch_leave_lazy_mmu_mode(); + return ret; } -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); +EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); -int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, - unsigned long mfn) +static struct page *m2p_find_override(unsigned long mfn) +{ + unsigned long flags; + struct list_head *bucket; + struct page *p, *ret; + + if (unlikely(!m2p_overrides)) + return NULL; + + ret = NULL; + bucket = &m2p_overrides[mfn_hash(mfn)]; + + spin_lock_irqsave(&m2p_override_lock, flags); + + list_for_each_entry(p, bucket, lru) { + if (page_private(p) == mfn) { + ret = p; + break; + } + } + + spin_unlock_irqrestore(&m2p_override_lock, flags); + + return ret; +} + +static int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op, + unsigned long mfn) { unsigned long flags; unsigned long pfn; @@ -1072,7 +832,7 @@ int m2p_remove_override(struct page *page, ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_remove_override: pfn %lx not mapped", pfn)) + "m2p_remove_override: pfn %lx not mapped", pfn)) return -EINVAL; } @@ -1102,9 +862,8 @@ int m2p_remove_override(struct page *page, * hypercall actually returned an error. */ if (kmap_op->handle == GNTST_general_error) { - printk(KERN_WARNING "m2p_remove_override: " - "pfn %lx mfn %lx, failed to modify kernel mappings", - pfn, mfn); + pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", + pfn, mfn); put_balloon_scratch_page(); return -1; } @@ -1112,14 +871,14 @@ int m2p_remove_override(struct page *page, xen_mc_batch(); mcs = __xen_mc_entry( - sizeof(struct gnttab_unmap_and_replace)); + sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; unmap_op->host_addr = kmap_op->host_addr; unmap_op->new_addr = scratch_page_address; unmap_op->handle = kmap_op->handle; MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_and_replace, unmap_op, 1); + GNTTABOP_unmap_and_replace, unmap_op, 1); mcs = __xen_mc_entry(0); MULTI_update_va_mapping(mcs.mc, scratch_page_address, @@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page, * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; pfn = mfn_to_pfn_no_overrides(mfn); - if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); return 0; } -EXPORT_SYMBOL_GPL(m2p_remove_override); -struct page *m2p_find_override(unsigned long mfn) +int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { - unsigned long flags; - struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *ret; + int i, ret = 0; + bool lazy = false; - ret = NULL; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; - spin_lock_irqsave(&m2p_override_lock, flags); + if (kmap_ops && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } - list_for_each_entry(p, bucket, lru) { - if (page_private(p) == mfn) { - ret = p; - break; + for (i = 0; i < count; i++) { + unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); + unsigned long pfn = page_to_pfn(pages[i]); + + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + ret = -EINVAL; + goto out; } - } - spin_unlock_irqrestore(&m2p_override_lock, flags); + set_page_private(pages[i], INVALID_P2M_ENTRY); + WARN_ON(!PagePrivate(pages[i])); + ClearPagePrivate(pages[i]); + set_phys_to_machine(pfn, pages[i]->index); + + if (kmap_ops) + ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); + if (ret) + goto out; + } +out: + if (lazy) + arch_leave_lazy_mmu_mode(); return ret; } +EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) { @@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn); #include "debugfs.h" static int p2m_dump_show(struct seq_file *m, void *v) { - static const char * const level_name[] = { "top", "middle", - "entry", "abnormal", "error"}; -#define TYPE_IDENTITY 0 -#define TYPE_MISSING 1 -#define TYPE_PFN 2 -#define TYPE_UNKNOWN 3 static const char * const type_name[] = { - [TYPE_IDENTITY] = "identity", - [TYPE_MISSING] = "missing", - [TYPE_PFN] = "pfn", - [TYPE_UNKNOWN] = "abnormal"}; - unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; - unsigned int uninitialized_var(prev_level); - unsigned int uninitialized_var(prev_type); - - if (!p2m_top) - return 0; - - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned idx = p2m_index(pfn); - unsigned lvl, type; - - lvl = 4; - type = TYPE_UNKNOWN; - if (p2m_top[topidx] == p2m_mid_missing) { - lvl = 0; type = TYPE_MISSING; - } else if (p2m_top[topidx] == NULL) { - lvl = 0; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx] == NULL) { - lvl = 1; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx] == p2m_identity) { - lvl = 1; type = TYPE_IDENTITY; - } else if (p2m_top[topidx][mididx] == p2m_missing) { - lvl = 1; type = TYPE_MISSING; - } else if (p2m_top[topidx][mididx][idx] == 0) { - lvl = 2; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { - lvl = 2; type = TYPE_IDENTITY; - } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { - lvl = 2; type = TYPE_MISSING; - } else if (p2m_top[topidx][mididx][idx] == pfn) { - lvl = 2; type = TYPE_PFN; - } else if (p2m_top[topidx][mididx][idx] != pfn) { - lvl = 2; type = TYPE_PFN; - } - if (pfn == 0) { - prev_level = lvl; - prev_type = type; - } - if (pfn == MAX_DOMAIN_PAGES-1) { - lvl = 3; - type = TYPE_UNKNOWN; - } - if (prev_type != type) { - seq_printf(m, " [0x%lx->0x%lx] %s\n", - prev_pfn_type, pfn, type_name[prev_type]); - prev_pfn_type = pfn; + [P2M_TYPE_IDENTITY] = "identity", + [P2M_TYPE_MISSING] = "missing", + [P2M_TYPE_PFN] = "pfn", + [P2M_TYPE_UNKNOWN] = "abnormal"}; + unsigned long pfn, first_pfn; + int type, prev_type; + + prev_type = xen_p2m_elem_type(0); + first_pfn = 0; + + for (pfn = 0; pfn < xen_p2m_size; pfn++) { + type = xen_p2m_elem_type(pfn); + if (type != prev_type) { + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, + type_name[prev_type]); prev_type = type; - } - if (prev_level != lvl) { - seq_printf(m, " [0x%lx->0x%lx] level %s\n", - prev_pfn_level, pfn, level_name[prev_level]); - prev_pfn_level = pfn; - prev_level = lvl; + first_pfn = pfn; } } + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, + type_name[prev_type]); return 0; -#undef TYPE_IDENTITY -#undef TYPE_MISSING -#undef TYPE_PFN -#undef TYPE_UNKNOWN } static int p2m_dump_open(struct inode *inode, struct file *filp) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 29834b3fd87f..dfd77dec8e2b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -30,6 +30,7 @@ #include "xen-ops.h" #include "vdso.h" #include "p2m.h" +#include "mmu.h" /* These are code, but not functions. Defined in entry.S */ extern const char xen_hypervisor_callback[]; @@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; /* Number of pages released from the initial allocation. */ unsigned long xen_released_pages; -/* Buffer used to remap identity mapped pages */ -unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; +/* + * Buffer used to remap identity mapped pages. We only need the virtual space. + * The physical page behind this address is remapped as needed to different + * buffer pages. + */ +#define REMAP_SIZE (P2M_PER_PAGE - 3) +static struct { + unsigned long next_area_mfn; + unsigned long target_pfn; + unsigned long size; + unsigned long mfns[REMAP_SIZE]; +} xen_remap_buf __initdata __aligned(PAGE_SIZE); +static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; /* * The maximum amount of extra memory compared to the base size. The @@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; static void __init xen_add_extra_mem(u64 start, u64 size) { - unsigned long pfn; int i; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { @@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size) printk(KERN_WARNING "Warning: not enough extra memory regions\n"); memblock_reserve(start, size); +} - xen_max_p2m_pfn = PFN_DOWN(start + size); - for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { - unsigned long mfn = pfn_to_mfn(pfn); - - if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) - continue; - WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", - pfn, mfn); +static void __init xen_del_extra_mem(u64 start, u64 size) +{ + int i; + u64 start_r, size_r; - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + start_r = xen_extra_mem[i].start; + size_r = xen_extra_mem[i].size; + + /* Start of region. */ + if (start_r == start) { + BUG_ON(size > size_r); + xen_extra_mem[i].start += size; + xen_extra_mem[i].size -= size; + break; + } + /* End of region. */ + if (start_r + size_r == start + size) { + BUG_ON(size > size_r); + xen_extra_mem[i].size -= size; + break; + } + /* Mid of region. */ + if (start > start_r && start < start_r + size_r) { + BUG_ON(start + size > start_r + size_r); + xen_extra_mem[i].size = start - start_r; + /* Calling memblock_reserve() again is okay. */ + xen_add_extra_mem(start + size, start_r + size_r - + (start + size)); + break; + } } + memblock_free(start, size); } -static unsigned long __init xen_do_chunk(unsigned long start, - unsigned long end, bool release) +/* + * Called during boot before the p2m list can take entries beyond the + * hypervisor supplied p2m list. Entries in extra mem are to be regarded as + * invalid. + */ +unsigned long __ref xen_chk_extra_mem(unsigned long pfn) { - struct xen_memory_reservation reservation = { - .address_bits = 0, - .extent_order = 0, - .domid = DOMID_SELF - }; - unsigned long len = 0; - unsigned long pfn; - int ret; + int i; + unsigned long addr = PFN_PHYS(pfn); - for (pfn = start; pfn < end; pfn++) { - unsigned long frame; - unsigned long mfn = pfn_to_mfn(pfn); + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + if (addr >= xen_extra_mem[i].start && + addr < xen_extra_mem[i].start + xen_extra_mem[i].size) + return INVALID_P2M_ENTRY; + } - if (release) { - /* Make sure pfn exists to start with */ - if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) - continue; - frame = mfn; - } else { - if (mfn != INVALID_P2M_ENTRY) - continue; - frame = pfn; - } - set_xen_guest_handle(reservation.extent_start, &frame); - reservation.nr_extents = 1; + return IDENTITY_FRAME(pfn); +} - ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, - &reservation); - WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", - release ? "release" : "populate", pfn, ret); +/* + * Mark all pfns of extra mem as invalid in p2m list. + */ +void __init xen_inv_extra_mem(void) +{ + unsigned long pfn, pfn_s, pfn_e; + int i; - if (ret == 1) { - if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { - if (release) - break; - set_xen_guest_handle(reservation.extent_start, &frame); - reservation.nr_extents = 1; - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - break; - } - len++; - } else - break; + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + pfn_s = PFN_DOWN(xen_extra_mem[i].start); + pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); + for (pfn = pfn_s; pfn < pfn_e; pfn++) + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } - if (len) - printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", - release ? "Freeing" : "Populating", - start, end, len, - release ? "freed" : "added"); - - return len; } /* @@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range( return done; } +static int __init xen_free_mfn(unsigned long mfn) +{ + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + set_xen_guest_handle(reservation.extent_start, &mfn); + reservation.nr_extents = 1; + + return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); +} + /* - * This releases a chunk of memory and then does the identity map. It's used as + * This releases a chunk of memory and then does the identity map. It's used * as a fallback if the remapping fails. */ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, unsigned long *released) { + unsigned long len = 0; + unsigned long pfn, end; + int ret; + WARN_ON(start_pfn > end_pfn); + end = min(end_pfn, nr_pages); + for (pfn = start_pfn; pfn < end; pfn++) { + unsigned long mfn = pfn_to_mfn(pfn); + + /* Make sure pfn exists to start with */ + if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) + continue; + + ret = xen_free_mfn(mfn); + WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); + + if (ret == 1) { + if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) + break; + len++; + } else + break; + } + /* Need to release pages first */ - *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); + *released += len; *identity += set_phys_range_identity(start_pfn, end_pfn); } /* - * Helper function to update both the p2m and m2p tables. + * Helper function to update the p2m and m2p tables and kernel mapping. */ -static unsigned long __init xen_update_mem_tables(unsigned long pfn, - unsigned long mfn) +static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) { struct mmu_update update = { .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, @@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn, }; /* Update p2m */ - if (!early_set_phys_to_machine(pfn, mfn)) { + if (!set_phys_to_machine(pfn, mfn)) { WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", pfn, mfn); - return false; + BUG(); } /* Update m2p */ if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", mfn, pfn); - return false; + BUG(); } - return true; + /* Update kernel mapping, but not for highmem. */ + if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) + return; + + if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(mfn, PAGE_KERNEL), 0)) { + WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", + mfn, pfn); + BUG(); + } } /* * This function updates the p2m and m2p tables with an identity map from - * start_pfn to start_pfn+size and remaps the underlying RAM of the original - * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks - * to not exhaust the reserved brk space. Doing it in properly aligned blocks - * ensures we only allocate the minimum required leaf pages in the p2m table. It - * copies the existing mfns from the p2m table under the 1:1 map, overwrites - * them with the identity map and then updates the p2m and m2p tables with the - * remapped memory. + * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the + * original allocation at remap_pfn. The information needed for remapping is + * saved in the memory itself to avoid the need for allocating buffers. The + * complete remap information is contained in a list of MFNs each containing + * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. + * This enables us to preserve the original mfn sequence while doing the + * remapping at a time when the memory management is capable of allocating + * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and + * its callers. */ -static unsigned long __init xen_do_set_identity_and_remap_chunk( +static void __init xen_do_set_identity_and_remap_chunk( unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) { + unsigned long buf = (unsigned long)&xen_remap_buf; + unsigned long mfn_save, mfn; unsigned long ident_pfn_iter, remap_pfn_iter; - unsigned long ident_start_pfn_align, remap_start_pfn_align; - unsigned long ident_end_pfn_align, remap_end_pfn_align; - unsigned long ident_boundary_pfn, remap_boundary_pfn; - unsigned long ident_cnt = 0; - unsigned long remap_cnt = 0; + unsigned long ident_end_pfn = start_pfn + size; unsigned long left = size; - unsigned long mod; - int i; + unsigned long ident_cnt = 0; + unsigned int i, chunk; WARN_ON(size == 0); BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); - /* - * Determine the proper alignment to remap memory in P2M_PER_PAGE sized - * blocks. We need to keep track of both the existing pfn mapping and - * the new pfn remapping. - */ - mod = start_pfn % P2M_PER_PAGE; - ident_start_pfn_align = - mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn; - mod = remap_pfn % P2M_PER_PAGE; - remap_start_pfn_align = - mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn; - mod = (start_pfn + size) % P2M_PER_PAGE; - ident_end_pfn_align = start_pfn + size - mod; - mod = (remap_pfn + size) % P2M_PER_PAGE; - remap_end_pfn_align = remap_pfn + size - mod; - - /* Iterate over each p2m leaf node in each range */ - for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; - ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; - ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { - /* Check we aren't past the end */ - BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size); - BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size); - - /* Save p2m mappings */ - for (i = 0; i < P2M_PER_PAGE; i++) - xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); - - /* Set identity map which will free a p2m leaf */ - ident_cnt += set_phys_range_identity(ident_pfn_iter, - ident_pfn_iter + P2M_PER_PAGE); + mfn_save = virt_to_mfn(buf); -#ifdef DEBUG - /* Helps verify a p2m leaf has been freed */ - for (i = 0; i < P2M_PER_PAGE; i++) { - unsigned int pfn = ident_pfn_iter + i; - BUG_ON(pfn_to_mfn(pfn) != pfn); - } -#endif - /* Now remap memory */ - for (i = 0; i < P2M_PER_PAGE; i++) { - unsigned long mfn = xen_remap_buf[i]; - - /* This will use the p2m leaf freed above */ - if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) { - WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", - remap_pfn_iter + i, mfn); - return 0; - } - - remap_cnt++; - } - - left -= P2M_PER_PAGE; - } - - /* Max boundary space possible */ - BUG_ON(left > (P2M_PER_PAGE - 1) * 2); + for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; + ident_pfn_iter < ident_end_pfn; + ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { + chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; - /* Now handle the boundary conditions */ - ident_boundary_pfn = start_pfn; - remap_boundary_pfn = remap_pfn; - for (i = 0; i < left; i++) { - unsigned long mfn; + /* Map first pfn to xen_remap_buf */ + mfn = pfn_to_mfn(ident_pfn_iter); + set_pte_mfn(buf, mfn, PAGE_KERNEL); - /* These two checks move from the start to end boundaries */ - if (ident_boundary_pfn == ident_start_pfn_align) - ident_boundary_pfn = ident_pfn_iter; - if (remap_boundary_pfn == remap_start_pfn_align) - remap_boundary_pfn = remap_pfn_iter; + /* Save mapping information in page */ + xen_remap_buf.next_area_mfn = xen_remap_mfn; + xen_remap_buf.target_pfn = remap_pfn_iter; + xen_remap_buf.size = chunk; + for (i = 0; i < chunk; i++) + xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); - /* Check we aren't past the end */ - BUG_ON(ident_boundary_pfn >= start_pfn + size); - BUG_ON(remap_boundary_pfn >= remap_pfn + size); - - mfn = pfn_to_mfn(ident_boundary_pfn); - - if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { - WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", - remap_pfn_iter + i, mfn); - return 0; - } - remap_cnt++; + /* Put remap buf into list. */ + xen_remap_mfn = mfn; - ident_boundary_pfn++; - remap_boundary_pfn++; - } + /* Set identity map */ + ident_cnt += set_phys_range_identity(ident_pfn_iter, + ident_pfn_iter + chunk); - /* Finish up the identity map */ - if (ident_start_pfn_align >= ident_end_pfn_align) { - /* - * In this case we have an identity range which does not span an - * aligned block so everything needs to be identity mapped here. - * If we didn't check this we might remap too many pages since - * the align boundaries are not meaningful in this case. - */ - ident_cnt += set_phys_range_identity(start_pfn, - start_pfn + size); - } else { - /* Remapped above so check each end of the chunk */ - if (start_pfn < ident_start_pfn_align) - ident_cnt += set_phys_range_identity(start_pfn, - ident_start_pfn_align); - if (start_pfn + size > ident_pfn_iter) - ident_cnt += set_phys_range_identity(ident_pfn_iter, - start_pfn + size); + left -= chunk; } - BUG_ON(ident_cnt != size); - BUG_ON(remap_cnt != size); - - return size; + /* Restore old xen_remap_buf mapping */ + set_pte_mfn(buf, mfn_save, PAGE_KERNEL); } /* @@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk( const struct e820entry *list, size_t map_size, unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, - unsigned long *identity, unsigned long *remapped, - unsigned long *released) + unsigned long *identity, unsigned long *released) { unsigned long pfn; unsigned long i = 0; @@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk( if (size > remap_range_size) size = remap_range_size; - if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { - WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n", - cur_pfn, size, remap_pfn); - xen_set_identity_and_release_chunk(cur_pfn, - cur_pfn + left, nr_pages, identity, released); - break; - } + xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn); /* Update variables to reflect new mappings. */ i += size; remap_pfn += size; *identity += size; - *remapped += size; } /* @@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk( return remap_pfn; } -static unsigned long __init xen_set_identity_and_remap( +static void __init xen_set_identity_and_remap( const struct e820entry *list, size_t map_size, unsigned long nr_pages, unsigned long *released) { phys_addr_t start = 0; unsigned long identity = 0; - unsigned long remapped = 0; unsigned long last_pfn = nr_pages; const struct e820entry *entry; unsigned long num_released = 0; @@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap( last_pfn = xen_set_identity_and_remap_chunk( list, map_size, start_pfn, end_pfn, nr_pages, last_pfn, - &identity, &remapped, - &num_released); + &identity, &num_released); start = end; } } @@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap( *released = num_released; pr_info("Set %ld page(s) to 1-1 mapping\n", identity); - pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped, - last_pfn); pr_info("Released %ld page(s)\n", num_released); +} + +/* + * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). + * The remap information (which mfn remap to which pfn) is contained in the + * to be remapped memory itself in a linked list anchored at xen_remap_mfn. + * This scheme allows to remap the different chunks in arbitrary order while + * the resulting mapping will be independant from the order. + */ +void __init xen_remap_memory(void) +{ + unsigned long buf = (unsigned long)&xen_remap_buf; + unsigned long mfn_save, mfn, pfn; + unsigned long remapped = 0; + unsigned int i; + unsigned long pfn_s = ~0UL; + unsigned long len = 0; + + mfn_save = virt_to_mfn(buf); + + while (xen_remap_mfn != INVALID_P2M_ENTRY) { + /* Map the remap information */ + set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL); - return last_pfn; + BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); + + pfn = xen_remap_buf.target_pfn; + for (i = 0; i < xen_remap_buf.size; i++) { + mfn = xen_remap_buf.mfns[i]; + xen_update_mem_tables(pfn, mfn); + remapped++; + pfn++; + } + if (pfn_s == ~0UL || pfn == pfn_s) { + pfn_s = xen_remap_buf.target_pfn; + len += xen_remap_buf.size; + } else if (pfn_s + len == xen_remap_buf.target_pfn) { + len += xen_remap_buf.size; + } else { + xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); + pfn_s = xen_remap_buf.target_pfn; + len = xen_remap_buf.size; + } + + mfn = xen_remap_mfn; + xen_remap_mfn = xen_remap_buf.next_area_mfn; + } + + if (pfn_s != ~0UL && len) + xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); + + set_pte_mfn(buf, mfn_save, PAGE_KERNEL); + + pr_info("Remapped %ld page(s)\n", remapped); } + static unsigned long __init xen_get_max_pages(void) { unsigned long max_pages = MAX_DOMAIN_PAGES; @@ -569,7 +585,6 @@ char * __init xen_memory_setup(void) int rc; struct xen_memory_map memmap; unsigned long max_pages; - unsigned long last_pfn = 0; unsigned long extra_pages = 0; int i; int op; @@ -616,17 +631,14 @@ char * __init xen_memory_setup(void) extra_pages += max_pages - max_pfn; /* - * Set identity map on non-RAM pages and remap the underlying RAM. + * Set identity map on non-RAM pages and prepare remapping the + * underlying RAM. */ - last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, - &xen_released_pages); + xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, + &xen_released_pages); extra_pages += xen_released_pages; - if (last_pfn > max_pfn) { - max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); - mem_end = PFN_PHYS(max_pfn); - } /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO * factor the base size. On non-highmem systems, the base @@ -653,6 +665,7 @@ char * __init xen_memory_setup(void) size = min(size, (u64)extra_pages * PAGE_SIZE); extra_pages -= size / PAGE_SIZE; xen_add_extra_mem(addr, size); + xen_max_p2m_pfn = PFN_DOWN(addr + size); } else type = E820_UNUSABLE; } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 4ab9298c5e17..5686bd9d58cc 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -29,11 +29,13 @@ void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); -extern unsigned long xen_max_p2m_pfn; void xen_mm_pin_all(void); void xen_mm_unpin_all(void); +unsigned long __ref xen_chk_extra_mem(unsigned long pfn); +void __init xen_inv_extra_mem(void); +void __init xen_remap_memory(void); char * __init xen_memory_setup(void); char * xen_auto_xlated_memory_setup(void); void __init xen_arch_setup(void); @@ -46,7 +48,7 @@ void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); -unsigned long __init xen_revector_p2m_tree(void); +void __init xen_vmalloc_p2m_tree(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 81f57e8c8f1b..e31d4949124a 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -98,12 +98,6 @@ config XTENSA_VARIANT_DC233C help This variant refers to Tensilica's Diamond 233L Standard core Rev.C (LE). -config XTENSA_VARIANT_S6000 - bool "s6000 - Stretch software configurable processor" - select VARIANT_IRQ_SWITCH - select ARCH_REQUIRE_GPIOLIB - select XTENSA_CALIBRATE_CCOUNT - config XTENSA_VARIANT_CUSTOM bool "Custom Xtensa processor configuration" select MAY_HAVE_SMP @@ -126,7 +120,6 @@ config XTENSA_VARIANT_NAME default "dc232b" if XTENSA_VARIANT_DC232B default "dc233c" if XTENSA_VARIANT_DC233C default "fsf" if XTENSA_VARIANT_FSF - default "s6000" if XTENSA_VARIANT_S6000 default XTENSA_VARIANT_CUSTOM_NAME if XTENSA_VARIANT_CUSTOM config XTENSA_VARIANT_MMU @@ -191,7 +184,6 @@ config HOTPLUG_CPU config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX bool "Initialize Xtensa MMU inside the Linux kernel code" - depends on MMU default y help Earlier version initialized the MMU in the exception vector @@ -311,15 +303,10 @@ config XTENSA_PLATFORM_XT2000 XT2000 is the name of Tensilica's feature-rich emulation platform. This hardware is capable of running a full Linux distribution. -config XTENSA_PLATFORM_S6105 - bool "S6105" - select HAVE_IDE - select SERIAL_CONSOLE - select NO_IOPORT_MAP - config XTENSA_PLATFORM_XTFPGA bool "XTFPGA" select ETHOC if ETHERNET + select PLATFORM_WANT_DEFAULT_MEM select SERIAL_CONSOLE select XTENSA_CALIBRATE_CCOUNT help @@ -406,6 +393,41 @@ source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" +config PLATFORM_WANT_DEFAULT_MEM + def_bool n + +config DEFAULT_MEM_START + hex "Physical address of the default memory area start" + depends on PLATFORM_WANT_DEFAULT_MEM + default 0x00000000 if MMU + default 0x40000000 if !MMU + help + This is a fallback start address of the default memory area, it is + used when no physical memory size is passed through DTB or through + boot parameter from bootloader. + + In noMMU configuration the following parameters are derived from it: + - kernel load address; + - kernel entry point address; + - relocatable vectors base address; + - uBoot load address; + - TASK_SIZE. + + If unsure, leave the default value here. + +config DEFAULT_MEM_SIZE + hex "Maximal size of the default memory area" + depends on PLATFORM_WANT_DEFAULT_MEM + default 0x04000000 + help + This is a fallback size of the default memory area, it is used when + no physical memory size is passed through DTB or through boot + parameter from bootloader. + + It's also used for TASK_SIZE calculation in noMMU configuration. + + If unsure, leave the default value here. + endmenu menu "Executable file formats" @@ -414,6 +436,12 @@ source "fs/Kconfig.binfmt" endmenu +menu "Power management options" + +source "kernel/power/Kconfig" + +endmenu + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug index af7da74d535f..8430af27de0a 100644 --- a/arch/xtensa/Kconfig.debug +++ b/arch/xtensa/Kconfig.debug @@ -4,7 +4,7 @@ source "lib/Kconfig.debug" config DEBUG_TLB_SANITY bool "Debug TLB sanity" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && MMU help Enable this to turn on TLB sanity check on each entry to userspace. This check can spot missing TLB invalidation/wrong PTE permissions/ @@ -14,7 +14,7 @@ config DEBUG_TLB_SANITY config LD_NO_RELAX bool "Disable linker relaxation" - default n + default y help Enable this function to disable link-time optimizations. The default linker behavior is to combine identical literal diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 472533064b46..f9e6a068aafd 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -35,7 +35,6 @@ endif platform-$(CONFIG_XTENSA_PLATFORM_XT2000) := xt2000 platform-$(CONFIG_XTENSA_PLATFORM_ISS) := iss -platform-$(CONFIG_XTENSA_PLATFORM_S6105) := s6105 platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA) := xtfpga PLATFORM = $(platform-y) diff --git a/arch/xtensa/boot/boot-elf/boot.lds.S b/arch/xtensa/boot/boot-elf/boot.lds.S index 932b58ef33d4..958b33af96b7 100644 --- a/arch/xtensa/boot/boot-elf/boot.lds.S +++ b/arch/xtensa/boot/boot-elf/boot.lds.S @@ -41,6 +41,7 @@ SECTIONS __bss_end = .; } +#ifdef CONFIG_MMU /* * This is a remapped copy of the Reset Vector Code. * It keeps gdb in sync with the PC after switching @@ -51,4 +52,5 @@ SECTIONS { *(.ResetVector.remapped_text) } +#endif } diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S index 1388a499753b..9341a5750694 100644 --- a/arch/xtensa/boot/boot-elf/bootstrap.S +++ b/arch/xtensa/boot/boot-elf/bootstrap.S @@ -20,6 +20,7 @@ #include <asm/page.h> #include <asm/cacheasm.h> #include <asm/initialize_mmu.h> +#include <asm/vectors.h> #include <linux/linkage.h> .section .ResetVector.text, "ax" @@ -34,12 +35,7 @@ _ResetVector: .align 4 RomInitAddr: -#if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \ - XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY - .word 0x00003000 -#else - .word 0xd0003000 -#endif + .word LOAD_MEMORY_ADDRESS RomBootParam: .word _bootparam _bootparam: @@ -79,6 +75,7 @@ reset: movi a4, 0 jx a0 +#ifdef CONFIG_MMU .align 4 .section .ResetVector.remapped_text, "x" @@ -102,3 +99,4 @@ _RemappedSetupMMU: #endif .end no-absolute-literals +#endif diff --git a/arch/xtensa/boot/boot-uboot/Makefile b/arch/xtensa/boot/boot-uboot/Makefile index 545759819ef9..403fcf23405c 100644 --- a/arch/xtensa/boot/boot-uboot/Makefile +++ b/arch/xtensa/boot/boot-uboot/Makefile @@ -4,11 +4,15 @@ # for more details. # +ifdef CONFIG_MMU ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX UIMAGE_LOADADDR = 0x00003000 else UIMAGE_LOADADDR = 0xd0003000 endif +else +UIMAGE_LOADADDR = $(shell printf "0x%x" $$(( ${CONFIG_DEFAULT_MEM_START} + 0x3000 )) ) +endif UIMAGE_COMPRESSION = gzip $(obj)/../uImage: vmlinux.bin.gz FORCE diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index b966baf82cae..e4d193e7a300 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -143,7 +143,6 @@ CONFIG_MMU=y # CONFIG_XTENSA_VARIANT_FSF=y # CONFIG_XTENSA_VARIANT_DC232B is not set -# CONFIG_XTENSA_VARIANT_S6000 is not set # CONFIG_XTENSA_UNALIGNED_USER is not set # CONFIG_PREEMPT is not set CONFIG_XTENSA_CALIBRATE_CCOUNT=y @@ -161,7 +160,6 @@ CONFIG_XTENSA_ISS_NETWORK=y # CONFIG_XTENSA_PLATFORM_ISS=y # CONFIG_XTENSA_PLATFORM_XT2000 is not set -# CONFIG_XTENSA_PLATFORM_S6105 is not set # CONFIG_GENERIC_CALIBRATE_DELAY is not set CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,38400 eth0=tuntap,,tap0 ip=192.168.168.5:192.168.168.1 root=nfs nfsroot=192.168.168.1:/opt/montavista/pro/devkit/xtensa/linux_be/target" @@ -759,3 +757,4 @@ CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_HAS_IOMEM=y CONFIG_HAS_DMA=y CONFIG_NLATTR=y +CONFIG_LD_NO_RELAX=y diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig deleted file mode 100644 index 9471265b8ca6..000000000000 --- a/arch/xtensa/configs/s6105_defconfig +++ /dev/null @@ -1,615 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc7-s6 -# Tue Mar 10 11:09:26 2009 -# -# CONFIG_FRAME_POINTER is not set -CONFIG_ZONE_DMA=y -CONFIG_XTENSA=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_GENERIC_HWEIGHT=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_NO_IOPORT_MAP=y -CONFIG_HZ=100 -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# General setup -# -CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_AUDIT is not set - -# -# RCU Subsystem -# -# CONFIG_CLASSIC_RCU is not set -# CONFIG_TREE_RCU is not set -CONFIG_PREEMPT_RCU=y -# CONFIG_RCU_TRACE is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=16 -# CONFIG_GROUP_SCHED is not set -# CONFIG_CGROUPS is not set -# CONFIG_SYSFS_DEPRECATED_V2 is not set -# CONFIG_RELAY is not set -# CONFIG_NAMESPACES is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL=y -CONFIG_EXPERT=y -CONFIG_SYSCTL_SYSCALL=y -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_ALL is not set -# CONFIG_KALLSYMS_EXTRA_PASS is not set -# CONFIG_HOTPLUG is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -# CONFIG_COMPAT_BRK is not set -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_ANON_INODES=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_AIO=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLAB=y -# CONFIG_SLUB is not set -# CONFIG_SLOB is not set -# CONFIG_PROFILING is not set -# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set -CONFIG_SLABINFO=y -CONFIG_RT_MUTEXES=y -CONFIG_BASE_SMALL=0 -# CONFIG_MODULES is not set -CONFIG_BLOCK=y -# CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_BLK_DEV_INTEGRITY is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set -# CONFIG_IOSCHED_DEADLINE is not set -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -# CONFIG_FREEZER is not set -# CONFIG_MMU is not set -CONFIG_VARIANT_IRQ_SWITCH=y - -# -# Processor type and features -# -# CONFIG_XTENSA_VARIANT_FSF is not set -# CONFIG_XTENSA_VARIANT_DC232B is not set -CONFIG_XTENSA_VARIANT_S6000=y -# CONFIG_XTENSA_UNALIGNED_USER is not set -CONFIG_PREEMPT=y -# CONFIG_HIGHMEM is not set -CONFIG_XTENSA_CALIBRATE_CCOUNT=y -CONFIG_SERIAL_CONSOLE=y -# CONFIG_XTENSA_ISS_NETWORK is not set - -# -# Bus options -# -# CONFIG_PCI is not set -# CONFIG_ARCH_SUPPORTS_MSI is not set - -# -# Platform options -# -# CONFIG_XTENSA_PLATFORM_ISS is not set -# CONFIG_XTENSA_PLATFORM_XT2000 is not set -CONFIG_XTENSA_PLATFORM_S6105=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS1,38400 debug bootmem_debug loglevel=7" -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -# CONFIG_PHYS_ADDR_T_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_VIRT_TO_BUS=y - -# -# Executable file formats -# -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_FLAT=y -# CONFIG_BINFMT_ZFLAT is not set -# CONFIG_BINFMT_SHARED_FLAT is not set -# CONFIG_HAVE_AOUT is not set -# CONFIG_BINFMT_MISC is not set -CONFIG_NET=y - -# -# Networking options -# -CONFIG_COMPAT_NET_DEV_OPS=y -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -# CONFIG_IPV6 is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set -# CONFIG_IP_DCCP is not set -# CONFIG_IP_SCTP is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_NET_DSA is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_SCHED is not set -# CONFIG_DCB is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_CAN is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set -# CONFIG_WIRELESS is not set -# CONFIG_WIMAX is not set -# CONFIG_RFKILL is not set -# CONFIG_NET_9P is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set -# CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set -# CONFIG_MTD is not set -# CONFIG_PARPORT is not set -CONFIG_BLK_DEV=y -# CONFIG_BLK_DEV_COW_COMMON is not set -# CONFIG_BLK_DEV_LOOP is not set -# CONFIG_BLK_DEV_NBD is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -# CONFIG_BLK_DEV_XIP is not set -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set -# CONFIG_BLK_DEV_HD is not set -# CONFIG_MISC_DEVICES is not set -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -# CONFIG_SCSI is not set -# CONFIG_SCSI_DMA is not set -# CONFIG_SCSI_NETLINK is not set -# CONFIG_ATA is not set -# CONFIG_MD is not set -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_MACVLAN is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_VETH is not set -CONFIG_PHYLIB=y - -# -# MII PHY device drivers -# -# CONFIG_MARVELL_PHY is not set -# CONFIG_DAVICOM_PHY is not set -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -CONFIG_SMSC_PHY=y -# CONFIG_BROADCOM_PHY is not set -# CONFIG_ICPLUS_PHY is not set -# CONFIG_REALTEK_PHY is not set -# CONFIG_NATIONAL_PHY is not set -# CONFIG_STE10XP is not set -# CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_FIXED_PHY is not set -# CONFIG_MDIO_BITBANG is not set -# CONFIG_NET_ETHERNET is not set -CONFIG_NETDEV_1000=y -CONFIG_S6GMAC=y -# CONFIG_NETDEV_10000 is not set - -# -# Wireless LAN -# -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set -# CONFIG_IWLWIFI_LEDS is not set - -# -# Enable WiMAX (Networking options) to see the WiMAX drivers -# -# CONFIG_WAN is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_ISDN is not set -# CONFIG_PHONE is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_DEVKMEM is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -# CONFIG_SERIAL_8250_EXTENDED is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_UNIX98_PTYS=y -# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set -# CONFIG_LEGACY_PTYS is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_R3964 is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_I2C is not set -# CONFIG_SPI is not set -CONFIG_ARCH_REQUIRE_GPIOLIB=y -CONFIG_GPIOLIB=y -# CONFIG_DEBUG_GPIO is not set -# CONFIG_GPIO_SYSFS is not set - -# -# Memory mapped GPIO expanders: -# - -# -# I2C GPIO expanders: -# - -# -# PCI GPIO expanders: -# - -# -# SPI GPIO expanders: -# -# CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set -# CONFIG_HWMON is not set -# CONFIG_THERMAL is not set -# CONFIG_THERMAL_HWMON is not set -# CONFIG_WATCHDOG is not set -CONFIG_SSB_POSSIBLE=y - -# -# Sonics Silicon Backplane -# -# CONFIG_SSB is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_HTC_PASIC3 is not set -# CONFIG_MFD_TMIO is not set -# CONFIG_REGULATOR is not set - -# -# Multimedia devices -# - -# -# Multimedia core support -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_VIDEO_MEDIA is not set - -# -# Multimedia drivers -# -# CONFIG_DAB is not set - -# -# Graphics support -# -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -# CONFIG_FB is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set - -# -# Display device support -# -# CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_SOUND is not set -# CONFIG_USB_SUPPORT is not set -# CONFIG_MMC is not set -# CONFIG_MEMSTICK is not set -# CONFIG_NEW_LEDS is not set -# CONFIG_ACCESSIBILITY is not set -CONFIG_RTC_LIB=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_HCTOSYS=y -CONFIG_RTC_HCTOSYS_DEVICE="rtc0" -# CONFIG_RTC_DEBUG is not set - -# -# RTC interfaces -# -# CONFIG_RTC_INTF_SYSFS is not set -# CONFIG_RTC_INTF_PROC is not set -# CONFIG_RTC_INTF_DEV is not set -# CONFIG_RTC_DRV_TEST is not set - -# -# I2C RTC drivers -# -# CONFIG_RTC_DRV_DS1307 is not set -# CONFIG_RTC_DRV_DS1374 is not set -# CONFIG_RTC_DRV_DS1672 is not set -# CONFIG_RTC_DRV_MAX6900 is not set -# CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set -# CONFIG_RTC_DRV_X1205 is not set -# CONFIG_RTC_DRV_PCF8563 is not set -# CONFIG_RTC_DRV_PCF8583 is not set -CONFIG_RTC_DRV_M41T80=y -# CONFIG_RTC_DRV_M41T80_WDT is not set -# CONFIG_RTC_DRV_S35390A is not set -# CONFIG_RTC_DRV_FM3130 is not set -# CONFIG_RTC_DRV_RX8581 is not set - -# -# SPI RTC drivers -# - -# -# Platform RTC drivers -# -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_BQ4802 is not set -# CONFIG_RTC_DRV_V3020 is not set - -# -# on-CPU RTC drivers -# -# CONFIG_DMADEVICES is not set -# CONFIG_UIO is not set -# CONFIG_STAGING is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set -# CONFIG_EXT3_FS is not set -# CONFIG_EXT4_FS is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -# CONFIG_FS_POSIX_ACL is not set -CONFIG_FILE_LOCKING=y -# CONFIG_XFS_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_BTRFS_FS is not set -# CONFIG_DNOTIFY is not set -# CONFIG_INOTIFY is not set -# CONFIG_QUOTA is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_FUSE_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_SYSCTL=y -CONFIG_SYSFS=y -# CONFIG_TMPFS is not set -# CONFIG_HUGETLB_PAGE is not set -# CONFIG_CONFIGFS_FS is not set -# CONFIG_MISC_FILESYSTEMS is not set -# CONFIG_NETWORK_FILESYSTEMS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_NLS is not set -# CONFIG_DLM is not set - -# -# Kernel hacking -# -CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SHIRQ=y -CONFIG_DETECT_SOFTLOCKUP=y -# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 -# CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set -# CONFIG_TIMER_STATS is not set -# CONFIG_DEBUG_OBJECTS is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -# CONFIG_DEBUG_KOBJECT is not set -# CONFIG_DEBUG_INFO is not set -# CONFIG_DEBUG_VM is not set -CONFIG_DEBUG_NOMMU_REGIONS=y -# CONFIG_DEBUG_MEMORY_INIT is not set -# CONFIG_DEBUG_LIST is not set -# CONFIG_DEBUG_SG is not set -# CONFIG_DEBUG_NOTIFIERS is not set -# CONFIG_BOOT_PRINTK_DELAY is not set -# CONFIG_RCU_TORTURE_TEST is not set -# CONFIG_BACKTRACE_SELF_TEST is not set -# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set -# CONFIG_FAULT_INJECTION is not set -# CONFIG_SYSCTL_SYSCALL_CHECK is not set - -# -# Tracers -# -# CONFIG_PREEMPT_TRACER is not set -# CONFIG_SCHED_TRACER is not set -# CONFIG_CONTEXT_SWITCH_TRACER is not set -# CONFIG_BOOT_TRACER is not set -# CONFIG_TRACE_BRANCH_PROFILING is not set -# CONFIG_DYNAMIC_DEBUG is not set -# CONFIG_SAMPLES is not set - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_SECURITYFS is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set -# CONFIG_CRYPTO is not set - -# -# Library routines -# -CONFIG_GENERIC_FIND_LAST_BIT=y -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -# CONFIG_CRC_T10DIF is not set -# CONFIG_CRC_ITU_T is not set -# CONFIG_CRC32 is not set -# CONFIG_CRC7 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_PLIST=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_DMA=y diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index e72aaca7a77f..5f67ace97b32 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -67,6 +67,8 @@ extern void __invalidate_dcache_page_alias(unsigned long, unsigned long); #else static inline void __flush_invalidate_dcache_page_alias(unsigned long virt, unsigned long phys) { } +static inline void __invalidate_dcache_page_alias(unsigned long virt, + unsigned long phys) { } #endif #if defined(CONFIG_MMU) && (ICACHE_WAY_SIZE > PAGE_SIZE) extern void __invalidate_icache_page_alias(unsigned long, unsigned long); @@ -84,7 +86,8 @@ static inline void __invalidate_icache_page_alias(unsigned long virt, * (see also Documentation/cachetlb.txt) */ -#if (DCACHE_WAY_SIZE > PAGE_SIZE) || defined(CONFIG_SMP) +#if defined(CONFIG_MMU) && \ + ((DCACHE_WAY_SIZE > PAGE_SIZE) || defined(CONFIG_SMP)) #ifdef CONFIG_SMP void flush_cache_all(void); @@ -150,7 +153,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -#if (DCACHE_WAY_SIZE > PAGE_SIZE) +#if defined(CONFIG_MMU) && (DCACHE_WAY_SIZE > PAGE_SIZE) extern void copy_to_user_page(struct vm_area_struct*, struct page*, unsigned long, void*, const void*, unsigned long); diff --git a/arch/xtensa/include/asm/highmem.h b/arch/xtensa/include/asm/highmem.h index 2c7901edffaf..01cef6b40829 100644 --- a/arch/xtensa/include/asm/highmem.h +++ b/arch/xtensa/include/asm/highmem.h @@ -25,7 +25,7 @@ #define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -#define kmap_prot PAGE_KERNEL +#define kmap_prot PAGE_KERNEL_EXEC #if DCACHE_WAY_SIZE > PAGE_SIZE #define get_pkmap_color get_pkmap_color diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h index 600781edc8a3..e256f2270ec9 100644 --- a/arch/xtensa/include/asm/initialize_mmu.h +++ b/arch/xtensa/include/asm/initialize_mmu.h @@ -26,8 +26,16 @@ #include <asm/pgtable.h> #include <asm/vectors.h> +#if XCHAL_HAVE_PTP_MMU #define CA_BYPASS (_PAGE_CA_BYPASS | _PAGE_HW_WRITE | _PAGE_HW_EXEC) #define CA_WRITEBACK (_PAGE_CA_WB | _PAGE_HW_WRITE | _PAGE_HW_EXEC) +#else +#define CA_WRITEBACK (0x4) +#endif + +#ifndef XCHAL_SPANNING_WAY +#define XCHAL_SPANNING_WAY 0 +#endif #ifdef __ASSEMBLY__ @@ -75,7 +83,7 @@ /* Step 1: invalidate mapping at 0x40000000..0x5FFFFFFF. */ - movi a2, 0x40000006 + movi a2, 0x40000000 | XCHAL_SPANNING_WAY idtlb a2 iitlb a2 isync @@ -141,9 +149,6 @@ jx a4 1: - movi a2, VECBASE_RESET_VADDR - wsr a2, vecbase - /* Step 5: remove temporary mapping. */ idtlb a7 iitlb a7 @@ -156,6 +161,33 @@ #endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY */ +#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS + /* Enable data and instruction cache in the DEFAULT_MEMORY region + * if the processor has DTLB and ITLB. + */ + + movi a5, PLATFORM_DEFAULT_MEM_START | XCHAL_SPANNING_WAY + movi a6, ~_PAGE_ATTRIB_MASK + movi a7, CA_WRITEBACK + movi a8, 0x20000000 + movi a9, PLATFORM_DEFAULT_MEM_SIZE + j 2f +1: + sub a9, a9, a8 +2: + rdtlb1 a3, a5 + ritlb1 a4, a5 + and a3, a3, a6 + and a4, a4, a6 + or a3, a3, a7 + or a4, a4, a7 + wdtlb a3, a5 + witlb a4, a5 + add a5, a5, a8 + bltu a8, a9, 1b + +#endif + .endm #endif /*__ASSEMBLY__*/ diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h index d33c71a8c9ec..04c8ebdc4517 100644 --- a/arch/xtensa/include/asm/mmu_context.h +++ b/arch/xtensa/include/asm/mmu_context.h @@ -50,11 +50,7 @@ DECLARE_PER_CPU(unsigned long, asid_cache); #define ASID_MASK ((1 << XCHAL_MMU_ASID_BITS) - 1) #define ASID_INSERT(x) (0x03020001 | (((x) & ASID_MASK) << 8)) -#ifdef CONFIG_MMU void init_mmu(void); -#else -static inline void init_mmu(void) { } -#endif static inline void set_rasid_register (unsigned long val) { diff --git a/arch/xtensa/include/asm/nommu_context.h b/arch/xtensa/include/asm/nommu_context.h index 3407cf7989b7..22984fd1d846 100644 --- a/arch/xtensa/include/asm/nommu_context.h +++ b/arch/xtensa/include/asm/nommu_context.h @@ -1,3 +1,7 @@ +static inline void init_mmu(void) +{ +} + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index abe24c6f8b2f..ad38500471fa 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -20,10 +20,10 @@ * Fixed TLB translations in the processor. */ -#define XCHAL_KSEG_CACHED_VADDR 0xd0000000 -#define XCHAL_KSEG_BYPASS_VADDR 0xd8000000 -#define XCHAL_KSEG_PADDR 0x00000000 -#define XCHAL_KSEG_SIZE 0x08000000 +#define XCHAL_KSEG_CACHED_VADDR __XTENSA_UL_CONST(0xd0000000) +#define XCHAL_KSEG_BYPASS_VADDR __XTENSA_UL_CONST(0xd8000000) +#define XCHAL_KSEG_PADDR __XTENSA_UL_CONST(0x00000000) +#define XCHAL_KSEG_SIZE __XTENSA_UL_CONST(0x08000000) /* * PAGE_SHIFT determines the page size @@ -37,7 +37,7 @@ #define PAGE_OFFSET XCHAL_KSEG_CACHED_VADDR #define MAX_MEM_PFN XCHAL_KSEG_SIZE #else -#define PAGE_OFFSET 0 +#define PAGE_OFFSET __XTENSA_UL_CONST(0) #define MAX_MEM_PFN (PLATFORM_DEFAULT_MEM_START + PLATFORM_DEFAULT_MEM_SIZE) #endif @@ -145,7 +145,7 @@ extern void copy_page(void *to, void *from); * some extra work */ -#if DCACHE_WAY_SIZE > PAGE_SIZE +#if defined(CONFIG_MMU) && DCACHE_WAY_SIZE > PAGE_SIZE extern void clear_page_alias(void *vaddr, unsigned long paddr); extern void copy_page_alias(void *to, void *from, unsigned long to_paddr, unsigned long from_paddr); diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 0383aed59121..872bf0194e6d 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -178,6 +178,7 @@ #else /* no mmu */ +# define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) # define PAGE_NONE __pgprot(0) # define PAGE_SHARED __pgprot(0) # define PAGE_COPY __pgprot(0) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index c7211e7e182d..876eb380aa26 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -320,7 +320,7 @@ __asm__ __volatile__( \ ({ \ long __gu_err, __gu_val; \ __get_user_size(__gu_val,(ptr),(size),__gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -330,7 +330,7 @@ __asm__ __volatile__( \ const __typeof__(*(ptr)) *__gu_addr = (ptr); \ if (access_ok(VERIFY_READ,__gu_addr,size)) \ __get_user_size(__gu_val,__gu_addr,(size),__gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index f74ddfbb92ef..a46c53f36113 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -19,6 +19,7 @@ #define _XTENSA_VECTORS_H #include <variant/core.h> +#include <platform/hardware.h> #define XCHAL_KIO_CACHED_VADDR 0xe0000000 #define XCHAL_KIO_BYPASS_VADDR 0xf0000000 @@ -51,13 +52,13 @@ /* MMU Not being used - Virtual == Physical */ /* VECBASE */ - #define VIRTUAL_MEMORY_ADDRESS 0x00002000 + #define VIRTUAL_MEMORY_ADDRESS (PLATFORM_DEFAULT_MEM_START + 0x2000) /* Location of the start of the kernel text, _start */ - #define KERNELOFFSET 0x00003000 + #define KERNELOFFSET (PLATFORM_DEFAULT_MEM_START + 0x3000) /* Loaded just above possibly live vectors */ - #define LOAD_MEMORY_ADDRESS 0x00003000 + #define LOAD_MEMORY_ADDRESS (PLATFORM_DEFAULT_MEM_START + 0x3000) #endif /* CONFIG_MMU */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 00eed6786d7e..201aec0e0446 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -55,6 +55,12 @@ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ +#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED +# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be + * uninitialized */ +#else +# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ +#endif /* * Flags for msync diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index aeeb3cc8a410..15a461e2a0ed 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -112,6 +112,11 @@ ENTRY(_startup) movi a0, 0 +#if XCHAL_HAVE_VECBASE + movi a2, VECBASE_RESET_VADDR + wsr a2, vecbase +#endif + /* Clear debugging registers. */ #if XCHAL_HAVE_DEBUG diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 5d3f7a119ed1..83cf49685373 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -57,6 +57,7 @@ asmlinkage long xtensa_fadvise64_64(int fd, int advice, return sys_fadvise64_64(fd, offset, len, advice); } +#ifdef CONFIG_MMU unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -93,3 +94,4 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = COLOUR_ALIGN(addr, pgoff); } } +#endif diff --git a/arch/xtensa/mm/Makefile b/arch/xtensa/mm/Makefile index f54f78e24d7b..e601e2fbe8e6 100644 --- a/arch/xtensa/mm/Makefile +++ b/arch/xtensa/mm/Makefile @@ -2,6 +2,6 @@ # Makefile for the Linux/Xtensa-specific parts of the memory manager. # -obj-y := init.o cache.o misc.o -obj-$(CONFIG_MMU) += fault.o mmu.o tlb.o +obj-y := init.o misc.o +obj-$(CONFIG_MMU) += cache.o fault.o mmu.o tlb.o obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 77ed20209ca5..9a9a5935bd36 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -239,6 +239,17 @@ void __init bootmem_init(void) unsigned long bootmap_start, bootmap_size; int i; + /* Reserve all memory below PLATFORM_DEFAULT_MEM_START, as memory + * accounting doesn't work for pages below that address. + * + * If PLATFORM_DEFAULT_MEM_START is zero reserve page at address 0: + * successfull allocations should never return NULL. + */ + if (PLATFORM_DEFAULT_MEM_START) + mem_reserve(0, PLATFORM_DEFAULT_MEM_START, 0); + else + mem_reserve(0, 1, 0); + sysmem_dump(); max_low_pfn = max_pfn = 0; min_low_pfn = ~0; @@ -332,18 +343,24 @@ void __init mem_init(void) " pkmap : 0x%08lx - 0x%08lx (%5lu kB)\n" " fixmap : 0x%08lx - 0x%08lx (%5lu kB)\n" #endif +#ifdef CONFIG_MMU " vmalloc : 0x%08x - 0x%08x (%5u MB)\n" - " lowmem : 0x%08x - 0x%08lx (%5lu MB)\n", +#endif + " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n", #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, #endif +#ifdef CONFIG_MMU VMALLOC_START, VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, PAGE_OFFSET, PAGE_OFFSET + (max_low_pfn - min_low_pfn) * PAGE_SIZE, +#else + min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE, +#endif ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20); } diff --git a/arch/xtensa/platforms/s6105/Makefile b/arch/xtensa/platforms/s6105/Makefile deleted file mode 100644 index 0be6194bcb72..000000000000 --- a/arch/xtensa/platforms/s6105/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# Makefile for the Stretch S6105 eval board - -obj-y := setup.o device.o diff --git a/arch/xtensa/platforms/s6105/device.c b/arch/xtensa/platforms/s6105/device.c deleted file mode 100644 index 4f4fc971042f..000000000000 --- a/arch/xtensa/platforms/s6105/device.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * s6105 platform devices - * - * Copyright (c) 2009 emlix GmbH - */ - -#include <linux/kernel.h> -#include <linux/gpio.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/phy.h> -#include <linux/platform_device.h> -#include <linux/serial.h> -#include <linux/serial_8250.h> - -#include <variant/hardware.h> -#include <variant/dmac.h> - -#include <platform/gpio.h> - -#define GPIO3_INTNUM 3 -#define UART_INTNUM 4 -#define GMAC_INTNUM 5 - -static const signed char gpio3_irq_mappings[] = { - S6_INTC_GPIO(3), - -1 -}; - -static const signed char uart_irq_mappings[] = { - S6_INTC_UART(0), - S6_INTC_UART(1), - -1, -}; - -static const signed char gmac_irq_mappings[] = { - S6_INTC_GMAC_STAT, - S6_INTC_GMAC_ERR, - S6_INTC_DMA_HOSTTERMCNT(0), - S6_INTC_DMA_HOSTTERMCNT(1), - -1 -}; - -const signed char *platform_irq_mappings[NR_IRQS] = { - [GPIO3_INTNUM] = gpio3_irq_mappings, - [UART_INTNUM] = uart_irq_mappings, - [GMAC_INTNUM] = gmac_irq_mappings, -}; - -static struct plat_serial8250_port serial_platform_data[] = { - { - .membase = (void *)S6_REG_UART + 0x0000, - .mapbase = S6_REG_UART + 0x0000, - .irq = UART_INTNUM, - .uartclk = S6_SCLK, - .regshift = 2, - .iotype = SERIAL_IO_MEM, - .flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST, - }, - { - .membase = (void *)S6_REG_UART + 0x1000, - .mapbase = S6_REG_UART + 0x1000, - .irq = UART_INTNUM, - .uartclk = S6_SCLK, - .regshift = 2, - .iotype = SERIAL_IO_MEM, - .flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST, - }, - { }, -}; - -static struct resource s6_gmac_resource[] = { - { - .name = "mem", - .start = (resource_size_t)S6_REG_GMAC, - .end = (resource_size_t)S6_REG_GMAC + 0x10000 - 1, - .flags = IORESOURCE_MEM, - }, - { - .name = "dma", - .start = (resource_size_t) - DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACTX), - .end = (resource_size_t) - DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACTX) + 0x100 - 1, - .flags = IORESOURCE_DMA, - }, - { - .name = "dma", - .start = (resource_size_t) - DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACRX), - .end = (resource_size_t) - DMA_CHNL(S6_REG_HIFDMA, S6_HIFDMA_GMACRX) + 0x100 - 1, - .flags = IORESOURCE_DMA, - }, - { - .name = "io", - .start = (resource_size_t)S6_MEM_GMAC, - .end = (resource_size_t)S6_MEM_GMAC + 0x2000000 - 1, - .flags = IORESOURCE_IO, - }, - { - .name = "irq", - .start = (resource_size_t)GMAC_INTNUM, - .flags = IORESOURCE_IRQ, - }, - { - .name = "irq", - .start = (resource_size_t)PHY_POLL, - .flags = IORESOURCE_IRQ, - }, -}; - -static int __init prepare_phy_irq(int pin) -{ - int irq; - if (gpio_request(pin, "s6gmac_phy") < 0) - goto fail; - if (gpio_direction_input(pin) < 0) - goto free; - irq = gpio_to_irq(pin); - if (irq < 0) - goto free; - if (irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW) < 0) - goto free; - return irq; -free: - gpio_free(pin); -fail: - return PHY_POLL; -} - -static struct platform_device platform_devices[] = { - { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = serial_platform_data, - }, - }, - { - .name = "s6gmac", - .id = 0, - .resource = s6_gmac_resource, - .num_resources = ARRAY_SIZE(s6_gmac_resource), - }, - { - I2C_BOARD_INFO("m41t62", S6I2C_ADDR_M41T62), - }, -}; - -static int __init device_init(void) -{ - int i; - - s6_gmac_resource[5].start = prepare_phy_irq(GPIO_PHY_IRQ); - - for (i = 0; i < ARRAY_SIZE(platform_devices); i++) - platform_device_register(&platform_devices[i]); - return 0; -} -arch_initcall_sync(device_init); diff --git a/arch/xtensa/platforms/s6105/include/platform/gpio.h b/arch/xtensa/platforms/s6105/include/platform/gpio.h deleted file mode 100644 index fa11aa4b61e9..000000000000 --- a/arch/xtensa/platforms/s6105/include/platform/gpio.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __ASM_XTENSA_S6105_GPIO_H -#define __ASM_XTENSA_S6105_GPIO_H - -#define GPIO_BP_TEMP_ALARM 0 -#define GPIO_PB_RESET_IN 1 -#define GPIO_EXP_IRQ 2 -#define GPIO_TRIGGER_IRQ 3 -#define GPIO_RTC_IRQ 4 -#define GPIO_PHY_IRQ 5 -#define GPIO_IMAGER_RESET 6 -#define GPIO_SD_IRQ 7 -#define GPIO_MINI_BOOT_INH 8 -#define GPIO_BOARD_RESET 9 -#define GPIO_EXP_PRESENT 10 -#define GPIO_LED1_NGREEN 12 -#define GPIO_LED1_RED 13 -#define GPIO_LED0_NGREEN 14 -#define GPIO_LED0_NRED 15 -#define GPIO_SPI_CS0 16 -#define GPIO_SPI_CS1 17 -#define GPIO_SPI_CS3 19 -#define GPIO_SPI_CS4 20 -#define GPIO_SD_WP 21 -#define GPIO_BP_RESET 22 -#define GPIO_ALARM_OUT 23 - -#endif /* __ASM_XTENSA_S6105_GPIO_H */ diff --git a/arch/xtensa/platforms/s6105/include/platform/hardware.h b/arch/xtensa/platforms/s6105/include/platform/hardware.h deleted file mode 100644 index d628efac7089..000000000000 --- a/arch/xtensa/platforms/s6105/include/platform/hardware.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __XTENSA_S6105_HARDWARE_H -#define __XTENSA_S6105_HARDWARE_H - -#define PLATFORM_DEFAULT_MEM_START 0x40000000 -#define PLATFORM_DEFAULT_MEM_SIZE 0x08000000 - -#define MAX_DMA_ADDRESS 0 - -#define KERNELOFFSET (PLATFORM_DEFAULT_MEM_START + 0x1000) - -#endif /* __XTENSA_S6105_HARDWARE_H */ diff --git a/arch/xtensa/platforms/s6105/include/platform/serial.h b/arch/xtensa/platforms/s6105/include/platform/serial.h deleted file mode 100644 index c8a771e5981b..000000000000 --- a/arch/xtensa/platforms/s6105/include/platform/serial.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __ASM_XTENSA_S6105_SERIAL_H -#define __ASM_XTENSA_S6105_SERIAL_H - -#include <variant/hardware.h> - -#define BASE_BAUD (S6_SCLK / 16) - -#endif /* __ASM_XTENSA_S6105_SERIAL_H */ diff --git a/arch/xtensa/platforms/s6105/setup.c b/arch/xtensa/platforms/s6105/setup.c deleted file mode 100644 index 86ce730f7913..000000000000 --- a/arch/xtensa/platforms/s6105/setup.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * s6105 control routines - * - * Copyright (c) 2009 emlix GmbH - */ -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/gpio.h> - -#include <asm/bootparam.h> - -#include <variant/hardware.h> -#include <variant/gpio.h> - -#include <platform/gpio.h> - -void platform_halt(void) -{ - local_irq_disable(); - while (1) - ; -} - -void platform_power_off(void) -{ - platform_halt(); -} - -void platform_restart(void) -{ - platform_halt(); -} - -void __init platform_setup(char **cmdline) -{ - unsigned long reg; - - reg = readl(S6_REG_GREG1 + S6_GREG1_PLLSEL); - reg &= ~(S6_GREG1_PLLSEL_GMAC_MASK << S6_GREG1_PLLSEL_GMAC | - S6_GREG1_PLLSEL_GMII_MASK << S6_GREG1_PLLSEL_GMII); - reg |= S6_GREG1_PLLSEL_GMAC_125MHZ << S6_GREG1_PLLSEL_GMAC | - S6_GREG1_PLLSEL_GMII_125MHZ << S6_GREG1_PLLSEL_GMII; - writel(reg, S6_REG_GREG1 + S6_GREG1_PLLSEL); - - reg = readl(S6_REG_GREG1 + S6_GREG1_CLKGATE); - reg &= ~(1 << S6_GREG1_BLOCK_SB); - reg &= ~(1 << S6_GREG1_BLOCK_GMAC); - writel(reg, S6_REG_GREG1 + S6_GREG1_CLKGATE); - - reg = readl(S6_REG_GREG1 + S6_GREG1_BLOCKENA); - reg |= 1 << S6_GREG1_BLOCK_SB; - reg |= 1 << S6_GREG1_BLOCK_GMAC; - writel(reg, S6_REG_GREG1 + S6_GREG1_BLOCKENA); - - printk(KERN_NOTICE "S6105 on Stretch S6000 - " - "Copyright (C) 2009 emlix GmbH <info@emlix.com>\n"); -} - -void __init platform_init(bp_tag_t *first) -{ - s6_gpio_init(0); - gpio_request(GPIO_LED1_NGREEN, "led1_green"); - gpio_request(GPIO_LED1_RED, "led1_red"); - gpio_direction_output(GPIO_LED1_NGREEN, 1); -} - -void platform_heartbeat(void) -{ - static unsigned int c; - - if (!(++c & 0x4F)) - gpio_direction_output(GPIO_LED1_RED, !(c & 0x10)); -} diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h index aeb316b7ff88..6edd20bb4565 100644 --- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h +++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h @@ -17,8 +17,8 @@ /* Memory configuration. */ -#define PLATFORM_DEFAULT_MEM_START 0x00000000 -#define PLATFORM_DEFAULT_MEM_SIZE 0x04000000 +#define PLATFORM_DEFAULT_MEM_START CONFIG_DEFAULT_MEM_START +#define PLATFORM_DEFAULT_MEM_SIZE CONFIG_DEFAULT_MEM_SIZE /* Interrupt configuration. */ diff --git a/arch/xtensa/variants/s6000/Makefile b/arch/xtensa/variants/s6000/Makefile deleted file mode 100644 index 3e7ef0a0c498..000000000000 --- a/arch/xtensa/variants/s6000/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# s6000 Makefile - -obj-y += irq.o gpio.o dmac.o -obj-$(CONFIG_XTENSA_CALIBRATE_CCOUNT) += delay.o diff --git a/arch/xtensa/variants/s6000/delay.c b/arch/xtensa/variants/s6000/delay.c deleted file mode 100644 index 39154563ee17..000000000000 --- a/arch/xtensa/variants/s6000/delay.c +++ /dev/null @@ -1,25 +0,0 @@ -#include <asm/timex.h> -#include <asm/io.h> -#include <variant/hardware.h> - -#define LOOPS 10 -void platform_calibrate_ccount(void) -{ - u32 uninitialized_var(a); - u32 uninitialized_var(u); - u32 b; - u32 tstamp = S6_REG_GREG1 + S6_GREG1_GLOBAL_TIMER; - int i = LOOPS+1; - do { - u32 t = u; - asm volatile( - "1: l32i %0, %2, 0 ;" - " beq %0, %1, 1b ;" - : "=&a"(u) : "a"(t), "a"(tstamp)); - b = get_ccount(); - if (i == LOOPS) - a = b; - } while (--i >= 0); - b -= a; - ccount_freq = b * (100000UL / LOOPS); -} diff --git a/arch/xtensa/variants/s6000/dmac.c b/arch/xtensa/variants/s6000/dmac.c deleted file mode 100644 index 340f5bb0b5ef..000000000000 --- a/arch/xtensa/variants/s6000/dmac.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Authors: Oskar Schirmer <oskar@scara.com> - * Daniel Gloeckner <dg@emlix.com> - * (c) 2008 emlix GmbH http://www.emlix.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/io.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/spinlock.h> -#include <asm/cacheflush.h> -#include <variant/dmac.h> - -/* DMA engine lookup */ - -struct s6dmac_ctrl s6dmac_ctrl[S6_DMAC_NB]; - - -/* DMA control, per engine */ - -void s6dmac_put_fifo_cache(u32 dmac, int chan, u32 src, u32 dst, u32 size) -{ - if (xtensa_need_flush_dma_source(src)) { - u32 base = src; - u32 span = size; - u32 chunk = readl(DMA_CHNL(dmac, chan) + S6_DMA_CMONCHUNK); - if (chunk && (size > chunk)) { - s32 skip = - readl(DMA_CHNL(dmac, chan) + S6_DMA_SRCSKIP); - u32 gaps = (size+chunk-1)/chunk - 1; - if (skip >= 0) { - span += gaps * skip; - } else if (-skip > chunk) { - s32 decr = gaps * (chunk + skip); - base += decr; - span = chunk - decr; - } else { - span = max(span + gaps * skip, - (chunk + skip) * gaps - skip); - } - } - flush_dcache_unaligned(base, span); - } - if (xtensa_need_invalidate_dma_destination(dst)) { - u32 base = dst; - u32 span = size; - u32 chunk = readl(DMA_CHNL(dmac, chan) + S6_DMA_CMONCHUNK); - if (chunk && (size > chunk)) { - s32 skip = - readl(DMA_CHNL(dmac, chan) + S6_DMA_DSTSKIP); - u32 gaps = (size+chunk-1)/chunk - 1; - if (skip >= 0) { - span += gaps * skip; - } else if (-skip > chunk) { - s32 decr = gaps * (chunk + skip); - base += decr; - span = chunk - decr; - } else { - span = max(span + gaps * skip, - (chunk + skip) * gaps - skip); - } - } - invalidate_dcache_unaligned(base, span); - } - s6dmac_put_fifo(dmac, chan, src, dst, size); -} - -void s6dmac_disable_error_irqs(u32 dmac, u32 mask) -{ - unsigned long flags; - spinlock_t *spinl = &s6dmac_ctrl[_dmac_addr_index(dmac)].lock; - spin_lock_irqsave(spinl, flags); - _s6dmac_disable_error_irqs(dmac, mask); - spin_unlock_irqrestore(spinl, flags); -} - -u32 s6dmac_int_sources(u32 dmac, u32 channel) -{ - u32 mask, ret, tmp; - mask = 1 << channel; - - tmp = readl(dmac + S6_DMA_TERMCNTIRQSTAT); - tmp &= mask; - writel(tmp, dmac + S6_DMA_TERMCNTIRQCLR); - ret = tmp >> channel; - - tmp = readl(dmac + S6_DMA_PENDCNTIRQSTAT); - tmp &= mask; - writel(tmp, dmac + S6_DMA_PENDCNTIRQCLR); - ret |= (tmp >> channel) << 1; - - tmp = readl(dmac + S6_DMA_LOWWMRKIRQSTAT); - tmp &= mask; - writel(tmp, dmac + S6_DMA_LOWWMRKIRQCLR); - ret |= (tmp >> channel) << 2; - - tmp = readl(dmac + S6_DMA_INTRAW0); - tmp &= (mask << S6_DMA_INT0_OVER) | (mask << S6_DMA_INT0_UNDER); - writel(tmp, dmac + S6_DMA_INTCLEAR0); - - if (tmp & (mask << S6_DMA_INT0_UNDER)) - ret |= 1 << 3; - if (tmp & (mask << S6_DMA_INT0_OVER)) - ret |= 1 << 4; - - tmp = readl(dmac + S6_DMA_MASTERERRINFO); - mask <<= S6_DMA_INT1_CHANNEL; - if (((tmp >> S6_DMA_MASTERERR_CHAN(0)) & S6_DMA_MASTERERR_CHAN_MASK) - == channel) - mask |= 1 << S6_DMA_INT1_MASTER; - if (((tmp >> S6_DMA_MASTERERR_CHAN(1)) & S6_DMA_MASTERERR_CHAN_MASK) - == channel) - mask |= 1 << (S6_DMA_INT1_MASTER + 1); - if (((tmp >> S6_DMA_MASTERERR_CHAN(2)) & S6_DMA_MASTERERR_CHAN_MASK) - == channel) - mask |= 1 << (S6_DMA_INT1_MASTER + 2); - - tmp = readl(dmac + S6_DMA_INTRAW1) & mask; - writel(tmp, dmac + S6_DMA_INTCLEAR1); - ret |= ((tmp >> channel) & 1) << 5; - ret |= ((tmp >> S6_DMA_INT1_MASTER) & S6_DMA_INT1_MASTER_MASK) << 6; - - return ret; -} - -void s6dmac_release_chan(u32 dmac, int chan) -{ - if (chan >= 0) - s6dmac_disable_chan(dmac, chan); -} - - -/* global init */ - -static inline void __init dmac_init(u32 dmac, u8 chan_nb) -{ - s6dmac_ctrl[S6_DMAC_INDEX(dmac)].dmac = dmac; - spin_lock_init(&s6dmac_ctrl[S6_DMAC_INDEX(dmac)].lock); - s6dmac_ctrl[S6_DMAC_INDEX(dmac)].chan_nb = chan_nb; - writel(S6_DMA_INT1_MASTER_MASK << S6_DMA_INT1_MASTER, - dmac + S6_DMA_INTCLEAR1); -} - -static inline void __init dmac_master(u32 dmac, - u32 m0start, u32 m0end, u32 m1start, u32 m1end) -{ - writel(m0start, dmac + S6_DMA_MASTER0START); - writel(m0end - 1, dmac + S6_DMA_MASTER0END); - writel(m1start, dmac + S6_DMA_MASTER1START); - writel(m1end - 1, dmac + S6_DMA_MASTER1END); -} - -static void __init s6_dmac_init(void) -{ - dmac_init(S6_REG_LMSDMA, S6_LMSDMA_NB); - dmac_master(S6_REG_LMSDMA, - S6_MEM_DDR, S6_MEM_PCIE_APER, S6_MEM_EFI, S6_MEM_GMAC); - dmac_init(S6_REG_NIDMA, S6_NIDMA_NB); - dmac_init(S6_REG_DPDMA, S6_DPDMA_NB); - dmac_master(S6_REG_DPDMA, - S6_MEM_DDR, S6_MEM_PCIE_APER, S6_REG_DP, S6_REG_DPDMA); - dmac_init(S6_REG_HIFDMA, S6_HIFDMA_NB); - dmac_master(S6_REG_HIFDMA, - S6_MEM_GMAC, S6_MEM_PCIE_CFG, S6_MEM_PCIE_APER, S6_MEM_AUX); -} - -arch_initcall(s6_dmac_init); diff --git a/arch/xtensa/variants/s6000/gpio.c b/arch/xtensa/variants/s6000/gpio.c deleted file mode 100644 index da9e85c13b08..000000000000 --- a/arch/xtensa/variants/s6000/gpio.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * s6000 gpio driver - * - * Copyright (c) 2009 emlix GmbH - * Authors: Oskar Schirmer <oskar@scara.com> - * Johannes Weiner <hannes@cmpxchg.org> - * Daniel Gloeckner <dg@emlix.com> - */ -#include <linux/bitops.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/gpio.h> - -#include <variant/hardware.h> - -#define IRQ_BASE XTENSA_NR_IRQS - -#define S6_GPIO_DATA 0x000 -#define S6_GPIO_IS 0x404 -#define S6_GPIO_IBE 0x408 -#define S6_GPIO_IEV 0x40C -#define S6_GPIO_IE 0x410 -#define S6_GPIO_RIS 0x414 -#define S6_GPIO_MIS 0x418 -#define S6_GPIO_IC 0x41C -#define S6_GPIO_AFSEL 0x420 -#define S6_GPIO_DIR 0x800 -#define S6_GPIO_BANK(nr) ((nr) * 0x1000) -#define S6_GPIO_MASK(nr) (4 << (nr)) -#define S6_GPIO_OFFSET(nr) \ - (S6_GPIO_BANK((nr) >> 3) + S6_GPIO_MASK((nr) & 7)) - -static int direction_input(struct gpio_chip *chip, unsigned int off) -{ - writeb(0, S6_REG_GPIO + S6_GPIO_DIR + S6_GPIO_OFFSET(off)); - return 0; -} - -static int get(struct gpio_chip *chip, unsigned int off) -{ - return readb(S6_REG_GPIO + S6_GPIO_DATA + S6_GPIO_OFFSET(off)); -} - -static int direction_output(struct gpio_chip *chip, unsigned int off, int val) -{ - unsigned rel = S6_GPIO_OFFSET(off); - writeb(~0, S6_REG_GPIO + S6_GPIO_DIR + rel); - writeb(val ? ~0 : 0, S6_REG_GPIO + S6_GPIO_DATA + rel); - return 0; -} - -static void set(struct gpio_chip *chip, unsigned int off, int val) -{ - writeb(val ? ~0 : 0, S6_REG_GPIO + S6_GPIO_DATA + S6_GPIO_OFFSET(off)); -} - -static int to_irq(struct gpio_chip *chip, unsigned offset) -{ - if (offset < 8) - return offset + IRQ_BASE; - return -EINVAL; -} - -static struct gpio_chip gpiochip = { - .owner = THIS_MODULE, - .direction_input = direction_input, - .get = get, - .direction_output = direction_output, - .set = set, - .to_irq = to_irq, - .base = 0, - .ngpio = 24, - .can_sleep = 0, /* no blocking io needed */ - .exported = 0, /* no exporting to userspace */ -}; - -int s6_gpio_init(u32 afsel) -{ - writeb(afsel, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_AFSEL); - writeb(afsel >> 8, S6_REG_GPIO + S6_GPIO_BANK(1) + S6_GPIO_AFSEL); - writeb(afsel >> 16, S6_REG_GPIO + S6_GPIO_BANK(2) + S6_GPIO_AFSEL); - return gpiochip_add(&gpiochip); -} - -static void ack(struct irq_data *d) -{ - writeb(1 << (d->irq - IRQ_BASE), S6_REG_GPIO + S6_GPIO_IC); -} - -static void mask(struct irq_data *d) -{ - u8 r = readb(S6_REG_GPIO + S6_GPIO_IE); - r &= ~(1 << (d->irq - IRQ_BASE)); - writeb(r, S6_REG_GPIO + S6_GPIO_IE); -} - -static void unmask(struct irq_data *d) -{ - u8 m = readb(S6_REG_GPIO + S6_GPIO_IE); - m |= 1 << (d->irq - IRQ_BASE); - writeb(m, S6_REG_GPIO + S6_GPIO_IE); -} - -static int set_type(struct irq_data *d, unsigned int type) -{ - const u8 m = 1 << (d->irq - IRQ_BASE); - irq_flow_handler_t handler; - u8 reg; - - if (type == IRQ_TYPE_PROBE) { - if ((readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_AFSEL) & m) - || (readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IE) & m) - || readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_DIR - + S6_GPIO_MASK(irq - IRQ_BASE))) - return 0; - type = IRQ_TYPE_EDGE_BOTH; - } - - reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IS); - if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) { - reg |= m; - handler = handle_level_irq; - } else { - reg &= ~m; - handler = handle_edge_irq; - } - writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IS); - __irq_set_handler_locked(irq, handler); - - reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IEV); - if (type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING)) - reg |= m; - else - reg &= ~m; - writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IEV); - - reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IBE); - if ((type & IRQ_TYPE_EDGE_BOTH) == IRQ_TYPE_EDGE_BOTH) - reg |= m; - else - reg &= ~m; - writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IBE); - return 0; -} - -static struct irq_chip gpioirqs = { - .name = "GPIO", - .irq_ack = ack, - .irq_mask = mask, - .irq_unmask = unmask, - .irq_set_type = set_type, -}; - -static u8 demux_masks[4]; - -static void demux_irqs(unsigned int irq, struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - u8 *mask = irq_desc_get_handler_data(desc); - u8 pending; - int cirq; - - chip->irq_mask(&desc->irq_data); - chip->irq_ack(&desc->irq_data); - pending = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_MIS) & *mask; - cirq = IRQ_BASE - 1; - while (pending) { - int n = ffs(pending); - cirq += n; - pending >>= n; - generic_handle_irq(cirq); - } - chip->irq_unmask(&desc->irq_data); -} - -extern const signed char *platform_irq_mappings[XTENSA_NR_IRQS]; - -void __init variant_init_irq(void) -{ - int irq, n; - writeb(0, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IE); - for (irq = n = 0; irq < XTENSA_NR_IRQS; irq++) { - const signed char *mapping = platform_irq_mappings[irq]; - int alone = 1; - u8 mask; - if (!mapping) - continue; - for(mask = 0; *mapping != -1; mapping++) - switch (*mapping) { - case S6_INTC_GPIO(0): - mask |= 1 << 0; - break; - case S6_INTC_GPIO(1): - mask |= 1 << 1; - break; - case S6_INTC_GPIO(2): - mask |= 1 << 2; - break; - case S6_INTC_GPIO(3): - mask |= 0x1f << 3; - break; - default: - alone = 0; - } - if (mask) { - int cirq, i; - if (!alone) { - printk(KERN_ERR "chained irq chips can't share" - " parent irq %i\n", irq); - continue; - } - demux_masks[n] = mask; - cirq = IRQ_BASE - 1; - do { - i = ffs(mask); - cirq += i; - mask >>= i; - irq_set_chip(cirq, &gpioirqs); - irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); - } while (mask); - irq_set_handler_data(irq, demux_masks + n); - irq_set_chained_handler(irq, demux_irqs); - if (++n == ARRAY_SIZE(demux_masks)) - break; - } - } -} diff --git a/arch/xtensa/variants/s6000/include/variant/core.h b/arch/xtensa/variants/s6000/include/variant/core.h deleted file mode 100644 index af007953027e..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/core.h +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Xtensa processor core configuration information. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 1999-2008 Tensilica Inc. - */ - -#ifndef _XTENSA_CORE_CONFIGURATION_H -#define _XTENSA_CORE_CONFIGURATION_H - - -/**************************************************************************** - Parameters Useful for Any Code, USER or PRIVILEGED - ****************************************************************************/ - -/* - * Note: Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is - * configured, and a value of 0 otherwise. These macros are always defined. - */ - - -/*---------------------------------------------------------------------- - ISA - ----------------------------------------------------------------------*/ - -#define XCHAL_HAVE_BE 0 /* big-endian byte ordering */ -#define XCHAL_HAVE_WINDOWED 1 /* windowed registers option */ -#define XCHAL_NUM_AREGS 64 /* num of physical addr regs */ -#define XCHAL_NUM_AREGS_LOG2 6 /* log2(XCHAL_NUM_AREGS) */ -#define XCHAL_MAX_INSTRUCTION_SIZE 8 /* max instr bytes (3..8) */ -#define XCHAL_HAVE_DEBUG 1 /* debug option */ -#define XCHAL_HAVE_DENSITY 1 /* 16-bit instructions */ -#define XCHAL_HAVE_LOOPS 1 /* zero-overhead loops */ -#define XCHAL_HAVE_NSA 1 /* NSA/NSAU instructions */ -#define XCHAL_HAVE_MINMAX 1 /* MIN/MAX instructions */ -#define XCHAL_HAVE_SEXT 1 /* SEXT instruction */ -#define XCHAL_HAVE_CLAMPS 1 /* CLAMPS instruction */ -#define XCHAL_HAVE_MUL16 1 /* MUL16S/MUL16U instructions */ -#define XCHAL_HAVE_MUL32 1 /* MULL instruction */ -#define XCHAL_HAVE_MUL32_HIGH 1 /* MULUH/MULSH instructions */ -#define XCHAL_HAVE_DIV32 0 /* QUOS/QUOU/REMS/REMU instructions */ -#define XCHAL_HAVE_L32R 1 /* L32R instruction */ -#define XCHAL_HAVE_ABSOLUTE_LITERALS 1 /* non-PC-rel (extended) L32R */ -#define XCHAL_HAVE_CONST16 0 /* CONST16 instruction */ -#define XCHAL_HAVE_ADDX 1 /* ADDX#/SUBX# instructions */ -#define XCHAL_HAVE_WIDE_BRANCHES 0 /* B*.W18 or B*.W15 instr's */ -#define XCHAL_HAVE_PREDICTED_BRANCHES 0 /* B[EQ/EQZ/NE/NEZ]T instr's */ -#define XCHAL_HAVE_CALL4AND12 1 /* (obsolete option) */ -#define XCHAL_HAVE_ABS 1 /* ABS instruction */ -/*#define XCHAL_HAVE_POPC 0*/ /* POPC instruction */ -/*#define XCHAL_HAVE_CRC 0*/ /* CRC instruction */ -#define XCHAL_HAVE_RELEASE_SYNC 0 /* L32AI/S32RI instructions */ -#define XCHAL_HAVE_S32C1I 0 /* S32C1I instruction */ -#define XCHAL_HAVE_SPECULATION 0 /* speculation */ -#define XCHAL_HAVE_FULL_RESET 0 /* all regs/state reset */ -#define XCHAL_NUM_CONTEXTS 1 /* */ -#define XCHAL_NUM_MISC_REGS 4 /* num of scratch regs (0..4) */ -#define XCHAL_HAVE_TAP_MASTER 0 /* JTAG TAP control instr's */ -#define XCHAL_HAVE_PRID 0 /* processor ID register */ -#define XCHAL_HAVE_THREADPTR 0 /* THREADPTR register */ -#define XCHAL_HAVE_BOOLEANS 1 /* boolean registers */ -#define XCHAL_HAVE_CP 1 /* CPENABLE reg (coprocessor) */ -#define XCHAL_CP_MAXCFG 8 /* max allowed cp id plus one */ -#define XCHAL_HAVE_MAC16 0 /* MAC16 package */ -#define XCHAL_HAVE_VECTORFPU2005 0 /* vector floating-point pkg */ -#define XCHAL_HAVE_FP 1 /* floating point pkg */ -#define XCHAL_HAVE_VECTRA1 0 /* Vectra I pkg */ -#define XCHAL_HAVE_VECTRALX 0 /* Vectra LX pkg */ -#define XCHAL_HAVE_HIFI2 0 /* HiFi2 Audio Engine pkg */ - - -/*---------------------------------------------------------------------- - MISC - ----------------------------------------------------------------------*/ - -#define XCHAL_NUM_WRITEBUFFER_ENTRIES 8 /* size of write buffer */ -#define XCHAL_INST_FETCH_WIDTH 8 /* instr-fetch width in bytes */ -#define XCHAL_DATA_WIDTH 16 /* data width in bytes */ -/* In T1050, applies to selected core load and store instructions (see ISA): */ -#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1 /* unaligned loads cause exc. */ -#define XCHAL_UNALIGNED_STORE_EXCEPTION 1 /* unaligned stores cause exc.*/ - -#define XCHAL_SW_VERSION 701001 /* sw version of this header */ - -#define XCHAL_CORE_ID "stretch_bali" /* alphanum core name - (CoreID) set in the Xtensa - Processor Generator */ - -#define XCHAL_BUILD_UNIQUE_ID 0x000104B9 /* 22-bit sw build ID */ - -/* - * These definitions describe the hardware targeted by this software. - */ -#define XCHAL_HW_CONFIGID0 0xC2F3F9FE /* ConfigID hi 32 bits*/ -#define XCHAL_HW_CONFIGID1 0x054104B9 /* ConfigID lo 32 bits*/ -#define XCHAL_HW_VERSION_NAME "LX1.0.2" /* full version name */ -#define XCHAL_HW_VERSION_MAJOR 2100 /* major ver# of targeted hw */ -#define XCHAL_HW_VERSION_MINOR 2 /* minor ver# of targeted hw */ -#define XCHAL_HW_VERSION 210002 /* major*100+minor */ -#define XCHAL_HW_REL_LX1 1 -#define XCHAL_HW_REL_LX1_0 1 -#define XCHAL_HW_REL_LX1_0_2 1 -#define XCHAL_HW_CONFIGID_RELIABLE 1 -/* If software targets a *range* of hardware versions, these are the bounds: */ -#define XCHAL_HW_MIN_VERSION_MAJOR 2100 /* major v of earliest tgt hw */ -#define XCHAL_HW_MIN_VERSION_MINOR 2 /* minor v of earliest tgt hw */ -#define XCHAL_HW_MIN_VERSION 210002 /* earliest targeted hw */ -#define XCHAL_HW_MAX_VERSION_MAJOR 2100 /* major v of latest tgt hw */ -#define XCHAL_HW_MAX_VERSION_MINOR 2 /* minor v of latest tgt hw */ -#define XCHAL_HW_MAX_VERSION 210002 /* latest targeted hw */ - - -/*---------------------------------------------------------------------- - CACHE - ----------------------------------------------------------------------*/ - -#define XCHAL_ICACHE_LINESIZE 16 /* I-cache line size in bytes */ -#define XCHAL_DCACHE_LINESIZE 16 /* D-cache line size in bytes */ -#define XCHAL_ICACHE_LINEWIDTH 4 /* log2(I line size in bytes) */ -#define XCHAL_DCACHE_LINEWIDTH 4 /* log2(D line size in bytes) */ - -#define XCHAL_ICACHE_SIZE 32768 /* I-cache size in bytes or 0 */ -#define XCHAL_DCACHE_SIZE 32768 /* D-cache size in bytes or 0 */ - -#define XCHAL_DCACHE_IS_WRITEBACK 1 /* writeback feature */ - - - - -/**************************************************************************** - Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code - ****************************************************************************/ - - -#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY - -/*---------------------------------------------------------------------- - CACHE - ----------------------------------------------------------------------*/ - -#define XCHAL_HAVE_PIF 1 /* any outbound PIF present */ - -/* If present, cache size in bytes == (ways * 2^(linewidth + setwidth)). */ - -/* Number of cache sets in log2(lines per way): */ -#define XCHAL_ICACHE_SETWIDTH 9 -#define XCHAL_DCACHE_SETWIDTH 10 - -/* Cache set associativity (number of ways): */ -#define XCHAL_ICACHE_WAYS 4 -#define XCHAL_DCACHE_WAYS 2 - -/* Cache features: */ -#define XCHAL_ICACHE_LINE_LOCKABLE 1 -#define XCHAL_DCACHE_LINE_LOCKABLE 0 -#define XCHAL_ICACHE_ECC_PARITY 0 -#define XCHAL_DCACHE_ECC_PARITY 0 - -/* Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits): */ -#define XCHAL_CA_BITS 4 - - -/*---------------------------------------------------------------------- - INTERNAL I/D RAM/ROMs and XLMI - ----------------------------------------------------------------------*/ - -#define XCHAL_NUM_INSTROM 0 /* number of core instr. ROMs */ -#define XCHAL_NUM_INSTRAM 0 /* number of core instr. RAMs */ -#define XCHAL_NUM_DATAROM 0 /* number of core data ROMs */ -#define XCHAL_NUM_DATARAM 1 /* number of core data RAMs */ -#define XCHAL_NUM_URAM 0 /* number of core unified RAMs*/ -#define XCHAL_NUM_XLMI 1 /* number of core XLMI ports */ - -/* Data RAM 0: */ -#define XCHAL_DATARAM0_VADDR 0x3FFF0000 -#define XCHAL_DATARAM0_PADDR 0x3FFF0000 -#define XCHAL_DATARAM0_SIZE 65536 -#define XCHAL_DATARAM0_ECC_PARITY 0 - -/* XLMI Port 0: */ -#define XCHAL_XLMI0_VADDR 0x37F80000 -#define XCHAL_XLMI0_PADDR 0x37F80000 -#define XCHAL_XLMI0_SIZE 262144 -#define XCHAL_XLMI0_ECC_PARITY 0 - - -/*---------------------------------------------------------------------- - INTERRUPTS and TIMERS - ----------------------------------------------------------------------*/ - -#define XCHAL_HAVE_INTERRUPTS 1 /* interrupt option */ -#define XCHAL_HAVE_HIGHPRI_INTERRUPTS 1 /* med/high-pri. interrupts */ -#define XCHAL_HAVE_NMI 1 /* non-maskable interrupt */ -#define XCHAL_HAVE_CCOUNT 1 /* CCOUNT reg. (timer option) */ -#define XCHAL_NUM_TIMERS 3 /* number of CCOMPAREn regs */ -#define XCHAL_NUM_INTERRUPTS 27 /* number of interrupts */ -#define XCHAL_NUM_INTERRUPTS_LOG2 5 /* ceil(log2(NUM_INTERRUPTS)) */ -#define XCHAL_NUM_EXTINTERRUPTS 20 /* num of external interrupts */ -#define XCHAL_NUM_INTLEVELS 4 /* number of interrupt levels - (not including level zero) */ -#define XCHAL_EXCM_LEVEL 1 /* level masked by PS.EXCM */ - /* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */ - -/* Masks of interrupts at each interrupt level: */ -#define XCHAL_INTLEVEL1_MASK 0x01F07FFF -#define XCHAL_INTLEVEL2_MASK 0x02018000 -#define XCHAL_INTLEVEL3_MASK 0x04060000 -#define XCHAL_INTLEVEL4_MASK 0x00000000 -#define XCHAL_INTLEVEL5_MASK 0x00080000 -#define XCHAL_INTLEVEL6_MASK 0x00000000 -#define XCHAL_INTLEVEL7_MASK 0x00000000 - -/* Masks of interrupts at each range 1..n of interrupt levels: */ -#define XCHAL_INTLEVEL1_ANDBELOW_MASK 0x01F07FFF -#define XCHAL_INTLEVEL2_ANDBELOW_MASK 0x03F1FFFF -#define XCHAL_INTLEVEL3_ANDBELOW_MASK 0x07F7FFFF -#define XCHAL_INTLEVEL4_ANDBELOW_MASK 0x07F7FFFF -#define XCHAL_INTLEVEL5_ANDBELOW_MASK 0x07FFFFFF -#define XCHAL_INTLEVEL6_ANDBELOW_MASK 0x07FFFFFF -#define XCHAL_INTLEVEL7_ANDBELOW_MASK 0x07FFFFFF - -/* Level of each interrupt: */ -#define XCHAL_INT0_LEVEL 1 -#define XCHAL_INT1_LEVEL 1 -#define XCHAL_INT2_LEVEL 1 -#define XCHAL_INT3_LEVEL 1 -#define XCHAL_INT4_LEVEL 1 -#define XCHAL_INT5_LEVEL 1 -#define XCHAL_INT6_LEVEL 1 -#define XCHAL_INT7_LEVEL 1 -#define XCHAL_INT8_LEVEL 1 -#define XCHAL_INT9_LEVEL 1 -#define XCHAL_INT10_LEVEL 1 -#define XCHAL_INT11_LEVEL 1 -#define XCHAL_INT12_LEVEL 1 -#define XCHAL_INT13_LEVEL 1 -#define XCHAL_INT14_LEVEL 1 -#define XCHAL_INT15_LEVEL 2 -#define XCHAL_INT16_LEVEL 2 -#define XCHAL_INT17_LEVEL 3 -#define XCHAL_INT18_LEVEL 3 -#define XCHAL_INT19_LEVEL 5 -#define XCHAL_INT20_LEVEL 1 -#define XCHAL_INT21_LEVEL 1 -#define XCHAL_INT22_LEVEL 1 -#define XCHAL_INT23_LEVEL 1 -#define XCHAL_INT24_LEVEL 1 -#define XCHAL_INT25_LEVEL 2 -#define XCHAL_INT26_LEVEL 3 -#define XCHAL_DEBUGLEVEL 4 /* debug interrupt level */ -#define XCHAL_HAVE_DEBUG_EXTERN_INT 1 /* OCD external db interrupt */ -#define XCHAL_NMILEVEL 5 /* NMI "level" (for use with - EXCSAVE/EPS/EPC_n, RFI n) */ - -/* Type of each interrupt: */ -#define XCHAL_INT0_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT1_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT2_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT3_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT4_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT5_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT6_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT7_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT8_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT9_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT10_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT11_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT12_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT13_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT14_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT15_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT16_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT17_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT18_TYPE XTHAL_INTTYPE_EXTERN_LEVEL -#define XCHAL_INT19_TYPE XTHAL_INTTYPE_NMI -#define XCHAL_INT20_TYPE XTHAL_INTTYPE_SOFTWARE -#define XCHAL_INT21_TYPE XTHAL_INTTYPE_SOFTWARE -#define XCHAL_INT22_TYPE XTHAL_INTTYPE_SOFTWARE -#define XCHAL_INT23_TYPE XTHAL_INTTYPE_SOFTWARE -#define XCHAL_INT24_TYPE XTHAL_INTTYPE_TIMER -#define XCHAL_INT25_TYPE XTHAL_INTTYPE_TIMER -#define XCHAL_INT26_TYPE XTHAL_INTTYPE_TIMER - -/* Masks of interrupts for each type of interrupt: */ -#define XCHAL_INTTYPE_MASK_UNCONFIGURED 0xF8000000 -#define XCHAL_INTTYPE_MASK_SOFTWARE 0x00F00000 -#define XCHAL_INTTYPE_MASK_EXTERN_EDGE 0x00000000 -#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL 0x0007FFFF -#define XCHAL_INTTYPE_MASK_TIMER 0x07000000 -#define XCHAL_INTTYPE_MASK_NMI 0x00080000 -#define XCHAL_INTTYPE_MASK_WRITE_ERROR 0x00000000 - -/* Interrupt numbers assigned to specific interrupt sources: */ -#define XCHAL_TIMER0_INTERRUPT 24 /* CCOMPARE0 */ -#define XCHAL_TIMER1_INTERRUPT 25 /* CCOMPARE1 */ -#define XCHAL_TIMER2_INTERRUPT 26 /* CCOMPARE2 */ -#define XCHAL_TIMER3_INTERRUPT XTHAL_TIMER_UNCONFIGURED -#define XCHAL_NMI_INTERRUPT 19 /* non-maskable interrupt */ - -/* Interrupt numbers for levels at which only one interrupt is configured: */ -#define XCHAL_INTLEVEL5_NUM 19 -/* (There are many interrupts each at level(s) 1, 2, 3.) */ - - -/* - * External interrupt vectors/levels. - * These macros describe how Xtensa processor interrupt numbers - * (as numbered internally, eg. in INTERRUPT and INTENABLE registers) - * map to external BInterrupt<n> pins, for those interrupts - * configured as external (level-triggered, edge-triggered, or NMI). - * See the Xtensa processor databook for more details. - */ - -/* Core interrupt numbers mapped to each EXTERNAL interrupt number: */ -#define XCHAL_EXTINT0_NUM 0 /* (intlevel 1) */ -#define XCHAL_EXTINT1_NUM 1 /* (intlevel 1) */ -#define XCHAL_EXTINT2_NUM 2 /* (intlevel 1) */ -#define XCHAL_EXTINT3_NUM 3 /* (intlevel 1) */ -#define XCHAL_EXTINT4_NUM 4 /* (intlevel 1) */ -#define XCHAL_EXTINT5_NUM 5 /* (intlevel 1) */ -#define XCHAL_EXTINT6_NUM 6 /* (intlevel 1) */ -#define XCHAL_EXTINT7_NUM 7 /* (intlevel 1) */ -#define XCHAL_EXTINT8_NUM 8 /* (intlevel 1) */ -#define XCHAL_EXTINT9_NUM 9 /* (intlevel 1) */ -#define XCHAL_EXTINT10_NUM 10 /* (intlevel 1) */ -#define XCHAL_EXTINT11_NUM 11 /* (intlevel 1) */ -#define XCHAL_EXTINT12_NUM 12 /* (intlevel 1) */ -#define XCHAL_EXTINT13_NUM 13 /* (intlevel 1) */ -#define XCHAL_EXTINT14_NUM 14 /* (intlevel 1) */ -#define XCHAL_EXTINT15_NUM 15 /* (intlevel 2) */ -#define XCHAL_EXTINT16_NUM 16 /* (intlevel 2) */ -#define XCHAL_EXTINT17_NUM 17 /* (intlevel 3) */ -#define XCHAL_EXTINT18_NUM 18 /* (intlevel 3) */ -#define XCHAL_EXTINT19_NUM 19 /* (intlevel 5) */ - - -/*---------------------------------------------------------------------- - EXCEPTIONS and VECTORS - ----------------------------------------------------------------------*/ - -#define XCHAL_XEA_VERSION 2 /* Xtensa Exception Architecture - number: 1 == XEA1 (old) - 2 == XEA2 (new) - 0 == XEAX (extern) */ -#define XCHAL_HAVE_XEA1 0 /* Exception Architecture 1 */ -#define XCHAL_HAVE_XEA2 1 /* Exception Architecture 2 */ -#define XCHAL_HAVE_XEAX 0 /* External Exception Arch. */ -#define XCHAL_HAVE_EXCEPTIONS 1 /* exception option */ -#define XCHAL_HAVE_MEM_ECC_PARITY 0 /* local memory ECC/parity */ -#define XCHAL_HAVE_VECTOR_SELECT 0 /* relocatable vectors */ -#define XCHAL_HAVE_VECBASE 0 /* relocatable vectors */ - -#define XCHAL_RESET_VECOFS 0x00000000 -#define XCHAL_RESET_VECTOR_VADDR 0x3FFE03D0 -#define XCHAL_RESET_VECTOR_PADDR 0x3FFE03D0 -#define XCHAL_USER_VECOFS 0x00000000 -#define XCHAL_USER_VECTOR_VADDR 0x40000220 -#define XCHAL_USER_VECTOR_PADDR 0x40000220 -#define XCHAL_KERNEL_VECOFS 0x00000000 -#define XCHAL_KERNEL_VECTOR_VADDR 0x40000200 -#define XCHAL_KERNEL_VECTOR_PADDR 0x40000200 -#define XCHAL_DOUBLEEXC_VECOFS 0x00000000 -#define XCHAL_DOUBLEEXC_VECTOR_VADDR 0x400002A0 -#define XCHAL_DOUBLEEXC_VECTOR_PADDR 0x400002A0 -#define XCHAL_WINDOW_OF4_VECOFS 0x00000000 -#define XCHAL_WINDOW_UF4_VECOFS 0x00000040 -#define XCHAL_WINDOW_OF8_VECOFS 0x00000080 -#define XCHAL_WINDOW_UF8_VECOFS 0x000000C0 -#define XCHAL_WINDOW_OF12_VECOFS 0x00000100 -#define XCHAL_WINDOW_UF12_VECOFS 0x00000140 -#define XCHAL_WINDOW_VECTORS_VADDR 0x40000000 -#define XCHAL_WINDOW_VECTORS_PADDR 0x40000000 -#define XCHAL_INTLEVEL2_VECOFS 0x00000000 -#define XCHAL_INTLEVEL2_VECTOR_VADDR 0x40000240 -#define XCHAL_INTLEVEL2_VECTOR_PADDR 0x40000240 -#define XCHAL_INTLEVEL3_VECOFS 0x00000000 -#define XCHAL_INTLEVEL3_VECTOR_VADDR 0x40000260 -#define XCHAL_INTLEVEL3_VECTOR_PADDR 0x40000260 -#define XCHAL_INTLEVEL4_VECOFS 0x00000000 -#define XCHAL_INTLEVEL4_VECTOR_VADDR 0x40000390 -#define XCHAL_INTLEVEL4_VECTOR_PADDR 0x40000390 -#define XCHAL_DEBUG_VECOFS XCHAL_INTLEVEL4_VECOFS -#define XCHAL_DEBUG_VECTOR_VADDR XCHAL_INTLEVEL4_VECTOR_VADDR -#define XCHAL_DEBUG_VECTOR_PADDR XCHAL_INTLEVEL4_VECTOR_PADDR -#define XCHAL_NMI_VECOFS 0x00000000 -#define XCHAL_NMI_VECTOR_VADDR 0x400003B0 -#define XCHAL_NMI_VECTOR_PADDR 0x400003B0 -#define XCHAL_INTLEVEL5_VECOFS XCHAL_NMI_VECOFS -#define XCHAL_INTLEVEL5_VECTOR_VADDR XCHAL_NMI_VECTOR_VADDR -#define XCHAL_INTLEVEL5_VECTOR_PADDR XCHAL_NMI_VECTOR_PADDR - - -/*---------------------------------------------------------------------- - DEBUG - ----------------------------------------------------------------------*/ - -#define XCHAL_HAVE_OCD 1 /* OnChipDebug option */ -#define XCHAL_NUM_IBREAK 2 /* number of IBREAKn regs */ -#define XCHAL_NUM_DBREAK 2 /* number of DBREAKn regs */ -#define XCHAL_HAVE_OCD_DIR_ARRAY 1 /* faster OCD option */ - - -/*---------------------------------------------------------------------- - MMU - ----------------------------------------------------------------------*/ - -/* See core-matmap.h header file for more details. */ - -#define XCHAL_HAVE_TLBS 1 /* inverse of HAVE_CACHEATTR */ -#define XCHAL_HAVE_SPANNING_WAY 1 /* one way maps I+D 4GB vaddr */ -#define XCHAL_HAVE_IDENTITY_MAP 1 /* vaddr == paddr always */ -#define XCHAL_HAVE_CACHEATTR 0 /* CACHEATTR register present */ -#define XCHAL_HAVE_MIMIC_CACHEATTR 1 /* region protection */ -#define XCHAL_HAVE_XLT_CACHEATTR 0 /* region prot. w/translation */ -#define XCHAL_HAVE_PTP_MMU 0 /* full MMU (with page table - [autorefill] and protection) - usable for an MMU-based OS */ -/* If none of the above last 4 are set, it's a custom TLB configuration. */ - -#define XCHAL_MMU_ASID_BITS 0 /* number of bits in ASIDs */ -#define XCHAL_MMU_RINGS 1 /* number of rings (1..4) */ -#define XCHAL_MMU_RING_BITS 0 /* num of bits in RING field */ - -#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ - - -#endif /* _XTENSA_CORE_CONFIGURATION_H */ - diff --git a/arch/xtensa/variants/s6000/include/variant/dmac.h b/arch/xtensa/variants/s6000/include/variant/dmac.h deleted file mode 100644 index 3f88d9fc6897..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/dmac.h +++ /dev/null @@ -1,387 +0,0 @@ -/* - * include/asm-xtensa/variant-s6000/dmac.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2006 Tensilica Inc. - * Copyright (C) 2008 Emlix GmbH <info@emlix.com> - * Authors: Fabian Godehardt <fg@emlix.com> - * Oskar Schirmer <oskar@scara.com> - * Daniel Gloeckner <dg@emlix.com> - */ - -#ifndef __ASM_XTENSA_S6000_DMAC_H -#define __ASM_XTENSA_S6000_DMAC_H -#include <linux/io.h> -#include <variant/hardware.h> - -/* DMA global */ - -#define S6_DMA_INTSTAT0 0x000 -#define S6_DMA_INTSTAT1 0x004 -#define S6_DMA_INTENABLE0 0x008 -#define S6_DMA_INTENABLE1 0x00C -#define S6_DMA_INTRAW0 0x010 -#define S6_DMA_INTRAW1 0x014 -#define S6_DMA_INTCLEAR0 0x018 -#define S6_DMA_INTCLEAR1 0x01C -#define S6_DMA_INTSET0 0x020 -#define S6_DMA_INTSET1 0x024 -#define S6_DMA_INT0_UNDER 0 -#define S6_DMA_INT0_OVER 16 -#define S6_DMA_INT1_CHANNEL 0 -#define S6_DMA_INT1_MASTER 16 -#define S6_DMA_INT1_MASTER_MASK 7 -#define S6_DMA_TERMCNTIRQSTAT 0x028 -#define S6_DMA_TERMCNTIRQCLR 0x02C -#define S6_DMA_TERMCNTIRQSET 0x030 -#define S6_DMA_PENDCNTIRQSTAT 0x034 -#define S6_DMA_PENDCNTIRQCLR 0x038 -#define S6_DMA_PENDCNTIRQSET 0x03C -#define S6_DMA_LOWWMRKIRQSTAT 0x040 -#define S6_DMA_LOWWMRKIRQCLR 0x044 -#define S6_DMA_LOWWMRKIRQSET 0x048 -#define S6_DMA_MASTERERRINFO 0x04C -#define S6_DMA_MASTERERR_CHAN(n) (4*(n)) -#define S6_DMA_MASTERERR_CHAN_MASK 0xF -#define S6_DMA_DESCRFIFO0 0x050 -#define S6_DMA_DESCRFIFO1 0x054 -#define S6_DMA_DESCRFIFO2 0x058 -#define S6_DMA_DESCRFIFO2_AUTODISABLE 24 -#define S6_DMA_DESCRFIFO3 0x05C -#define S6_DMA_MASTER0START 0x060 -#define S6_DMA_MASTER0END 0x064 -#define S6_DMA_MASTER1START 0x068 -#define S6_DMA_MASTER1END 0x06C -#define S6_DMA_NEXTFREE 0x070 -#define S6_DMA_NEXTFREE_CHAN 0 -#define S6_DMA_NEXTFREE_CHAN_MASK 0x1F -#define S6_DMA_NEXTFREE_ENA 16 -#define S6_DMA_NEXTFREE_ENA_MASK ((1 << 16) - 1) -#define S6_DMA_DPORTCTRLGRP(p) ((p) * 4 + 0x074) -#define S6_DMA_DPORTCTRLGRP_FRAMEREP 0 -#define S6_DMA_DPORTCTRLGRP_NRCHANS 1 -#define S6_DMA_DPORTCTRLGRP_NRCHANS_1 0 -#define S6_DMA_DPORTCTRLGRP_NRCHANS_3 1 -#define S6_DMA_DPORTCTRLGRP_NRCHANS_4 2 -#define S6_DMA_DPORTCTRLGRP_NRCHANS_2 3 -#define S6_DMA_DPORTCTRLGRP_ENA 31 - - -/* DMA per channel */ - -#define DMA_CHNL(dmac, n) ((dmac) + 0x1000 + (n) * 0x100) -#define DMA_INDEX_CHNL(addr) (((addr) >> 8) & 0xF) -#define DMA_MASK_DMAC(addr) ((addr) & 0xFFFF0000) -#define S6_DMA_CHNCTRL 0x000 -#define S6_DMA_CHNCTRL_ENABLE 0 -#define S6_DMA_CHNCTRL_PAUSE 1 -#define S6_DMA_CHNCTRL_PRIO 2 -#define S6_DMA_CHNCTRL_PRIO_MASK 3 -#define S6_DMA_CHNCTRL_PERIPHXFER 4 -#define S6_DMA_CHNCTRL_PERIPHENA 5 -#define S6_DMA_CHNCTRL_SRCINC 6 -#define S6_DMA_CHNCTRL_DSTINC 7 -#define S6_DMA_CHNCTRL_BURSTLOG 8 -#define S6_DMA_CHNCTRL_BURSTLOG_MASK 7 -#define S6_DMA_CHNCTRL_DESCFIFODEPTH 12 -#define S6_DMA_CHNCTRL_DESCFIFODEPTH_MASK 0x1F -#define S6_DMA_CHNCTRL_DESCFIFOFULL 17 -#define S6_DMA_CHNCTRL_BWCONSEL 18 -#define S6_DMA_CHNCTRL_BWCONENA 19 -#define S6_DMA_CHNCTRL_PENDGCNTSTAT 20 -#define S6_DMA_CHNCTRL_PENDGCNTSTAT_MASK 0x3F -#define S6_DMA_CHNCTRL_LOWWMARK 26 -#define S6_DMA_CHNCTRL_LOWWMARK_MASK 0xF -#define S6_DMA_CHNCTRL_TSTAMP 30 -#define S6_DMA_TERMCNTNB 0x004 -#define S6_DMA_TERMCNTNB_MASK 0xFFFF -#define S6_DMA_TERMCNTTMO 0x008 -#define S6_DMA_TERMCNTSTAT 0x00C -#define S6_DMA_TERMCNTSTAT_MASK 0xFF -#define S6_DMA_CMONCHUNK 0x010 -#define S6_DMA_SRCSKIP 0x014 -#define S6_DMA_DSTSKIP 0x018 -#define S6_DMA_CUR_SRC 0x024 -#define S6_DMA_CUR_DST 0x028 -#define S6_DMA_TIMESTAMP 0x030 - -/* DMA channel lists */ - -#define S6_DPDMA_CHAN(stream, channel) (4 * (stream) + (channel)) -#define S6_DPDMA_NB 16 - -#define S6_HIFDMA_GMACTX 0 -#define S6_HIFDMA_GMACRX 1 -#define S6_HIFDMA_I2S0 2 -#define S6_HIFDMA_I2S1 3 -#define S6_HIFDMA_EGIB 4 -#define S6_HIFDMA_PCITX 5 -#define S6_HIFDMA_PCIRX 6 -#define S6_HIFDMA_NB 7 - -#define S6_NIDMA_NB 4 - -#define S6_LMSDMA_NB 12 - -/* controller access */ - -#define S6_DMAC_NB 4 -#define S6_DMAC_INDEX(dmac) (((unsigned)(dmac) >> 18) % S6_DMAC_NB) - -struct s6dmac_ctrl { - u32 dmac; - spinlock_t lock; - u8 chan_nb; -}; - -extern struct s6dmac_ctrl s6dmac_ctrl[S6_DMAC_NB]; - - -/* DMA control, per channel */ - -static inline int s6dmac_fifo_full(u32 dmac, int chan) -{ - return (readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL) - & (1 << S6_DMA_CHNCTRL_DESCFIFOFULL)) && 1; -} - -static inline int s6dmac_termcnt_irq(u32 dmac, int chan) -{ - u32 m = 1 << chan; - int r = (readl(dmac + S6_DMA_TERMCNTIRQSTAT) & m) && 1; - if (r) - writel(m, dmac + S6_DMA_TERMCNTIRQCLR); - return r; -} - -static inline int s6dmac_pendcnt_irq(u32 dmac, int chan) -{ - u32 m = 1 << chan; - int r = (readl(dmac + S6_DMA_PENDCNTIRQSTAT) & m) && 1; - if (r) - writel(m, dmac + S6_DMA_PENDCNTIRQCLR); - return r; -} - -static inline int s6dmac_lowwmark_irq(u32 dmac, int chan) -{ - int r = (readl(dmac + S6_DMA_LOWWMRKIRQSTAT) & (1 << chan)) ? 1 : 0; - if (r) - writel(1 << chan, dmac + S6_DMA_LOWWMRKIRQCLR); - return r; -} - -static inline u32 s6dmac_pending_count(u32 dmac, int chan) -{ - return (readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL) - >> S6_DMA_CHNCTRL_PENDGCNTSTAT) - & S6_DMA_CHNCTRL_PENDGCNTSTAT_MASK; -} - -static inline void s6dmac_set_terminal_count(u32 dmac, int chan, u32 n) -{ - n &= S6_DMA_TERMCNTNB_MASK; - n |= readl(DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB) - & ~S6_DMA_TERMCNTNB_MASK; - writel(n, DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB); -} - -static inline u32 s6dmac_get_terminal_count(u32 dmac, int chan) -{ - return (readl(DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB)) - & S6_DMA_TERMCNTNB_MASK; -} - -static inline u32 s6dmac_timestamp(u32 dmac, int chan) -{ - return readl(DMA_CHNL(dmac, chan) + S6_DMA_TIMESTAMP); -} - -static inline u32 s6dmac_cur_src(u32 dmac, int chan) -{ - return readl(DMA_CHNL(dmac, chan) + S6_DMA_CUR_SRC); -} - -static inline u32 s6dmac_cur_dst(u32 dmac, int chan) -{ - return readl(DMA_CHNL(dmac, chan) + S6_DMA_CUR_DST); -} - -static inline void s6dmac_disable_chan(u32 dmac, int chan) -{ - u32 ctrl; - writel(readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL) - & ~(1 << S6_DMA_CHNCTRL_ENABLE), - DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL); - do - ctrl = readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL); - while (ctrl & (1 << S6_DMA_CHNCTRL_ENABLE)); -} - -static inline void s6dmac_set_stride_skip(u32 dmac, int chan, - int comchunk, /* 0: disable scatter/gather */ - int srcskip, int dstskip) -{ - writel(comchunk, DMA_CHNL(dmac, chan) + S6_DMA_CMONCHUNK); - writel(srcskip, DMA_CHNL(dmac, chan) + S6_DMA_SRCSKIP); - writel(dstskip, DMA_CHNL(dmac, chan) + S6_DMA_DSTSKIP); -} - -static inline void s6dmac_enable_chan(u32 dmac, int chan, - int prio, /* 0 (highest) .. 3 (lowest) */ - int periphxfer, /* <0: disable p.req.line, 0..1: mode */ - int srcinc, int dstinc, /* 0: dont increment src/dst address */ - int comchunk, /* 0: disable scatter/gather */ - int srcskip, int dstskip, - int burstsize, /* 4 for I2S, 7 for everything else */ - int bandwidthconserve, /* <0: disable, 0..1: select */ - int lowwmark, /* 0..15 */ - int timestamp, /* 0: disable timestamp */ - int enable) /* 0: disable for now */ -{ - writel(1, DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTNB); - writel(0, DMA_CHNL(dmac, chan) + S6_DMA_TERMCNTTMO); - writel(lowwmark << S6_DMA_CHNCTRL_LOWWMARK, - DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL); - s6dmac_set_stride_skip(dmac, chan, comchunk, srcskip, dstskip); - writel(((enable ? 1 : 0) << S6_DMA_CHNCTRL_ENABLE) | - (prio << S6_DMA_CHNCTRL_PRIO) | - (((periphxfer > 0) ? 1 : 0) << S6_DMA_CHNCTRL_PERIPHXFER) | - (((periphxfer < 0) ? 0 : 1) << S6_DMA_CHNCTRL_PERIPHENA) | - ((srcinc ? 1 : 0) << S6_DMA_CHNCTRL_SRCINC) | - ((dstinc ? 1 : 0) << S6_DMA_CHNCTRL_DSTINC) | - (burstsize << S6_DMA_CHNCTRL_BURSTLOG) | - (((bandwidthconserve > 0) ? 1 : 0) << S6_DMA_CHNCTRL_BWCONSEL) | - (((bandwidthconserve < 0) ? 0 : 1) << S6_DMA_CHNCTRL_BWCONENA) | - (lowwmark << S6_DMA_CHNCTRL_LOWWMARK) | - ((timestamp ? 1 : 0) << S6_DMA_CHNCTRL_TSTAMP), - DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL); -} - - -/* DMA control, per engine */ - -static inline unsigned _dmac_addr_index(u32 dmac) -{ - unsigned i = S6_DMAC_INDEX(dmac); - if (s6dmac_ctrl[i].dmac != dmac) - BUG(); - return i; -} - -static inline void _s6dmac_disable_error_irqs(u32 dmac, u32 mask) -{ - writel(mask, dmac + S6_DMA_TERMCNTIRQCLR); - writel(mask, dmac + S6_DMA_PENDCNTIRQCLR); - writel(mask, dmac + S6_DMA_LOWWMRKIRQCLR); - writel(readl(dmac + S6_DMA_INTENABLE0) - & ~((mask << S6_DMA_INT0_UNDER) | (mask << S6_DMA_INT0_OVER)), - dmac + S6_DMA_INTENABLE0); - writel(readl(dmac + S6_DMA_INTENABLE1) & ~(mask << S6_DMA_INT1_CHANNEL), - dmac + S6_DMA_INTENABLE1); - writel((mask << S6_DMA_INT0_UNDER) | (mask << S6_DMA_INT0_OVER), - dmac + S6_DMA_INTCLEAR0); - writel(mask << S6_DMA_INT1_CHANNEL, dmac + S6_DMA_INTCLEAR1); -} - -/* - * request channel from specified engine - * with chan<0, accept any channel - * further parameters see s6dmac_enable_chan - * returns < 0 upon error, channel nb otherwise - */ -static inline int s6dmac_request_chan(u32 dmac, int chan, - int prio, - int periphxfer, - int srcinc, int dstinc, - int comchunk, - int srcskip, int dstskip, - int burstsize, - int bandwidthconserve, - int lowwmark, - int timestamp, - int enable) -{ - int r = chan; - unsigned long flags; - spinlock_t *spinl = &s6dmac_ctrl[_dmac_addr_index(dmac)].lock; - spin_lock_irqsave(spinl, flags); - if (r < 0) { - r = (readl(dmac + S6_DMA_NEXTFREE) >> S6_DMA_NEXTFREE_CHAN) - & S6_DMA_NEXTFREE_CHAN_MASK; - } - if (r >= s6dmac_ctrl[_dmac_addr_index(dmac)].chan_nb) { - if (chan < 0) - r = -EBUSY; - else - r = -ENXIO; - } else if (((readl(dmac + S6_DMA_NEXTFREE) >> S6_DMA_NEXTFREE_ENA) - >> r) & 1) { - r = -EBUSY; - } else { - s6dmac_enable_chan(dmac, r, prio, periphxfer, - srcinc, dstinc, comchunk, srcskip, dstskip, burstsize, - bandwidthconserve, lowwmark, timestamp, enable); - } - spin_unlock_irqrestore(spinl, flags); - return r; -} - -static inline void s6dmac_put_fifo(u32 dmac, int chan, - u32 src, u32 dst, u32 size) -{ - unsigned long flags; - spinlock_t *spinl = &s6dmac_ctrl[_dmac_addr_index(dmac)].lock; - spin_lock_irqsave(spinl, flags); - writel(src, dmac + S6_DMA_DESCRFIFO0); - writel(dst, dmac + S6_DMA_DESCRFIFO1); - writel(size, dmac + S6_DMA_DESCRFIFO2); - writel(chan, dmac + S6_DMA_DESCRFIFO3); - spin_unlock_irqrestore(spinl, flags); -} - -static inline u32 s6dmac_channel_enabled(u32 dmac, int chan) -{ - return readl(DMA_CHNL(dmac, chan) + S6_DMA_CHNCTRL) & - (1 << S6_DMA_CHNCTRL_ENABLE); -} - -/* - * group 1-4 data port channels - * with port=0..3, nrch=1-4 channels, - * frrep=0/1 (dis- or enable frame repeat) - */ -static inline void s6dmac_dp_setup_group(u32 dmac, int port, - int nrch, int frrep) -{ - static const u8 mask[4] = {0, 3, 1, 2}; - BUG_ON(dmac != S6_REG_DPDMA); - if ((port < 0) || (port > 3) || (nrch < 1) || (nrch > 4)) - return; - writel((mask[nrch - 1] << S6_DMA_DPORTCTRLGRP_NRCHANS) - | ((frrep ? 1 : 0) << S6_DMA_DPORTCTRLGRP_FRAMEREP), - dmac + S6_DMA_DPORTCTRLGRP(port)); -} - -static inline void s6dmac_dp_switch_group(u32 dmac, int port, int enable) -{ - u32 tmp; - BUG_ON(dmac != S6_REG_DPDMA); - tmp = readl(dmac + S6_DMA_DPORTCTRLGRP(port)); - if (enable) - tmp |= (1 << S6_DMA_DPORTCTRLGRP_ENA); - else - tmp &= ~(1 << S6_DMA_DPORTCTRLGRP_ENA); - writel(tmp, dmac + S6_DMA_DPORTCTRLGRP(port)); -} - -extern void s6dmac_put_fifo_cache(u32 dmac, int chan, - u32 src, u32 dst, u32 size); -extern void s6dmac_disable_error_irqs(u32 dmac, u32 mask); -extern u32 s6dmac_int_sources(u32 dmac, u32 channel); -extern void s6dmac_release_chan(u32 dmac, int chan); - -#endif /* __ASM_XTENSA_S6000_DMAC_H */ diff --git a/arch/xtensa/variants/s6000/include/variant/gpio.h b/arch/xtensa/variants/s6000/include/variant/gpio.h deleted file mode 100644 index 8484ab0df461..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/gpio.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _XTENSA_VARIANT_S6000_GPIO_H -#define _XTENSA_VARIANT_S6000_GPIO_H - -extern int s6_gpio_init(u32 afsel); - -#endif /* _XTENSA_VARIANT_S6000_GPIO_H */ diff --git a/arch/xtensa/variants/s6000/include/variant/hardware.h b/arch/xtensa/variants/s6000/include/variant/hardware.h deleted file mode 100644 index 5d9ba098d84a..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/hardware.h +++ /dev/null @@ -1,259 +0,0 @@ -#ifndef __XTENSA_S6000_HARDWARE_H -#define __XTENSA_S6000_HARDWARE_H - -#define S6_SCLK 1843200 - -#define S6_MEM_REG 0x20000000 -#define S6_MEM_EFI 0x33F00000 -#define S6_MEM_PCIE_DATARAM1 0x34000000 -#define S6_MEM_XLMI 0x37F80000 -#define S6_MEM_PIF_DATARAM1 0x37FFC000 -#define S6_MEM_GMAC 0x38000000 -#define S6_MEM_I2S 0x3A000000 -#define S6_MEM_EGIB 0x3C000000 -#define S6_MEM_PCIE_CFG 0x3E000000 -#define S6_MEM_PIF_DATARAM 0x3FFE0000 -#define S6_MEM_XLMI_DATARAM 0x3FFF0000 -#define S6_MEM_DDR 0x40000000 -#define S6_MEM_PCIE_APER 0xC0000000 -#define S6_MEM_AUX 0xF0000000 - -/* Device addresses */ - -#define S6_REG_SCB S6_MEM_REG -#define S6_REG_NB (S6_REG_SCB + 0x10000) -#define S6_REG_LMSDMA (S6_REG_SCB + 0x20000) -#define S6_REG_NI (S6_REG_SCB + 0x30000) -#define S6_REG_NIDMA (S6_REG_SCB + 0x40000) -#define S6_REG_NS (S6_REG_SCB + 0x50000) -#define S6_REG_DDR (S6_REG_SCB + 0x60000) -#define S6_REG_GREG1 (S6_REG_SCB + 0x70000) -#define S6_REG_DP (S6_REG_SCB + 0x80000) -#define S6_REG_DPDMA (S6_REG_SCB + 0x90000) -#define S6_REG_EGIB (S6_REG_SCB + 0xA0000) -#define S6_REG_PCIE (S6_REG_SCB + 0xB0000) -#define S6_REG_I2S (S6_REG_SCB + 0xC0000) -#define S6_REG_GMAC (S6_REG_SCB + 0xD0000) -#define S6_REG_HIFDMA (S6_REG_SCB + 0xE0000) -#define S6_REG_GREG2 (S6_REG_SCB + 0xF0000) - -#define S6_REG_APB S6_REG_SCB -#define S6_REG_UART (S6_REG_APB + 0x0000) -#define S6_REG_INTC (S6_REG_APB + 0x2000) -#define S6_REG_SPI (S6_REG_APB + 0x3000) -#define S6_REG_I2C (S6_REG_APB + 0x4000) -#define S6_REG_GPIO (S6_REG_APB + 0x8000) - -/* Global register block */ - -#define S6_GREG1_PLL_LOCKCLEAR 0x000 -#define S6_GREG1_PLL_LOCK_SYS 0 -#define S6_GREG1_PLL_LOCK_IO 1 -#define S6_GREG1_PLL_LOCK_AIM 2 -#define S6_GREG1_PLL_LOCK_DP0 3 -#define S6_GREG1_PLL_LOCK_DP2 4 -#define S6_GREG1_PLL_LOCK_DDR 5 -#define S6_GREG1_PLL_LOCKSTAT 0x004 -#define S6_GREG1_PLL_LOCKSTAT_CURLOCK 0 -#define S6_GREG1_PLL_LOCKSTAT_EVERUNLCK 8 -#define S6_GREG1_PLLSEL 0x010 -#define S6_GREG1_PLLSEL_AIM 0 -#define S6_GREG1_PLLSEL_AIM_DDR2 0 -#define S6_GREG1_PLLSEL_AIM_300MHZ 1 -#define S6_GREG1_PLLSEL_AIM_240MHZ 2 -#define S6_GREG1_PLLSEL_AIM_200MHZ 3 -#define S6_GREG1_PLLSEL_AIM_150MHZ 4 -#define S6_GREG1_PLLSEL_AIM_120MHZ 5 -#define S6_GREG1_PLLSEL_AIM_40MHZ 6 -#define S6_GREG1_PLLSEL_AIM_PLLAIMREF 7 -#define S6_GREG1_PLLSEL_AIM_MASK 7 -#define S6_GREG1_PLLSEL_DDR 8 -#define S6_GREG1_PLLSEL_DDR_HS 0 -#define S6_GREG1_PLLSEL_DDR_333MHZ 1 -#define S6_GREG1_PLLSEL_DDR_250MHZ 2 -#define S6_GREG1_PLLSEL_DDR_200MHZ 3 -#define S6_GREG1_PLLSEL_DDR_167MHZ 4 -#define S6_GREG1_PLLSEL_DDR_100MHZ 5 -#define S6_GREG1_PLLSEL_DDR_33MHZ 6 -#define S6_GREG1_PLLSEL_DDR_PLLIOREF 7 -#define S6_GREG1_PLLSEL_DDR_MASK 7 -#define S6_GREG1_PLLSEL_GMAC 16 -#define S6_GREG1_PLLSEL_GMAC_125MHZ 0 -#define S6_GREG1_PLLSEL_GMAC_25MHZ 1 -#define S6_GREG1_PLLSEL_GMAC_2500KHZ 2 -#define S6_GREG1_PLLSEL_GMAC_EXTERN 3 -#define S6_GREG1_PLLSEL_GMAC_MASK 3 -#define S6_GREG1_PLLSEL_GMII 18 -#define S6_GREG1_PLLSEL_GMII_111MHZ 0 -#define S6_GREG1_PLLSEL_GMII_IOREF 1 -#define S6_GREG1_PLLSEL_GMII_NONE 2 -#define S6_GREG1_PLLSEL_GMII_125MHZ 3 -#define S6_GREG1_PLLSEL_GMII_MASK 3 -#define S6_GREG1_SYSUNLOCKCNT 0x020 -#define S6_GREG1_IOUNLOCKCNT 0x024 -#define S6_GREG1_AIMUNLOCKCNT 0x028 -#define S6_GREG1_DP0UNLOCKCNT 0x02C -#define S6_GREG1_DP2UNLOCKCNT 0x030 -#define S6_GREG1_DDRUNLOCKCNT 0x034 -#define S6_GREG1_CLKBAL0 0x040 -#define S6_GREG1_CLKBAL0_LSGB 0 -#define S6_GREG1_CLKBAL0_LSPX 8 -#define S6_GREG1_CLKBAL0_MEMDO 16 -#define S6_GREG1_CLKBAL0_HSXT1 24 -#define S6_GREG1_CLKBAL1 0x044 -#define S6_GREG1_CLKBAL1_HSISEF 0 -#define S6_GREG1_CLKBAL1_HSNI 8 -#define S6_GREG1_CLKBAL1_HSNS 16 -#define S6_GREG1_CLKBAL1_HSISEFCFG 24 -#define S6_GREG1_CLKBAL2 0x048 -#define S6_GREG1_CLKBAL2_LSNB 0 -#define S6_GREG1_CLKBAL2_LSSB 8 -#define S6_GREG1_CLKBAL2_LSREST 24 -#define S6_GREG1_CLKBAL3 0x04C -#define S6_GREG1_CLKBAL3_ISEFXAD 0 -#define S6_GREG1_CLKBAL3_ISEFLMS 8 -#define S6_GREG1_CLKBAL3_ISEFISEF 16 -#define S6_GREG1_CLKBAL3_DDRDD 24 -#define S6_GREG1_CLKBAL4 0x050 -#define S6_GREG1_CLKBAL4_DDRDP 0 -#define S6_GREG1_CLKBAL4_DDRDO 8 -#define S6_GREG1_CLKBAL4_DDRNB 16 -#define S6_GREG1_CLKBAL4_DDRLMS 24 -#define S6_GREG1_BLOCKENA 0x100 -#define S6_GREG1_BLOCK_DDR 0 -#define S6_GREG1_BLOCK_DP 1 -#define S6_GREG1_BLOCK_NSNI 2 -#define S6_GREG1_BLOCK_PCIE 3 -#define S6_GREG1_BLOCK_GMAC 4 -#define S6_GREG1_BLOCK_I2S 5 -#define S6_GREG1_BLOCK_EGIB 6 -#define S6_GREG1_BLOCK_SB 7 -#define S6_GREG1_BLOCK_XT1 8 -#define S6_GREG1_CLKGATE 0x104 -#define S6_GREG1_BGATE_AIMNORTH 9 -#define S6_GREG1_BGATE_AIMEAST 10 -#define S6_GREG1_BGATE_AIMWEST 11 -#define S6_GREG1_BGATE_AIMSOUTH 12 -#define S6_GREG1_CHIPRES 0x108 -#define S6_GREG1_CHIPRES_SOFTRES 0 -#define S6_GREG1_CHIPRES_LOSTLOCK 1 -#define S6_GREG1_RESETCAUSE 0x10C -#define S6_GREG1_RESETCAUSE_RESETN 0 -#define S6_GREG1_RESETCAUSE_GLOBAL 1 -#define S6_GREG1_RESETCAUSE_WDOGTIMER 2 -#define S6_GREG1_RESETCAUSE_SWCHIP 3 -#define S6_GREG1_RESETCAUSE_PLLSYSLOSS 4 -#define S6_GREG1_RESETCAUSE_PCIE 5 -#define S6_GREG1_RESETCAUSE_CREATEDGLOB 6 -#define S6_GREG1_REFCLOCKCNT 0x110 -#define S6_GREG1_RESETTIMER 0x114 -#define S6_GREG1_NMITIMER 0x118 -#define S6_GREG1_GLOBAL_TIMER 0x11C -#define S6_GREG1_TIMER0 0x180 -#define S6_GREG1_TIMER1 0x184 -#define S6_GREG1_UARTCLOCKSEL 0x204 -#define S6_GREG1_CHIPVERSPACKG 0x208 -#define S6_GREG1_CHIPVERSPACKG_CHIPVID 0 -#define S6_GREG1_CHIPVERSPACKG_PACKSEL 8 -#define S6_GREG1_ONDIETERMCTRL 0x20C -#define S6_GREG1_ONDIETERMCTRL_WEST 0 -#define S6_GREG1_ONDIETERMCTRL_NORTH 2 -#define S6_GREG1_ONDIETERMCTRL_EAST 4 -#define S6_GREG1_ONDIETERMCTRL_SOUTH 6 -#define S6_GREG1_ONDIETERMCTRL_NONE 0 -#define S6_GREG1_ONDIETERMCTRL_75OHM 2 -#define S6_GREG1_ONDIETERMCTRL_MASK 3 -#define S6_GREG1_BOOT_CFG0 0x210 -#define S6_GREG1_BOOT_CFG0_AIMSTRONG 1 -#define S6_GREG1_BOOT_CFG0_MINIBOOTDL 2 -#define S6_GREG1_BOOT_CFG0_OCDGPIO8SET 5 -#define S6_GREG1_BOOT_CFG0_OCDGPIOENA 6 -#define S6_GREG1_BOOT_CFG0_DOWNSTREAM 7 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV 8 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_300MHZ 1 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_240MHZ 2 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_200MHZ 3 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_150MHZ 4 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_120MHZ 5 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_40MHZ 6 -#define S6_GREG1_BOOT_CFG0_PLLSYSDIV_MASK 7 -#define S6_GREG1_BOOT_CFG0_BALHSLMS 12 -#define S6_GREG1_BOOT_CFG0_BALHSNB 18 -#define S6_GREG1_BOOT_CFG0_BALHSXAD 24 -#define S6_GREG1_BOOT_CFG1 0x214 -#define S6_GREG1_BOOT_CFG1_PCIE1LANE 1 -#define S6_GREG1_BOOT_CFG1_MPLLPRESCALE 2 -#define S6_GREG1_BOOT_CFG1_MPLLNCY 4 -#define S6_GREG1_BOOT_CFG1_MPLLNCY5 9 -#define S6_GREG1_BOOT_CFG1_BALHSREST 14 -#define S6_GREG1_BOOT_CFG1_BALHSPSMEMS 20 -#define S6_GREG1_BOOT_CFG1_BALLSGI 26 -#define S6_GREG1_BOOT_CFG2 0x218 -#define S6_GREG1_BOOT_CFG2_PEID 0 -#define S6_GREG1_BOOT_CFG3 0x21C -#define S6_GREG1_DRAMBUSYHOLDOF 0x220 -#define S6_GREG1_DRAMBUSYHOLDOF_XT0 0 -#define S6_GREG1_DRAMBUSYHOLDOF_XT1 4 -#define S6_GREG1_DRAMBUSYHOLDOF_XT_MASK 7 -#define S6_GREG1_PCIEBAR1SIZE 0x224 -#define S6_GREG1_PCIEBAR2SIZE 0x228 -#define S6_GREG1_PCIEVENDOR 0x22C -#define S6_GREG1_PCIEDEVICE 0x230 -#define S6_GREG1_PCIEREV 0x234 -#define S6_GREG1_PCIECLASS 0x238 -#define S6_GREG1_XT1DCACHEMISS 0x240 -#define S6_GREG1_XT1ICACHEMISS 0x244 -#define S6_GREG1_HWSEMAPHORE(n) (0x400 + 4 * (n)) -#define S6_GREG1_HWSEMAPHORE_NB 16 - -/* peripheral interrupt numbers */ - -#define S6_INTC_GPIO(n) (n) /* 0..3 */ -#define S6_INTC_I2C 4 -#define S6_INTC_SPI 5 -#define S6_INTC_NB_ERR 6 -#define S6_INTC_DMA_LMSERR 7 -#define S6_INTC_DMA_LMSLOWWMRK(n) (8 + (n)) /* 0..11 */ -#define S6_INTC_DMA_LMSPENDCNT(n) (20 + (n)) /* 0..11 */ -#define S6_INTC_DMA HOSTLOWWMRK(n) (32 + (n)) /* 0..6 */ -#define S6_INTC_DMA_HOSTPENDCNT(n) (39 + (n)) /* 0..6 */ -#define S6_INTC_DMA_HOSTERR 46 -#define S6_INTC_UART(n) (47 + (n)) /* 0..1 */ -#define S6_INTC_XAD 49 -#define S6_INTC_NI_ERR 50 -#define S6_INTC_NI_INFIFOFULL 51 -#define S6_INTC_DMA_NIERR 52 -#define S6_INTC_DMA_NILOWWMRK(n) (53 + (n)) /* 0..3 */ -#define S6_INTC_DMA_NIPENDCNT(n) (57 + (n)) /* 0..3 */ -#define S6_INTC_DDR 61 -#define S6_INTC_NS_ERR 62 -#define S6_INTC_EFI_CFGERR 63 -#define S6_INTC_EFI_ISEFTEST 64 -#define S6_INTC_EFI_WRITEERR 65 -#define S6_INTC_NMI_TIMER 66 -#define S6_INTC_PLLLOCK_SYS 67 -#define S6_INTC_PLLLOCK_IO 68 -#define S6_INTC_PLLLOCK_AIM 69 -#define S6_INTC_PLLLOCK_DP0 70 -#define S6_INTC_PLLLOCK_DP2 71 -#define S6_INTC_I2S_ERR 72 -#define S6_INTC_GMAC_STAT 73 -#define S6_INTC_GMAC_ERR 74 -#define S6_INTC_GIB_ERR 75 -#define S6_INTC_PCIE_ERR 76 -#define S6_INTC_PCIE_MSI(n) (77 + (n)) /* 0..3 */ -#define S6_INTC_PCIE_INTA 81 -#define S6_INTC_PCIE_INTB 82 -#define S6_INTC_PCIE_INTC 83 -#define S6_INTC_PCIE_INTD 84 -#define S6_INTC_SW(n) (85 + (n)) /* 0..9 */ -#define S6_INTC_SW_ENABLE(n) (85 + 256 + (n)) -#define S6_INTC_DMA_DP_ERR 95 -#define S6_INTC_DMA_DPLOWWMRK(n) (96 + (n)) /* 0..3 */ -#define S6_INTC_DMA_DPPENDCNT(n) (100 + (n)) /* 0..3 */ -#define S6_INTC_DMA_DPTERMCNT(n) (104 + (n)) /* 0..3 */ -#define S6_INTC_TIMER0 108 -#define S6_INTC_TIMER1 109 -#define S6_INTC_DMA_HOSTTERMCNT(n) (110 + (n)) /* 0..6 */ - -#endif /* __XTENSA_S6000_HARDWARE_H */ diff --git a/arch/xtensa/variants/s6000/include/variant/irq.h b/arch/xtensa/variants/s6000/include/variant/irq.h deleted file mode 100644 index 39ca751a6255..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/irq.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _XTENSA_S6000_IRQ_H -#define _XTENSA_S6000_IRQ_H - -#define VARIANT_NR_IRQS 8 /* GPIO interrupts */ - -extern void variant_irq_enable(unsigned int irq); - -#endif /* __XTENSA_S6000_IRQ_H */ diff --git a/arch/xtensa/variants/s6000/include/variant/tie-asm.h b/arch/xtensa/variants/s6000/include/variant/tie-asm.h deleted file mode 100644 index f02d0a3a2e20..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/tie-asm.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * This header file contains assembly-language definitions (assembly - * macros, etc.) for this specific Xtensa processor's TIE extensions - * and options. It is customized to this Xtensa processor configuration. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1999-2008 Tensilica Inc. - */ - -#ifndef _XTENSA_CORE_TIE_ASM_H -#define _XTENSA_CORE_TIE_ASM_H - -/* Selection parameter values for save-area save/restore macros: */ -/* Option vs. TIE: */ -#define XTHAL_SAS_TIE 0x0001 /* custom extension or coprocessor */ -#define XTHAL_SAS_OPT 0x0002 /* optional (and not a coprocessor) */ -/* Whether used automatically by compiler: */ -#define XTHAL_SAS_NOCC 0x0004 /* not used by compiler w/o special opts/code */ -#define XTHAL_SAS_CC 0x0008 /* used by compiler without special opts/code */ -/* ABI handling across function calls: */ -#define XTHAL_SAS_CALR 0x0010 /* caller-saved */ -#define XTHAL_SAS_CALE 0x0020 /* callee-saved */ -#define XTHAL_SAS_GLOB 0x0040 /* global across function calls (in thread) */ -/* Misc */ -#define XTHAL_SAS_ALL 0xFFFF /* include all default NCP contents */ - - - -/* Macro to save all non-coprocessor (extra) custom TIE and optional state - * (not including zero-overhead loop registers). - * Save area ptr (clobbered): ptr (16 byte aligned) - * Scratch regs (clobbered): at1..at4 (only first XCHAL_NCP_NUM_ATMPS needed) - */ - .macro xchal_ncp_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL - xchal_sa_start \continue, \ofs - .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select - xchal_sa_align \ptr, 0, 1024-4, 4, 4 - rsr \at1, BR // boolean option - s32i \at1, \ptr, .Lxchal_ofs_ + 0 - .set .Lxchal_ofs_, .Lxchal_ofs_ + 4 - .endif - .endm // xchal_ncp_store - -/* Macro to save all non-coprocessor (extra) custom TIE and optional state - * (not including zero-overhead loop registers). - * Save area ptr (clobbered): ptr (16 byte aligned) - * Scratch regs (clobbered): at1..at4 (only first XCHAL_NCP_NUM_ATMPS needed) - */ - .macro xchal_ncp_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL - xchal_sa_start \continue, \ofs - .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select - xchal_sa_align \ptr, 0, 1024-4, 4, 4 - l32i \at1, \ptr, .Lxchal_ofs_ + 0 - wsr \at1, BR // boolean option - .set .Lxchal_ofs_, .Lxchal_ofs_ + 4 - .endif - .endm // xchal_ncp_load - - - -#define XCHAL_NCP_NUM_ATMPS 1 - - - -/* Macro to save the state of TIE coprocessor FPU. - * Save area ptr (clobbered): ptr (16 byte aligned) - * Scratch regs (clobbered): at1..at4 (only first XCHAL_CP0_NUM_ATMPS needed) - */ -#define xchal_cp_FPU_store xchal_cp0_store -/* #define xchal_cp_FPU_store_a2 xchal_cp0_store a2 a3 a4 a5 a6 */ - .macro xchal_cp0_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL - xchal_sa_start \continue, \ofs - .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select - xchal_sa_align \ptr, 0, 0, 1, 16 - rur232 \at1 // FCR - s32i \at1, \ptr, 0 - rur233 \at1 // FSR - s32i \at1, \ptr, 4 - SSI f0, \ptr, 8 - SSI f1, \ptr, 12 - SSI f2, \ptr, 16 - SSI f3, \ptr, 20 - SSI f4, \ptr, 24 - SSI f5, \ptr, 28 - SSI f6, \ptr, 32 - SSI f7, \ptr, 36 - SSI f8, \ptr, 40 - SSI f9, \ptr, 44 - SSI f10, \ptr, 48 - SSI f11, \ptr, 52 - SSI f12, \ptr, 56 - SSI f13, \ptr, 60 - SSI f14, \ptr, 64 - SSI f15, \ptr, 68 - .set .Lxchal_ofs_, .Lxchal_ofs_ + 72 - .endif - .endm // xchal_cp0_store - -/* Macro to restore the state of TIE coprocessor FPU. - * Save area ptr (clobbered): ptr (16 byte aligned) - * Scratch regs (clobbered): at1..at4 (only first XCHAL_CP0_NUM_ATMPS needed) - */ -#define xchal_cp_FPU_load xchal_cp0_load -/* #define xchal_cp_FPU_load_a2 xchal_cp0_load a2 a3 a4 a5 a6 */ - .macro xchal_cp0_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL - xchal_sa_start \continue, \ofs - .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select - xchal_sa_align \ptr, 0, 0, 1, 16 - l32i \at1, \ptr, 0 - wur232 \at1 // FCR - l32i \at1, \ptr, 4 - wur233 \at1 // FSR - LSI f0, \ptr, 8 - LSI f1, \ptr, 12 - LSI f2, \ptr, 16 - LSI f3, \ptr, 20 - LSI f4, \ptr, 24 - LSI f5, \ptr, 28 - LSI f6, \ptr, 32 - LSI f7, \ptr, 36 - LSI f8, \ptr, 40 - LSI f9, \ptr, 44 - LSI f10, \ptr, 48 - LSI f11, \ptr, 52 - LSI f12, \ptr, 56 - LSI f13, \ptr, 60 - LSI f14, \ptr, 64 - LSI f15, \ptr, 68 - .set .Lxchal_ofs_, .Lxchal_ofs_ + 72 - .endif - .endm // xchal_cp0_load - -#define XCHAL_CP0_NUM_ATMPS 1 - -/* Macro to save the state of TIE coprocessor XAD. - * Save area ptr (clobbered): ptr (16 byte aligned) - * Scratch regs (clobbered): at1..at4 (only first XCHAL_CP6_NUM_ATMPS needed) - */ -#define xchal_cp_XAD_store xchal_cp6_store -/* #define xchal_cp_XAD_store_a2 xchal_cp6_store a2 a3 a4 a5 a6 */ - .macro xchal_cp6_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL - xchal_sa_start \continue, \ofs - .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select - xchal_sa_align \ptr, 0, 0, 1, 16 - rur0 \at1 // LDCBHI - s32i \at1, \ptr, 0 - rur1 \at1 // LDCBLO - s32i \at1, \ptr, 4 - rur2 \at1 // STCBHI - s32i \at1, \ptr, 8 - rur3 \at1 // STCBLO - s32i \at1, \ptr, 12 - rur8 \at1 // LDBRBASE - s32i \at1, \ptr, 16 - rur9 \at1 // LDBROFF - s32i \at1, \ptr, 20 - rur10 \at1 // LDBRINC - s32i \at1, \ptr, 24 - rur11 \at1 // STBRBASE - s32i \at1, \ptr, 28 - rur12 \at1 // STBROFF - s32i \at1, \ptr, 32 - rur13 \at1 // STBRINC - s32i \at1, \ptr, 36 - rur24 \at1 // SCRATCH0 - s32i \at1, \ptr, 40 - rur25 \at1 // SCRATCH1 - s32i \at1, \ptr, 44 - rur26 \at1 // SCRATCH2 - s32i \at1, \ptr, 48 - rur27 \at1 // SCRATCH3 - s32i \at1, \ptr, 52 - WRAS128I wra0, \ptr, 64 - WRAS128I wra1, \ptr, 80 - WRAS128I wra2, \ptr, 96 - WRAS128I wra3, \ptr, 112 - WRAS128I wra4, \ptr, 128 - WRAS128I wra5, \ptr, 144 - WRAS128I wra6, \ptr, 160 - WRAS128I wra7, \ptr, 176 - WRAS128I wra8, \ptr, 192 - WRAS128I wra9, \ptr, 208 - WRAS128I wra10, \ptr, 224 - WRAS128I wra11, \ptr, 240 - WRAS128I wra12, \ptr, 256 - WRAS128I wra13, \ptr, 272 - WRAS128I wra14, \ptr, 288 - WRAS128I wra15, \ptr, 304 - WRBS128I wrb0, \ptr, 320 - WRBS128I wrb1, \ptr, 336 - WRBS128I wrb2, \ptr, 352 - WRBS128I wrb3, \ptr, 368 - WRBS128I wrb4, \ptr, 384 - WRBS128I wrb5, \ptr, 400 - WRBS128I wrb6, \ptr, 416 - WRBS128I wrb7, \ptr, 432 - WRBS128I wrb8, \ptr, 448 - WRBS128I wrb9, \ptr, 464 - WRBS128I wrb10, \ptr, 480 - WRBS128I wrb11, \ptr, 496 - WRBS128I wrb12, \ptr, 512 - WRBS128I wrb13, \ptr, 528 - WRBS128I wrb14, \ptr, 544 - WRBS128I wrb15, \ptr, 560 - .set .Lxchal_ofs_, .Lxchal_ofs_ + 576 - .endif - .endm // xchal_cp6_store - -/* Macro to restore the state of TIE coprocessor XAD. - * Save area ptr (clobbered): ptr (16 byte aligned) - * Scratch regs (clobbered): at1..at4 (only first XCHAL_CP6_NUM_ATMPS needed) - */ -#define xchal_cp_XAD_load xchal_cp6_load -/* #define xchal_cp_XAD_load_a2 xchal_cp6_load a2 a3 a4 a5 a6 */ - .macro xchal_cp6_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL - xchal_sa_start \continue, \ofs - .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~\select - xchal_sa_align \ptr, 0, 0, 1, 16 - l32i \at1, \ptr, 0 - wur0 \at1 // LDCBHI - l32i \at1, \ptr, 4 - wur1 \at1 // LDCBLO - l32i \at1, \ptr, 8 - wur2 \at1 // STCBHI - l32i \at1, \ptr, 12 - wur3 \at1 // STCBLO - l32i \at1, \ptr, 16 - wur8 \at1 // LDBRBASE - l32i \at1, \ptr, 20 - wur9 \at1 // LDBROFF - l32i \at1, \ptr, 24 - wur10 \at1 // LDBRINC - l32i \at1, \ptr, 28 - wur11 \at1 // STBRBASE - l32i \at1, \ptr, 32 - wur12 \at1 // STBROFF - l32i \at1, \ptr, 36 - wur13 \at1 // STBRINC - l32i \at1, \ptr, 40 - wur24 \at1 // SCRATCH0 - l32i \at1, \ptr, 44 - wur25 \at1 // SCRATCH1 - l32i \at1, \ptr, 48 - wur26 \at1 // SCRATCH2 - l32i \at1, \ptr, 52 - wur27 \at1 // SCRATCH3 - WRBL128I wrb0, \ptr, 320 - WRBL128I wrb1, \ptr, 336 - WRBL128I wrb2, \ptr, 352 - WRBL128I wrb3, \ptr, 368 - WRBL128I wrb4, \ptr, 384 - WRBL128I wrb5, \ptr, 400 - WRBL128I wrb6, \ptr, 416 - WRBL128I wrb7, \ptr, 432 - WRBL128I wrb8, \ptr, 448 - WRBL128I wrb9, \ptr, 464 - WRBL128I wrb10, \ptr, 480 - WRBL128I wrb11, \ptr, 496 - WRBL128I wrb12, \ptr, 512 - WRBL128I wrb13, \ptr, 528 - WRBL128I wrb14, \ptr, 544 - WRBL128I wrb15, \ptr, 560 - WRAL128I wra0, \ptr, 64 - WRAL128I wra1, \ptr, 80 - WRAL128I wra2, \ptr, 96 - WRAL128I wra3, \ptr, 112 - WRAL128I wra4, \ptr, 128 - WRAL128I wra5, \ptr, 144 - WRAL128I wra6, \ptr, 160 - WRAL128I wra7, \ptr, 176 - WRAL128I wra8, \ptr, 192 - WRAL128I wra9, \ptr, 208 - WRAL128I wra10, \ptr, 224 - WRAL128I wra11, \ptr, 240 - WRAL128I wra12, \ptr, 256 - WRAL128I wra13, \ptr, 272 - WRAL128I wra14, \ptr, 288 - WRAL128I wra15, \ptr, 304 - .set .Lxchal_ofs_, .Lxchal_ofs_ + 576 - .endif - .endm // xchal_cp6_load - -#define XCHAL_CP6_NUM_ATMPS 1 -#define XCHAL_SA_NUM_ATMPS 1 - - /* Empty macros for unconfigured coprocessors: */ - .macro xchal_cp1_store p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp1_load p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp2_store p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp2_load p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp3_store p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp3_load p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp4_store p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp4_load p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp5_store p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp5_load p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp7_store p a b c d continue=0 ofs=-1 select=-1 ; .endm - .macro xchal_cp7_load p a b c d continue=0 ofs=-1 select=-1 ; .endm - -#endif /*_XTENSA_CORE_TIE_ASM_H*/ - diff --git a/arch/xtensa/variants/s6000/include/variant/tie.h b/arch/xtensa/variants/s6000/include/variant/tie.h deleted file mode 100644 index be7ea843d5df..000000000000 --- a/arch/xtensa/variants/s6000/include/variant/tie.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * This header file describes this specific Xtensa processor's TIE extensions - * that extend basic Xtensa core functionality. It is customized to this - * Xtensa processor configuration. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1999-2008 Tensilica Inc. - */ - -#ifndef _XTENSA_CORE_TIE_H -#define _XTENSA_CORE_TIE_H - -#define XCHAL_CP_NUM 2 /* number of coprocessors */ -#define XCHAL_CP_MAX 7 /* max CP ID + 1 (0 if none) */ -#define XCHAL_CP_MASK 0x41 /* bitmask of all CPs by ID */ -#define XCHAL_CP_PORT_MASK 0x00 /* bitmask of only port CPs */ - -/* Basic parameters of each coprocessor: */ -#define XCHAL_CP0_NAME "FPU" -#define XCHAL_CP0_IDENT FPU -#define XCHAL_CP0_SA_SIZE 72 /* size of state save area */ -#define XCHAL_CP0_SA_ALIGN 4 /* min alignment of save area */ -#define XCHAL_CP_ID_FPU 0 /* coprocessor ID (0..7) */ -#define XCHAL_CP6_NAME "XAD" -#define XCHAL_CP6_IDENT XAD -#define XCHAL_CP6_SA_SIZE 576 /* size of state save area */ -#define XCHAL_CP6_SA_ALIGN 16 /* min alignment of save area */ -#define XCHAL_CP_ID_XAD 6 /* coprocessor ID (0..7) */ - -/* Filler info for unassigned coprocessors, to simplify arrays etc: */ -#define XCHAL_CP1_SA_SIZE 0 -#define XCHAL_CP1_SA_ALIGN 1 -#define XCHAL_CP2_SA_SIZE 0 -#define XCHAL_CP2_SA_ALIGN 1 -#define XCHAL_CP3_SA_SIZE 0 -#define XCHAL_CP3_SA_ALIGN 1 -#define XCHAL_CP4_SA_SIZE 0 -#define XCHAL_CP4_SA_ALIGN 1 -#define XCHAL_CP5_SA_SIZE 0 -#define XCHAL_CP5_SA_ALIGN 1 -#define XCHAL_CP7_SA_SIZE 0 -#define XCHAL_CP7_SA_ALIGN 1 - -/* Save area for non-coprocessor optional and custom (TIE) state: */ -#define XCHAL_NCP_SA_SIZE 4 -#define XCHAL_NCP_SA_ALIGN 4 - -/* Total save area for optional and custom state (NCP + CPn): */ -#define XCHAL_TOTAL_SA_SIZE 672 /* with 16-byte align padding */ -#define XCHAL_TOTAL_SA_ALIGN 16 /* actual minimum alignment */ - -/* - * Detailed contents of save areas. - * NOTE: caller must define the XCHAL_SA_REG macro (not defined here) - * before expanding the XCHAL_xxx_SA_LIST() macros. - * - * XCHAL_SA_REG(s,ccused,abikind,kind,opt,name,galign,align,asize, - * dbnum,base,regnum,bitsz,gapsz,reset,x...) - * - * s = passed from XCHAL_*_LIST(s), eg. to select how to expand - * ccused = set if used by compiler without special options or code - * abikind = 0 (caller-saved), 1 (callee-saved), or 2 (thread-global) - * kind = 0 (special reg), 1 (TIE user reg), or 2 (TIE regfile reg) - * opt = 0 (custom TIE extension or coprocessor), or 1 (optional reg) - * name = lowercase reg name (no quotes) - * galign = group byte alignment (power of 2) (galign >= align) - * align = register byte alignment (power of 2) - * asize = allocated size in bytes (asize*8 == bitsz + gapsz + padsz) - * (not including any pad bytes required to galign this or next reg) - * dbnum = unique target number f/debug (see <xtensa-libdb-macros.h>) - * base = reg shortname w/o index (or sr=special, ur=TIE user reg) - * regnum = reg index in regfile, or special/TIE-user reg number - * bitsz = number of significant bits (regfile width, or ur/sr mask bits) - * gapsz = intervening bits, if bitsz bits not stored contiguously - * (padsz = pad bits at end [TIE regfile] or at msbits [ur,sr] of asize) - * reset = register reset value (or 0 if undefined at reset) - * x = reserved for future use (0 until then) - * - * To filter out certain registers, e.g. to expand only the non-global - * registers used by the compiler, you can do something like this: - * - * #define XCHAL_SA_REG(s,ccused,p...) SELCC##ccused(p) - * #define SELCC0(p...) - * #define SELCC1(abikind,p...) SELAK##abikind(p) - * #define SELAK0(p...) REG(p) - * #define SELAK1(p...) REG(p) - * #define SELAK2(p...) - * #define REG(kind,tie,name,galn,aln,asz,csz,dbnum,base,rnum,bsz,rst,x...) \ - * ...what you want to expand... - */ - -#define XCHAL_NCP_SA_NUM 1 -#define XCHAL_NCP_SA_LIST(s) \ - XCHAL_SA_REG(s,0,0,0,1, br, 4, 4, 4,0x0204, sr,4 , 16,0,0,0) - -#define XCHAL_CP0_SA_NUM 18 -#define XCHAL_CP0_SA_LIST(s) \ - XCHAL_SA_REG(s,0,0,1,0, fcr, 4, 4, 4,0x03E8, ur,232, 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, fsr, 4, 4, 4,0x03E9, ur,233, 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f0, 4, 4, 4,0x0030, f,0 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f1, 4, 4, 4,0x0031, f,1 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f2, 4, 4, 4,0x0032, f,2 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f3, 4, 4, 4,0x0033, f,3 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f4, 4, 4, 4,0x0034, f,4 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f5, 4, 4, 4,0x0035, f,5 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f6, 4, 4, 4,0x0036, f,6 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f7, 4, 4, 4,0x0037, f,7 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f8, 4, 4, 4,0x0038, f,8 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f9, 4, 4, 4,0x0039, f,9 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f10, 4, 4, 4,0x003A, f,10 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f11, 4, 4, 4,0x003B, f,11 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f12, 4, 4, 4,0x003C, f,12 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f13, 4, 4, 4,0x003D, f,13 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f14, 4, 4, 4,0x003E, f,14 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, f15, 4, 4, 4,0x003F, f,15 , 32,0,0,0) - -#define XCHAL_CP1_SA_NUM 0 -#define XCHAL_CP1_SA_LIST(s) /* empty */ - -#define XCHAL_CP2_SA_NUM 0 -#define XCHAL_CP2_SA_LIST(s) /* empty */ - -#define XCHAL_CP3_SA_NUM 0 -#define XCHAL_CP3_SA_LIST(s) /* empty */ - -#define XCHAL_CP4_SA_NUM 0 -#define XCHAL_CP4_SA_LIST(s) /* empty */ - -#define XCHAL_CP5_SA_NUM 0 -#define XCHAL_CP5_SA_LIST(s) /* empty */ - -#define XCHAL_CP6_SA_NUM 46 -#define XCHAL_CP6_SA_LIST(s) \ - XCHAL_SA_REG(s,0,0,1,0, ldcbhi,16, 4, 4,0x0300, ur,0 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, ldcblo, 4, 4, 4,0x0301, ur,1 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, stcbhi, 4, 4, 4,0x0302, ur,2 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, stcblo, 4, 4, 4,0x0303, ur,3 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, ldbrbase, 4, 4, 4,0x0308, ur,8 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, ldbroff, 4, 4, 4,0x0309, ur,9 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, ldbrinc, 4, 4, 4,0x030A, ur,10 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, stbrbase, 4, 4, 4,0x030B, ur,11 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, stbroff, 4, 4, 4,0x030C, ur,12 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, stbrinc, 4, 4, 4,0x030D, ur,13 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, scratch0, 4, 4, 4,0x0318, ur,24 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, scratch1, 4, 4, 4,0x0319, ur,25 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, scratch2, 4, 4, 4,0x031A, ur,26 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,1,0, scratch3, 4, 4, 4,0x031B, ur,27 , 32,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra0,16,16,16,0x1010, wra,0 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra1,16,16,16,0x1011, wra,1 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra2,16,16,16,0x1012, wra,2 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra3,16,16,16,0x1013, wra,3 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra4,16,16,16,0x1014, wra,4 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra5,16,16,16,0x1015, wra,5 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra6,16,16,16,0x1016, wra,6 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra7,16,16,16,0x1017, wra,7 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra8,16,16,16,0x1018, wra,8 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra9,16,16,16,0x1019, wra,9 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra10,16,16,16,0x101A, wra,10 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra11,16,16,16,0x101B, wra,11 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra12,16,16,16,0x101C, wra,12 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra13,16,16,16,0x101D, wra,13 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra14,16,16,16,0x101E, wra,14 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wra15,16,16,16,0x101F, wra,15 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb0,16,16,16,0x1020, wrb,0 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb1,16,16,16,0x1021, wrb,1 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb2,16,16,16,0x1022, wrb,2 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb3,16,16,16,0x1023, wrb,3 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb4,16,16,16,0x1024, wrb,4 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb5,16,16,16,0x1025, wrb,5 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb6,16,16,16,0x1026, wrb,6 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb7,16,16,16,0x1027, wrb,7 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb8,16,16,16,0x1028, wrb,8 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb9,16,16,16,0x1029, wrb,9 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb10,16,16,16,0x102A, wrb,10 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb11,16,16,16,0x102B, wrb,11 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb12,16,16,16,0x102C, wrb,12 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb13,16,16,16,0x102D, wrb,13 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb14,16,16,16,0x102E, wrb,14 ,128,0,0,0) \ - XCHAL_SA_REG(s,0,0,2,0, wrb15,16,16,16,0x102F, wrb,15 ,128,0,0,0) - -#define XCHAL_CP7_SA_NUM 0 -#define XCHAL_CP7_SA_LIST(s) /* empty */ - -/* Byte length of instruction from its first nibble (op0 field), per FLIX. */ -#define XCHAL_OP0_FORMAT_LENGTHS 3,3,3,3,3,3,3,3,2,2,2,2,2,2,8,8 - -#endif /*_XTENSA_CORE_TIE_H*/ - diff --git a/arch/xtensa/variants/s6000/irq.c b/arch/xtensa/variants/s6000/irq.c deleted file mode 100644 index 81a241e79075..000000000000 --- a/arch/xtensa/variants/s6000/irq.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * s6000 irq crossbar - * - * Copyright (c) 2009 emlix GmbH - * Authors: Johannes Weiner <hannes@cmpxchg.org> - * Oskar Schirmer <oskar@scara.com> - */ -#include <linux/io.h> -#include <asm/irq.h> -#include <variant/hardware.h> - -/* S6_REG_INTC */ -#define INTC_STATUS 0x000 -#define INTC_RAW 0x010 -#define INTC_STATUS_AG 0x100 -#define INTC_CFG(n) (0x200 + 4 * (n)) - -/* - * The s6000 has a crossbar that multiplexes interrupt output lines - * from the peripherals to input lines on the xtensa core. - * - * We leave the mapping decisions to the platform as it depends on the - * actually connected peripherals which distribution makes sense. - */ -extern const signed char *platform_irq_mappings[NR_IRQS]; - -static unsigned long scp_to_intc_enable[] = { -#define TO_INTC_ENABLE(n) (((n) << 1) + 1) - TO_INTC_ENABLE(0), - TO_INTC_ENABLE(1), - TO_INTC_ENABLE(2), - TO_INTC_ENABLE(3), - TO_INTC_ENABLE(4), - TO_INTC_ENABLE(5), - TO_INTC_ENABLE(6), - TO_INTC_ENABLE(7), - TO_INTC_ENABLE(8), - TO_INTC_ENABLE(9), - TO_INTC_ENABLE(10), - TO_INTC_ENABLE(11), - TO_INTC_ENABLE(12), - -1, - -1, - TO_INTC_ENABLE(13), - -1, - TO_INTC_ENABLE(14), - -1, - TO_INTC_ENABLE(15), -#undef TO_INTC_ENABLE -}; - -static void irq_set(unsigned int irq, int enable) -{ - unsigned long en; - const signed char *m = platform_irq_mappings[irq]; - - if (!m) - return; - en = enable ? scp_to_intc_enable[irq] : 0; - while (*m >= 0) { - writel(en, S6_REG_INTC + INTC_CFG(*m)); - m++; - } -} - -void variant_irq_enable(unsigned int irq) -{ - irq_set(irq, 1); -} - -void variant_irq_disable(unsigned int irq) -{ - irq_set(irq, 0); -} |