From 4dba4185e3d984a7a856dfaeb7e8aec26633e845 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Thu, 18 Dec 2014 19:10:35 +0800
Subject: ARM: dts: sunxi: Fix usb-phy support for sun4i/sun5i

usbphy0 support in the sunxi usb-phy driver has been merged, but the
dtsi's for sun4i/sun5i haven't been updated. This results in the phy
driver failing to load, breaking usb support.

Fixes: 6827a46f5994 ('phy: sun4i: add support for USB phy0')
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun4i-a10.dtsi  | 4 ++--
 arch/arm/boot/dts/sun5i-a10s.dtsi | 4 ++--
 arch/arm/boot/dts/sun5i-a13.dtsi  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 7b4099fcf817..e3422309e54b 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -438,8 +438,8 @@
 			reg-names = "phy_ctrl", "pmu1", "pmu2";
 			clocks = <&usb_clk 8>;
 			clock-names = "usb_phy";
-			resets = <&usb_clk 1>, <&usb_clk 2>;
-			reset-names = "usb1_reset", "usb2_reset";
+			resets = <&usb_clk 0>, <&usb_clk 1>, <&usb_clk 2>;
+			reset-names = "usb0_reset", "usb1_reset", "usb2_reset";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 1b76667f3182..3c3c1920788a 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -390,8 +390,8 @@
 			reg-names = "phy_ctrl", "pmu1";
 			clocks = <&usb_clk 8>;
 			clock-names = "usb_phy";
-			resets = <&usb_clk 1>;
-			reset-names = "usb1_reset";
+			resets = <&usb_clk 0>, <&usb_clk 1>;
+			reset-names = "usb0_reset", "usb1_reset";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index c35217ea1f64..2448d7f38d6b 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -349,8 +349,8 @@
 			reg-names = "phy_ctrl", "pmu1";
 			clocks = <&usb_clk 8>;
 			clock-names = "usb_phy";
-			resets = <&usb_clk 1>;
-			reset-names = "usb1_reset";
+			resets = <&usb_clk 0>, <&usb_clk 1>;
+			reset-names = "usb0_reset", "usb1_reset";
 			status = "disabled";
 		};
 
-- 
cgit v1.2.3


From 6ba8bbe8e283674bd9737bcb31a9c55ba4f8fbee Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 25 Dec 2014 13:25:19 +0100
Subject: ARM: dts: sun6i: ippo-q8h-v5: Fix serial0 alias

The Ippo q8h has its serial console connected to the r-uart. Adjust the
serial0 alias to match.

This fixes the kernel serial console no longer working since 3.19-rc1, because
8250_dw.c now honors dt aliases, causing the serial console to be ttyS5 rather
then being ttyS0, as it was in 3.18 and before.

Note that adjusting bootargs instead is not an acceptable fix, because
console=ttyS0,115200 is used by a lot of bootscripts, etc. and this should
continue to work.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
index 7f2117ce6985..32ad80804dbb 100644
--- a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
+++ b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
@@ -55,6 +55,10 @@
 	model = "Ippo Q8H Dual Core Tablet (v5)";
 	compatible = "ippo,q8h-v5", "allwinner,sun8i-a23";
 
+	aliases {
+		serial0 = &r_uart;
+	};
+
 	chosen {
 		bootargs = "earlyprintk console=ttyS0,115200";
 	};
-- 
cgit v1.2.3


From 8cedd6628472aa2d8f2f54dfcc36633e46a59db8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 19 Jan 2015 14:01:17 +0100
Subject: ARM: dts: sun4i: Add simplefb node with de_fe0-de_be0-lcd0-hdmi
 pipeline

Testing has shown that on sun4i the display backend engine does not have
deep enough fifo-s causing flickering / tearing in full-hd mode due to
fifo underruns. This can be avoided by letting the display frontend engine
do the dma from memory, and then letting it feed the data directly into
the backend unmodified, as the frontend does have deep enough fifo-s.

Note since u-boot-v2015.01 has been released using the de_be0-lcd0-hdmi
pipeline on sun4i, we need to keep that one around too (unfortunately).

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun4i-a10.dtsi | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index e3422309e54b..3b25de8f5fce 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -39,6 +39,14 @@
 				 <&ahb_gates 44>;
 			status = "disabled";
 		};
+
+		framebuffer@1 {
+			compatible = "allwinner,simple-framebuffer", "simple-framebuffer";
+			allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi";
+			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
+				 <&ahb_gates 44>, <&ahb_gates 46>;
+			status = "disabled";
+		};
 	};
 
 	cpus {
-- 
cgit v1.2.3


From 117a2cc38ffe0b46bcc6f991a3534b345477439c Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 22 Jan 2015 22:10:54 +0100
Subject: ARM: sunxi: dt: Fix aliases

Commit f77d55a3b56a ("serial: 8250_dw: get index of serial line from DT
aliases") made the serial driver now use the serial aliases to get the tty
number, pointing out that our aliases have been wrong all along.

Remove them from the DTSI and add custom ones in the relevant boards.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun4i-a10.dtsi                 |  8 --------
 arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts |  6 ++++++
 arch/arm/boot/dts/sun5i-a10s.dtsi                |  4 ----
 arch/arm/boot/dts/sun5i-a13-hsg-h702.dts         |  4 ++++
 arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts  |  4 ++++
 arch/arm/boot/dts/sun5i-a13-olinuxino.dts        |  4 ++++
 arch/arm/boot/dts/sun5i-a13.dtsi                 |  5 -----
 arch/arm/boot/dts/sun6i-a31.dtsi                 |  6 ------
 arch/arm/boot/dts/sun7i-a20-bananapi.dts         |  6 ++++++
 arch/arm/boot/dts/sun7i-a20-hummingbird.dts      |  8 ++++++++
 arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts  |  3 +++
 arch/arm/boot/dts/sun7i-a20.dtsi                 |  8 --------
 arch/arm/boot/dts/sun8i-a23.dtsi                 |  9 ---------
 arch/arm/boot/dts/sun9i-a80-optimus.dts          |  5 +++++
 arch/arm/boot/dts/sun9i-a80.dtsi                 | 10 ----------
 15 files changed, 40 insertions(+), 50 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 3b25de8f5fce..d5c4669224b1 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -17,14 +17,6 @@
 
 	aliases {
 		ethernet0 = &emac;
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-		serial3 = &uart3;
-		serial4 = &uart4;
-		serial5 = &uart5;
-		serial6 = &uart6;
-		serial7 = &uart7;
 	};
 
 	chosen {
diff --git a/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts b/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts
index fe3c559ca6a8..bfa742817690 100644
--- a/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts
+++ b/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts
@@ -55,6 +55,12 @@
 	model = "Olimex A10s-Olinuxino Micro";
 	compatible = "olimex,a10s-olinuxino-micro", "allwinner,sun5i-a10s";
 
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart2;
+		serial2 = &uart3;
+	};
+
 	soc@01c00000 {
 		emac: ethernet@01c0b000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 3c3c1920788a..2e7d8263799d 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -18,10 +18,6 @@
 
 	aliases {
 		ethernet0 = &emac;
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-		serial3 = &uart3;
 	};
 
 	chosen {
diff --git a/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts b/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts
index eeed1f236ee8..c7be3abd9fcc 100644
--- a/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts
+++ b/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts
@@ -53,6 +53,10 @@
 	model = "HSG H702";
 	compatible = "hsg,h702", "allwinner,sun5i-a13";
 
+	aliases {
+		serial0 = &uart1;
+	};
+
 	soc@01c00000 {
 		mmc0: mmc@01c0f000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts b/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts
index 916ee8bb826f..3decefb3c37a 100644
--- a/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts
+++ b/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts
@@ -54,6 +54,10 @@
 	model = "Olimex A13-Olinuxino Micro";
 	compatible = "olimex,a13-olinuxino-micro", "allwinner,sun5i-a13";
 
+	aliases {
+		serial0 = &uart1;
+	};
+
 	soc@01c00000 {
 		mmc0: mmc@01c0f000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun5i-a13-olinuxino.dts b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts
index e31d291d14cb..b421f7fa197b 100644
--- a/arch/arm/boot/dts/sun5i-a13-olinuxino.dts
+++ b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts
@@ -55,6 +55,10 @@
 	model = "Olimex A13-Olinuxino";
 	compatible = "olimex,a13-olinuxino", "allwinner,sun5i-a13";
 
+	aliases {
+		serial0 = &uart1;
+	};
+
 	soc@01c00000 {
 		mmc0: mmc@01c0f000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index 2448d7f38d6b..c556688f8b8b 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -16,11 +16,6 @@
 / {
 	interrupt-parent = <&intc>;
 
-	aliases {
-		serial0 = &uart1;
-		serial1 = &uart3;
-	};
-
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index f47156b6572b..1e7e7bcf8307 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -53,12 +53,6 @@
 	interrupt-parent = <&gic>;
 
 	aliases {
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-		serial3 = &uart3;
-		serial4 = &uart4;
-		serial5 = &uart5;
 		ethernet0 = &gmac;
 	};
 
diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts
index 1cf1214cc068..bd7b15add697 100644
--- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts
+++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts
@@ -55,6 +55,12 @@
 	model = "LeMaker Banana Pi";
 	compatible = "lemaker,bananapi", "allwinner,sun7i-a20";
 
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart3;
+		serial2 = &uart7;
+	};
+
 	soc@01c00000 {
 		spi0: spi@01c05000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun7i-a20-hummingbird.dts b/arch/arm/boot/dts/sun7i-a20-hummingbird.dts
index 0e4bfa3b2b85..0bcefcbbb756 100644
--- a/arch/arm/boot/dts/sun7i-a20-hummingbird.dts
+++ b/arch/arm/boot/dts/sun7i-a20-hummingbird.dts
@@ -19,6 +19,14 @@
 	model = "Merrii A20 Hummingbird";
 	compatible = "merrii,a20-hummingbird", "allwinner,sun7i-a20";
 
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart2;
+		serial2 = &uart3;
+		serial3 = &uart4;
+		serial4 = &uart5;
+	};
+
 	soc@01c00000 {
 		mmc0: mmc@01c0f000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
index 9d669cdf031d..66cc77707198 100644
--- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
+++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
@@ -20,6 +20,9 @@
 	compatible = "olimex,a20-olinuxino-micro", "allwinner,sun7i-a20";
 
 	aliases {
+		serial0 = &uart0;
+		serial1 = &uart6;
+		serial2 = &uart7;
 		spi0 = &spi1;
 		spi1 = &spi2;
 	};
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index e21ce5992d56..89749ce34a84 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -54,14 +54,6 @@
 
 	aliases {
 		ethernet0 = &gmac;
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-		serial3 = &uart3;
-		serial4 = &uart4;
-		serial5 = &uart5;
-		serial6 = &uart6;
-		serial7 = &uart7;
 	};
 
 	chosen {
diff --git a/arch/arm/boot/dts/sun8i-a23.dtsi b/arch/arm/boot/dts/sun8i-a23.dtsi
index 0746cd1024d7..86584fcf5e32 100644
--- a/arch/arm/boot/dts/sun8i-a23.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23.dtsi
@@ -52,15 +52,6 @@
 / {
 	interrupt-parent = <&gic>;
 
-	aliases {
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-		serial3 = &uart3;
-		serial4 = &uart4;
-		serial5 = &r_uart;
-	};
-
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts
index 506948f582ee..11ec71072e81 100644
--- a/arch/arm/boot/dts/sun9i-a80-optimus.dts
+++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts
@@ -54,6 +54,11 @@
 	model = "Merrii A80 Optimus Board";
 	compatible = "merrii,a80-optimus", "allwinner,sun9i-a80";
 
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart4;
+	};
+
 	chosen {
 		bootargs = "earlyprintk console=ttyS0,115200";
 	};
diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index 494714f67b57..9ef4438206a9 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -52,16 +52,6 @@
 / {
 	interrupt-parent = <&gic>;
 
-	aliases {
-		serial0 = &uart0;
-		serial1 = &uart1;
-		serial2 = &uart2;
-		serial3 = &uart3;
-		serial4 = &uart4;
-		serial5 = &uart5;
-		serial6 = &r_uart;
-	};
-
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
-- 
cgit v1.2.3


From dcad68876c21bac709b01eda24e39d4410dc36a8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 28 Jan 2015 12:55:45 +0100
Subject: ARM: mvebu: don't set the PL310 in I/O coherency mode when I/O
 coherency is disabled

Since commit f2c3c67f00 (merge commit that adds commit "ARM: mvebu:
completely disable hardware I/O coherency"), we disable I/O coherency
on Armada EBU platforms.

However, we continue to initialize the coherency fabric, because this
coherency fabric is needed on Armada XP for inter-CPU
coherency. Unfortunately, due to this, we also continued to execute
the coherency fabric initialization code for Armada 375/38x, which
switched the PL310 into I/O coherent mode. This has the effect of
disabling the outer cache sync operation: this is needed when I/O
coherency is enabled to work around a PCIe/L2 deadlock. But obviously,
when I/O coherency is disabled, having the outer cache sync operation
is crucial.

Therefore, this commit fixes the armada_375_380_coherency_init() so
that the PL310 is switched to I/O coherent mode only if I/O coherency
is enabled.

Without this fix, all devices using DMA are broken on Armada 375/38x.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Cc: <stable@vger.kernel.org> # v3.8+
---
 arch/arm/mach-mvebu/coherency.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index caa21e9b8cd9..ccef8806bb58 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -189,6 +189,13 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
 	coherency_cpu_base = of_iomap(np, 0);
 	arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
 
+	/*
+	 * We should switch the PL310 to I/O coherency mode only if
+	 * I/O coherency is actually enabled.
+	 */
+	if (!coherency_available())
+		return;
+
 	/*
 	 * Add the PL310 property "arm,io-coherent". This makes sure the
 	 * outer sync operation is not used, which allows to
-- 
cgit v1.2.3


From 974b072f71e350bf2c5fb9cfd5c6c9d341a53300 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 28 Jan 2015 21:46:45 +0900
Subject: ARM: shmobile: r8a73a4: Instantiate GIC from C board code in legacy
 builds

As of commit 9a1091ef0017c40a ("irqchip: gic: Support hierarchy irq
domain."), the APE6EVM legacy board support is known to be broken.

The IRQ numbers of the GIC are now virtual, and no longer match the
hardcoded hardware IRQ numbers in the legacy platform board code.

To fix this issue specific to non-muliplatform r8a73a4 and APE6EVM:
 1) Instantiate the GIC from platform board code and also
 2) Skip over the DT arch timer as well as
 3) Force delay setup based on DT CPU frequency

With these 3 fixes in place interrupts on APE6EVM are now unbroken.

Partially based on legacy GIC fix by Geert Uytterhoeven, thanks to
him for the initial work.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/mach-shmobile/board-ape6evm.c | 20 ++++++++++++++++++++
 arch/arm/mach-shmobile/timer.c         |  7 +++++++
 2 files changed, 27 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mach-shmobile/board-ape6evm.c b/arch/arm/mach-shmobile/board-ape6evm.c
index b222f68d55b7..2cca5ef86ba0 100644
--- a/arch/arm/mach-shmobile/board-ape6evm.c
+++ b/arch/arm/mach-shmobile/board-ape6evm.c
@@ -22,6 +22,8 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/kernel.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
@@ -277,6 +279,22 @@ static void __init ape6evm_add_standard_devices(void)
 				      sizeof(ape6evm_leds_pdata));
 }
 
+static void __init ape6evm_legacy_init_time(void)
+{
+	/* Do not invoke DT-based timers via clocksource_of_init() */
+}
+
+static void __init ape6evm_legacy_init_irq(void)
+{
+	void __iomem *gic_dist_base = ioremap_nocache(0xf1001000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xf1002000, 0x1000);
+
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+
+	/* Do not invoke DT-based interrupt code via irqchip_init() */
+}
+
+
 static const char *ape6evm_boards_compat_dt[] __initdata = {
 	"renesas,ape6evm",
 	NULL,
@@ -284,7 +302,9 @@ static const char *ape6evm_boards_compat_dt[] __initdata = {
 
 DT_MACHINE_START(APE6EVM_DT, "ape6evm")
 	.init_early	= shmobile_init_delay,
+	.init_irq       = ape6evm_legacy_init_irq,
 	.init_machine	= ape6evm_add_standard_devices,
 	.init_late	= shmobile_init_late,
 	.dt_compat	= ape6evm_boards_compat_dt,
+	.init_time	= ape6evm_legacy_init_time,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c
index 32ee335d2632..44d32a84b001 100644
--- a/arch/arm/mach-shmobile/timer.c
+++ b/arch/arm/mach-shmobile/timer.c
@@ -75,6 +75,13 @@ void __init shmobile_init_delay(void)
 	if (!max_freq)
 		return;
 
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	/* Non-multiplatform r8a73a4 SoC cannot use arch timer due
+	 * to GIC being initialized from C and arch timer via DT */
+	if (of_machine_is_compatible("renesas,r8a73a4"))
+		has_arch_timer = false;
+#endif
+
 	if (!has_arch_timer || !IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) {
 		if (is_a7_a8_a9)
 			shmobile_setup_delay_hz(max_freq, 1, 3);
-- 
cgit v1.2.3


From 77cf5166f21c3b73b04571311dc89a327424f594 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Thu, 29 Jan 2015 16:25:32 +0900
Subject: ARM: shmobile: r8a7790: Instantiate GIC from C board code in legacy
 builds

As of commit 9a1091ef0017c40a ("irqchip: gic: Support hierarchy irq
domain."), the Lager legacy board support is known to be broken.

The IRQ numbers of the GIC are now virtual, and no longer match the
hardcoded hardware IRQ numbers in the legacy platform board code.

To fix this issue specific to non-multiplatform r8a7790 and Lager:
 1) Instantiate the GIC from platform board code and also
 2) Skip over the DT arch timer as well as
 3) Force delay setup based on DT CPU frequency

With these 3 fixes in place interrupts on Lager are now unbroken.

Partially based on legacy GIC fix by Geert Uytterhoeven, thanks to
him for the initial work.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/mach-shmobile/board-lager.c     | 13 +++++++++++++
 arch/arm/mach-shmobile/setup-rcar-gen2.c |  2 ++
 arch/arm/mach-shmobile/timer.c           |  5 +++++
 3 files changed, 20 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c
index 571327b1c942..3f513270fc60 100644
--- a/arch/arm/mach-shmobile/board-lager.c
+++ b/arch/arm/mach-shmobile/board-lager.c
@@ -25,6 +25,8 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/kernel.h>
 #include <linux/leds.h>
 #include <linux/mfd/tmio.h>
@@ -873,6 +875,16 @@ static void __init lager_init(void)
 					  lager_ksz8041_fixup);
 }
 
+static void __init lager_legacy_init_irq(void)
+{
+	void __iomem *gic_dist_base = ioremap_nocache(0xf1001000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xf1002000, 0x1000);
+
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+
+	/* Do not invoke DT-based interrupt code via irqchip_init() */
+}
+
 static const char * const lager_boards_compat_dt[] __initconst = {
 	"renesas,lager",
 	NULL,
@@ -881,6 +893,7 @@ static const char * const lager_boards_compat_dt[] __initconst = {
 DT_MACHINE_START(LAGER_DT, "lager")
 	.smp		= smp_ops(r8a7790_smp_ops),
 	.init_early	= shmobile_init_delay,
+	.init_irq	= lager_legacy_init_irq,
 	.init_time	= rcar_gen2_timer_init,
 	.init_machine	= lager_init,
 	.init_late	= shmobile_init_late,
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index 7ed92790d13f..101b3e430a0d 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -137,7 +137,9 @@ void __init rcar_gen2_timer_init(void)
 #ifdef CONFIG_COMMON_CLK
 	rcar_gen2_clocks_init(mode);
 #endif
+#ifdef CONFIG_ARCH_SHMOBILE_MULTI
 	clocksource_of_init();
+#endif
 }
 
 struct memory_reserve_config {
diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c
index 44d32a84b001..88f067bcec94 100644
--- a/arch/arm/mach-shmobile/timer.c
+++ b/arch/arm/mach-shmobile/timer.c
@@ -80,6 +80,11 @@ void __init shmobile_init_delay(void)
 	 * to GIC being initialized from C and arch timer via DT */
 	if (of_machine_is_compatible("renesas,r8a73a4"))
 		has_arch_timer = false;
+
+	/* Non-multiplatform r8a7790 SoC cannot use arch timer due
+	 * to GIC being initialized from C and arch timer via DT */
+	if (of_machine_is_compatible("renesas,r8a7790"))
+		has_arch_timer = false;
 #endif
 
 	if (!has_arch_timer || !IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) {
-- 
cgit v1.2.3


From c2273a185354fe9420fb342b1ca09a6fed857fb3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 16 Jan 2015 18:01:43 +0100
Subject: ARM: 8288/1: dma-mapping: don't detach devices without an IOMMU
 during teardown

When tearing down the DMA ops for a device via of_dma_deconfigure, we
unconditionally detach the device from its IOMMU domain. For devices
that aren't actually behind an IOMMU, this produces a "Not attached"
warning message on the console.

This patch changes the teardown code so that we don't detach from the
IOMMU domain when there isn't an IOMMU dma mapping to start with.

Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7864797609b3..f142ddd6c40a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2025,6 +2025,9 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
 
+	if (!mapping)
+		return;
+
 	arm_iommu_detach_device(dev);
 	arm_iommu_release_mapping(mapping);
 }
-- 
cgit v1.2.3


From c2607f74aad96d18316a6e709b40e0ffe9def148 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 27 Jan 2015 16:10:42 +0100
Subject: ARM: 8294/1: ATAG_DTB_COMPAT: remove the DT workspace's hardcoded
 64KB size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is currently a hardcoded limit of 64KB for the DTB to live in and
be extended with ATAG info.  Some DTBs have outgrown that limit:

$ du -b arch/arm/boot/dts/omap3-n900.dtb
70212   arch/arm/boot/dts/omap3-n900.dtb

Furthermore, the actual size passed to atags_to_fdt() included the stack
size which is obviously wrong.

The initial DTB size is known, so use it to size the allocated workspace
with a 50% growth assumption and relocate the temporary stack above that.
This is also clamped to 32KB min / 1MB max for robustness against bad
DTB data.

Reported-by: Pali Rohár <pali.rohar@gmail.com>
Tested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/head.S | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 68be9017593d..132c70e2d2f1 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -263,16 +263,37 @@ restart:	adr	r0, LC0
 		 * OK... Let's do some funky business here.
 		 * If we do have a DTB appended to zImage, and we do have
 		 * an ATAG list around, we want the later to be translated
-		 * and folded into the former here.  To be on the safe side,
-		 * let's temporarily move  the stack away into the malloc
-		 * area.  No GOT fixup has occurred yet, but none of the
-		 * code we're about to call uses any global variable.
+		 * and folded into the former here. No GOT fixup has occurred
+		 * yet, but none of the code we're about to call uses any
+		 * global variable.
 		*/
-		add	sp, sp, #0x10000
+
+		/* Get the initial DTB size */
+		ldr	r5, [r6, #4]
+#ifndef __ARMEB__
+		/* convert to little endian */
+		eor	r1, r5, r5, ror #16
+		bic	r1, r1, #0x00ff0000
+		mov	r5, r5, ror #8
+		eor	r5, r5, r1, lsr #8
+#endif
+		/* 50% DTB growth should be good enough */
+		add	r5, r5, r5, lsr #1
+		/* preserve 64-bit alignment */
+		add	r5, r5, #7
+		bic	r5, r5, #7
+		/* clamp to 32KB min and 1MB max */
+		cmp	r5, #(1 << 15)
+		movlo	r5, #(1 << 15)
+		cmp	r5, #(1 << 20)
+		movhi	r5, #(1 << 20)
+		/* temporarily relocate the stack past the DTB work space */
+		add	sp, sp, r5
+
 		stmfd	sp!, {r0-r3, ip, lr}
 		mov	r0, r8
 		mov	r1, r6
-		sub	r2, sp, r6
+		mov	r2, r5
 		bl	atags_to_fdt
 
 		/*
@@ -285,11 +306,11 @@ restart:	adr	r0, LC0
 		bic	r0, r0, #1
 		add	r0, r0, #0x100
 		mov	r1, r6
-		sub	r2, sp, r6
+		mov	r2, r5
 		bleq	atags_to_fdt
 
 		ldmfd	sp!, {r0-r3, ip, lr}
-		sub	sp, sp, #0x10000
+		sub	sp, sp, r5
 #endif
 
 		mov	r8, r6			@ use the appended device tree
@@ -306,7 +327,7 @@ restart:	adr	r0, LC0
 		subs	r1, r5, r1
 		addhi	r9, r9, r1
 
-		/* Get the dtb's size */
+		/* Get the current DTB size */
 		ldr	r5, [r6, #4]
 #ifndef __ARMEB__
 		/* convert r5 (dtb size) to little endian */
-- 
cgit v1.2.3


From ed46092518aaed9e5266f8dd87ac12bf18cfc8e8 Mon Sep 17 00:00:00 2001
From: Rob Herring <r.herring@freescale.com>
Date: Wed, 28 Jan 2015 16:05:04 +0100
Subject: ARM: 8295/1: fix v7M build for !CONFIG_PRINTK

Minimal builds for v7M are broken when printk is disabled. The caller is
assembly so add the necessary ifdef around the call.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-v7m.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
index 2260f1855820..8944f4991c3c 100644
--- a/arch/arm/kernel/entry-v7m.S
+++ b/arch/arm/kernel/entry-v7m.S
@@ -22,10 +22,12 @@
 
 __invalid_entry:
 	v7m_exception_entry
+#ifdef CONFIG_PRINTK
 	adr	r0, strerr
 	mrs	r1, ipsr
 	mov	r2, lr
 	bl	printk
+#endif
 	mov	r0, sp
 	bl	show_regs
 1:	b	1b
-- 
cgit v1.2.3


From fba289054f24d2550f47a1413e1ccc24f4165560 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Jan 2015 17:58:33 +0100
Subject: ARM: 8298/1: ARM_KERNMEM_PERMS only works with MMU enabled

The recently added ARM_KERNMEM_PERMS feature works by manipulating
the kernel page tables, which obviously requires an MMU. Trying
to enable this feature when the MMU is disabled results in a lot
of compile errors in mm/init.c, so let's add a Kconfig dependency
to avoid that case.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 03823e784f63..c43c71455566 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1012,6 +1012,7 @@ config ARCH_SUPPORTS_BIG_ENDIAN
 
 config ARM_KERNMEM_PERMS
 	bool "Restrict kernel memory permissions"
+	depends on MMU
 	help
 	  If this is set, kernel memory other than kernel text (and rodata)
 	  will be made non-executable. The tradeoff is that each region is
-- 
cgit v1.2.3


From eab8d6530cc00744087bb5566e350cf37fcdfc7e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Fri, 23 Jan 2015 16:21:49 +0200
Subject: arm: dma-mapping: Set DMA IOMMU ops in arm_iommu_attach_device()

Commit 4bb25789ed28228a ("arm: dma-mapping: plumb our iommu mapping ops
into arch_setup_dma_ops") moved the setting of the DMA operations from
arm_iommu_attach_device() to arch_setup_dma_ops() where the DMA
operations to be used are selected based on whether the device is
connected to an IOMMU. However, the IOMMU detection scheme requires the
IOMMU driver to be ported to the new IOMMU of_xlate API. As no driver
has been ported yet, this effectively breaks all IOMMU ARM users that
depend on the IOMMU being handled transparently by the DMA mapping API.

Fix this by restoring the setting of DMA IOMMU ops in
arm_iommu_attach_device() and splitting the rest of the function into a
new internal __arm_iommu_attach_device() function, called by
arch_setup_dma_ops().

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mm/dma-mapping.c | 53 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 15 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7864797609b3..a673c7f7e208 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1940,13 +1940,32 @@ void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
 }
 EXPORT_SYMBOL_GPL(arm_iommu_release_mapping);
 
+static int __arm_iommu_attach_device(struct device *dev,
+				     struct dma_iommu_mapping *mapping)
+{
+	int err;
+
+	err = iommu_attach_device(mapping->domain, dev);
+	if (err)
+		return err;
+
+	kref_get(&mapping->kref);
+	dev->archdata.mapping = mapping;
+
+	pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
+	return 0;
+}
+
 /**
  * arm_iommu_attach_device
  * @dev: valid struct device pointer
  * @mapping: io address space mapping structure (returned from
  *	arm_iommu_create_mapping)
  *
- * Attaches specified io address space mapping to the provided device,
+ * Attaches specified io address space mapping to the provided device.
+ * This replaces the dma operations (dma_map_ops pointer) with the
+ * IOMMU aware version.
+ *
  * More than one client might be attached to the same io address space
  * mapping.
  */
@@ -1955,25 +1974,16 @@ int arm_iommu_attach_device(struct device *dev,
 {
 	int err;
 
-	err = iommu_attach_device(mapping->domain, dev);
+	err = __arm_iommu_attach_device(dev, mapping);
 	if (err)
 		return err;
 
-	kref_get(&mapping->kref);
-	dev->archdata.mapping = mapping;
-
-	pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
+	set_dma_ops(dev, &iommu_ops);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
 
-/**
- * arm_iommu_detach_device
- * @dev: valid struct device pointer
- *
- * Detaches the provided device from a previously attached map.
- */
-void arm_iommu_detach_device(struct device *dev)
+static void __arm_iommu_detach_device(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping;
 
@@ -1989,6 +1999,19 @@ void arm_iommu_detach_device(struct device *dev)
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
+
+/**
+ * arm_iommu_detach_device
+ * @dev: valid struct device pointer
+ *
+ * Detaches the provided device from a previously attached map.
+ * This voids the dma operations (dma_map_ops pointer)
+ */
+void arm_iommu_detach_device(struct device *dev)
+{
+	__arm_iommu_detach_device(dev);
+	set_dma_ops(dev, NULL);
+}
 EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
 
 static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
@@ -2011,7 +2034,7 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		return false;
 	}
 
-	if (arm_iommu_attach_device(dev, mapping)) {
+	if (__arm_iommu_attach_device(dev, mapping)) {
 		pr_warn("Failed to attached device %s to IOMMU_mapping\n",
 				dev_name(dev));
 		arm_iommu_release_mapping(mapping);
@@ -2025,7 +2048,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
 
-	arm_iommu_detach_device(dev);
+	__arm_iommu_detach_device(dev);
 	arm_iommu_release_mapping(mapping);
 }
 
-- 
cgit v1.2.3


From 3c1e716508335eb132c9349cb1a1716c8f7e3d2e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 19 Dec 2014 16:05:31 +0000
Subject: arm/arm64: KVM: Use set/way op trapping to track the state of the
 caches

Trying to emulate the behaviour of set/way cache ops is fairly
pointless, as there are too many ways we can end-up missing stuff.
Also, there is some system caches out there that simply ignore
set/way operations.

So instead of trying to implement them, let's convert it to VA ops,
and use them as a way to re-enable the trapping of VM ops. That way,
we can detect the point when the MMU/caches are turned off, and do
a full VM flush (which is what the guest was trying to do anyway).

This allows a 32bit zImage to boot on the APM thingy, and will
probably help bootloaders in general.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h | 10 ++++++
 arch/arm/include/asm/kvm_host.h    |  3 --
 arch/arm/include/asm/kvm_mmu.h     |  3 +-
 arch/arm/kvm/arm.c                 | 10 ------
 arch/arm/kvm/coproc.c              | 70 ++++++++------------------------------
 arch/arm/kvm/coproc.h              |  6 ++--
 arch/arm/kvm/coproc_a15.c          |  2 +-
 arch/arm/kvm/coproc_a7.c           |  2 +-
 arch/arm/kvm/mmu.c                 | 70 +++++++++++++++++++++++++++++++++++++-
 arch/arm/kvm/trace.h               | 39 +++++++++++++++++++++
 10 files changed, 139 insertions(+), 76 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 66ce17655bb9..7b0152321b20 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -38,6 +38,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 	vcpu->arch.hcr = HCR_GUEST_MASK;
 }
 
+static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hcr;
+}
+
+static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+{
+	vcpu->arch.hcr = hcr;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 254e0650e48b..04b4ea0b550a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -125,9 +125,6 @@ struct kvm_vcpu_arch {
 	 * Anything that is not used directly from assembly code goes
 	 * here.
 	 */
-	/* dcache set/way operation pending */
-	int last_pcpu;
-	cpumask_t require_dcache_flush;
 
 	/* Don't run the guest on this vcpu */
 	bool pause;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 63e0ecc04901..286644c729ba 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -190,7 +190,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
-void stage2_flush_vm(struct kvm *kvm);
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2d6d91001062..0b0d58a905c4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -281,15 +281,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	vcpu->cpu = cpu;
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
-	/*
-	 * Check whether this vcpu requires the cache to be flushed on
-	 * this physical CPU. This is a consequence of doing dcache
-	 * operations by set/way on this vcpu. We do it here to be in
-	 * a non-preemptible section.
-	 */
-	if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
-		flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
-
 	kvm_arm_set_running_vcpu(vcpu);
 }
 
@@ -541,7 +532,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
 		vcpu->mode = OUTSIDE_GUEST_MODE;
-		vcpu->arch.last_pcpu = smp_processor_id();
 		kvm_guest_exit();
 		trace_kvm_exit(*vcpu_pc(vcpu));
 		/*
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 7928dbdf2102..f3d88dc388bc 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -189,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-/* See note at ARM ARM B1.14.4 */
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
 			const struct coproc_reg *r)
 {
-	unsigned long val;
-	int cpu;
-
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
 
-	cpu = get_cpu();
-
-	cpumask_setall(&vcpu->arch.require_dcache_flush);
-	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
-
-	/* If we were already preempted, take the long way around */
-	if (cpu != vcpu->arch.last_pcpu) {
-		flush_cache_all();
-		goto done;
-	}
-
-	val = *vcpu_reg(vcpu, p->Rt1);
-
-	switch (p->CRm) {
-	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
-	case 14:		/* DCCISW */
-		asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
-		break;
-
-	case 10:		/* DCCSW */
-		asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
-		break;
-	}
-
-done:
-	put_cpu();
-
+	kvm_set_way_flush(vcpu);
 	return true;
 }
 
 /*
  * Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set.
+ * is set.  If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
  */
-static bool access_vm_reg(struct kvm_vcpu *vcpu,
-			  const struct coproc_params *p,
-			  const struct coproc_reg *r)
+bool access_vm_reg(struct kvm_vcpu *vcpu,
+		   const struct coproc_params *p,
+		   const struct coproc_reg *r)
 {
+	bool was_enabled = vcpu_has_cache_enabled(vcpu);
+
 	BUG_ON(!p->is_write);
 
 	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
 	if (p->is_64bit)
 		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
 
-	return true;
-}
-
-/*
- * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
- * guest enables the MMU, we stop trapping the VM sys_regs and leave
- * it in complete control of the caches.
- *
- * Used by the cpu-specific code.
- */
-bool access_sctlr(struct kvm_vcpu *vcpu,
-		  const struct coproc_params *p,
-		  const struct coproc_reg *r)
-{
-	access_vm_reg(vcpu, p, r);
-
-	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
-		vcpu->arch.hcr &= ~HCR_TVM;
-		stage2_flush_vm(vcpu->kvm);
-	}
-
+	kvm_toggle_cache(vcpu, was_enabled);
 	return true;
 }
 
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 1a44bbe39643..88d24a3a9778 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,8 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 #define is64		.is_64 = true
 #define is32		.is_64 = false
 
-bool access_sctlr(struct kvm_vcpu *vcpu,
-		  const struct coproc_params *p,
-		  const struct coproc_reg *r);
+bool access_vm_reg(struct kvm_vcpu *vcpu,
+		   const struct coproc_params *p,
+		   const struct coproc_reg *r);
 
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index e6f4ae48bda9..a7136757d373 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
 static const struct coproc_reg a15_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
+			access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 17fc7cd479d3..b19e46d1b2c0 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
 static const struct coproc_reg a7_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+			access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
 };
 
 static struct kvm_coproc_target_table a7_target_table = {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1dc9778a00af..106737e309b1 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -278,7 +278,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
  * Go through the stage 2 page tables and invalidate any cache lines
  * backing memory already mapped to the VM.
  */
-void stage2_flush_vm(struct kvm *kvm)
+static void stage2_flush_vm(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
@@ -1411,3 +1411,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 	unmap_stage2_range(kvm, gpa, size);
 	spin_unlock(&kvm->mmu_lock);
 }
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the
+ * S/W to PA mapping, it can only use S/W to nuke the whole cache,
+ * which is a rather good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the cache maintenance instructions associated
+ * with the powerdown and powerup of caches, if this is required by
+ * the implementation.").
+ *
+ * We use the following policy:
+ *
+ * - If we trap a S/W operation, we enable VM trapping to detect
+ *   caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void kvm_set_way_flush(struct kvm_vcpu *vcpu)
+{
+	unsigned long hcr = vcpu_get_hcr(vcpu);
+
+	/*
+	 * If this is the first time we do a S/W operation
+	 * (i.e. HCR_TVM not set) flush the whole memory, and set the
+	 * VM trapping.
+	 *
+	 * Otherwise, rely on the VM trapping to wait for the MMU +
+	 * Caches to be turned off. At that point, we'll be able to
+	 * clean the caches again.
+	 */
+	if (!(hcr & HCR_TVM)) {
+		trace_kvm_set_way_flush(*vcpu_pc(vcpu),
+					vcpu_has_cache_enabled(vcpu));
+		stage2_flush_vm(vcpu->kvm);
+		vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+	}
+}
+
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
+{
+	bool now_enabled = vcpu_has_cache_enabled(vcpu);
+
+	/*
+	 * If switching the MMU+caches on, need to invalidate the caches.
+	 * If switching it off, need to clean the caches.
+	 * Clean + invalidate does the trick always.
+	 */
+	if (now_enabled != was_enabled)
+		stage2_flush_vm(vcpu->kvm);
+
+	/* Caches are now on, stop trapping VM ops (until a S/W op) */
+	if (now_enabled)
+		vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+
+	trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b1d640f78623..b6a6e7102201 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -223,6 +223,45 @@ TRACE_EVENT(kvm_hvc,
 		  __entry->vcpu_pc, __entry->r0, __entry->imm)
 );
 
+TRACE_EVENT(kvm_set_way_flush,
+	    TP_PROTO(unsigned long vcpu_pc, bool cache),
+	    TP_ARGS(vcpu_pc, cache),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		cache		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->cache		= cache;
+	    ),
+
+	    TP_printk("S/W flush at 0x%016lx (cache %s)",
+		      __entry->vcpu_pc, __entry->cache ? "on" : "off")
+);
+
+TRACE_EVENT(kvm_toggle_cache,
+	    TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
+	    TP_ARGS(vcpu_pc, was, now),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		was		)
+		    __field(	bool,		now		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->was		= was;
+		    __entry->now		= now;
+	    ),
+
+	    TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
+		      __entry->vcpu_pc, __entry->was ? "on" : "off",
+		      __entry->now ? "on" : "off")
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 363ef89f8e9bcedc28b976d0fe2d858fe139c122 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 19 Dec 2014 16:48:06 +0000
Subject: arm/arm64: KVM: Invalidate data cache on unmap

Let's assume a guest has created an uncached mapping, and written
to that page. Let's also assume that the host uses a cache-coherent
IO subsystem. Let's finally assume that the host is under memory
pressure and starts to swap things out.

Before this "uncached" page is evicted, we need to make sure
we invalidate potential speculated, clean cache lines that are
sitting there, or the IO subsystem is going to swap out the
cached view, loosing the data that has been written directly
into memory.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h | 31 ++++++++++++++++
 arch/arm/kvm/mmu.c             | 82 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 98 insertions(+), 15 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 286644c729ba..552c31f5a3f7 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -44,6 +44,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 
@@ -188,6 +189,36 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 	}
 }
 
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+	void *va = kmap_atomic(pte_page(pte));
+
+	kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+	kunmap_atomic(va);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	unsigned long size = PMD_SIZE;
+	pfn_t pfn = pmd_pfn(pmd);
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		pfn++;
+		size -= PAGE_SIZE;
+
+		kunmap_atomic(va);
+	}
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+}
+
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 106737e309b1..78e68abcb01f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+	__kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	__kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+	__kvm_flush_dcache_pud(pud);
+}
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 				  int min, int max)
 {
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 	put_page(virt_to_page(pmd));
 }
 
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM.  However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
 static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
 	start_pte = pte = pte_offset_kernel(pmd, addr);
 	do {
 		if (!pte_none(*pte)) {
+			pte_t old_pte = *pte;
+
 			kvm_set_pte(pte, __pte(0));
-			put_page(virt_to_page(pte));
 			kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+			/* No need to invalidate the cache for device mappings */
+			if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+				kvm_flush_dcache_pte(old_pte);
+
+			put_page(virt_to_page(pte));
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
 		next = kvm_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
 			if (kvm_pmd_huge(*pmd)) {
+				pmd_t old_pmd = *pmd;
+
 				pmd_clear(pmd);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pmd(old_pmd);
+
 				put_page(virt_to_page(pmd));
 			} else {
 				unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
 		next = kvm_pud_addr_end(addr, end);
 		if (!pud_none(*pud)) {
 			if (pud_huge(*pud)) {
+				pud_t old_pud = *pud;
+
 				pud_clear(pud);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pud(old_pud);
+
 				put_page(virt_to_page(pud));
 			} else {
 				unmap_pmds(kvm, pud, addr, next);
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		if (!pte_none(*pte)) {
-			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
-		}
+		if (!pte_none(*pte) &&
+		    (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+			kvm_flush_dcache_pte(*pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
 	do {
 		next = kvm_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
-			if (kvm_pmd_huge(*pmd)) {
-				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
-			} else {
+			if (kvm_pmd_huge(*pmd))
+				kvm_flush_dcache_pmd(*pmd);
+			else
 				stage2_flush_ptes(kvm, pmd, addr, next);
-			}
 		}
 	} while (pmd++, addr = next, addr != end);
 }
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
 	do {
 		next = kvm_pud_addr_end(addr, end);
 		if (!pud_none(*pud)) {
-			if (pud_huge(*pud)) {
-				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
-			} else {
+			if (pud_huge(*pud))
+				kvm_flush_dcache_pud(*pud);
+			else
 				stage2_flush_pmds(kvm, pud, addr, next);
-			}
 		}
 	} while (pud++, addr = next, addr != end);
 }
-- 
cgit v1.2.3


From 0d3e4d4fade6b04e933b11e69e80044f35e9cd60 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 5 Jan 2015 21:13:24 +0000
Subject: arm/arm64: KVM: Use kernel mapping to perform invalidation on page
 fault

When handling a fault in stage-2, we need to resync I$ and D$, just
to be sure we don't leave any old cache line behind.

That's very good, except that we do so using the *user* address.
Under heavy load (swapping like crazy), we may end up in a situation
where the page gets mapped in stage-2 while being unmapped from
userspace by another CPU.

At that point, the DC/IC instructions can generate a fault, which
we handle with kvm->mmu_lock held. The box quickly deadlocks, user
is unhappy.

Instead, perform this invalidation through the kernel mapping,
which is guaranteed to be present. The box is much happier, and so
am I.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h | 43 +++++++++++++++++++++++++++++++++---------
 arch/arm/kvm/mmu.c             | 12 ++++++++----
 2 files changed, 42 insertions(+), 13 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 552c31f5a3f7..1bca8f8af442 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -162,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
 }
 
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size,
-					     bool ipa_uncached)
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+					       unsigned long size,
+					       bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
-		kvm_flush_dcache_to_poc((void *)hva, size);
-	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -180,10 +177,38 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 	 *
 	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
+	 *
+	 * We need to do this through a kernel mapping (using the
+	 * user-space mapping has proved to be the wrong
+	 * solution). For that, we need to kmap one page at a time,
+	 * and iterate over the range.
 	 */
-	if (icache_is_pipt()) {
-		__cpuc_coherent_user_range(hva, hva + size);
-	} else if (!icache_is_vivt_asid_tagged()) {
+
+	bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
+
+	VM_BUG_ON(size & PAGE_MASK);
+
+	if (!need_flush && !icache_is_pipt())
+		goto vipt_cache;
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		if (need_flush)
+			kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		if (icache_is_pipt())
+			__cpuc_coherent_user_range((unsigned long)va,
+						   (unsigned long)va + PAGE_SIZE);
+
+		size -= PAGE_SIZE;
+		pfn++;
+
+		kunmap_atomic(va);
+	}
+
+vipt_cache:
+	if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 78e68abcb01f..136662547ca6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -957,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
 	return !pfn_valid(pfn);
 }
 
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+				      unsigned long size, bool uncached)
+{
+	__coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -1046,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
-					  fault_ipa_uncached);
+		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1055,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
-					  fault_ipa_uncached);
+		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
-- 
cgit v1.2.3


From 8e64806672466392acf19e14427d1c29df3e58b9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 29 Jan 2015 16:41:46 +0100
Subject: ARM: 8299/1: mm: ensure local active ASID is marked as allocated on
 rollover

Commit e1a5848e3398 ("ARM: 7924/1: mm: don't bother with reserved ttbr0
when running with LPAE") removed the use of the reserved TTBR0 value
for LPAE systems, since the ASID is held in the TTBR and can be updated
atomicly with the pgd of the next mm.

Unfortunately, this patch forgot to update flush_context, which
deliberately avoids marking the local active ASID as allocated, since we
used to switch via ASID zero and didn't need to allocate the ASID of
the previous mm. The side-effect of this is that we can allocate the
same ASID to the next mm and, between flushing the local TLB and updating
TTBR0, we can perform speculative TLB fills for userspace nG mappings
using the page table of the previous mm.

The consequence of this is that the next mm can erroneously hit some
mappings of the previous mm. Note that this was made significantly
harder to hit by a391263cd84e ("ARM: 8203/1: mm: try to re-use old ASID
assignments following a rollover") but is still theoretically possible.

This patch fixes the problem by removing the code from flush_context
that forces the allocated ASID to zero for the local CPU. Many thanks
to the Broadcom guys for tracking this one down.

Fixes: e1a5848e3398 ("ARM: 7924/1: mm: don't bother with reserved ttbr0 when running with LPAE")

Cc: <stable@vger.kernel.org> # v3.14+
Reported-by: Raymond Ngun <rngun@broadcom.com>
Tested-by: Raymond Ngun <rngun@broadcom.com>
Reviewed-by: Gregory Fong <gregory.0xf0@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/context.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 91892569710f..845769e41332 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -144,21 +144,17 @@ static void flush_context(unsigned int cpu)
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 	for_each_possible_cpu(i) {
-		if (i == cpu) {
-			asid = 0;
-		} else {
-			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
-			/*
-			 * If this CPU has already been through a
-			 * rollover, but hasn't run another task in
-			 * the meantime, we must preserve its reserved
-			 * ASID, as this is the only trace we have of
-			 * the process it is still running.
-			 */
-			if (asid == 0)
-				asid = per_cpu(reserved_asids, i);
-			__set_bit(asid & ~ASID_MASK, asid_map);
-		}
+		asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, i);
+		__set_bit(asid & ~ASID_MASK, asid_map);
 		per_cpu(reserved_asids, i) = asid;
 	}
 
-- 
cgit v1.2.3