From 2f2724630f7a8d582470f03ee56b96746767d270 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 22 May 2017 16:25:14 +1000
Subject: KVM: PPC: Book3S HV: Cope with host using large decrementer mode

POWER9 introduces a new mode for the decrementer register, called
large decrementer mode, in which the decrementer counter is 56 bits
wide rather than 32, and reads are sign-extended rather than
zero-extended.  For the decrementer, this new mode is optional and
controlled by a bit in the LPCR.  The hypervisor decrementer (HDEC)
is 56 bits wide on POWER9 and has no mode control.

Since KVM code reads and writes the decrementer and hypervisor
decrementer registers in a few places, it needs to be aware of the
need to treat the decrementer value as a 64-bit quantity, and only do
a 32-bit sign extension when large decrementer mode is not in effect.
Similarly, the HDEC should always be treated as a 64-bit quantity on
POWER9.  We define a new EXTEND_HDEC macro to encapsulate the feature
test for POWER9 and the sign extension.

To enable the sign extension to be removed in large decrementer mode,
we test the LPCR_LD bit in the host LPCR image stored in the struct
kvm for the guest.  If is set then large decrementer mode is enabled
and the sign extension should be skipped.

This is partly based on an earlier patch by Oliver O'Halloran.

Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_interrupts.S | 12 +++++++++++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 +++++++++++++++++------
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..404deb512844 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
+BEGIN_FTR_SECTION
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_KVM(r5)
+	ld	r9, KVM_HOST_LPCR(r6)
+	andis.	r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mfspr	r8,SPRN_DEC
 	mftb	r7
-	mtspr	SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+	bne	32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r8,r8
+32:	mtspr	SPRN_HDEC,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..e390b383b4d6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,6 +32,12 @@
 #include <asm/opal.h>
 #include <asm/xive-regs.h>
 
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg)			\
+BEGIN_FTR_SECTION;				\
+	extsw	reg, reg;			\
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
 /* Values in HSTATE_NAPPING(r13) */
@@ -214,6 +220,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
+	/* HDEC value came from DEC in the first place, it will fit */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -295,8 +303,9 @@ kvm_novcpu_wakeup:
 
 	/* See if our timeslice has expired (HDEC is negative) */
 	mfspr	r0, SPRN_HDEC
+	EXTEND_HDEC(r0)
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -390,8 +399,8 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+	LOAD_REG_ADDR(r6, decrementer_max)
+	ld	r6, 0(r6)
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -968,7 +977,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
 	/* Check if HDEC expires soon */
 	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	EXTEND_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
 #ifdef CONFIG_KVM_XICS
@@ -2366,12 +2376,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 	mfspr	r3, SPRN_DEC
 	mfspr	r4, SPRN_HDEC
 	mftb	r5
-	cmpw	r3, r4
+	extsw	r3, r3
+	EXTEND_HDEC(r4)
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
-- 
cgit v1.2.3


From 60ce2858514ed9ccaf00dc7e9f4dc219537e9855 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 9 Jun 2017 10:14:53 +0100
Subject: ARM: 8680/1: boot/compressed: fix inappropriate Thumb2 mnemonic for
 __nop

Commit 06a4b6d009a1 ("ARM: 8677/1: boot/compressed: fix decompressor
header layout for v7-M") fixed an issue in the layout of the header
of the compressed kernel image that was caused by the assembler
emitting narrow opcodes for 'mov r0, r0', and for this reason, the
mnemonic was updated to use the W() macro, which will append the .w
suffix (which forces a wide encoding) if required, i.e., when building
the kernel in Thumb2 mode.

However, this failed to take into account that on Thumb2 kernels built
for CPUs that are also ARM capable, the entry point is entered in ARM
mode, and so the instructions emitted here will be ARM instructions
that only exist in a wide encoding to begin with, which is why the
assembler rejects the .w suffix here and aborts the build with the
following message:

  head.S: Assembler messages:
  head.S:132: Error: width suffixes are invalid in ARM mode -- `mov.w r0,r0'

So replace the W(mov) with separate ARM and Thumb2 instructions, where
the latter will only be used for THUMB2_ONLY builds.

Fixes: 06a4b6d009a1 ("ARM: 8677/1: boot/compressed: fix decompressor ...")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/efi-header.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 3f7d1b74c5e0..a17ca8d78656 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -17,7 +17,8 @@
 		@ there.
 		.inst	'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
 #else
-		W(mov)	r0, r0
+ AR_CLASS(	mov	r0, r0		)
+  M_CLASS(	nop.w			)
 #endif
 		.endm
 
-- 
cgit v1.2.3


From bbeedfda8eee0b17ea16e4e157c596095458676a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Fri, 9 Jun 2017 15:28:18 +0100
Subject: ARM: 8681/1: make VMSPLIT_3G_OPT depends on !ARM_LPAE

When both enable CONFIG_ARM_LPAE=y and CONFIG_VMSPLIT_3G_OPT=y, which
means use PAGE_OFFSET=0xB0000000 with ARM_LPAE, the kernel will boot
fail and stop after uncompressed:

   Starting kernel ...

   Uart base = 0x20001000
   watchdog reg = 0x20013000
   dtb addr = 0x80840308
   Uncompressing Linux... done, booting the kernel.

For ARM_LPAE only support 3:1, 2:2, 1:3 split of TTBR1, which mention in:
   http://elinux.org/images/6/6a/Elce11_marinas.pdf - p16

So we should make VMSPLIT_3G_OPT depends on !ARM_LPAE to avoid trigger
this bug.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f15838..c0fcab6a5504 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1416,6 +1416,7 @@ choice
 	config VMSPLIT_3G
 		bool "3G/1G user/kernel split"
 	config VMSPLIT_3G_OPT
+		depends on !ARM_LPAE
 		bool "3G/1G user/kernel split (for full 1G low memory)"
 	config VMSPLIT_2G
 		bool "2G/2G user/kernel split"
-- 
cgit v1.2.3


From d360a687d99577110c181e67ebfb9a1b6fed63a2 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 12 Jun 2017 13:35:52 +0100
Subject: ARM: 8682/1: V7M: Set cacheid iff DminLine or IminLine is nonzero

Cache support is optional feature in M-class cores, thus DminLine or
IminLine of Cache Type Register is zero if caches are not implemented,
but we check the whole CTR which has other features encoded there.
Let's be more precise and check for DminLine and IminLine of CTR
before we set cacheid.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 32e1a9513dc7..4e80bf7420d4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
 	if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
 
-		if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+		if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
 			cacheid = 0;
 		} else if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
-- 
cgit v1.2.3


From ca8efa1df1d15a1795a2da57f9f6aada6ed6b946 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 6 Jun 2017 16:47:22 +1000
Subject: KVM: PPC: Book3S HV: Context-switch EBB registers properly

This adds code to save the values of three SPRs (special-purpose
registers) used by userspace to control event-based branches (EBBs),
which are essentially interrupts that get delivered directly to
userspace.  These registers are loaded up with guest values when
entering the guest, and their values are saved when exiting the
guest, but we were not saving the host values and restoring them
before going back to userspace.

On POWER8 this would only affect userspace programs which explicitly
request the use of EBBs and also use the KVM_RUN ioctl, since the
only source of EBBs on POWER8 is the PMU, and there is an explicit
enable bit in the PMU registers (and those PMU registers do get
properly context-switched between host and guest).  On POWER9 there
is provision for externally-generated EBBs, and these are not subject
to the control in the PMU registers.

Since these registers only affect userspace, we can save them when
we first come in from userspace and restore them before returning to
userspace, rather than saving/restoring the host values on every
guest entry/exit.  Similarly, we don't need to worry about their
values on offline secondary threads since they execute in the context
of the idle task, which never executes in userspace.

Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08)
Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4fd57d9..400a5992b121 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2907,6 +2907,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 	int srcu_idx;
+	unsigned long ebb_regs[3] = {};	/* shut up GCC */
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -2934,6 +2935,13 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	flush_all_to_thread(current);
 
+	/* Save userspace EBB register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		ebb_regs[0] = mfspr(SPRN_EBBHR);
+		ebb_regs[1] = mfspr(SPRN_EBBRR);
+		ebb_regs[2] = mfspr(SPRN_BESCR);
+	}
+
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +2968,13 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 	} while (is_kvmppc_resume_guest(r));
 
+	/* Restore userspace EBB register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_EBBHR, ebb_regs[0]);
+		mtspr(SPRN_EBBRR, ebb_regs[1]);
+		mtspr(SPRN_BESCR, ebb_regs[2]);
+	}
+
  out:
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	atomic_dec(&vcpu->kvm->arch.vcpus_running);
-- 
cgit v1.2.3


From 4130b28f568688f79539b732797a1dc04b048442 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 6 Jun 2017 13:55:42 +0200
Subject: s390/ipl: revert Load Normal semantics for LPAR CCW-type re-IPL

This reverts the two commits

7afbeb6df2aa ("s390/ipl: always use load normal for CCW-type re-IPL")
0f7451ff3ab8 ("s390/ipl: use load normal for LPAR re-ipl")

The two commits did not take into account that behavior of standby
memory changes fundamentally if the re-IPL method is changed from
Load Clear to Load Normal.

In case of the old re-IPL clear method all memory that was initially
in standby state will be put into standby state again within the
re-IPL process. Or in other words: memory that was brought online
before a re-IPL will be offline again after a reboot.

Given that we use different re-IPL methods depending on the hypervisor
and CCW-type vs SCSI re-IPL it is not easy to tell in advance when and
why memory will stay online or will be offline after a re-IPL.
This does also have other side effects, since memory that is online
from the beginning will be in ZONE_NORMAL by default vs ZONE_MOVABLE
for memory that is offline.

Therefore, before the change, a user could online and offline memory
easily since standby memory was always in ZONE_NORMAL.  After the
change, and a re-IPL, this depended on which memory parts were online
before the re-IPL.

From a usability point of view the current behavior is more than
suboptimal. Therefore revert these changes until we have a better
solution and get back to a consistent behavior. The bad thing about
this is that the time required for a re-IPL will be significantly
increased for configurations with several 100GB or 1TB of memory.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e545ffe5155a..8e622bb52f7a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
-		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		if (MACHINE_IS_LPAR)
-			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
-		else
-			diag308(DIAG308_LOAD_CLEAR, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
-- 
cgit v1.2.3


From 4c3bb4ccd074e1a0552078c0bf94c662367a1658 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 15 Jun 2017 15:43:17 +1000
Subject: KVM: PPC: Book3S HV: Restore critical SPRs to host values on guest
 exit

This restores several special-purpose registers (SPRs) to sane values
on guest exit that were missed before.

TAR and VRSAVE are readable and writable by userspace, and we need to
save and restore them to prevent the guest from potentially affecting
userspace execution (not that TAR or VRSAVE are used by any known
program that run uses the KVM_RUN ioctl).  We save/restore these
in kvmppc_vcpu_run_hv() rather than on every guest entry/exit.

FSCR affects userspace execution in that it can prohibit access to
certain facilities by userspace.  We restore it to the normal value
for the task on exit from the KVM_RUN ioctl.

IAMR is normally 0, and is restored to 0 on guest exit.  However,
with a radix host on POWER9, it is set to a value that prevents the
kernel from executing user-accessible memory.  On POWER9, we save
IAMR on guest entry and restore it on guest exit to the saved value
rather than 0.  On POWER8 we continue to set it to 0 on guest exit.

PSPB is normally 0.  We restore it to 0 on guest exit to prevent
userspace taking advantage of the guest having set it non-zero
(which would allow userspace to set its SMT priority to high).

UAMOR is normally 0.  We restore it to 0 on guest exit to prevent
the AMR from being used as a covert channel between userspace
processes, since the AMR is not context-switched at present.

Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08)
Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c            | 11 +++++++++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  9 ++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 400a5992b121..a963762a031f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2908,6 +2908,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	int r;
 	int srcu_idx;
 	unsigned long ebb_regs[3] = {};	/* shut up GCC */
+	unsigned long user_tar = 0;
+	unsigned int user_vrsave;
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -2935,12 +2937,14 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	flush_all_to_thread(current);
 
-	/* Save userspace EBB register values */
+	/* Save userspace EBB and other register values */
 	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 		ebb_regs[0] = mfspr(SPRN_EBBHR);
 		ebb_regs[1] = mfspr(SPRN_EBBRR);
 		ebb_regs[2] = mfspr(SPRN_BESCR);
+		user_tar = mfspr(SPRN_TAR);
 	}
+	user_vrsave = mfspr(SPRN_VRSAVE);
 
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
@@ -2968,12 +2972,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 	} while (is_kvmppc_resume_guest(r));
 
-	/* Restore userspace EBB register values */
+	/* Restore userspace EBB and other register values */
 	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 		mtspr(SPRN_EBBHR, ebb_regs[0]);
 		mtspr(SPRN_EBBRR, ebb_regs[1]);
 		mtspr(SPRN_BESCR, ebb_regs[2]);
+		mtspr(SPRN_TAR, user_tar);
+		mtspr(SPRN_FSCR, current->thread.fscr);
 	}
+	mtspr(SPRN_VRSAVE, user_vrsave);
 
  out:
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e390b383b4d6..4e4390564276 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -558,6 +558,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 #define STACK_SLOT_TID		(112-16)
 #define STACK_SLOT_PSSCR	(112-24)
 #define STACK_SLOT_PID		(112-32)
+#define STACK_SLOT_IAMR		(112-40)
 
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
@@ -758,9 +759,11 @@ BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_TIDR
 	mfspr	r6, SPRN_PSSCR
 	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_IAMR
 	std	r5, STACK_SLOT_TID(r1)
 	std	r6, STACK_SLOT_PSSCR(r1)
 	std	r7, STACK_SLOT_PID(r1)
+	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 BEGIN_FTR_SECTION
@@ -1515,11 +1518,12 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	 * set by the guest could disrupt the host.
 	 */
 	li	r0, 0
-	mtspr	SPRN_IAMR, r0
 	mtspr	SPRN_CIABR, r0
 	mtspr	SPRN_DAWRX, r0
+	mtspr	SPRN_PSPB, r0
 	mtspr	SPRN_WORT, r0
 BEGIN_FTR_SECTION
+	mtspr	SPRN_IAMR, r0
 	mtspr	SPRN_TCSCR, r0
 	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
 	li	r0, 1
@@ -1535,6 +1539,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
+	mtspr	SPRN_UAMOR, r6
 
 	/* Switch DSCR back to host value */
 	mfspr	r8, SPRN_DSCR
@@ -1683,9 +1688,11 @@ BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
 	ld	r6, STACK_SLOT_PSSCR(r1)
 	ld	r7, STACK_SLOT_PID(r1)
+	ld	r8, STACK_SLOT_IAMR(r1)
 	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
 	mtspr	SPRN_PID, r7
+	mtspr	SPRN_IAMR, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
 	PPC_INVALIDATE_ERAT
-- 
cgit v1.2.3


From 46a704f8409f79fd66567ad3f8a7304830a84293 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 15 Jun 2017 16:10:27 +1000
Subject: KVM: PPC: Book3S HV: Preserve userspace HTM state properly

If userspace attempts to call the KVM_RUN ioctl when it has hardware
transactional memory (HTM) enabled, the values that it has put in the
HTM-related SPRs TFHAR, TFIAR and TEXASR will get overwritten by
guest values.  To fix this, we detect this condition and save those
SPR values in the thread struct, and disable HTM for the task.  If
userspace goes to access those SPRs or the HTM facility in future,
a TM-unavailable interrupt will occur and the handler will reload
those SPRs and re-enable HTM.

If userspace has started a transaction and suspended it, we would
currently lose the transactional state in the guest entry path and
would almost certainly get a "TM Bad Thing" interrupt, which would
cause the host to crash.  To avoid this, we detect this case and
return from the KVM_RUN ioctl with an EINVAL error, with the KVM
exit reason set to KVM_EXIT_FAIL_ENTRY.

Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08)
Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a963762a031f..fd4d978d5257 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2916,6 +2916,27 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		return -EINVAL;
 	}
 
+	/*
+	 * Don't allow entry with a suspended transaction, because
+	 * the guest entry/exit code will lose it.
+	 * If the guest has TM enabled, save away their TM-related SPRs
+	 * (they will get restored by the TM unavailable interrupt).
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+	    (current->thread.regs->msr & MSR_TM)) {
+		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			return -EINVAL;
+		}
+		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+		current->thread.regs->msr &= ~MSR_TM;
+	}
+#endif
+
 	kvmppc_core_prepare_to_enter(vcpu);
 
 	/* No need to go into the guest when all we'll do is come back out */
-- 
cgit v1.2.3


From 7ceaa6dcd8c6f59588428cec37f3c8093dd1011f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 16 Jun 2017 11:53:19 +1000
Subject: KVM: PPC: Book3S HV: Save/restore host values of debug registers

At present, HV KVM on POWER8 and POWER9 machines loses any instruction
or data breakpoint set in the host whenever a guest is run.
Instruction breakpoints are currently only used by xmon, but ptrace
and the perf_event subsystem can set data breakpoints as well as xmon.

To fix this, we save the host values of the debug registers (CIABR,
DAWR and DAWRX) before entering the guest and restore them on exit.
To provide space to save them in the stack frame, we expand the stack
frame allocated by kvmppc_hv_entry() from 112 to 144 bytes.

Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08)
Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 45 +++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4e4390564276..4888dd494604 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -44,6 +44,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
 
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS			144
+#define STACK_SLOT_TRAP		(SFS-4)
+#define STACK_SLOT_TID		(SFS-16)
+#define STACK_SLOT_PSSCR	(SFS-24)
+#define STACK_SLOT_PID		(SFS-32)
+#define STACK_SLOT_IAMR		(SFS-40)
+#define STACK_SLOT_CIABR	(SFS-48)
+#define STACK_SLOT_DAWR		(SFS-56)
+#define STACK_SLOT_DAWRX	(SFS-64)
+
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
@@ -328,10 +339,10 @@ kvm_novcpu_exit:
 	bl	kvmhv_accumulate_time
 #endif
 13:	mr	r3, r12
-	stw	r12, 112-4(r1)
+	stw	r12, STACK_SLOT_TRAP(r1)
 	bl	kvmhv_commence_exit
 	nop
-	lwz	r12, 112-4(r1)
+	lwz	r12, STACK_SLOT_TRAP(r1)
 	b	kvmhv_switch_to_host
 
 /*
@@ -554,12 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  *                                                                            *
  *****************************************************************************/
 
-/* Stack frame offsets */
-#define STACK_SLOT_TID		(112-16)
-#define STACK_SLOT_PSSCR	(112-24)
-#define STACK_SLOT_PID		(112-32)
-#define STACK_SLOT_IAMR		(112-40)
-
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
 
@@ -575,7 +580,7 @@ kvmppc_hv_entry:
 	 */
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -112(r1)
+	stdu	r1, -SFS(r1)
 
 	/* Save R1 in the PACA */
 	std	r1, HSTATE_HOST_R1(r13)
@@ -765,6 +770,14 @@ BEGIN_FTR_SECTION
 	std	r7, STACK_SLOT_PID(r1)
 	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_CIABR
+	mfspr	r6, SPRN_DAWR
+	mfspr	r7, SPRN_DAWRX
+	std	r5, STACK_SLOT_CIABR(r1)
+	std	r6, STACK_SLOT_DAWR(r1)
+	std	r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -1518,8 +1531,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	 * set by the guest could disrupt the host.
 	 */
 	li	r0, 0
-	mtspr	SPRN_CIABR, r0
-	mtspr	SPRN_DAWRX, r0
 	mtspr	SPRN_PSPB, r0
 	mtspr	SPRN_WORT, r0
 BEGIN_FTR_SECTION
@@ -1684,6 +1695,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ptesync
 
 	/* Restore host values of some registers */
+BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_CIABR(r1)
+	ld	r6, STACK_SLOT_DAWR(r1)
+	ld	r7, STACK_SLOT_DAWRX(r1)
+	mtspr	SPRN_CIABR, r5
+	mtspr	SPRN_DAWR, r6
+	mtspr	SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
 	ld	r6, STACK_SLOT_PSSCR(r1)
@@ -1836,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
 
-	ld	r0, 112+PPC_LR_STKOFF(r1)
-	addi	r1, r1, 112
+	ld	r0, SFS+PPC_LR_STKOFF(r1)
+	addi	r1, r1, SFS
 	mtlr	r0
 	blr
 
-- 
cgit v1.2.3


From 3d3efb68c19e539f0535c93a5258c1299270215f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 6 Jun 2017 14:35:30 +1000
Subject: KVM: PPC: Book3S HV: Ignore timebase offset on POWER9 DD1

POWER9 DD1 has an erratum where writing to the TBU40 register, which
is used to apply an offset to the timebase, can cause the timebase to
lose counts.  This results in the timebase on some CPUs getting out of
sync with other CPUs, which then results in misbehaviour of the
timekeeping code.

To work around the problem, we make KVM ignore the timebase offset for
all guests on POWER9 DD1 machines.  This means that live migration
cannot be supported on POWER9 DD1 machines.

Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fd4d978d5257..8d1a365b8edc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
 	case KVM_REG_PPC_TB_OFFSET:
+		/*
+		 * POWER9 DD1 has an erratum where writing TBU40 causes
+		 * the timebase to lose ticks.  So we don't let the
+		 * timebase offset be changed on P9 DD1.  (It is
+		 * initialized to zero.)
+		 */
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			break;
 		/* round up to multiple of 2^24 */
 		vcpu->arch.vcore->tb_offset =
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
-- 
cgit v1.2.3


From a9f8553e935f26cb5447f67e280946b0923cd2dc Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 16:18:15 +0530
Subject: powerpc/kprobes: Pause function_graph tracing during jprobes handling

This fixes a crash when function_graph and jprobes are used together.
This is essentially commit 237d28db036e ("ftrace/jprobes/x86: Fix
conflict between jprobes and function graph tracing"), but for powerpc.

Jprobes breaks function_graph tracing since the jprobe hook needs to use
jprobe_return(), which never returns back to the hook, but instead to
the original jprobe'd function. The solution is to momentarily pause
function_graph tracing before invoking the jprobe hook and re-enable it
when returning back to the original jprobe'd function.

Fixes: 6794c78243bf ("powerpc64: port of the function graph tracer")
Cc: stable@vger.kernel.org # v2.6.30+
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/kprobes.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..5075a4d6f1d7 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -617,6 +617,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
 #endif
 
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
+
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +651,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	 * saved regs...
 	 */
 	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* It's OK to start function graph tracing again */
+	unpause_graph_tracing();
 	preempt_enable_no_resched();
 	return 1;
 }
-- 
cgit v1.2.3


From a4979a7e71eb8da976cbe4a0a1fa50636e76b04f Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 16:18:16 +0530
Subject: powerpc/ftrace: Pass the correct stack pointer for
 DYNAMIC_FTRACE_WITH_REGS

For DYNAMIC_FTRACE_WITH_REGS, we should be passing-in the original set
of registers in pt_regs, to capture the state _before_ ftrace_caller.
However, we are instead passing the stack pointer *after* allocating a
stack frame in ftrace_caller. Fix this by saving the proper value of r1
in pt_regs. Also, use SAVE_10GPRS() to simplify the code.

Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..fa0921410fa4 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
 	stdu	r1,-SWITCH_FRAME_SIZE(r1)
 
 	/* Save all gprs to pt_regs */
-	SAVE_8GPRS(0,r1)
-	SAVE_8GPRS(8,r1)
-	SAVE_8GPRS(16,r1)
-	SAVE_8GPRS(24,r1)
+	SAVE_GPR(0, r1)
+	SAVE_10GPRS(2, r1)
+	SAVE_10GPRS(12, r1)
+	SAVE_10GPRS(22, r1)
+
+	/* Save previous stack pointer (r1) */
+	addi	r8, r1, SWITCH_FRAME_SIZE
+	std	r8, GPR1(r1)
 
 	/* Load special regs for save below */
 	mfmsr   r8
@@ -103,10 +107,10 @@ ftrace_call:
 #endif
 
 	/* Restore gprs */
-	REST_8GPRS(0,r1)
-	REST_8GPRS(8,r1)
-	REST_8GPRS(16,r1)
-	REST_8GPRS(24,r1)
+	REST_GPR(0,r1)
+	REST_10GPRS(2,r1)
+	REST_10GPRS(12,r1)
+	REST_10GPRS(22,r1)
 
 	/* Restore possibly modified LR */
 	ld	r0, _LINK(r1)
-- 
cgit v1.2.3


From c05b8c4474c03026aaa7f8872e78369f69f1bb08 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 16:18:17 +0530
Subject: powerpc/kprobes: Skip livepatch_handler() for jprobes

ftrace_caller() depends on a modified regs->nip to detect if a certain
function has been livepatched. However, with KPROBES_ON_FTRACE, it is
possible for regs->nip to have been modified by the kprobes pre_handler
(jprobes, for instance). In this case, we do not want to invoke the
livepatch_handler so as not to consume the livepatch stack.

To distinguish between the two (kprobes and livepatch), we check if
there is an active kprobe on the current function. If there is, then we
know for sure that it must have modified the NIP as we don't support
livepatching a kprobe'd function. In this case, we simply skip the
livepatch_handler and branch to the new NIP. Otherwise, the
livepatch_handler is invoked.

Fixes: ead514d5fb30 ("powerpc/kprobes: Add support for KPROBES_ON_FTRACE")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/kprobes.h             |  1 +
 arch/powerpc/kernel/kprobes.c                  |  6 ++++
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 39 ++++++++++++++++++++++----
 3 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 			   struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 5075a4d6f1d7..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
 
+int is_current_kprobe_addr(unsigned long addr)
+{
+	struct kprobe *p = kprobe_running();
+	return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
 	return  (addr >= (unsigned long)__kprobes_text_start &&
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index fa0921410fa4..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -99,13 +99,39 @@ ftrace_call:
 	bl	ftrace_stub
 	nop
 
-	/* Load ctr with the possibly modified NIP */
-	ld	r3, _NIP(r1)
-	mtctr	r3
+	/* Load the possibly modified NIP */
+	ld	r15, _NIP(r1)
+
 #ifdef CONFIG_LIVEPATCH
-	cmpd	r14,r3		/* has NIP been altered? */
+	cmpd	r14, r15	/* has NIP been altered? */
 #endif
 
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+	/* NIP has not been altered, skip over further checks */
+	beq	1f
+
+	/* Check if there is an active kprobe on us */
+	subi	r3, r14, 4
+	bl	is_current_kprobe_addr
+	nop
+
+	/*
+	 * If r3 == 1, then this is a kprobe/jprobe.
+	 * else, this is livepatched function.
+	 *
+	 * The conditional branch for livepatch_handler below will use the
+	 * result of this comparison. For kprobe/jprobe, we just need to branch to
+	 * the new NIP, not call livepatch_handler. The branch below is bne, so we
+	 * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+	 * CR0[EQ] = (r3 == 1).
+	 */
+	cmpdi	r3, 1
+1:
+#endif
+
+	/* Load CTR with the possibly modified NIP */
+	mtctr	r15
+
 	/* Restore gprs */
 	REST_GPR(0,r1)
 	REST_10GPRS(2,r1)
@@ -123,7 +149,10 @@ ftrace_call:
 	addi r1, r1, SWITCH_FRAME_SIZE
 
 #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+	 * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+	 * not on a kprobe/jprobe, then handle livepatch.
+	 */
 	bne-	livepatch_handler
 #endif
 
-- 
cgit v1.2.3


From d89ba5353f301971dd7d2f9fdf25c4432728f38e Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 14 Jun 2017 00:12:00 +0530
Subject: powerpc/64s: Handle data breakpoints in Radix mode

On Power9, trying to use data breakpoints throws the splat shown
below. This is because the check for a data breakpoint in DSISR is in
do_hash_page(), which is not called when in Radix mode.

  Unable to handle kernel paging request for data at address 0xc000000000e19218
  Faulting instruction address: 0xc0000000001155e8
  cpu 0x0: Vector: 300 (Data Access) at [c0000000ef1e7b20]
  pc: c0000000001155e8: find_pid_ns+0x48/0xe0
  lr: c000000000116ac4: find_task_by_vpid+0x44/0x90
  sp: c0000000ef1e7da0
  msr: 9000000000009033
  dar: c000000000e19218
  dsisr: 400000

Move the check to handle_page_fault() so as to catch data breakpoints
in both Hash and Radix MMU modes.

We have to change the check in do_hash_page() against 0xa410 to use
0xa450, so as to include the value of (DSISR_DABRMATCH << 16).

There are two sites that call handle_page_fault() when in Radix, both
already pass DSISR in r4.

Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code")
Cc: stable@vger.kernel.org # v4.7+
Reported-by: Shriya R. Kulkarni <shriykul@in.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
[mpe: Fix the fall-through case on hash, we need to reload DSISR]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
 	.balign	IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
-	andis.	r0,r4,0xa410		/* weird error? */
+	andis.	r0,r4,0xa450		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
-	andis.  r0,r4,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
 
 	/* Error */
 	blt-	13f
+
+	/* Reload DSISR into r4 for the DABR check below */
+	ld      r4,_DSISR(r1)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
-11:	ld	r4,_DAR(r1)
+11:	andis.  r0,r4,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_page_fault
-- 
cgit v1.2.3


From bf05fc25f268cd62f147f368fe65ad3e5b04fe9f Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Thu, 15 Jun 2017 19:16:48 +0530
Subject: powerpc/perf: Fix oops when kthread execs user process

When a kthread calls call_usermodehelper() the steps are:
  1. allocate current->mm
  2. load_elf_binary()
  3. populate current->thread.regs

While doing this, interrupts are not disabled. If there is a perf
interrupt in the middle of this process (i.e. step 1 has completed
but not yet reached to step 3) and if perf tries to read userspace
regs, kernel oops with following log:

  Unable to handle kernel paging request for data at address 0x00000000
  Faulting instruction address: 0xc0000000000da0fc
  ...
  Call Trace:
  perf_output_sample_regs+0x6c/0xd0
  perf_output_sample+0x4e4/0x830
  perf_event_output_forward+0x64/0x90
  __perf_event_overflow+0x8c/0x1e0
  record_and_restart+0x220/0x5c0
  perf_event_interrupt+0x2d8/0x4d0
  performance_monitor_exception+0x54/0x70
  performance_monitor_common+0x158/0x160
  --- interrupt: f01 at avtab_search_node+0x150/0x1a0
      LR = avtab_search_node+0x100/0x1a0
  ...
  load_elf_binary+0x6e8/0x15a0
  search_binary_handler+0xe8/0x290
  do_execveat_common.isra.14+0x5f4/0x840
  call_usermodehelper_exec_async+0x170/0x210
  ret_from_kernel_thread+0x5c/0x7c

Fix it by setting abi to PERF_SAMPLE_REGS_ABI_NONE when userspace
pt_regs are not set.

Fixes: ed4a4ef85cf5 ("powerpc/perf: Add support for sampling interrupt register state")
Cc: stable@vger.kernel.org # v4.7+
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/perf_regs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 			struct pt_regs *regs_user_copy)
 {
 	regs_user->regs = task_pt_regs(current);
-	regs_user->abi  = perf_reg_abi(current);
+	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+			 PERF_SAMPLE_REGS_ABI_NONE;
 }
-- 
cgit v1.2.3


From dbb236c1ceb697a559e0694ac4c9e7b9131d0b16 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 8 Jun 2017 16:44:22 -0700
Subject: arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW

Recently vDSO support for CLOCK_MONOTONIC_RAW was added in
49eea433b326 ("arm64: Add support for CLOCK_MONOTONIC_RAW in
clock_gettime() vDSO"). Noticing that the core timekeeping code
never set tkr_raw.xtime_nsec, the vDSO implementation didn't
bother exposing it via the data page and instead took the
unshifted tk->raw_time.tv_nsec value which was then immediately
shifted left in the vDSO code.

Unfortunately, by accellerating the MONOTONIC_RAW clockid, it
uncovered potential 1ns time inconsistencies caused by the
timekeeping core not handing sub-ns resolution.

Now that the core code has been fixed and is actually setting
tkr_raw.xtime_nsec, we need to take that into account in the
vDSO by adding it to the shifted raw_time value, in order to
fix the user-visible inconsistency. Rather than do that at each
use (and expand the data page in the process), instead perform
the shift/addition operation when populating the data page and
remove the shift from the vDSO code entirely.

[jstultz: minor whitespace tweak, tried to improve commit
 message to make it more clear this fixes a regression]
Reported-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Tested-by: Daniel Mentz <danielmentz@google.com>
Acked-by: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: "stable #4 . 8+" <stable@vger.kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Link: http://lkml.kernel.org/r/1496965462-20003-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm64/kernel/vdso.c              | 5 +++--
 arch/arm64/kernel/vdso/gettimeofday.S | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31f8f55..d0cb007fa482 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
 		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->raw_time_sec		= tk->raw_time.tv_sec;
-		vdso_data->raw_time_nsec	= tk->raw_time.tv_nsec;
+		vdso_data->raw_time_nsec	= (tk->raw_time.tv_nsec <<
+						   tk->tkr_raw.shift) +
+						  tk->tkr_raw.xtime_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
-		/* tkr_raw.xtime_nsec == 0 */
 		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
 		vdso_data->cs_raw_mult		= tk->tkr_raw.mult;
 		/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..76320e920965 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
 	seqcnt_check fail=monotonic_raw
 
 	/* All computations are done with left-shifted nsecs. */
-	lsl	x14, x14, x12
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 
-- 
cgit v1.2.3


From e27a9eca5d4a392b96ce5d5238c8d637bcb0a52c Mon Sep 17 00:00:00 2001
From: James Cowgill <James.Cowgill@imgtec.com>
Date: Tue, 20 Jun 2017 10:57:51 +0100
Subject: KVM: MIPS: Fix maybe-uninitialized build failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit fixes a "maybe-uninitialized" build failure in
arch/mips/kvm/tlb.c when KVM, DYNAMIC_DEBUG and JUMP_LABEL are all
enabled. The failure is:

In file included from ./include/linux/printk.h:329:0,
                 from ./include/linux/kernel.h:13,
                 from ./include/asm-generic/bug.h:15,
                 from ./arch/mips/include/asm/bug.h:41,
                 from ./include/linux/bug.h:4,
                 from ./include/linux/thread_info.h:11,
                 from ./include/asm-generic/current.h:4,
                 from ./arch/mips/include/generated/asm/current.h:1,
                 from ./include/linux/sched.h:11,
                 from arch/mips/kvm/tlb.c:13:
arch/mips/kvm/tlb.c: In function ‘kvm_mips_host_tlb_inv’:
./include/linux/dynamic_debug.h:126:3: error: ‘idx_kernel’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
   __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \
   ^~~~~~~~~~~~~~~~~~
arch/mips/kvm/tlb.c:169:16: note: ‘idx_kernel’ was declared here
  int idx_user, idx_kernel;
                ^~~~~~~~~~

There is a similar error relating to "idx_user". Both errors were
observed with GCC 6.

As far as I can tell, it is impossible for either idx_user or idx_kernel
to be uninitialized when they are later read in the calls to kvm_debug,
but to satisfy the compiler, add zero initializers to both variables.

Signed-off-by: James Cowgill <James.Cowgill@imgtec.com>
Fixes: 57e3869cfaae ("KVM: MIPS/TLB: Generalise host TLB invalidate to kernel ASID")
Cc: <stable@vger.kernel.org> # 4.11+
Acked-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/mips/kvm/tlb.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336dd2638..7cd92166a0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
 			  bool user, bool kernel)
 {
-	int idx_user, idx_kernel;
+	/*
+	 * Initialize idx_user and idx_kernel to workaround bogus
+	 * maybe-initialized warning when using GCC 6.
+	 */
+	int idx_user = 0, idx_kernel = 0;
 	unsigned long flags, old_entryhi;
 
 	local_irq_save(flags);
-- 
cgit v1.2.3


From fb3a5055cd7098f8d1dd0cd38d7172211113255f Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Mon, 19 Jun 2017 07:26:09 -0700
Subject: perf/x86/intel: Add 1G DTLB load/store miss support for SKL

Current DTLB load/store miss events (0x608/0x649) only counts 4K,2M and
4M page size.
Need to extend the events to support any page size (4K/2M/4M/1G).

The complete DTLB load/store miss events are:

  DTLB_LOAD_MISSES.WALK_COMPLETED		0xe08
  DTLB_STORE_MISSES.WALK_COMPLETED		0xe49

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/20170619142609.11058-1-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..110ce8238466 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
-		[ C(RESULT_MISS)   ] = 0x608,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
-		[ C(RESULT_MISS)   ] = 0x649,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_PREFETCH) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
-- 
cgit v1.2.3


From addb63c18a0d52a9ce2611d039f981f7b6148d2b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 19 Jun 2017 08:02:28 +0200
Subject: KVM: s390: gaccess: fix real-space designation asce handling for gmap
 shadows

For real-space designation asces the asce origin part is only a token.
The asce token origin must not be used to generate an effective
address for storage references. This however is erroneously done
within kvm_s390_shadow_tables().

Furthermore within the same function the wrong parts of virtual
addresses are used to generate a corresponding real address
(e.g. the region second index is used as region first index).

Both of the above can result in incorrect address translations. Only
for real space designations with a token origin of zero and addresses
below one megabyte the translation was correct.

Furthermore replace a "!asce.r" statement with a "!*fake" statement to
make it more obvious that a specific condition has nothing to do with
the architecture, but with the fake handling of real space designations.

Fixes: 3218f7094b6b ("s390/mm: support real-space for gmap shadows")
Cc: David Hildenbrand <david@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/gaccess.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..3b297fa3aa67 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 	ptr = asce.origin * 4096;
 	if (asce.r) {
 		*fake = 1;
+		ptr = 0;
 		asce.dt = ASCE_TYPE_REGION1;
 	}
 	switch (asce.dt) {
 	case ASCE_TYPE_REGION1:
-		if (vaddr.rfx01 > asce.tl && !asce.r)
+		if (vaddr.rfx01 > asce.tl && !*fake)
 			return PGM_REGION_FIRST_TRANS;
 		break;
 	case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region1_table_entry rfte;
 
 		if (*fake) {
-			/* offset in 16EB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+			ptr += (unsigned long) vaddr.rfx << 53;
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
@@ -1036,8 +1036,7 @@ shadow_r2t:
 		union region2_table_entry rste;
 
 		if (*fake) {
-			/* offset in 8PB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+			ptr += (unsigned long) vaddr.rsx << 42;
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
@@ -1064,8 +1063,7 @@ shadow_r3t:
 		union region3_table_entry rtte;
 
 		if (*fake) {
-			/* offset in 4TB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+			ptr += (unsigned long) vaddr.rtx << 31;
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
@@ -1101,8 +1099,7 @@ shadow_sgt:
 		union segment_table_entry ste;
 
 		if (*fake) {
-			/* offset in 2G guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+			ptr += (unsigned long) vaddr.sx << 20;
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
-- 
cgit v1.2.3


From bbd5ff50afffcf4a01d05367524736c57607a478 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Tue, 20 Jun 2017 18:37:28 +1000
Subject: powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

NPU2 requires an extra explicit flush to an active GPU PID when
sending address translation shoot downs (ATSDs) to reliably flush the
GPU TLB. This patch adds just such a flush at the end of each sequence
of ATSDs.

We can safely use PID 0 which is always reserved and active on the
GPU. PID 0 is only used for init_mm which will never be a user mm on
the GPU. To enforce this we add a check in pnv_npu2_init_context()
just in case someone tries to use PID 0 on the GPU.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
[mpe: Use true/false for bool literals]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++++++++----------
 1 file changed, 65 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
 	return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	/* Invalidating the entire process doesn't use a va */
 	return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-			unsigned long pid)
+			unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+	struct npu *npu;
+	int reg;
+};
+
+static void mmio_invalidate_wait(
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+	struct npu *npu;
+	int i, reg;
+
+	/* Wait for all invalidations to complete */
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		/* Wait for completion */
+		npu = mmio_atsd_reg[i].npu;
+		reg = mmio_atsd_reg[i].reg;
+		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+			cpu_relax();
+
+		put_mmio_atsd_reg(npu, reg);
+
+		/*
+		 * The GPU requires two flush ATSDs to ensure all entries have
+		 * been flushed. We use PID 0 as it will never be used for a
+		 * process on the GPU.
+		 */
+		if (flush)
+			mmio_invalidate_pid(npu, 0, true);
+	}
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-			unsigned long address)
+			unsigned long address, bool flush)
 {
-	int i, j, reg;
+	int i, j;
 	struct npu *npu;
 	struct pnv_phb *nphb;
 	struct pci_dev *npdev;
-	struct {
-		struct npu *npu;
-		int reg;
-	} mmio_atsd_reg[NV_MAX_NPUS];
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 	unsigned long pid = npu_context->mm->context.id;
 
 	/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
 			if (va)
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_va(npu, address, pid);
+					mmio_invalidate_va(npu, address, pid,
+							flush);
 			else
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_pid(npu, pid);
+					mmio_invalidate_pid(npu, pid, flush);
 
 			/*
 			 * The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	 */
 	flush_tlb_mm(npu_context->mm);
 
-	/* Wait for all invalidations to complete */
-	for (i = 0; i <= max_npu2_index; i++) {
-		if (mmio_atsd_reg[i].reg < 0)
-			continue;
-
-		/* Wait for completion */
-		npu = mmio_atsd_reg[i].npu;
-		reg = mmio_atsd_reg[i].reg;
-		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-			cpu_relax();
-		put_mmio_atsd_reg(npu, reg);
-	}
+	mmio_invalidate_wait(mmio_atsd_reg, flush);
+	if (flush)
+		/* Wait for the flush to complete */
+		mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
 	 * There should be no more translation requests for this PID, but we
 	 * need to ensure any entries for it are removed from the TLB.
 	 */
-	mmio_invalidate(npu_context, 0, 0);
+	mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	for (address = start; address <= end; address += PAGE_SIZE)
-		mmio_invalidate(npu_context, 1, address);
+	for (address = start; address < end; address += PAGE_SIZE)
+		mmio_invalidate(npu_context, 1, address, false);
+
+	/* Do the flush only on the final addess == end */
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		/* No nvlink associated with this GPU device */
 		return ERR_PTR(-ENODEV);
 
-	if (!mm) {
-		/* kernel thread contexts are not supported */
+	if (!mm || mm->context.id == 0) {
+		/*
+		 * Kernel thread contexts are not supported and context id 0 is
+		 * reserved on the GPU.
+		 */
 		return ERR_PTR(-EINVAL);
 	}
 
-- 
cgit v1.2.3


From c8401dda2f0a00cd25c0af6a95ed50e478d25de4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 7 Jun 2017 15:13:14 +0200
Subject: KVM: x86: fix singlestepping over syscall
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TF is handled a bit differently for syscall and sysret, compared
to the other instructions: TF is checked after the instruction completes,
so that the OS can disable #DB at a syscall by adding TF to FMASK.
When the sysret is executed the #DB is taken "as if" the syscall insn
just completed.

KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
Fix the behavior, otherwise you could get #DB on a user stack which is not
nice.  This does not affect Linux guests, as they use an IST or task gate
for #DB.

This fixes CVE-2017-7518.

Cc: stable@vger.kernel.org
Reported-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  1 +
 arch/x86/kvm/emulate.c             |  1 +
 arch/x86/kvm/x86.c                 | 62 ++++++++++++++++++++------------------
 3 files changed, 34 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 
 	bool perm_ok; /* do not check permissions if true */
 	bool ud;	/* inject an #UD if host doesn't support insn */
+	bool tf;	/* TF value before instruction (after for syscall/sysret) */
 
 	bool have_exception;
 	struct x86_exception exception;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	}
 
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 	return X86EMUL_CONTINUE;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 87d3cb901935..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
 	ctxt->eip = kvm_rip_read(vcpu);
 	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 	return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
-	/*
-	 * rflags is the old, "raw" value of the flags.  The new value has
-	 * not been saved yet.
-	 *
-	 * This is correct even for TF set by the guest, because "the
-	 * processor will not generate this exception after the instruction
-	 * that sets the TF flag".
-	 */
-	if (unlikely(rflags & X86_EFLAGS_TF)) {
-		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-						  DR6_RTM;
-			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-			kvm_run->debug.arch.exception = DB_VECTOR;
-			kvm_run->exit_reason = KVM_EXIT_DEBUG;
-			*r = EMULATE_USER_EXIT;
-		} else {
-			/*
-			 * "Certain debug exceptions may clear bit 0-3.  The
-			 * remaining contents of the DR6 register are never
-			 * cleared by the processor".
-			 */
-			vcpu->arch.dr6 &= ~15;
-			vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-			kvm_queue_exception(vcpu, DB_VECTOR);
-		}
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		*r = EMULATE_USER_EXIT;
+	} else {
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~15;
+		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+		kvm_queue_exception(vcpu, DB_VECTOR);
 	}
 }
 
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	int r = EMULATE_DONE;
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+	/*
+	 * rflags is the old, "raw" value of the flags.  The new value has
+	 * not been saved yet.
+	 *
+	 * This is correct even for TF set by the guest, because "the
+	 * processor will not generate this exception after the instruction
+	 * that sets the TF flag".
+	 */
+	if (unlikely(rflags & X86_EFLAGS_TF))
+		kvm_vcpu_do_singlestep(vcpu, &r);
 	return r == EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
-		if (r == EMULATE_DONE)
-			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+		if (r == EMULATE_DONE &&
+		    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+			kvm_vcpu_do_singlestep(vcpu, &r);
 		if (!ctxt->have_exception ||
 		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 			__kvm_set_rflags(vcpu, ctxt->eflags);
-- 
cgit v1.2.3


From 34f19ff1b5a0d11e46df479623d6936460105c9f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Jun 2017 15:58:29 +1000
Subject: powerpc/64: Initialise thread_info for emergency stacks

Emergency stacks have their thread_info mostly uninitialised, which in
particular means garbage preempt_count values.

Emergency stack code runs with interrupts disabled entirely, and is
used very rarely, so this has been unnoticed so far. It was found by a
proposed new powerpc watchdog that takes a soft-NMI directly from the
masked_interrupt handler and using the emergency stack. That crashed
at BUG_ON(in_nmi()) in nmi_enter(). preempt_count()s were found to be
garbage.

To fix this, zero the entire THREAD_SIZE allocation, and initialize
the thread_info.

Cc: stable@vger.kernel.org
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Move it all into setup_64.c, use a function not a macro. Fix
      crashes on Cell by setting preempt_count to 0 not HARDIRQ_OFFSET]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void)
 }
 #endif
 
+/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+	ti->task = NULL;
+	ti->cpu = cpu;
+	ti->preempt_count = 0;
+	ti->local_flags = 0;
+	ti->flags = 0;
+	klp_init_thread_info(ti);
+}
+
 /*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled. Exclusive emergency
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
 	 * Since we use these as temporary stacks during secondary CPU
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
+	 *
+	 * The IRQ stacks allocated elsewhere in this file are zeroed and
+	 * initialized in kernel/irq.c. These are initialized here in order
+	 * to have emergency stacks available as early as possible.
 	 */
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		struct thread_info *ti;
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for NMI exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
 		/* emergency stack for machine check exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}
-- 
cgit v1.2.3


From 26fcd952d5c977a94ac64bb44ed409e37607b2c9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 23 Jun 2017 10:50:38 +0200
Subject: x86/mshyperv: Remove excess #includes from mshyperv.h

A recent commit included linux/slab.h in linux/irq.h. This breaks the build
of vdso32 on a 64-bit kernel.

The reason is that linux/irq.h gets included into the vdso code via
linux/interrupt.h which is included from asm/mshyperv.h. That makes the
32-bit vdso compile fail, because slab.h includes the pgtable headers for
64-bit on a 64-bit build.

Neither linux/clocksource.h nor linux/interrupt.h are needed in the
mshyperv.h header file itself - it has a dependency on <linux/atomic.h>.

Remove the includes and unbreak the build.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: devel@linuxdriverproject.org
Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource")
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1706231038460.2647@nanos
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mshyperv.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba100713924..d5acc27ed1cc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
 #define _ASM_X86_MSHYPER_H
 
 #include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
 #include <asm/hyperv.h>
 
 /*
-- 
cgit v1.2.3


From 6474924e2b5ddb0030c355558966adcbe3b49022 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 28 Jun 2017 15:30:02 +0200
Subject: arch: remove unused macro/function thread_saved_pc()

The only user of thread_saved_pc() in non-arch-specific code was removed
in commit 8243d5597793 ("sched/core: Remove pointless printout in
sched_show_task()").  Remove the implementations as well.

Some architectures use thread_saved_pc() in their arch-specific code.
Leave their thread_saved_pc() intact.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/processor.h        |  2 --
 arch/blackfin/include/asm/processor.h   |  5 -----
 arch/c6x/include/asm/processor.h        |  5 -----
 arch/cris/arch-v10/kernel/process.c     |  8 --------
 arch/cris/arch-v32/kernel/process.c     |  8 --------
 arch/cris/include/asm/processor.h       |  2 --
 arch/frv/include/asm/processor.h        |  5 -----
 arch/frv/kernel/process.c               |  9 ---------
 arch/h8300/include/asm/processor.h      |  4 ----
 arch/h8300/kernel/process.c             |  5 -----
 arch/hexagon/include/asm/processor.h    |  3 ---
 arch/hexagon/kernel/process.c           |  8 --------
 arch/ia64/include/asm/processor.h       | 17 -----------------
 arch/m32r/include/asm/processor.h       |  2 --
 arch/m32r/kernel/process.c              |  8 --------
 arch/m68k/include/asm/processor.h       |  2 --
 arch/m68k/kernel/process.c              | 14 --------------
 arch/microblaze/include/asm/processor.h |  6 ------
 arch/microblaze/kernel/process.c        | 17 -----------------
 arch/mn10300/include/asm/processor.h    |  5 -----
 arch/mn10300/kernel/process.c           |  8 --------
 arch/nios2/include/asm/processor.h      |  3 ---
 arch/openrisc/include/asm/processor.h   |  5 -----
 arch/openrisc/kernel/process.c          |  5 -----
 arch/parisc/include/asm/processor.h     |  5 -----
 arch/parisc/kernel/process.c            |  5 -----
 arch/powerpc/include/asm/processor.h    |  6 ------
 arch/s390/include/asm/processor.h       |  5 -----
 arch/s390/kernel/process.c              | 25 -------------------------
 arch/score/include/asm/processor.h      |  1 -
 arch/score/kernel/process.c             |  5 -----
 arch/sparc/include/asm/processor_32.h   |  3 ---
 arch/sparc/include/asm/processor_64.h   |  2 --
 arch/sparc/kernel/process_32.c          |  8 --------
 arch/sparc/kernel/process_64.c          | 19 -------------------
 arch/tile/include/asm/processor.h       |  7 -------
 arch/um/include/asm/processor-generic.h |  2 --
 arch/um/kernel/um_arch.c                |  6 ------
 arch/x86/include/asm/processor.h        |  2 --
 arch/x86/kernel/process.c               | 11 -----------
 arch/xtensa/include/asm/processor.h     |  2 --
 41 files changed, 270 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 6e1242da0159..4104a0839214 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -86,8 +86,6 @@ struct task_struct;
 #define TSK_K_BLINK(tsk)	TSK_K_REG(tsk, 4)
 #define TSK_K_FP(tsk)		TSK_K_REG(tsk, 0)
 
-#define thread_saved_pc(tsk)	TSK_K_BLINK(tsk)
-
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 85d4af97c986..dbdbb8a558df 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk)	(tsk->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)							\
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da7f278..7c87b5be53b5 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -95,11 +95,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define copy_segments(tsk, mm)		do { } while (0)
 #define release_segments(mm)		do { } while (0)
 
-/*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
 /*
  * saved kernel SP and DP of a blocked thread.
  */
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index e299d30105b5..a2cdb1521aca 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -69,14 +69,6 @@ void hard_reset_now (void)
 	while(1) /* waiting for RETRIBUTION! */ ;
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->irp;
-}
-
 /* setup the child's kernel stack with a pt_regs and switch_stack on it.
  * it will be un-nested during _resume and _ret_from_sys_call when the
  * new thread is scheduled.
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c530a8fa87ce..fe87b383fbf3 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -84,14 +84,6 @@ hard_reset_now(void)
 		; /* Wait for reset. */
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->erp;
-}
-
 /*
  * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
  * It will be unnested during _resume and _ret_from_sys_call when the new thread
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815df29c1..bc2729e4b2c9 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /* Free all resources held by a thread. */
 static inline void release_thread(struct task_struct *dead_task)
 {
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index ddaeb9cc9143..e4d08d74ed9f 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
 #define release_segments(mm)		do { } while (0)
 #define forget_segments()		do { } while (0)
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5a4c92abc99e..a957b374e3a6 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(tsk->thread.pc))
-		return ((unsigned long *)tsk->thread.fp)[2];
-	else
-		return tsk->thread.pc;
-}
-
 int elf_check_arch(const struct elf32_hdr *hdr)
 {
 	unsigned long hsr0 = __get_HSR(0);
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 65132d7ae9e5..afa53147e66a 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0f5db5bb561b..d1ddcabbbe83 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a825402f63..ce67940860a5 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -33,9 +33,6 @@
 /*  task_struct, defined elsewhere, is the "process descriptor" */
 struct task_struct;
 
-/*  this is defined in arch/process.c  */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /*
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index de715bab7956..656050c2e6a0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -60,14 +60,6 @@ void arch_cpu_idle(void)
 	local_irq_enable();
 }
 
-/*
- *  Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return 0;
-}
-
 /*
  * Copy architecture-specific thread state
  */
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 26a63d69c599..ab982f07ea68 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -601,23 +601,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
 	*unat = (*unat & ~mask) | (nat << bit);
 }
 
-/*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
-	struct unw_frame_info info;
-	unsigned long ip;
-
-	unw_init_from_blocked_task(&info, t);
-	if (unw_unwind(&info) < 0)
-		return 0;
-	unw_get_ip(&info, &ip);
-	return ip;
-}
-
 /*
  * Get the current instruction/program counter value.
  */
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367550c6..657874eeeccc 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
 extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
 extern void release_segments(struct mm_struct * mm);
 
-extern unsigned long thread_saved_pc(struct task_struct *);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)  do { } while (0)
 #define release_segments(mm)  do { } while (0)
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index d8ffcfec599c..8cd7e03f4370 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -39,14 +39,6 @@
 
 #include <linux/err.h>
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return tsk->thread.lr;
-}
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 77239e81379b..94c36030440c 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e475c945c8b2..7df92f8b0781 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,20 +40,6 @@
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(sw->retpc))
-		return ((unsigned long *)sw->a6)[1];
-	else
-		return sw->retpc;
-}
-
 void arch_cpu_idle(void)
 {
 #if defined(MACH_ATARI_ONLY)
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 37ef196e4519..330d556860ba 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 # define KSTK_EIP(tsk)	(0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved (kernel) PC of a blocked thread.  */
-#  define thread_saved_pc(tsk)	\
-	((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index e92a817e645f..6527ec22f158 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct cpu_context *ctx =
-		&(((struct thread_info *)(tsk->stack))->cpu_context);
-
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(ctx->r15))
-		return (unsigned long)ctx->r15;
-	else
-		return ctx->r14;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 18e17abf7664..3ae479117b42 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(task) ((task)->thread.uregs)
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index c9fa42619c6a..89e8027e07fb 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -39,14 +39,6 @@
 #include <asm/gdb-stub.h>
 #include "internal.h"
 
-/*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((unsigned long *) tsk->thread.sp)[3];
-}
-
 /*
  * power off function, if any
  */
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 3bbbc3d798e5..4944e2e1d8b0 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk)	((tsk)->thread.kregs->ea)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index a908e6c30a00..396d8f306c21 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 106859ae27ff..f9b77003f113 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
 	show_registers(regs);
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return (unsigned long)user_regs(t->stack)->pc;
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 }
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a3661ee6b060..4c6694b4e77e 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -163,12 +163,7 @@ struct thread_struct {
 	.flags		= 0 \
 	}
 
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
 void show_trace(struct task_struct *task, unsigned long *stack);
 
 /*
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4516a5b53f38..b64d7d21646e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return t->thread.regs.kpc;
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bb99b651085a..1189d04f3bd1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -378,12 +378,6 @@ struct thread_struct {
 }
 #endif
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk)    \
-        ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.regs)
 
 unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..aeac013968f2 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
 /* Free guarded storage control block for current */
 void exit_thread_gs(void);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
         (task_stack_page(tsk) + THREAD_SIZE) - 1)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 999d7154bbdc..bb32b8618bf6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -41,31 +41,6 @@
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct stack_frame *sf, *low, *high;
-
-	if (!tsk || !task_stack_page(tsk))
-		return 0;
-	low = task_stack_page(tsk);
-	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) tsk->thread.ksp;
-	if (sf <= low || sf > high)
-		return 0;
-	sf = (struct stack_frame *) sf->back_chain;
-	if (sf <= low || sf > high)
-		return 0;
-	return sf->gprs[8];
-}
-
 extern void kernel_thread_starter(void);
 
 /*
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d8711b..299274581968 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@ struct task_struct;
  */
 extern void (*cpu_wait)(void);
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern void start_thread(struct pt_regs *regs,
 			unsigned long pc, unsigned long sp);
 extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index eb64d7a677cb..6e20241a1ed4 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
 	return 1;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_pt_regs(tsk)->cp0_epc;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	if (!task || task == current || task->state == TASK_RUNNING)
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index dd27159819eb..b395e5620c0b 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -67,9 +67,6 @@ struct thread_struct {
 	.current_ds = KERNEL_DS, \
 }
 
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
 /* Do necessary setup to start up a newly executed thread. */
 static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 				    unsigned long sp)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index b58ee9018433..f04dc5a43062 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -89,9 +89,7 @@ struct thread_struct {
 #include <linux/types.h>
 #include <asm/fpumacro.h>
 
-/* Return saved PC of a blocked thread. */
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
 
 /* On Uniprocessor, even in RMO processes see TSO semantics */
 #ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b6dac8e980f0..9245f93398c7 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -176,14 +176,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 	printk("\n");
 }
 
-/*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_thread_info(tsk)->kpc;
-}
-
 /*
  * Free current thread data structures etc..
  */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc493e62e..b96104da5bd6 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
 
 #endif
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct thread_info *ti = task_thread_info(tsk);
-	unsigned long ret = 0xdeadbeefUL;
-	
-	if (ti && ti->ksp) {
-		unsigned long *sp;
-		sp = (unsigned long *)(ti->ksp + STACK_BIAS);
-		if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
-		    sp[14]) {
-			unsigned long *fp;
-			fp = (unsigned long *)(sp[14] + STACK_BIAS);
-			if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
-				ret = fp[15];
-		}
-	}
-	return ret;
-}
-
 /* Free current thread data structures etc.. */
 void exit_thread(struct task_struct *tsk)
 {
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 0bc9968b97a1..f71e5206650b 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
 
 extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
 
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t)   ((t)->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* Return initial ksp value for given task. */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 2d1e0dd5bb0b..f6d1a3f747a9 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 static inline void mm_copy_segments(struct mm_struct *from_mm,
 				    struct mm_struct *new_mm)
 {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 64a1fd06f3fd..7b5640117325 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
 	__attribute__((__section__(".data..init_irqstack"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return os_process_pc(userspace_pid[0]);
-}
-
 /* Changed in setup_arch, which is called in early boot */
 static char host_info[(__NEW_UTS_LEN + 1) * 5];
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..a28b671f1549 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 					       unsigned long new_sp);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb88428cbf2..3ca198080ea9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -544,17 +544,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	return randomize_page(mm->brk, 0x02000000);
 }
 
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct inactive_task_frame *frame =
-		(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
-	return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
 /*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 003eeee3fbc6..30ee8c608853 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -213,8 +213,6 @@ struct mm_struct;
 #define release_segments(mm)	do { } while(0)
 #define forget_segments()	do { } while (0)
 
-#define thread_saved_pc(tsk)	(task_pt_regs(tsk)->pc)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
-- 
cgit v1.2.3