summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-25 10:33:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-25 10:33:48 -0700
commit3c0ce1497a449b0d150b455628947152c5f6216a (patch)
tree883f3140140d67cc68900956be9dca4280ae1591
parent12e9bd168c85b1e8a8ef2dd2cb34250f29656c71 (diff)
parentd9c57d3ed52a92536f5fa59dc5ccdd58b4875076 (diff)
Merge tag 'powerpc-5.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman: - Fix guest to host memory corruption in H_RTAS due to missing nargs check. - Fix guest triggerable host crashes due to bad handling of nested guest TM state. - Fix possible crashes due to incorrect reference counting in kvm_arch_vcpu_ioctl(). - Two commits fixing some regressions in KVM transactional memory handling introduced by the recent rework of the KVM code. Thanks to Nicholas Piggin, Alexey Kardashevskiy, and Michael Neuling. * tag 'powerpc-5.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak KVM: PPC: Book3S: Fix CONFIG_TRANSACTIONAL_MEM=n crash KVM: PPC: Book3S HV P9: Fix guest TM support
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_entry.c25
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c25
-rw-r--r--arch/powerpc/kvm/powerpc.c4
5 files changed, 68 insertions, 8 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1d1fcc290fca..085fb8ecbf68 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
if (cpu_has_feature(CPU_FTR_HVMODE)) {
vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
vcpu->arch.hfscr |= HFSCR_TM;
+#endif
}
if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 8543ad538b0c..898f942eb198 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.l1_ptcr == 0)
return H_NOT_AVAILABLE;
+ if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+ return H_BAD_MODE;
+
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
regs_ptr = kvmppc_get_gpr(vcpu, 5);
@@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
if (l2_hv.vcpu_token >= NR_CPUS)
return H_PARAMETER;
+ /*
+ * L1 must have set up a suspended state to enter the L2 in a
+ * transactional state, and only in that case. These have to be
+ * filtered out here to prevent causing a TM Bad Thing in the
+ * host HRFID. We could synthesize a TM Bad Thing back to the L1
+ * here but there doesn't seem like much point.
+ */
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+ if (!MSR_TM_ACTIVE(l2_regs.msr))
+ return H_BAD_MODE;
+ } else {
+ if (l2_regs.msr & MSR_TS_MASK)
+ return H_BAD_MODE;
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+ return H_BAD_MODE;
+ }
+
/* translate lpid */
l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
if (!l2)
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 83f592eadcd2..961b3d70483c 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tm_return_to_guest:
+#endif
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
@@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
* is in real suspend mode and is trying to transition to
* transactional mode.
*/
- if (local_paca->kvm_hstate.fake_suspend &&
+ if (!local_paca->kvm_hstate.fake_suspend &&
(vcpu->arch.shregs.msr & MSR_TS_S)) {
if (kvmhv_p9_tm_emulation_early(vcpu)) {
- /* Prevent it being handled again. */
- trap = 0;
+ /*
+ * Go straight back into the guest with the
+ * new NIP/MSR as set by TM emulation.
+ */
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
+
+ /*
+ * tm_return_to_guest re-loads SRR0/1, DAR,
+ * DSISR after RI is cleared, in case they had
+ * been clobbered by a MCE.
+ */
+ __mtmsrd(0, 1); /* clear RI */
+ goto tm_return_to_guest;
}
}
#endif
@@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
* If we are in real mode, only switch MMU on after the MMU is
* switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
*/
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ vcpu->arch.shregs.msr & MSR_TS_MASK)
+ msr |= MSR_TS_S;
+
__mtmsrd(msr, 0);
end_timing(vcpu);
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index c5e677508d3b..0f847f1e5ddd 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
+ if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+ /*
+ * Don't overflow our args array: ensure there is room for
+ * at least rets[0] (even if the call specifies 0 nret).
+ *
+ * Each handler must then check for the correct nargs and nret
+ * values, but they may always return failure in rets[0].
+ */
+ rc = -EINVAL;
+ goto fail;
+ }
args.rets = &args.args[be32_to_cpu(args.nargs)];
mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
@@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
fail:
/*
* We only get here if the guest has called RTAS with a bogus
- * args pointer. That means we can't get to the args, and so we
- * can't fail the RTAS call. So fail right out to userspace,
- * which should kill the guest.
+ * args pointer or nargs/nret values that would overflow the
+ * array. That means we can't get to the args, and so we can't
+ * fail the RTAS call. So fail right out to userspace, which
+ * should kill the guest.
+ *
+ * SLOF should actually pass the hcall return value from the
+ * rtas handler call in r3, so enter_rtas could be modified to
+ * return a failure indication in r3 and we could return such
+ * errors to the guest rather than failing to host userspace.
+ * However old guests that don't test for failure could then
+ * continue silently after errors, so for now we won't do this.
*/
return rc;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index be33b5321a76..b4e6f70b97b9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
{
struct kvm_enable_cap cap;
r = -EFAULT;
- vcpu_load(vcpu);
if (copy_from_user(&cap, argp, sizeof(cap)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
vcpu_put(vcpu);
break;
@@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_DIRTY_TLB: {
struct kvm_dirty_tlb dirty;
r = -EFAULT;
- vcpu_load(vcpu);
if (copy_from_user(&dirty, argp, sizeof(dirty)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
vcpu_put(vcpu);
break;