diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 12:39:30 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 12:39:30 -0700 |
commit | 7ae77150d94d3b535c7b85e6b3647113095e79bf (patch) | |
tree | 90fe894e7efd92898e813d88acfd4611d79be969 /arch/powerpc/kvm | |
parent | 084623e468d535d98f883cc2ccf2c4fdf2108556 (diff) | |
parent | 1395375c592770fe5158a592944aaeed67fa94ff (diff) |
Merge tag 'powerpc-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Support for userspace to send requests directly to the on-chip GZIP
accelerator on Power9.
- Rework of our lockless page table walking (__find_linux_pte()) to
make it safe against parallel page table manipulations without
relying on an IPI for serialisation.
- A series of fixes & enhancements to make our machine check handling
more robust.
- Lots of plumbing to add support for "prefixed" (64-bit) instructions
on Power10.
- Support for using huge pages for the linear mapping on 8xx (32-bit).
- Remove obsolete Xilinx PPC405/PPC440 support, and an associated sound
driver.
- Removal of some obsolete 40x platforms and associated cruft.
- Initial support for booting on Power10.
- Lots of other small features, cleanups & fixes.
Thanks to: Alexey Kardashevskiy, Alistair Popple, Andrew Donnellan,
Andrey Abramov, Aneesh Kumar K.V, Balamuruhan S, Bharata B Rao, Bulent
Abali, Cédric Le Goater, Chen Zhou, Christian Zigotzky, Christophe
JAILLET, Christophe Leroy, Dmitry Torokhov, Emmanuel Nicolet, Erhard F.,
Gautham R. Shenoy, Geoff Levand, George Spelvin, Greg Kurz, Gustavo A.
R. Silva, Gustavo Walbon, Haren Myneni, Hari Bathini, Joel Stanley,
Jordan Niethe, Kajol Jain, Kees Cook, Leonardo Bras, Madhavan
Srinivasan., Mahesh Salgaonkar, Markus Elfring, Michael Neuling, Michal
Simek, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin,
Oliver O'Halloran, Paul Mackerras, Pingfan Liu, Qian Cai, Ram Pai,
Raphael Moreira Zinsly, Ravi Bangoria, Sam Bobroff, Sandipan Das, Segher
Boessenkool, Stephen Rothwell, Sukadev Bhattiprolu, Tyrel Datwyler,
Wolfram Sang, Xiongfeng Wang.
* tag 'powerpc-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (299 commits)
powerpc/pseries: Make vio and ibmebus initcalls pseries specific
cxl: Remove dead Kconfig options
powerpc: Add POWER10 architected mode
powerpc/dt_cpu_ftrs: Add MMA feature
powerpc/dt_cpu_ftrs: Enable Prefixed Instructions
powerpc/dt_cpu_ftrs: Advertise support for ISA v3.1 if selected
powerpc: Add support for ISA v3.1
powerpc: Add new HWCAP bits
powerpc/64s: Don't set FSCR bits in INIT_THREAD
powerpc/64s: Save FSCR to init_task.thread.fscr after feature init
powerpc/64s: Don't let DT CPU features set FSCR_DSCR
powerpc/64s: Don't init FSCR_DSCR in __init_FSCR()
powerpc/32s: Fix another build failure with CONFIG_PPC_KUAP_DEBUG
powerpc/module_64: Use special stub for _mcount() with -mprofile-kernel
powerpc/module_64: Simplify check for -mprofile-kernel ftrace relocations
powerpc/module_64: Consolidate ftrace code
powerpc/32: Disable KASAN with pages bigger than 16k
powerpc/uaccess: Don't set KUEP by default on book3s/32
powerpc/uaccess: Don't set KUAP by default on book3s/32
powerpc/8xx: Reduce time spent in allow_user_access() and friends
...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 13 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 71 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio_hv.c | 66 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 15 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 39 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 60 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_native.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_template.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/emulate_loadstore.c | 2 |
10 files changed, 161 insertions, 137 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b35f9bcf892..18aed9775a3c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -281,11 +281,10 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, { long ret; - /* Protect linux PTE lookup from page table destruction */ - rcu_read_lock_sched(); /* this disables preemption too */ + preempt_disable(); ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, kvm->mm->pgd, false, pte_idx_ret); - rcu_read_unlock_sched(); + preempt_enable(); if (ret == H_TOO_HARD) { /* this can't happen */ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); @@ -602,12 +601,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d605ed0bb2e7..02219e28b1e4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -739,7 +739,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, return ret; } -bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, +bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid) { unsigned long pgflags; @@ -754,12 +754,12 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, pgflags = _PAGE_ACCESSED; if (writing) pgflags |= _PAGE_DIRTY; - /* - * We are walking the secondary (partition-scoped) page table here. - * We can do this without disabling irq because the Linux MM - * subsystem doesn't do THP splits and collapses on this tree. - */ - ptep = __find_linux_pte(pgtable, gpa, NULL, &shift); + + if (nested) + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); + else + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); + if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); return true; @@ -817,12 +817,12 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. @@ -953,8 +953,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Failed to set the reference/change bits */ if (dsisr & DSISR_SET_RC) { spin_lock(&kvm->mmu_lock); - if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, - writing, gpa, kvm->arch.lpid)) + if (kvmppc_hv_handle_set_rc(kvm, false, writing, + gpa, kvm->arch.lpid)) dsisr &= ~DSISR_SET_RC; spin_unlock(&kvm->mmu_lock); @@ -985,11 +985,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return 0; + return 0; } /* Called with kvm->mmu_lock held */ @@ -1005,7 +1005,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -1032,7 +1032,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -1044,7 +1044,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, { unsigned long gfn = memslot->base_gfn + pagenum; unsigned long gpa = gfn << PAGE_SHIFT; - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; int ret = 0; unsigned long old, *rmapp; @@ -1052,12 +1052,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ret; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); - if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { - ret = 1; - if (shift) - ret = 1 << (shift - PAGE_SHIFT); + /* + * For performance reasons we don't hold kvm->mmu_lock while walking the + * partition scoped table. + */ + ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift); + if (!ptep) + return 0; + + pte = READ_ONCE(*ptep); + if (pte_present(pte) && pte_dirty(pte)) { spin_lock(&kvm->mmu_lock); + /* + * Recheck the pte again + */ + if (pte_val(pte) != pte_val(*ptep)) { + /* + * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can + * only find PAGE_SIZE pte entries here. We can continue + * to use the pte addr returned by above page table + * walk. + */ + if (!pte_present(*ptep) || !pte_dirty(*ptep)) { + spin_unlock(&kvm->mmu_lock); + return 0; + } + } + + ret = 1; + VM_BUG_ON(shift); old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); @@ -1113,7 +1136,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, gpa = memslot->base_gfn << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); for (n = memslot->npages; n; --n) { - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 6fcaf1fa8e02..ac6ac192b8bb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -74,8 +74,8 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, EXPORT_SYMBOL_GPL(kvmppc_find_table); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, - unsigned long *ua, unsigned long **prmap) +static long kvmppc_rm_tce_to_ua(struct kvm *kvm, + unsigned long tce, unsigned long *ua) { unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; @@ -87,9 +87,6 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, *ua = __gfn_to_hva_memslot(memslot, gfn) | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); - if (prmap) - *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; - return 0; } @@ -116,7 +113,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; - if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -208,7 +205,7 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, idx = (ioba >> stt->page_shift) - stt->offset; sttpage = idx / TCES_PER_PAGE; - sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / + sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / TCES_PER_PAGE; for (i = sttpage; i < sttpage + sttpages; ++i) if (!stt->pages[i]) @@ -411,7 +408,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) return H_PARAMETER; entry = ioba >> stt->page_shift; @@ -437,8 +434,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return H_SUCCESS; } -static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, - unsigned long ua, unsigned long *phpa) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long ua, unsigned long *phpa) { pte_t *ptep, pte; unsigned shift = 0; @@ -452,10 +449,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, * to exit which will agains result in the below page table walk * to finish. */ - ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); - if (!ptep || !pte_present(*ptep)) + /* an rmap lock won't make it safe. because that just ensure hash + * page table entries are removed with rmap lock held. After that + * mmu notifier returns and we go ahead and removing ptes from Qemu page table. + */ + ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift); + if (!ptep) + return -ENXIO; + + pte = READ_ONCE(*ptep); + if (!pte_present(pte)) return -ENXIO; - pte = *ptep; if (!shift) shift = PAGE_SHIFT; @@ -477,10 +481,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce_list, unsigned long npages) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; - unsigned long *rmap = NULL; + unsigned long mmu_seq; bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; @@ -488,6 +493,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (kvm_is_radix(vcpu->kvm)) return H_TOO_HARD; + /* + * used to check for invalidations in progress + */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -515,7 +526,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ struct mm_iommu_table_group_mem_t *mem; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); @@ -531,23 +542,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, * We do not require memory to be preregistered in this case * so lock rmap and do __find_linux_pte_or_hugepte(). */ - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) - return H_TOO_HARD; - - rmap = (void *) vmalloc_to_phys(rmap); - if (WARN_ON_ONCE_RM(!rmap)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; - /* - * Synchronize with the MMU notifier callbacks in - * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.). - * While we have the rmap lock, code running on other CPUs - * cannot finish unmapping the host real page that backs - * this guest real page, so we are OK to access the host - * real page. - */ - lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -565,7 +564,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; goto invalidate_exit; } @@ -590,9 +589,8 @@ invalidate_exit: iommu_tce_kill_rm(stit->tbl, entry, npages); unlock_exit: - if (rmap) - unlock_rmap(rmap); - + if (!prereg) + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7f59c47a5b9d..a07e12ed9f5a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -342,9 +342,6 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } -/* Dummy value used in computing PCR value below */ -#define PCR_ARCH_300 (PCR_ARCH_207 << 1) - static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; @@ -3390,8 +3387,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr = mfspr(SPRN_DAWR); - unsigned long host_dawrx = mfspr(SPRN_DAWRX); + unsigned long host_dawr = mfspr(SPRN_DAWR0); + unsigned long host_dawrx = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); @@ -3420,8 +3417,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPURR, vcpu->arch.spurr); if (dawr_enabled()) { - mtspr(SPRN_DAWR, vcpu->arch.dawr); - mtspr(SPRN_DAWRX, vcpu->arch.dawrx); + mtspr(SPRN_DAWR0, vcpu->arch.dawr); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx); } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3473,8 +3470,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR, host_dawr); - mtspr(SPRN_DAWRX, host_dawrx); + mtspr(SPRN_DAWR0, host_dawr); + mtspr(SPRN_DAWRX0, host_dawrx); mtspr(SPRN_PID, host_pidr); /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index dc97e5be76f6..66c38ee37fd5 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -750,6 +750,23 @@ static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) return kvm->arch.nested_guests[lpid]; } +pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, + unsigned long ea, unsigned *hshift) +{ + struct kvm_nested_guest *gp; + pte_t *pte; + + gp = kvmhv_find_nested(kvm, lpid); + if (!gp) + return NULL; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift); + + return pte; +} + static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) { return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | @@ -792,19 +809,15 @@ static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, unsigned long clr, unsigned long set, unsigned long hpa, unsigned long mask) { - struct kvm_nested_guest *gp; unsigned long gpa; unsigned int shift, lpid; pte_t *ptep; gpa = n_rmap & RMAP_NESTED_GPA_MASK; lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; - gp = kvmhv_find_nested(kvm, lpid); - if (!gp) - return; /* Find the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* * If the pte is present and the pfn is still the same, update the pte. * If the pfn has changed then this is a stale rmap entry, the nested @@ -854,7 +867,7 @@ static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, return; /* Find and invalidate the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* Don't spuriously invalidate ptes if the pfn has changed */ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); @@ -921,7 +934,7 @@ static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, int shift; spin_lock(&kvm->mmu_lock); - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (ptep && pte_present(*ptep)) { @@ -1169,7 +1182,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { /* Can we execute? */ if (!gpte_p->may_execute) { - flags |= SRR1_ISI_N_OR_G; + flags |= SRR1_ISI_N_G_OR_CIP; goto forward_to_l1; } } else { @@ -1212,16 +1225,16 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ - ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing, - gpte.raddr, kvm->arch.lpid); + ret = kvmppc_hv_handle_set_rc(kvm, false, writing, + gpte.raddr, kvm->arch.lpid); if (!ret) { ret = -EINVAL; goto out_unlock; } /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ - ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa, - gp->shadow_lpid); + ret = kvmppc_hv_handle_set_rc(kvm, true, writing, + n_gpa, gp->shadow_lpid); if (!ret) ret = -EINVAL; else @@ -1362,7 +1375,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, /* See if can find translation in our partition scoped tables for L1 */ pte = __pte(0); spin_lock(&kvm->mmu_lock); - pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + pte_p = find_kvm_secondary_pte(kvm, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (pte_p) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23..88da2764c1bb 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -210,7 +210,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pte_t *ptep; unsigned int writing; unsigned long mmu_seq; - unsigned long rcbits, irq_flags = 0; + unsigned long rcbits; if (kvm_is_radix(kvm)) return H_FUNCTION; @@ -248,17 +248,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); - /* - * If we had a page table table change after lookup, we would - * retry via mmu_notifier_retry. - */ - if (!realmode) - local_irq_save(irq_flags); - /* - * If called in real mode we have MSR_EE = 0. Otherwise - * we disable irq above. - */ - ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); + + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; @@ -272,8 +264,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * to <= host page size, if host is using hugepage */ if (host_pte_size < psize) { - if (!realmode) - local_irq_restore(flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return H_PARAMETER; } pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -287,8 +278,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pa |= gpa & ~PAGE_MASK; } } - if (!realmode) - local_irq_restore(irq_flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; @@ -888,8 +878,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, return ret; } -static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, - int writing, unsigned long *hpa, +static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long gpa, int writing, unsigned long *hpa, struct kvm_memory_slot **memslot_p) { struct kvm *kvm = vcpu->kvm; @@ -908,7 +898,7 @@ static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, hva = __gfn_to_hva_memslot(memslot, gfn); /* Try to find the host pte for that virtual address */ - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); if (!ptep) return H_TOO_HARD; pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -943,16 +933,11 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); - if (ret != H_SUCCESS) - return ret; + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); + if (ret != H_SUCCESS) goto out_unlock; - } /* Zero the page */ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) @@ -960,7 +945,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -976,19 +961,14 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); - if (ret != H_SUCCESS) - return ret; - ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); if (ret != H_SUCCESS) - return ret; + goto out_unlock; - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); + if (ret != H_SUCCESS) goto out_unlock; - } /* Copy the page */ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); @@ -996,7 +976,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -1260,7 +1240,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ if (!data) { if (gr & (HPTE_R_N | HPTE_R_G)) - return status | SRR1_ISI_N_OR_G; + return status | SRR1_ISI_N_G_OR_CIP; if (!hpte_read_permission(pp, slb_v & key)) return status | SRR1_ISI_PROT; } else if (status & DSISR_ISSTORE) { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 780a499c7114..71943892c81c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -707,8 +707,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR - mfspr r6, SPRN_DAWR - mfspr r7, SPRN_DAWRX + mfspr r6, SPRN_DAWR0 + mfspr r7, SPRN_DAWRX0 mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) std r6, STACK_SLOT_DAWR(r1) @@ -803,8 +803,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) beq 1f ld r5, VCPU_DAWR(r4) ld r6, VCPU_DAWRX(r4) - mtspr SPRN_DAWR, r5 - mtspr SPRN_DAWRX, r6 + mtspr SPRN_DAWR0, r5 + mtspr SPRN_DAWRX0, r6 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -1766,8 +1766,8 @@ BEGIN_FTR_SECTION * If the DAWR doesn't work, it's ok to write these here as * this value should always be zero */ - mtspr SPRN_DAWR, r6 - mtspr SPRN_DAWRX, r7 + mtspr SPRN_DAWR0, r6 + mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) @@ -2577,8 +2577,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfmsr r6 andi. r6, r6, MSR_DR /* in real mode? */ bne 4f - mtspr SPRN_DAWR, r4 - mtspr SPRN_DAWRX, r5 + mtspr SPRN_DAWR0, r4 + mtspr SPRN_DAWRX0, r5 4: li r3, 0 blr @@ -2907,6 +2907,11 @@ kvm_cede_exit: beq 4f li r0, 0 stb r0, VCPU_CEDED(r9) + /* + * The escalation interrupts are special as we don't EOI them. + * There is no need to use the load-after-store ordering offset + * to set PQ to 10 as we won't use StoreEOI. + */ li r6, XIVE_ESB_SET_PQ_10 b 5f 4: li r0, 1 @@ -3329,7 +3334,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_AMR, r0 mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_DAWRX0, r0 BEGIN_MMU_FTR_SECTION b 4f diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 6ef0151ff70a..bdea91df1497 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -31,6 +31,12 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) { u64 val; + /* + * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 + * load operation, so there is no need to enforce load-after-store + * ordering. + */ + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index a8a900ace1e6..4ad3c0279458 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -58,6 +58,9 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) { u64 val; + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + offset |= XIVE_ESB_LD_ST_MO; + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 1139bc56e004..135d0e686622 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -95,7 +95,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = EMULATE_FAIL; vcpu->arch.regs.msr = vcpu->arch.shared->msr; - if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { + if (analyse_instr(&op, &vcpu->arch.regs, ppc_inst(inst)) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); |