diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2018-12-20 14:54:09 +0100 |
---|---|---|
committer | Radim Krčmář <rkrcmar@redhat.com> | 2018-12-20 14:54:09 +0100 |
commit | cfdfaf4a86406f8eff7901de09f4910341eb9e31 (patch) | |
tree | 6634ec832615d2cbc8c1b730199bb7178a73051d /arch/powerpc | |
parent | e9f2e05a5f19ce2a21d79c765fd6bb7c03d4aaeb (diff) | |
parent | 95d386c2d2e7660a6447df1507a9845665dab7d8 (diff) |
Merge tag 'kvm-ppc-next-4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
PPC KVM update for 4.21 from Paul Mackerras
The main new feature this time is support in HV nested KVM for passing
a device that is emulated by a level 0 hypervisor and presented to
level 1 as a PCI device through to a level 2 guest using VFIO.
Apart from that there are improvements for migration of radix guests
under HV KVM and some other fixes and cleanups.
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/include/asm/hvcall.h | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s.h | 21 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 15 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_ppc.h | 10 | ||||
-rw-r--r-- | arch/powerpc/kernel/exceptions-64s.S | 9 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 134 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 95 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 116 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 33 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 1 |
18 files changed, 418 insertions, 70 deletions
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 33a4fc891947..463c63a9fcf1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -335,6 +335,7 @@ #define H_SET_PARTITION_TABLE 0xF800 #define H_ENTER_NESTED 0xF804 #define H_TLB_INVALIDATE 0xF808 +#define H_COPY_TOFROM_GUEST 0xF80C /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 09f8e9ba69bc..82a0a9f3c39c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -188,6 +188,13 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr); +extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n); +extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *to, unsigned long n); +extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *from, unsigned long n); extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, u64 *pte_ret_p); @@ -197,7 +204,8 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data, bool iswrite); extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, - unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int shift, + const struct kvm_memory_slot *memslot, unsigned int lpid); extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, unsigned long gpa, @@ -215,16 +223,14 @@ extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); -extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, - unsigned long gpa, unsigned int shift, - struct kvm_memory_slot *memslot, - unsigned int lpid); extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); +extern void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); /* XXX remove this export when load_last_inst() is generic */ @@ -242,7 +248,7 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot, +extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, unsigned long gfn, unsigned long psize); extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); @@ -298,6 +304,7 @@ long kvmhv_nested_init(void); void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); +long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); @@ -307,7 +314,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu, void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); -long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); +long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 6d298145d564..34968fd2de24 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -55,6 +55,7 @@ struct kvm_nested_guest { cpumask_t need_tlb_flush; cpumask_t cpu_in_guest; short prev_cpu[NR_CPUS]; + u8 radix; /* is this nested guest radix */ }; /* @@ -150,6 +151,18 @@ static inline bool kvm_is_radix(struct kvm *kvm) return kvm->arch.radix; } +static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) +{ + bool radix; + + if (vcpu->arch.nested) + radix = vcpu->arch.nested->radix; + else + radix = kvm_is_radix(vcpu->kvm); + + return radix; +} + #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif @@ -625,7 +638,7 @@ extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, struct rmap_nested **n_rmap); extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, unsigned long gpa, unsigned long hpa, unsigned long nbytes); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fac6f631ed29..7a2483a139cf 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -793,6 +793,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; u32 nested_vcpu_id; + gpa_t nested_io_gpr; #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING @@ -827,6 +828,8 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x00c0 #define KVM_MMIO_REG_VSX 0x0100 #define KVM_MMIO_REG_VMX 0x0180 +#define KVM_MMIO_REG_NESTED_GPR 0xffc0 + #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9b89b1918dfc..eb0d79f0ca45 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -224,7 +224,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, extern void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new); + const struct kvm_memory_slot *new, + enum kvm_mr_change change); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -294,7 +295,8 @@ struct kvmppc_ops { void (*commit_memory_region)(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new); + const struct kvm_memory_slot *new, + enum kvm_mr_change change); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); @@ -326,6 +328,10 @@ struct kvmppc_ops { unsigned long flags); void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); int (*enable_nested)(struct kvm *kvm); + int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size); + int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 89d32bb79d5e..db2691ff4c0b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -995,7 +995,16 @@ EXC_COMMON_BEGIN(h_data_storage_common) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD +BEGIN_MMU_FTR_SECTION + ld r4,PACA_EXGEN+EX_DAR(r13) + lwz r5,PACA_EXGEN+EX_DSISR(r13) + std r4,_DAR(r1) + std r5,_DSISR(r1) + li r5,SIGSEGV + bl bad_page_fault +MMU_FTR_SECTION_ELSE bl unknown_exception +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b ret_from_except diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index fd9893bc7aa1..a35fb4099094 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -830,9 +830,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { - kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c615617e78ac..6f2d2fb4e098 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm) srcu_idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); /* * This assumes it is acceptable to lose reference and * change bits across a reset. */ memset(memslot->arch.rmap, 0, memslot->npages * sizeof(*memslot->arch.rmap)); + spin_unlock(&kvm->mmu_lock); } srcu_read_unlock(&kvm->srcu, srcu_idx); } @@ -896,11 +899,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, gfn = memslot->base_gfn; rmapp = memslot->arch.rmap; + if (kvm_is_radix(kvm)) { + kvmppc_radix_flush_memslot(kvm, memslot); + return; + } + for (n = memslot->npages; n; --n, ++gfn) { - if (kvm_is_radix(kvm)) { - kvm_unmap_radix(kvm, memslot, gfn); - continue; - } /* * Testing the present bit without locking is OK because * the memslot has been marked invalid already, and hence diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d68162ee159b..870ef9d5eee6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -29,6 +29,103 @@ */ static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; +unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n) +{ + unsigned long quadrant, ret = n; + int old_pid, old_lpid; + bool is_load = !!to; + + /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ + if (kvmhv_on_pseries()) + return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, + __pa(to), __pa(from), n); + + quadrant = 1; + if (!pid) + quadrant = 2; + if (is_load) + from = (void *) (eaddr | (quadrant << 62)); + else + to = (void *) (eaddr | (quadrant << 62)); + + preempt_disable(); + + /* switch the lpid first to avoid running host with unallocated pid */ + old_lpid = mfspr(SPRN_LPID); + if (old_lpid != lpid) + mtspr(SPRN_LPID, lpid); + if (quadrant == 1) { + old_pid = mfspr(SPRN_PID); + if (old_pid != pid) + mtspr(SPRN_PID, pid); + } + isync(); + + pagefault_disable(); + if (is_load) + ret = raw_copy_from_user(to, from, n); + else + ret = raw_copy_to_user(to, from, n); + pagefault_enable(); + + /* switch the pid first to avoid running host with unallocated pid */ + if (quadrant == 1 && pid != old_pid) + mtspr(SPRN_PID, old_pid); + if (lpid != old_lpid) + mtspr(SPRN_LPID, old_lpid); + isync(); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix); + +static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *to, void *from, unsigned long n) +{ + int lpid = vcpu->kvm->arch.lpid; + int pid = vcpu->arch.pid; + + /* This would cause a data segment intr so don't allow the access */ + if (eaddr & (0x3FFUL << 52)) + return -EINVAL; + + /* Should we be using the nested lpid */ + if (vcpu->arch.nested) + lpid = vcpu->arch.nested->shadow_lpid; + + /* If accessing quadrant 3 then pid is expected to be 0 */ + if (((eaddr >> 62) & 0x3) == 0x3) + pid = 0; + + eaddr &= ~(0xFFFUL << 52); + + return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n); +} + +long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, + unsigned long n) +{ + long ret; + + ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n); + if (ret > 0) + memset(to + (n - ret), 0, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix); + +long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from, + unsigned long n) +{ + return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n); +} +EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix); + int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, u64 *pte_ret_p) @@ -284,7 +381,8 @@ static void kvmppc_pmd_free(pmd_t *pmdp) /* Called with kvm->mmu_lock held */ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, - unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int shift, + const struct kvm_memory_slot *memslot, unsigned int lpid) { @@ -683,6 +781,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, pte_t pte, *ptep; unsigned int shift, level; int ret; + bool large_enable; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_notifier_seq; @@ -732,12 +831,15 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, pte = *ptep; local_irq_enable(); + /* If we're logging dirty pages, always map single pages */ + large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES); + /* Get pte level from shift/size */ - if (shift == PUD_SHIFT && + if (large_enable && shift == PUD_SHIFT && (gpa & (PUD_SIZE - PAGE_SIZE)) == (hva & (PUD_SIZE - PAGE_SIZE))) { level = 2; - } else if (shift == PMD_SHIFT && + } else if (large_enable && shift == PMD_SHIFT && (gpa & (PMD_SIZE - PAGE_SIZE)) == (hva & (PMD_SIZE - PAGE_SIZE))) { level = 1; @@ -857,7 +959,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -872,7 +974,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -891,7 +993,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return ref; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -953,6 +1055,26 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, return 0; } +void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long n; + pte_t *ptep; + unsigned long gpa; + unsigned int shift; + + gpa = memslot->base_gfn << PAGE_SHIFT; + spin_lock(&kvm->mmu_lock); + for (n = memslot->npages; n; --n) { + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + if (ptep && pte_present(*ptep)) + kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, + kvm->arch.lpid); + gpa += PAGE_SIZE; + } + spin_unlock(&kvm->mmu_lock); +} + static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, int psize, int *indexp) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a56f8413758a..5a066fc299e1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -985,6 +985,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, 3, 0); vcpu->arch.hcall_needed = 0; return -EINTR; + } else if (ret == H_TOO_HARD) { + kvmppc_set_gpr(vcpu, 3, 0); + vcpu->arch.hcall_needed = 0; + return RESUME_HOST; } break; case H_TLB_INVALIDATE: @@ -992,7 +996,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (nesting_enabled(vcpu->kvm)) ret = kvmhv_do_nested_tlbie(vcpu); break; - + case H_COPY_TOFROM_GUEST: + ret = H_FUNCTION; + if (nesting_enabled(vcpu->kvm)) + ret = kvmhv_copy_tofrom_guest_nested(vcpu); + break; default: return RESUME_HOST; } @@ -1336,7 +1344,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) +static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; @@ -1394,7 +1402,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(vcpu); + r = kvmhv_nested_page_fault(run, vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: @@ -1404,7 +1412,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(vcpu); + r = kvmhv_nested_page_fault(run, vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; @@ -4059,7 +4067,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, if (!nested) r = kvmppc_handle_exit_hv(kvm_run, vcpu, current); else - r = kvmppc_handle_nested_exit(vcpu); + r = kvmppc_handle_nested_exit(kvm_run, vcpu); } vcpu->arch.ret = r; @@ -4371,7 +4379,8 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; @@ -4383,6 +4392,23 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, */ if (npages) atomic64_inc(&kvm->arch.mmio_update); + + /* + * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels + * have already called kvm_arch_flush_shadow_memslot() to + * flush shadow mappings. For KVM_MR_CREATE we have no + * previous mappings. So the only case to handle is + * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit + * has been changed. + * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES + * to get rid of any THP PTEs in the partition-scoped page tables + * so we can track dirtiness at the page level; we flush when + * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to + * using THP PTEs. + */ + if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) && + ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES)) + kvmppc_radix_flush_memslot(kvm, old); } /* @@ -4532,12 +4558,15 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); + kvmppc_rmap_reset(kvm); + kvm->arch.process_table = 0; + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); + kvm->arch.radix = 0; + spin_unlock(&kvm->mmu_lock); kvmppc_free_radix(kvm); kvmppc_update_lpcr(kvm, LPCR_VPM1, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); - kvmppc_rmap_reset(kvm); - kvm->arch.radix = 0; - kvm->arch.process_table = 0; return 0; } @@ -4549,12 +4578,14 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) err = kvmppc_init_vm_radix(kvm); if (err) return err; - + kvmppc_rmap_reset(kvm); + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); + kvm->arch.radix = 1; + spin_unlock(&kvm->mmu_lock); kvmppc_free_hpt(&kvm->arch.hpt); kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); - kvmppc_rmap_reset(kvm); - kvm->arch.radix = 1; return 0; } @@ -5214,6 +5245,44 @@ static int kvmhv_enable_nested(struct kvm *kvm) return 0; } +static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size) +{ + int rc = -EINVAL; + + if (kvmhv_vcpu_is_radix(vcpu)) { + rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size); + + if (rc > 0) + rc = -EINVAL; + } + + /* For now quadrants are the only way to access nested guest memory */ + if (rc && vcpu->arch.nested) + rc = -EAGAIN; + + return rc; +} + +static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size) +{ + int rc = -EINVAL; + + if (kvmhv_vcpu_is_radix(vcpu)) { + rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size); + + if (rc > 0) + rc = -EINVAL; + } + + /* For now quadrants are the only way to access nested guest memory */ + if (rc && vcpu->arch.nested) + rc = -EAGAIN; + + return rc; +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5254,6 +5323,8 @@ static struct kvmppc_ops kvm_ops_hv = { .get_rmmu_info = kvmhv_get_rmmu_info, .set_smt_mode = kvmhv_set_smt_mode, .enable_nested = kvmhv_enable_nested, + .load_from_eaddr = kvmhv_load_from_eaddr, + .store_to_eaddr = kvmhv_store_to_eaddr, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 401d2ecbebc5..9dce4b9c1d9c 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -195,6 +195,26 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, vcpu->arch.ppr = hr->ppr; } +static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) +{ + /* No need to reflect the page fault to L1, we've handled it */ + vcpu->arch.trap = 0; + + /* + * Since the L2 gprs have already been written back into L1 memory when + * we complete the mmio, store the L1 memory location of the L2 gpr + * being loaded into by the mmio so that the loaded value can be + * written there in kvmppc_complete_mmio_load() + */ + if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR) + && (vcpu->mmio_is_write == 0)) { + vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr + + offsetof(struct pt_regs, + gpr[vcpu->arch.io_gpr]); + vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR; + } +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; @@ -316,6 +336,11 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (r == -EINTR) return H_INTERRUPT; + if (vcpu->mmio_needed) { + kvmhv_nested_mmio_needed(vcpu, regs_ptr); + return H_TOO_HARD; + } + return vcpu->arch.trap; } @@ -437,6 +462,81 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) } /* + * Handle the H_COPY_TOFROM_GUEST hcall. + * r4 = L1 lpid of nested guest + * r5 = pid + * r6 = eaddr to access + * r7 = to buffer (L1 gpa) + * r8 = from buffer (L1 gpa) + * r9 = n bytes to copy + */ +long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) +{ + struct kvm_nested_guest *gp; + int l1_lpid = kvmppc_get_gpr(vcpu, 4); + int pid = kvmppc_get_gpr(vcpu, 5); + gva_t eaddr = kvmppc_get_gpr(vcpu, 6); + gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7); + gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8); + void *buf; + unsigned long n = kvmppc_get_gpr(vcpu, 9); + bool is_load = !!gp_to; + long rc; + + if (gp_to && gp_from) /* One must be NULL to determine the direction */ + return H_PARAMETER; + + if (eaddr & (0xFFFUL << 52)) + return H_PARAMETER; + + buf = kzalloc(n, GFP_KERNEL); + if (!buf) + return H_NO_MEM; + + gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false); + if (!gp) { + rc = H_PARAMETER; + goto out_free; + } + + mutex_lock(&gp->tlb_lock); + + if (is_load) { + /* Load from the nested guest into our buffer */ + rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, + eaddr, buf, NULL, n); + if (rc) + goto not_found; + + /* Write what was loaded into our buffer back to the L1 guest */ + rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + if (rc) + goto not_found; + } else { + /* Load the data to be stored from the L1 guest into our buf */ + rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + if (rc) + goto not_found; + + /* Store from our buffer into the nested guest */ + rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, + eaddr, NULL, buf, n); + if (rc) + goto not_found; + } + +out_unlock: + mutex_unlock(&gp->tlb_lock); + kvmhv_put_nested(gp); +out_free: + kfree(buf); + return rc; +not_found: + rc = H_NOT_FOUND; + goto out_unlock; +} + +/* * Reload the partition table entry for a guest. * Caller must hold gp->tlb_lock. */ @@ -480,6 +580,7 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) if (shadow_lpid < 0) goto out_free2; gp->shadow_lpid = shadow_lpid; + gp->radix = 1; memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu)); @@ -723,7 +824,7 @@ static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp, /* called with kvm->mmu_lock held */ void kvmhv_remove_nest_rmap_range(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, unsigned long gpa, unsigned long hpa, unsigned long nbytes) { @@ -1099,7 +1200,8 @@ static inline int kvmppc_radix_shift_to_level(int shift) } /* called with gp->tlb_lock held */ -static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, +static long int __kvmhv_nested_page_fault(struct kvm_run *run, + struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp) { struct kvm *kvm = vcpu->kvm; @@ -1180,9 +1282,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, kvmppc_core_queue_data_storage(vcpu, ea, dsisr); return RESUME_GUEST; } - /* passthrough of emulated MMIO case... */ - pr_err("emulated MMIO passthrough?\n"); - return -EINVAL; + + /* passthrough of emulated MMIO case */ + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { if (writing) { @@ -1264,13 +1366,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, return RESUME_GUEST; } -long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) +long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *gp = vcpu->arch.nested; long int ret; mutex_lock(&gp->tlb_lock); - ret = __kvmhv_nested_page_fault(vcpu, gp); + ret = __kvmhv_nested_page_fault(run, vcpu, gp); mutex_unlock(&gp->tlb_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a67cf1cdeda4..3b3791ed74a6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -107,7 +107,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); /* Update the dirty bitmap of a memslot */ -void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot, +void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, unsigned long gfn, unsigned long psize) { unsigned long npages; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 4efd65d9e828..811a3c2fb0e9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -587,6 +587,7 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: + case PVR_POWER9: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; break; @@ -1913,7 +1914,8 @@ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { return; } diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index b0b2bfc2ff51..f27ee57ab46e 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1015,17 +1015,7 @@ static int xics_debug_show(struct seq_file *m, void *private) return 0; } -static int xics_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xics_debug_show, inode->i_private); -} - -static const struct file_operations xics_debug_fops = { - .open = xics_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index ad4a370703d3..f78d002f0fe0 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1968,17 +1968,7 @@ static int xive_debug_show(struct seq_file *m, void *private) return 0; } -static int xive_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xive_debug_show, inode->i_private); -} - -static const struct file_operations xive_debug_fops = { - .open = xive_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a9ca016da670..dbec4128bb51 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1833,7 +1833,8 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b1ed31a17a8c..b90a7d154180 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -331,10 +331,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; - int r; + int r = -EINVAL; vcpu->stat.st++; + if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr) + r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr, + size); + + if ((!r) || (r == -EAGAIN)) + return r; + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, XLATE_WRITE, &pte); if (r < 0) @@ -367,10 +374,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; - int rc; + int rc = -EINVAL; vcpu->stat.ld++; + if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr) + rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr, + size); + + if ((!rc) || (rc == -EAGAIN)) + return rc; + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, XLATE_READ, &pte); if (rc) @@ -542,8 +556,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - /* fallthrough */ + r = 1; + break; case KVM_CAP_SPAPR_TCE_VFIO: + r = !!cpu_has_feature(CPU_FTR_HVMODE); + break; case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -695,7 +712,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - kvmppc_core_commit_memory_region(kvm, mem, old, new); + kvmppc_core_commit_memory_region(kvm, mem, old, new, change); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -1191,6 +1208,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_vmx_byte(vcpu, gpr); break; #endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_MMIO_REG_NESTED_GPR: + if (kvmppc_need_byteswap(vcpu)) + gpr = swab64(gpr); + kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr, + sizeof(gpr)); + break; +#endif default: BUG(); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1697e903bbf2..2e6fb1d758c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -636,6 +636,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (TRAP(regs)) { case 0x300: case 0x380: + case 0xe00: printk(KERN_ALERT "Unable to handle kernel paging request for " "data at address 0x%08lx\n", regs->dar); break; |