diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2019-11-01 00:35:55 +0100 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2019-11-01 00:35:55 +0100 |
commit | e7011c5d17a74e9aa58f014799e044f342582284 (patch) | |
tree | 7f1595fff5a9b488f9d0c3b644de1030eed28404 /arch/powerpc/kvm | |
parent | 19308a412ec52c0de92d296842be237778753d9b (diff) | |
parent | 55d7004299eb917767761f01a208d50afad4f535 (diff) |
Merge tag 'kvm-ppc-next-5.5-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD
KVM PPC update for 5.5
* Add capability to tell userspace whether we can single-step the guest.
* Improve the allocation of XIVE virtual processor IDs, to reduce the
risk of running out of IDs when running many VMs on POWER9.
* Rewrite interrupt synthesis code to deliver interrupts in virtual
mode when appropriate.
* Minor cleanups and improvements.
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 27 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_32_mmu.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu.c | 15 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 24 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 28 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 82 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 40 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 128 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.h | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_native.c | 38 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 2 |
14 files changed, 248 insertions, 158 deletions
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ec2547cc5ecb..58a59ee998e2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -74,27 +74,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { - ulong pc = kvmppc_get_pc(vcpu); - ulong lr = kvmppc_get_lr(vcpu); - if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) - kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); - if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) - kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); - vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; - } -} -EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); - -static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) -{ - if (!is_kvmppc_hv_enabled(vcpu->kvm)) - return to_book3s(vcpu)->hior; - return 0; -} - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, unsigned long pending_now, unsigned long old_pending) { @@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { - kvmppc_unfixup_split_real(vcpu); - kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags); - kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); - vcpu->arch.mmu.reset_msr(vcpu); + vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags); } static int kvmppc_book3s_vec2irqprio(unsigned int vec) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 2ef1311a2a13..3a4613985949 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {} #endif +extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); +extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); + #endif diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 18f244aad7aa..f21e73492ce3 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); } -static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) -{ - kvmppc_set_msr(vcpu, 0); -} - static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, u32 sre, gva_t eaddr, bool primary) @@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin; mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin; mmu->xlate = kvmppc_mmu_book3s_32_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr; mmu->tlbie = kvmppc_mmu_book3s_32_tlbie; mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid; mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp; diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 5f63a5f7f24f..599133256a95 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -24,20 +24,6 @@ #define dprintk(X...) do { } while(0) #endif -static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - unsigned long cur_msr = kvmppc_get_msr(vcpu); - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(cur_msr)) - msr |= MSR_TS_S; - else - msr |= cur_msr & MSR_TS_MASK; - - kvmppc_set_msr(vcpu, msr); -} - static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( struct kvm_vcpu *vcpu, gva_t eaddr) @@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) mmu->slbie = kvmppc_mmu_book3s_64_slbie; mmu->slbia = kvmppc_mmu_book3s_64_slbia; mmu->xlate = kvmppc_mmu_book3s_64_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr; mmu->tlbie = kvmppc_mmu_book3s_64_tlbie; mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid; mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 68678e31c84c..d381526c5c9b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void) return 0; } -static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) - msr |= MSR_TS_S; - else - msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; - kvmppc_set_msr(vcpu, msr); -} - static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) @@ -508,6 +496,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct vm_area_struct *vma; unsigned long rcbits; long mmio_update; + struct mm_struct *mm; if (kvm_is_radix(kvm)) return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); @@ -584,6 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = false; pfn = 0; page = NULL; + mm = current->mm; pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ @@ -592,8 +582,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); if (npages < 1) { /* Check if it's an I/O mapping */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, hva); + down_read(&mm->mmap_sem); + vma = find_vma(mm, hva); if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && (vma->vm_flags & VM_PFNMAP)) { pfn = vma->vm_pgoff + @@ -602,7 +592,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); write_ok = vma->vm_flags & VM_WRITE; } - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); if (!pfn) goto out_put; } else { @@ -621,8 +611,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_current_mm_pte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) @@ -2161,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 709cf1fd4cf4..ec5c0379296a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -133,7 +133,6 @@ static inline bool nesting_enabled(struct kvm *kvm) /* If set, the threads on each CPU core have to be in the same MMU mode */ static bool no_mixing_hpt_and_radix; -static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -338,18 +337,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } -static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) -{ - /* - * Check for illegal transactional state bit combination - * and if we find it, force the TS field to a safe state. - */ - if ((msr & MSR_TS_MASK) == MSR_TS_MASK) - msr &= ~MSR_TS_MASK; - vcpu->arch.shregs.msr = msr; - kvmppc_end_cede(vcpu); -} - static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) { vcpu->arch.pvr = pvr; @@ -792,6 +779,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawr = value1; vcpu->arch.dawrx = value2; return H_SUCCESS; + case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: + /* KVM does not support mflags=2 (AIL=2) */ + if (mflags != 0 && mflags != 3) + return H_UNSUPPORTED_FLAG_START; + return H_TOO_HARD; default: return H_TOO_HARD; } @@ -2454,15 +2446,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) vcpu->arch.timer_running = 1; } -static void kvmppc_end_cede(struct kvm_vcpu *vcpu) -{ - vcpu->arch.ceded = 0; - if (vcpu->arch.timer_running) { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - vcpu->arch.timer_running = 0; - } -} - extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, @@ -5401,6 +5384,7 @@ static struct kvmppc_ops kvm_ops_hv = { .set_one_reg = kvmppc_set_one_reg_hv, .vcpu_load = kvmppc_core_vcpu_load_hv, .vcpu_put = kvmppc_core_vcpu_put_hv, + .inject_interrupt = kvmppc_inject_interrupt_hv, .set_msr = kvmppc_set_msr_hv, .vcpu_run = kvmppc_vcpu_run_hv, .vcpu_create = kvmppc_core_vcpu_create_hv, diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7c1909657b55..7cd3cf3d366b 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -755,6 +755,71 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) local_paca->kvm_hstate.kvm_split_mode = NULL; } +static void kvmppc_end_cede(struct kvm_vcpu *vcpu) +{ + vcpu->arch.ceded = 0; + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } +} + +void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) +{ + /* + * Check for illegal transactional state bit combination + * and if we find it, force the TS field to a safe state. + */ + if ((msr & MSR_TS_MASK) == MSR_TS_MASK) + msr &= ~MSR_TS_MASK; + vcpu->arch.shregs.msr = msr; + kvmppc_end_cede(vcpu); +} +EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv); + +static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = vec; + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; + + /* + * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and + * applicable. AIL=2 is not supported. + * + * AIL does not apply to SRESET, MCE, or HMI (which is never + * delivered to the guest), and does not apply if IR=0 or DR=0. + */ + if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET && + vec != BOOK3S_INTERRUPT_MACHINE_CHECK && + (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 && + (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) { + new_msr |= MSR_IR | MSR_DR; + new_pc += 0xC000000000004000ULL; + } + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + vcpu->arch.shregs.msr = new_msr; +} + +void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + inject_interrupt(vcpu, vec, srr1_flags); + kvmppc_end_cede(vcpu); +} +EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv); + /* * Is there a PRIV_DOORBELL pending for the guest (on POWER9)? * Can we inject a Decrementer or a External interrupt? @@ -762,7 +827,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) { int ext; - unsigned long vec = 0; unsigned long lpcr; /* Insert EXTERNAL bit into LPCR at the MER bit position */ @@ -774,26 +838,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & MSR_EE) { if (ext) { - vec = BOOK3S_INTERRUPT_EXTERNAL; + inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0); } else { long int dec = mfspr(SPRN_DEC); if (!(lpcr & LPCR_LD)) dec = (int) dec; if (dec < 0) - vec = BOOK3S_INTERRUPT_DECREMENTER; + inject_interrupt(vcpu, + BOOK3S_INTERRUPT_DECREMENTER, 0); } } - if (vec) { - unsigned long msr, old_msr = vcpu->arch.shregs.msr; - - kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, old_msr); - kvmppc_set_pc(vcpu, vec); - msr = vcpu->arch.intr_msr; - if (MSR_TM_ACTIVE(old_msr)) - msr |= MSR_TS_S; - vcpu->arch.shregs.msr = msr; - } if (vcpu->arch.doorbell_request) { mtspr(SPRN_DPDES, 1); diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index cdf30c6eaf54..dc97e5be76f6 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, forward_to_l1: vcpu->arch.fault_dsisr = flags; if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { - vcpu->arch.shregs.msr &= ~0x783f0000ul; + vcpu->arch.shregs.msr &= SRR1_MSR_BITS; vcpu->arch.shregs.msr |= flags; } return RESUME_HOST; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc65af8fe6f7..ce4fcf76e53e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); } -void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); +static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + ulong lr = kvmppc_get_lr(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} + +static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + kvmppc_unfixup_split_real(vcpu); + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = to_book3s(vcpu)->hior + vec; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; +#endif + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + kvmppc_set_msr(vcpu, new_msr); +} static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { @@ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, #else /* default to book3s_32 (750) */ vcpu->arch.pvr = 0x84202; + vcpu->arch.intr_msr = 0; #endif kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); vcpu->arch.slb_nr = 64; @@ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = { .set_one_reg = kvmppc_set_one_reg_pr, .vcpu_load = kvmppc_core_vcpu_load_pr, .vcpu_put = kvmppc_core_vcpu_put_pr, + .inject_interrupt = kvmppc_inject_interrupt_pr, .set_msr = kvmppc_set_msr_pr, .vcpu_run = kvmppc_vcpu_run_pr, .vcpu_create = kvmppc_core_vcpu_create_pr, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a3f9c665bb5b..66858b7d3c6b 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1211,6 +1211,45 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.xive_vcpu = NULL; } +static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) +{ + /* We have a block of xive->nr_servers VPs. We just need to check + * raw vCPU ids are below the expected limit for this guest's + * core stride ; kvmppc_pack_vcpu_id() will pack them down to an + * index that can be safely used to compute a VP id that belongs + * to the VP block. + */ + return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; +} + +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) +{ + u32 vp_id; + + if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) { + pr_devel("Out of bounds !\n"); + return -EINVAL; + } + + if (xive->vp_base == XIVE_INVALID_VP) { + xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers); + pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers); + + if (xive->vp_base == XIVE_INVALID_VP) + return -ENOSPC; + } + + vp_id = kvmppc_xive_vp(xive, cpu); + if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { + pr_devel("Duplicate !\n"); + return -EEXIST; + } + + *vp = vp_id; + + return 0; +} + int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu) { @@ -1229,20 +1268,13 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } /* We need to synchronize with queue provisioning */ mutex_lock(&xive->lock); - vp_id = kvmppc_xive_vp(xive, cpu); - if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { - pr_devel("Duplicate !\n"); - r = -EEXIST; + r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id); + if (r) goto bail; - } xc = kzalloc(sizeof(*xc), GFP_KERNEL); if (!xc) { @@ -1834,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, return 0; } +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr) +{ + u32 __user *ubufp = (u32 __user *) addr; + u32 nr_servers; + int rc = 0; + + if (get_user(nr_servers, ubufp)) + return -EFAULT; + + pr_devel("%s nr_servers=%u\n", __func__, nr_servers); + + if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID) + return -EINVAL; + + mutex_lock(&xive->lock); + if (xive->vp_base != XIVE_INVALID_VP) + /* The VP block is allocated once and freed when the device + * is released. Better not allow to change its size since its + * used by connect_vcpu to validate vCPU ids are valid (eg, + * setting it back to a higher value could allow connect_vcpu + * to come up with a VP id that goes beyond the VP block, which + * is likely to cause a crash in OPAL). + */ + rc = -EBUSY; + else if (nr_servers > KVM_MAX_VCPUS) + /* We don't need more servers. Higher vCPU ids get packed + * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id(). + */ + xive->nr_servers = KVM_MAX_VCPUS; + else + xive->nr_servers = nr_servers; + + mutex_unlock(&xive->lock); + + return rc; +} + static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { struct kvmppc_xive *xive = dev->private; @@ -1842,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) switch (attr->group) { case KVM_DEV_XICS_GRP_SOURCES: return xive_set_source(xive, attr->attr, attr->addr); + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); + } } return -ENXIO; } @@ -1867,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) attr->attr < KVMPPC_XICS_NR_IRQS) return 0; break; + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return 0; + } } return -ENXIO; } @@ -2001,10 +2080,13 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive for partition\n"); + /* Already there ? */ + if (kvm->arch.xive) + return -EEXIST; + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; @@ -2014,12 +2096,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->kvm = kvm; mutex_init(&xive->lock); - /* Already there ? */ - if (kvm->arch.xive) - ret = -EEXIST; - else - kvm->arch.xive = xive; - /* We use the default queue size set by the host */ xive->q_order = xive_native_default_eq_shift(); if (xive->q_order < PAGE_SHIFT) @@ -2027,18 +2103,16 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) else xive->q_page_order = xive->q_order - PAGE_SHIFT; - /* Allocate a bunch of VPs */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENOMEM; + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. + */ + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); - if (ret) - return ret; - + kvm->arch.xive = xive; return 0; } @@ -2108,9 +2182,9 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x" + seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->cppr, xc->hw_cppr, + xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index fe3ed50e0818..382e3a56e789 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -135,6 +135,9 @@ struct kvmppc_xive { /* Flags */ u8 single_escalation; + /* Number of entries in the VP block */ + u32 nr_servers; + struct kvmppc_xive_ops *ops; struct address_space *mapping; struct mutex mapping_lock; @@ -296,6 +299,8 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, struct kvmppc_xive_vcpu *xc, int irq); +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 78b906ffa0d2..34bd123fa024 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -118,19 +118,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } mutex_lock(&xive->lock); - vp_id = kvmppc_xive_vp(xive, server_num); - if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { - pr_devel("Duplicate !\n"); - rc = -EEXIST; + rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); + if (rc) goto bail; - } xc = kzalloc(sizeof(*xc), GFP_KERNEL); if (!xc) { @@ -928,6 +921,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, return kvmppc_xive_reset(xive); case KVM_DEV_XIVE_EQ_SYNC: return kvmppc_xive_native_eq_sync(xive); + case KVM_DEV_XIVE_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); } break; case KVM_DEV_XIVE_GRP_SOURCE: @@ -967,6 +962,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_XIVE_RESET: case KVM_DEV_XIVE_EQ_SYNC: + case KVM_DEV_XIVE_NR_SERVERS: return 0; } break; @@ -1067,7 +1063,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive native device\n"); @@ -1081,27 +1076,20 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) dev->private = xive; xive->dev = dev; xive->kvm = kvm; - kvm->arch.xive = xive; mutex_init(&xive->mapping_lock); mutex_init(&xive->lock); - /* - * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for - * a default. Getting the max number of CPUs the VM was - * configured with would improve our usage of the XIVE VP space. + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENXIO; + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); xive->ops = &kvmppc_xive_native_ops; - if (ret) - return ret; - + kvm->arch.xive = xive; return 0; } @@ -1204,8 +1192,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", - xc->server_num, + seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, xc->vp_id, vcpu->arch.xive_saved_state.nsr, vcpu->arch.xive_saved_state.cppr, vcpu->arch.xive_saved_state.ipb, diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 321db0fdb9db..425d13806645 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 1) { struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); + down_read(&kvm->mm->mmap_sem); - vma = find_vma(current->mm, hva); + vma = find_vma(kvm->mm, hva); if (vma && hva >= vma->vm_start && (vma->vm_flags & VM_PFNMAP)) { /* @@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); } - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); } if (likely(!pfnmap)) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3a77bb643452..9e085e931d74 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -522,6 +522,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; + case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: + /* fall through */ case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: |