From c05c4186bbe4e99d64e8a36f7ca7f480da5d109f Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 7 Oct 2013 16:13:45 +0200 Subject: KVM: s390: add floating irq controller This patch adds a floating irq controller as a kvm_device. It will be necessary for migration of floating interrupts as well as for hardening the reset code by allowing user space to explicitly remove all pending floating interrupts. Signed-off-by: Jens Freimann Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- virt/kvm/kvm_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03a0381b1cb7..a9e999a48e43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2283,6 +2283,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_ARM_VGIC_V2: ops = &kvm_arm_vgic_v2_ops; break; +#endif +#ifdef CONFIG_S390 + case KVM_DEV_TYPE_FLIC: + ops = &kvm_flic_ops; + break; #endif default: return -ENODEV; -- cgit v1.2.3 From e0ead41a6dac09f86675ce07a66e4b253a9b7bd5 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 6 Jun 2013 15:32:37 +0200 Subject: KVM: async_pf: Provide additional direct page notification By setting a Kconfig option, the architecture can control when guest notifications will be presented by the apf backend. There is the default batch mechanism, working as before, where the vcpu thread should pull in this information. Opposite to this, there is now the direct mechanism, that will push the information to the guest. This way s390 can use an already existing architecture interface. Still the vcpu thread should call check_completion to cleanup leftovers. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- virt/kvm/Kconfig | 4 ++++ virt/kvm/async_pf.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fbe1a48bd629..13f2d19793e3 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -22,6 +22,10 @@ config KVM_MMIO config KVM_ASYNC_PF bool +# Toggle to switch between direct notification and batch job +config KVM_ASYNC_PF_SYNC + bool + config HAVE_KVM_MSI bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8631d9c14320..00980ab02c45 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -28,6 +28,21 @@ #include "async_pf.h" #include +static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} +static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifndef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} + static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) @@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work) down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); + kvm_async_page_present_sync(vcpu, apf); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); @@ -138,7 +154,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -159,7 +175,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; - work->addr = gfn_to_hva(vcpu->kvm, gfn); + work->addr = hva; work->arch = *arch; work->mm = current->mm; atomic_inc(&work->mm->mm_count); -- cgit v1.2.3 From 9f2ceda49c6b8827c795731c204f6c2587886e2c Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Tue, 3 Sep 2013 12:31:16 +0200 Subject: KVM: async_pf: Allow to wait for outstanding work On s390 we are not able to cancel work. Instead we will flush the work and wait for completion. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- virt/kvm/async_pf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 00980ab02c45..889aad022014 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -113,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); list_del(&work->queue); + +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + flush_work(&work->work); +#else if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } +#endif } spin_lock(&vcpu->async_pf.lock); -- cgit v1.2.3 From 1179ba539541347d5427cde8bcfdaa5ead14f3aa Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 31 Jan 2014 14:32:46 +0100 Subject: KVM: async_pf: Add missing call for async page present Commit KVM: async_pf: Provide additional direct page notification missed the call from kvm_check_async_pf_completion to the new introduced function. Reported-by: Paolo Bonzini Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 889aad022014..10df100c4514 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -151,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); - kvm_arch_async_page_present(vcpu, work); + kvm_async_page_present_async(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; -- cgit v1.2.3 From 52480137d82062bb8d0fb778cb9934667958e367 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:57 +0100 Subject: asmlinkage, kvm: Make kvm_rebooting visible kvm_rebooting is referenced from assembler code, thus needs to be visible. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03a0381b1cb7..b5ec7fb986f6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); -bool kvm_rebooting; +__visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; -- cgit v1.2.3 From 5befdc385ddb2d5ae8995ad89004529a3acf58fc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 18 Feb 2014 17:22:47 +0900 Subject: KVM: Simplify kvm->tlbs_dirty handling When this was introduced, kvm_flush_remote_tlbs() could be called without holding mmu_lock. It is now acknowledged that the function must be called before releasing mmu_lock, and all callers have already been changed to do so. There is no need to use smp_mb() and cmpxchg() any more. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a9e999a48e43..f5668a431d54 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -186,12 +186,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) void kvm_flush_remote_tlbs(struct kvm *kvm) { - long dirty_count = kvm->tlbs_dirty; - - smp_mb(); if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; - cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); + kvm->tlbs_dirty = false; } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); -- cgit v1.2.3 From 98f4a14676127397c54cab7d6119537ed4d113a2 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 26 Feb 2014 16:14:18 +0100 Subject: KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop Use the arch specific function kvm_arch_vcpu_runnable() to add a further criterium to identify a suitable vcpu to yield to during undirected yield processing. Signed-off-by: Michael Mueller Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f5668a431d54..5fd4cf8e8888 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1801,7 +1801,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (vcpu == me) continue; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; -- cgit v1.2.3 From 9d830d47c7a756da6f0b55fa25d51eec0739eb02 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 26 Feb 2014 11:38:40 -0700 Subject: kvm/vfio: Support for DMA coherent IOMMUs VFIO now has support for using the IOMMU_CACHE flag and a mechanism for an external user to test the current operating mode of the IOMMU. Add support for this to the kvm-vfio pseudo device so that we only register noncoherent DMA when necessary. Signed-off-by: Alex Williamson Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Paolo Bonzini --- virt/kvm/vfio.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index b4f9507ae650..ba1a93f935c7 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -59,6 +59,22 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) symbol_put(vfio_group_put_external_user); } +static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) +{ + long (*fn)(struct vfio_group *, unsigned long); + long ret; + + fn = symbol_get(vfio_external_check_extension); + if (!fn) + return false; + + ret = fn(vfio_group, VFIO_DMA_CC_IOMMU); + + symbol_put(vfio_external_check_extension); + + return ret > 0; +} + /* * Groups can use the same or different IOMMU domains. If the same then * adding a new group may change the coherency of groups we've previously @@ -75,13 +91,10 @@ static void kvm_vfio_update_coherency(struct kvm_device *dev) mutex_lock(&kv->lock); list_for_each_entry(kvg, &kv->group_list, node) { - /* - * TODO: We need an interface to check the coherency of - * the IOMMU domain this group is using. For now, assume - * it's always noncoherent. - */ - noncoherent = true; - break; + if (!kvm_vfio_group_is_coherent(kvg->vfio_group)) { + noncoherent = true; + break; + } } if (noncoherent != kv->noncoherent) { -- cgit v1.2.3 From 100943c54e0947a07d2c0185368fc2fd848f7f28 Mon Sep 17 00:00:00 2001 From: "Gabriel L. Somlo" Date: Thu, 27 Feb 2014 23:06:17 -0500 Subject: kvm: x86: ignore ioapic polarity Both QEMU and KVM have already accumulated a significant number of optimizations based on the hard-coded assumption that ioapic polarity will always use the ActiveHigh convention, where the logical and physical states of level-triggered irq lines always match (i.e., active(asserted) == high == 1, inactive == low == 0). QEMU guests are expected to follow directions given via ACPI and configure the ioapic with polarity 0 (ActiveHigh). However, even when misbehaving guests (e.g. OS X <= 10.9) set the ioapic polarity to 1 (ActiveLow), QEMU will still use the ActiveHigh signaling convention when interfacing with KVM. This patch modifies KVM to completely ignore ioapic polarity as set by the guest OS, enabling misbehaving guests to work alongside those which comply with the ActiveHigh polarity specified by QEMU's ACPI tables. Signed-off-by: Michael S. Tsirkin Signed-off-by: Gabriel L. Somlo [Move documentation to KVM_IRQ_LINE, add ia64. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index ce9ed99ad7dc..1539d3757a04 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -328,7 +328,6 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], irq_source_id, level); entry = ioapic->redirtbl[irq]; - irq_level ^= entry.fields.polarity; if (!irq_level) { ioapic->irr &= ~mask; ret = 1; -- cgit v1.2.3 From 684a0b719ddbbafe1c7e6646b9bc239453a1773d Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 17 Mar 2014 19:11:35 +0100 Subject: KVM: eventfd: Fix lock order inversion. When registering a new irqfd, we call its ->poll method to collect any event that might have previously been pending so that we can trigger it. This is done under the kvm->irqfds.lock, which means the eventfd's ctx lock is taken under it. However, if we get a POLLHUP in irqfd_wakeup, we will be called with the ctx lock held before getting the irqfds.lock to deactivate the irqfd, causing lockdep to complain. Calling the ->poll method does not really need the irqfds.lock, so let's just move it after we've given up the irqfds.lock in kvm_irqfd_assign(). Signed-off-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index abe4d6043b36..29c2a04e036e 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -391,19 +391,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) lockdep_is_held(&kvm->irqfds.lock)); irqfd_update(kvm, irqfd, irq_rt); - events = f.file->f_op->poll(f.file, &irqfd->pt); - list_add_tail(&irqfd->list, &kvm->irqfds.items); + spin_unlock_irq(&kvm->irqfds.lock); + /* * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it. */ + events = f.file->f_op->poll(f.file, &irqfd->pt); + if (events & POLLIN) schedule_work(&irqfd->inject); - spin_unlock_irq(&kvm->irqfds.lock); - /* * do not drop the file until the irqfd is fully initialized, otherwise * we might race against the POLLHUP -- cgit v1.2.3 From 0b10a1c87a2b0fb459baaefba9cb163dbb8d3344 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Mar 2014 11:51:29 +0100 Subject: KVM: ioapic: merge ioapic_deliver into ioapic_service Commonize the handling of masking, which was absent for kvm_ioapic_set_irq. Setting remote_irr does not need a separate function either, and merging the two functions avoids confusion. Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1539d3757a04..0b4914147b9d 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,7 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, @@ -163,23 +163,6 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) return false; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, - bool line_status) -{ - union kvm_ioapic_redirect_entry *pent; - int injected = -1; - - pent = &ioapic->redirtbl[idx]; - - if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx, line_status); - if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) - pent->fields.remote_irr = 1; - } - - return injected; -} - static void update_handled_vectors(struct kvm_ioapic *ioapic) { DECLARE_BITMAP(handled_vectors, 256); @@ -282,12 +265,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; int ret; + if (entry->fields.mask) + return -1; + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", entry->fields.dest_id, entry->fields.dest_mode, @@ -310,6 +296,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) + entry->fields.remote_irr = 1; + return ret; } @@ -393,7 +382,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) + if (ioapic->irr & (1 << i)) ioapic_service(ioapic, i, false); } } -- cgit v1.2.3 From 0bc830b05c667218d703f2026ec866c49df974fc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Mar 2014 10:47:17 +0100 Subject: KVM: ioapic: clear IRR for edge-triggered interrupts at delivery This ensures that IRR bits are set in the KVM_GET_IRQCHIP result only if the interrupt is still sitting in the IOAPIC. After the next patches, it avoids spurious reinjection of the interrupt when KVM_SET_IRQCHIP is called. Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 0b4914147b9d..25e16a6898ed 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -288,6 +288,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.level = 1; irqe.shorthand = 0; + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) + ioapic->irr &= ~(1 << irq); + if (irq == RTC_GSI && line_status) { BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, -- cgit v1.2.3 From 44847dea79751e95665a439f8c63a65e51da8e1f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Mar 2014 12:00:14 +0100 Subject: KVM: ioapic: extract body of kvm_ioapic_set_irq We will reuse it to process a nonzero IRR that is passed to KVM_SET_IRQCHIP. Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 74 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 24 deletions(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 25e16a6898ed..270f7fe73f39 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -163,6 +163,55 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) return false; } +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, + int irq_level, bool line_status) +{ + union kvm_ioapic_redirect_entry entry; + u32 mask = 1 << irq; + u32 old_irr; + int edge, ret; + + entry = ioapic->redirtbl[irq]; + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + goto out; + } + + /* + * Return 0 for coalesced interrupts; for edge-triggered interrupts, + * this only happens if a previous edge has not been delivered due + * do masking. For level interrupts, the remote_irr field tells + * us if the interrupt is waiting for an EOI. + * + * RTC is special: it is edge-triggered, but userspace likes to know + * if it has been already ack-ed via EOI because coalesced RTC + * interrupts lead to time drift in Windows guests. So we track + * EOI manually for the RTC interrupt. + */ + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; + goto out; + } + + old_irr = ioapic->irr; + ioapic->irr |= mask; + if ((edge && old_irr == ioapic->irr) || + (!edge && entry.fields.remote_irr)) { + ret = 0; + goto out; + } + + ret = ioapic_service(ioapic, irq, line_status); + +out: + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + return ret; +} + static void update_handled_vectors(struct kvm_ioapic *ioapic) { DECLARE_BITMAP(handled_vectors, 256); @@ -308,38 +357,15 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status) { - u32 old_irr; - u32 mask = 1 << irq; - union kvm_ioapic_redirect_entry entry; int ret, irq_level; BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); spin_lock(&ioapic->lock); - old_irr = ioapic->irr; irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], irq_source_id, level); - entry = ioapic->redirtbl[irq]; - if (!irq_level) { - ioapic->irr &= ~mask; - ret = 1; - } else { - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); - if (irq == RTC_GSI && line_status && - rtc_irq_check_coalesced(ioapic)) { - ret = 0; /* coalesced */ - goto out; - } - ioapic->irr |= mask; - if ((edge && old_irr != ioapic->irr) || - (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq, line_status); - else - ret = 0; /* report coalesced interrupt */ - } -out: - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); return ret; -- cgit v1.2.3 From 673f7b4257a1fe7b181e1a1182ecc2b6b2b795f1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Mar 2014 11:39:23 +0100 Subject: KVM: ioapic: reinject pending interrupts on KVM_SET_IRQCHIP After the previous patches, an interrupt whose bit is set in the IRR register will never be in the LAPIC's IRR and has never been injected on the migration source. So inject it on the destination. This fixes migration of Windows guests without HPET (they use the RTC to trigger the scheduler tick, and lose it after migration). Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 270f7fe73f39..d4b601547f1f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -212,6 +212,18 @@ out: return ret; } +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) +{ + u32 idx; + + rtc_irq_eoi_tracking_reset(ioapic); + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) + ioapic_set_irq(ioapic, idx, 1, true); + + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + + static void update_handled_vectors(struct kvm_ioapic *ioapic) { DECLARE_BITMAP(handled_vectors, 256); @@ -612,9 +624,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + ioapic->irr = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); - kvm_rtc_eoi_tracking_restore_all(ioapic); + kvm_ioapic_inject_all(ioapic, state->irr); spin_unlock(&ioapic->lock); return 0; } -- cgit v1.2.3 From 5678de3f15010b9022ee45673f33bcfc71d47b60 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Mar 2014 20:41:50 +0100 Subject: KVM: ioapic: fix assignment of ioapic->rtc_status.pending_eoi (CVE-2014-0155) QE reported that they got the BUG_ON in ioapic_service to trigger. I cannot reproduce it, but there are two reasons why this could happen. The less likely but also easiest one, is when kvm_irq_delivery_to_apic does not deliver to any APIC and returns -1. Because irqe.shorthand == 0, the kvm_for_each_vcpu loop in that function is never reached. However, you can target the similar loop in kvm_irq_delivery_to_apic_fast; just program a zero logical destination address into the IOAPIC, or an out-of-range physical destination address. Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index d4b601547f1f..d98d107efb05 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -356,7 +356,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, ioapic->rtc_status.dest_map); - ioapic->rtc_status.pending_eoi = ret; + ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); -- cgit v1.2.3 From 4009b2499eee26c7f413073300d124a37765dfca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Mar 2014 20:41:51 +0100 Subject: KVM: ioapic: try to recover if pending_eoi goes out of range The RTC tracking code tracks the cardinality of rtc_status.dest_map into rtc_status.pending_eoi. It has some WARN_ONs that trigger if pending_eoi ever becomes negative; however, these do not do anything to recover, and it bad things will happen soon after they trigger. When the next RTC interrupt is triggered, rtc_check_coalesced() will return false, but ioapic_service will find pending_eoi != 0 and do a BUG_ON. To avoid this, should pending_eoi ever be nonzero, call kvm_rtc_eoi_tracking_restore_all to recompute a correct dest_map and pending_eoi. Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index d98d107efb05..2458a1dc2ba9 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -97,6 +97,14 @@ static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); } +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); + +static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) +{ + if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; @@ -120,9 +128,8 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) } else { __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); ioapic->rtc_status.pending_eoi--; + rtc_status_pending_eoi_check_valid(ioapic); } - - WARN_ON(ioapic->rtc_status.pending_eoi < 0); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) @@ -149,10 +156,10 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) { - if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { --ioapic->rtc_status.pending_eoi; - - WARN_ON(ioapic->rtc_status.pending_eoi < 0); + rtc_status_pending_eoi_check_valid(ioapic); + } } static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) @@ -353,6 +360,12 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) ioapic->irr &= ~(1 << irq); if (irq == RTC_GSI && line_status) { + /* + * pending_eoi cannot ever become negative (see + * rtc_status_pending_eoi_check_valid) and the caller + * ensures that it is only called if it is >= zero, namely + * if rtc_irq_check_coalesced returns false). + */ BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, ioapic->rtc_status.dest_map); -- cgit v1.2.3 From 553f809e23f00976caea7a1ebdabaa58a6383e7d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 7 Apr 2014 01:36:08 +0800 Subject: arm, kvm: fix double lock on cpu_add_remove_lock Commit 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration) holds the lock before calling the two functions: kvm_vgic_hyp_init() kvm_timer_hyp_init() and both the two functions are calling register_cpu_notifier() to register cpu notifier, so cause double lock on cpu_add_remove_lock. Considered that both two functions are only called inside kvm_arch_init() with holding cpu_add_remove_lock, so simply use __register_cpu_notifier() to fix the problem. Fixes: 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration) Signed-off-by: Ming Lei Reviewed-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/vgic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5081e809821f..22fa819a9b6a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void) host_vtimer_irq = ppi; - err = register_cpu_notifier(&kvm_timer_cpu_nb); + err = __register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { kvm_err("Cannot register timer CPU notifier\n"); goto out_free; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8ca405cd7c1a..47b29834a6b6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1496,7 +1496,7 @@ int kvm_vgic_hyp_init(void) goto out; } - ret = register_cpu_notifier(&vgic_cpu_nb); + ret = __register_cpu_notifier(&vgic_cpu_nb); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); goto out_free_irq; -- cgit v1.2.3 From e8e249d78e0600cb892c87992b6c8c9ea3b301ae Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 21 Feb 2014 18:05:05 +0100 Subject: kvm: Use pci_enable_msix_exact() instead of pci_enable_msix() As result of deprecation of MSI-X/MSI enablement functions pci_enable_msix() and pci_enable_msi_block() all drivers using these two interfaces need to be updated to use the new pci_enable_msi_range() or pci_enable_msi_exact() and pci_enable_msix_range() or pci_enable_msix_exact() interfaces. Signed-off-by: Alexander Gordeev Cc: Gleb Natapov Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Cc: linux-pci@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/assigned-dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 8db43701016f..bf06577fea51 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -395,7 +395,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, if (dev->entries_nr == 0) return r; - r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + r = pci_enable_msix_exact(dev->dev, + dev->host_msix_entries, dev->entries_nr); if (r) return r; -- cgit v1.2.3 From 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Thu, 10 Apr 2014 13:14:32 +0100 Subject: KVM: ARM: vgic: Fix sgi dispatch problem When dispatch SGI(mode == 0), that is the vcpu of VM should send sgi to the cpu which the target_cpus list. So, there must add the "break" to branch of case 0. Cc: # 3.10+ Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 47b29834a6b6..7e8b44efb739 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; -- cgit v1.2.3 From 41c22f626254b9dc0376928cae009e73d1b6a49a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:26:01 +0200 Subject: KVM: async_pf: mm->mm_users can not pin apf->mm get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc was already called (or is in progress), mm->mm_count can only pin mm->pgd and mm_struct itself. Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users. kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks fine at first glance, it seems that this ->mm is only used to verify that current->mm == kvm->mm. Signed-off-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/async_pf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 10df100c4514..06e6401d6ef4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) flush_work(&work->work); #else if (cancel_work_sync(&work->work)) { - mmdrop(work->mm); + mmput(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } @@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } -- cgit v1.2.3 From f2ae85b2ab3776b9e4e42e5b6fa090f40d396794 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 Apr 2014 00:07:18 +0200 Subject: KVM: arm/arm64: vgic: fix GICD_ICFGR register accesses Since KVM internally represents the ICFGR registers by stuffing two of them into one word, the offset for accessing the internal representation and the one for the MMIO based access are different. So keep the original offset around, but adjust the internal array offset by one bit. Reported-by: Haibin Wang Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7e8b44efb739..f9af48c9eb37 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, u32 val; u32 *reg; - offset >>= 1; reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset); + vcpu->vcpu_id, offset >> 1); - if (offset & 2) + if (offset & 4) val = *reg >> 16; else val = *reg & 0xffff; @@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 4) { + if (offset < 8) { *reg = ~0U; /* Force PPIs/SGIs to 1 */ return false; } val = vgic_cfg_compress(val); - if (offset & 2) { + if (offset & 4) { *reg &= 0xffff; *reg |= val << 16; } else { -- cgit v1.2.3 From 30c2117085bc4e05d091cee6eba79f069b41a9cd Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Tue, 29 Apr 2014 14:49:17 +0800 Subject: KVM: ARM: vgic: Fix the overlap check action about setting the GICD & GICC base address. Currently below check in vgic_ioaddr_overlap will always succeed, because the vgic dist base and vgic cpu base are still kept UNDEF after initialization. The code as follows will be return forever. if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) return 0; So, before invoking the vgic_ioaddr_overlap, it needs to set the corresponding base address firstly. Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f9af48c9eb37..56ff9bebb577 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, if (addr + size < addr) return -EINVAL; + *ioaddr = addr; ret = vgic_ioaddr_overlap(kvm); if (ret) - return ret; - *ioaddr = addr; + *ioaddr = VGIC_ADDR_UNDEF; + return ret; } -- cgit v1.2.3