From fb341f572d26e0786167cd96b90cc4febed830cf Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sat, 5 Dec 2009 12:34:11 -0200 Subject: KVM: MMU: remove prefault from invlpg handler The invlpg prefault optimization breaks Windows 2008 R2 occasionally. The visible effect is that the invlpg handler instantiates a pte which is, microseconds later, written with a different gfn by another vcpu. The OS could have other mechanisms to prevent a present translation from being used, which the hypervisor is unaware of. While the documentation states that the cpu is at liberty to prefetch tlb entries, it looks like this is not heeded, so remove tlb prefetch from invlpg. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a6017132fba8..58a0f1e88596 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -455,8 +455,6 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; - pt_element_t gpte; - gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -470,10 +468,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - pte_gpa = (sp->gfn << PAGE_SHIFT); - pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -492,18 +486,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); spin_unlock(&vcpu->kvm->mmu_lock); - - if (pte_gpa == -1) - return; - if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, - sizeof(pt_element_t))) - return; - if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { - if (mmu_topup_memory_caches(vcpu)) - return; - kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, - sizeof(pt_element_t), 0); - } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) -- cgit v1.2.3 From 6e24a6eff4571002cd48b99a2b92dc829ce39cb9 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 14 Dec 2009 17:37:35 -0200 Subject: KVM: LAPIC: make sure IRR bitmap is scanned after vm load The vcpus are initialized with irr_pending set to false, but loading the LAPIC registers with pending IRR fails to reset the irr_pending variable. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cd60c0bd1b32..3063a0c4858b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1150,6 +1150,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) hrtimer_cancel(&apic->lapic_timer.timer); update_divide_count(apic); start_apic_timer(apic); + apic->irr_pending = true; } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From dab4b911a5327859bb8f969249c6978c26cd4853 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 6 Dec 2009 18:24:15 +0100 Subject: KVM: x86: Extend KVM_SET_VCPU_EVENTS with selective updates User space may not want to overwrite asynchronously changing VCPU event states on write-back. So allow to skip nmi.pending and sipi_vector by setting corresponding bits in the flags field of kvm_vcpu_events. [avi: advertise the bits in KVM_GET_VCPU_EVENTS] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 4 ++++ arch/x86/kvm/x86.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 950df434763f..f46b79f6c16c 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -254,6 +254,10 @@ struct kvm_reinject_control { __u8 reserved[31]; }; +/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ +#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 +#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d068966fb2a..6651dbf58675 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1913,7 +1913,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; - events->flags = 0; + events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR); vcpu_put(vcpu); } @@ -1921,7 +1922,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - if (events->flags) + if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) return -EINVAL; vcpu_load(vcpu); @@ -1938,10 +1940,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_pic_clear_isr_ack(vcpu->kvm); vcpu->arch.nmi_injected = events->nmi.injected; - vcpu->arch.nmi_pending = events->nmi.pending; + if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) + vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) + vcpu->arch.sipi_vector = events->sipi_vector; vcpu_put(vcpu); -- cgit v1.2.3 From a662b8135a1f9fee7d3f9129498cb03f3d6ce772 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Thu, 17 Dec 2009 17:05:03 -0800 Subject: KVM: ia64: fix build breakage due to host spinlock change Len Brown pointed out that allmodconfig is broken for ia64 because of: arch/ia64/kvm/vmm.c: In function 'vmm_spin_unlock': arch/ia64/kvm/vmm.c:70: error: 'spinlock_t' has no member named 'raw_lock' KVM has it's own spinlock routines. It should not depend on the base kernel spinlock_t type (which changed when ia64 switched to ticket locks). Define its own vmm_spinlock_t type. Signed-off-by: Tony Luck Signed-off-by: Avi Kivity --- arch/ia64/kvm/vcpu.h | 9 ++++++--- arch/ia64/kvm/vmm.c | 4 ++-- arch/ia64/kvm/vtlb.c | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h index 360724d3ae69..988911b4cc7a 100644 --- a/arch/ia64/kvm/vcpu.h +++ b/arch/ia64/kvm/vcpu.h @@ -388,6 +388,9 @@ static inline u64 __gpfn_is_io(u64 gpfn) #define _vmm_raw_spin_lock(x) do {}while(0) #define _vmm_raw_spin_unlock(x) do {}while(0) #else +typedef struct { + volatile unsigned int lock; +} vmm_spinlock_t; #define _vmm_raw_spin_lock(x) \ do { \ __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ @@ -405,12 +408,12 @@ static inline u64 __gpfn_is_io(u64 gpfn) #define _vmm_raw_spin_unlock(x) \ do { barrier(); \ - ((spinlock_t *)x)->raw_lock.lock = 0; } \ + ((vmm_spinlock_t *)x)->lock = 0; } \ while (0) #endif -void vmm_spin_lock(spinlock_t *lock); -void vmm_spin_unlock(spinlock_t *lock); +void vmm_spin_lock(vmm_spinlock_t *lock); +void vmm_spin_unlock(vmm_spinlock_t *lock); enum { I_TLB = 1, D_TLB = 2 diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index f4b4c899bb6c..7a62f75778c5 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c @@ -60,12 +60,12 @@ static void __exit kvm_vmm_exit(void) return ; } -void vmm_spin_lock(spinlock_t *lock) +void vmm_spin_lock(vmm_spinlock_t *lock) { _vmm_raw_spin_lock(lock); } -void vmm_spin_unlock(spinlock_t *lock) +void vmm_spin_unlock(vmm_spinlock_t *lock) { _vmm_raw_spin_unlock(lock); } diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 20b3852f7a6e..4332f7ee5203 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -182,7 +182,7 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) { u64 i, dirty_pages = 1; u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; - spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); + vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; -- cgit v1.2.3 From 5279aeb4b96cbdac22a0c2f8137dd0bbc35f02b8 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 19 Dec 2009 18:07:39 +0100 Subject: KVM: powerpc: Fix mtsrin in book3s_64 mmu We were shifting the Ks/Kp/N bits one bit too far on mtsrin. It took me some time to figure that out, so I also put in some debugging and a comment explaining the conversion. This fixes current OpenBIOS boot on PPC64 KVM. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_64_mmu.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 5598f88f142e..e4beeb371a73 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -390,6 +390,26 @@ static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, { u64 rb = 0, rs = 0; + /* + * According to Book3 2.01 mtsrin is implemented as: + * + * The SLB entry specified by (RB)32:35 is loaded from register + * RS, as follows. + * + * SLBE Bit Source SLB Field + * + * 0:31 0x0000_0000 ESID-0:31 + * 32:35 (RB)32:35 ESID-32:35 + * 36 0b1 V + * 37:61 0x00_0000|| 0b0 VSID-0:24 + * 62:88 (RS)37:63 VSID-25:51 + * 89:91 (RS)33:35 Ks Kp N + * 92 (RS)36 L ((RS)36 must be 0b0) + * 93 0b0 C + */ + + dprintk("KVM MMU: mtsrin(0x%x, 0x%lx)\n", srnum, value); + /* ESID = srnum */ rb |= (srnum & 0xf) << 28; /* Set the valid bit */ @@ -400,7 +420,7 @@ static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, /* VSID = VSID */ rs |= (value & 0xfffffff) << 12; /* flags = flags */ - rs |= ((value >> 27) & 0xf) << 9; + rs |= ((value >> 28) & 0x7) << 9; kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb); } -- cgit v1.2.3