diff options
author | Alexander Graf <agraf@suse.de> | 2013-04-17 15:20:38 +0200 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2013-04-17 15:20:38 +0200 |
commit | fca7567c30a45962401d8d0707e6b6d7adf90f9a (patch) | |
tree | 8b67f90c33e9e67f7b7c61916796193a057e368c /arch | |
parent | fbfba342a719b49d9cd0837202cf5365ba46ca9b (diff) | |
parent | 79558f112fc0352e057f7b5e158e3d88b8b62c60 (diff) |
Merge commit 'origin/next' into kvm-ppc-next
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kvm/arm.c | 3 | ||||
-rw-r--r-- | arch/ia64/kvm/lapic.h | 6 | ||||
-rw-r--r-- | arch/s390/kvm/intercept.c | 12 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 32 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 12 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 203 | ||||
-rw-r--r-- | arch/x86/include/asm/entry_arch.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/hw_irq.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 5 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 27 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 141 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 13 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 352 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 49 |
25 files changed, 591 insertions, 359 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e4ad0bb01843..678596f699f3 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -805,7 +805,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) return 0; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) { u32 irq = irq_level->irq; unsigned int irq_type, vcpu_idx, irq_num; diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c3e2935b6db4..c5f92a926a9a 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); #define kvm_apic_present(x) (true) #define kvm_lapic_enabled(x) (true) -static inline bool kvm_apic_vid_enabled(void) -{ - /* IA64 has no apicv supporting, do nothing here */ - return false; -} - #endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index c6ba4dfd7f1e..b7d1b2edeeb3 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -45,10 +45,8 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) do { rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], (u64 __user *) useraddr); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); useraddr += 8; if (reg == reg3) break; @@ -79,10 +77,8 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { rc = get_guest(vcpu, val, (u32 __user *) useraddr); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; vcpu->arch.sie_block->gcr[reg] |= val; useraddr += 4; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 33161b4a8280..c1c7c683fa26 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -149,6 +149,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; @@ -633,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } else { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - rc = 0; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", @@ -978,18 +980,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { - /* A few sanity checks. We can have exactly one memory slot which has - to start at guest virtual zero and which has to be located at a - page boundary in userland and which has to end at a page boundary. - The memory in userland is ok to be fragmented into various different - vmas. It is okay to mmap() and munmap() stuff in this slot after - doing this call at any time */ - - if (mem->slot) - return -EINVAL; - - if (mem->guest_phys_addr) - return -EINVAL; + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ if (mem->userspace_addr & 0xffffful) return -EINVAL; @@ -1007,6 +1001,16 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc; + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, mem->guest_phys_addr, mem->memory_size); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a8161..efc14f687265 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 7db2ad076f31..6bbd7b5a0bbe 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -14,6 +14,7 @@ #include <linux/kvm.h> #include <linux/gfp.h> #include <linux/errno.h> +#include <linux/compat.h> #include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/debug.h> @@ -36,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - if (get_guest(vcpu, address, (u32 __user *) operand2)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (get_guest(vcpu, address, (u32 __user *) operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); address = address & 0x7fffe000u; /* make sure that the new value is valid memory */ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || - (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); -out: return 0; } @@ -74,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); address = vcpu->arch.sie_block->prefix; address = address & 0x7fffe000u; /* get the value */ - if (put_guest(vcpu, address, (u32 __user *)operand2)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, address, (u32 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 0, address); -out: return 0; } static int handle_store_cpu_address(struct kvm_vcpu *vcpu) { u64 useraddr; - int rc; vcpu->stat.instruction_stap++; useraddr = kvm_s390_get_base_disp_s(vcpu); - if (useraddr & 1) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (useraddr & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); trace_kvm_s390_handle_stap(vcpu, useraddr); -out: return 0; } @@ -135,10 +117,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu) int cc; addr = kvm_s390_get_base_disp_s(vcpu); - if (addr & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); cc = 0; inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); if (!inti) @@ -167,7 +147,6 @@ no_interrupt: /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; -out: return 0; } @@ -237,12 +216,9 @@ static int handle_stfl(struct kvm_vcpu *vcpu) rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); if (rc) - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - else { - VCPU_EVENT(vcpu, 5, "store facility list value %x", - facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); - } + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); + trace_kvm_s390_handle_stfl(vcpu, facility_list); return 0; } @@ -255,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL -#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL #define PSW_ADDR_31 0x000000007fffffffUL +static int is_valid_psw(psw_t *psw) { + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + return 1; +} + int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) { - u64 addr; + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; psw_compat_t new_psw; + u64 addr; - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OPERATION); - addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - if (!(new_psw.mask & PSW32_MASK_BASE)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = - (new_psw.mask & ~PSW32_MASK_BASE) << 32; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_lpswe(struct kvm_vcpu *vcpu) { - u64 addr; psw_t new_psw; + u64 addr; addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = new_psw.mask; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_BA) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; - int rc; vcpu->stat.instruction_stidp++; operand2 = kvm_s390_get_base_disp_s(vcpu); - if (operand2 & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); -out: return 0; } @@ -400,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu) int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; u64 operand2; - unsigned long mem; + int rc = 0; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -420,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 2: mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) - goto out_mem; + goto out_no_data; break; case 3: if (sel1 != 2 || sel2 != 2) - goto out_fail; + goto out_no_data; mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; default: - goto out_fail; + goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out_mem; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_exception; } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->run->s.regs.gprs[0] = 0; return 0; -out_mem: - free_page(mem); -out_fail: +out_no_data: /* condition code 3 */ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; - return 0; +out_exception: + free_page(mem); + return rc; } static const intercept_handler_t b2_handlers[256] = { diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c69..9bd4ecac72be 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +#endif + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f74..ab0ae1aa6d0a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -12,6 +12,9 @@ typedef struct { unsigned int irq_spurious_count; unsigned int icr_read_retry_count; #endif +#ifdef CONFIG_HAVE_KVM + unsigned int kvm_posted_intr_ipis; +#endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; unsigned int apic_irq_work_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5a..1da97efad08a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,6 +28,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern void kvm_posted_intr_ipi(void); extern void error_interrupt(void); extern void irq_work_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86c..5702d7e3111d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * IRQ work vector: */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b5a64621d5af..599f98b612d4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -94,9 +94,6 @@ #define ASYNC_PF_PER_VCPU 64 -extern raw_spinlock_t kvm_lock; -extern struct list_head vm_list; - struct kvm_vcpu; struct kvm; struct kvm_async_pf; @@ -704,6 +701,8 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); @@ -730,6 +729,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); + void (*handle_external_intr)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -798,6 +798,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) +#define EMULTYPE_NO_REEXECUTE (1 << 4) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); @@ -975,7 +976,6 @@ enum { * Trap the fault and ignore the instruction if that happens. */ asmlinkage void kvm_spurious_fault(void); -extern bool kvm_rebooting; #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ "666: " insn "\n\t" \ @@ -1030,7 +1030,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fc1c3134473b..6f07f1999138 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,7 @@ #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -102,6 +103,7 @@ /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -136,6 +138,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca790..727208941030 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi +#ifdef CONFIG_HAVE_KVM +apicinterrupt POSTED_INTR_VECTOR \ + kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +#endif + apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e4595f105910..6ae6ea1d27d9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -228,6 +228,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HAVE_KVM +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ +void smp_kvm_posted_intr_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + irq_enter(); + + exit_idle(); + + inc_irq_stat(kvm_posted_intr_ipis); + + irq_exit(); + + set_irq_regs(old_regs); +} +#endif + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b3..a2a1fbc594ff 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -172,6 +172,10 @@ static void __init apic_intr_init(void) /* IPI for X86 platform specific use */ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde72..46f63b8d09f4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -132,8 +132,9 @@ #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ #define No64 (1<<28) #define PageTable (1 << 29) /* instruction used to write page table */ +#define NotImpl (1 << 30) /* instruction is not implemented */ /* Source 2 operand type */ -#define Src2Shift (30) +#define Src2Shift (31) #define Src2None (OpNone << Src2Shift) #define Src2CL (OpCL << Src2Shift) #define Src2ImmByte (OpImmByte << Src2Shift) @@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, memset(&seg_desc, 0, sizeof seg_desc); - if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) - || ctxt->mode == X86EMUL_MODE_REAL) { - /* set real mode segment descriptor */ + if (ctxt->mode == X86EMUL_MODE_REAL) { + /* set real mode segment descriptor (keep limit etc. for + * unreal mode) */ ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); set_desc_base(&seg_desc, selector << 4); goto load; + } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) { + /* VM86 needs a clean new segment descriptor */ + set_desc_base(&seg_desc, selector << 4); + set_desc_limit(&seg_desc, 0xffff); + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = 3; + goto load; } rpl = selector & 3; @@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ .check_perm = (_p) } -#define N D(0) +#define N D(NotImpl) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } @@ -3713,7 +3723,7 @@ static const struct opcode group5[] = { I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), I(SrcMem | Stack, em_grp45), I(SrcMemFAddr | ImplicitOps, em_grp45), - I(SrcMem | Stack, em_grp45), N, + I(SrcMem | Stack, em_grp45), D(Undefined), }; static const struct opcode group6[] = { @@ -4373,7 +4383,7 @@ done_prefixes: ctxt->intercept = opcode.intercept; /* Unrecognised? */ - if (ctxt->d == 0 || (ctxt->d & Undefined)) + if (ctxt->d == 0 || (ctxt->d & NotImpl)) return EMULATION_FAILED; if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) @@ -4511,7 +4521,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.pos = 0; - if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { + if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || + (ctxt->d & Undefined)) { rc = emulate_ud(ctxt); goto done; } diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c1d30b2fc9bb..412a5aa0ef94 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work) } spin_unlock(&ps->inject_lock); if (inject) { - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); /* * Provides NMI watchdog support via Virtual Wire mode. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a8e9369f41c5..e29883c604ff 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + return apic_test_vector(vector, apic->regs + APIC_ISR) || + apic_test_vector(vector, apic->regs + APIC_IRR); +} + static inline void apic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } -void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - struct kvm_lapic_irq *irq, - u64 *eoi_exit_bitmap) -{ - struct kvm_lapic **dst; - struct kvm_apic_map *map; - unsigned long bitmap = 1; - int i; - - rcu_read_lock(); - map = rcu_dereference(vcpu->kvm->arch.apic_map); - - if (unlikely(!map)) { - __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); - goto out; - } - - if (irq->dest_mode == 0) { /* physical mode */ - if (irq->delivery_mode == APIC_DM_LOWEST || - irq->dest_id == 0xff) { - __set_bit(irq->vector, - (unsigned long *)eoi_exit_bitmap); - goto out; - } - dst = &map->phys_map[irq->dest_id & 0xff]; - } else { - u32 mda = irq->dest_id << (32 - map->ldr_bits); - - dst = map->logical_map[apic_cluster_id(map, mda)]; - - bitmap = apic_logical_id(map, mda); - } - - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (dst[i]->vcpu == vcpu) { - __set_bit(irq->vector, - (unsigned long *)eoi_exit_bitmap); - break; - } - } - -out: - rcu_read_unlock(); -} - static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -256,7 +217,7 @@ out: if (old) kfree_rcu(old, rcu); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); } static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) @@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap) return count; } +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +{ + u32 i, pir_val; + struct kvm_lapic *apic = vcpu->arch.apic; + + for (i = 0; i <= 7; i++) { + pir_val = xchg(&pir[i], 0); + if (pir_val) + *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; + } +} +EXPORT_SYMBOL_GPL(kvm_apic_update_irr); + static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) { apic->irr_pending = true; @@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) } static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode); + int vector, int level, int trig_mode, + unsigned long *dest_map); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map) { struct kvm_lapic *apic = vcpu->arch.apic; return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode); + irq->level, irq->trig_mode, dest_map); } static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) @@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) return result; } +void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int i; + + for (i = 0; i < 8; i++) + apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); +} + static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, } bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r) + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) { struct kvm_apic_map *map; unsigned long bitmap = 1; @@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { - *r = kvm_apic_set_irq(src->vcpu, irq); + *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, continue; if (*r < 0) *r = 0; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq); + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } ret = true; @@ -681,7 +667,8 @@ out: * Return 1 if successfully added and 0 if discarded. */ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode) + int vector, int level, int trig_mode, + unsigned long *dest_map) { int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; @@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); + if (dest_map) + __set_bit(vcpu->vcpu_id, dest_map); - result = !apic_test_and_set_irr(vector, apic); - trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector, !result); - if (!result) { - if (trig_mode) - apic_debug("level trig mode repeatedly for " - "vector %d", vector); - break; - } + if (kvm_x86_ops->deliver_posted_interrupt) { + result = 1; + kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); + } else { + result = !apic_test_and_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + if (!result) { + if (trig_mode) + apic_debug("level trig mode repeatedly " + "for vector %d", vector); + goto out; + } + + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } +out: + trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector, !result); break; case APIC_DM_REMRD: @@ -786,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) trigger_mode = IOAPIC_LEVEL_TRIG; else trigger_mode = IOAPIC_EDGE_TRIG; - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); } } @@ -852,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, irq.vector); - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } static u32 apic_get_tmcct(struct kvm_lapic *apic) @@ -1488,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode); + return __apic_accept_irq(apic, mode, vector, 1, trig_mode, + NULL); } return 0; } @@ -1658,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic->highest_isr_cache = -1; kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_rtc_eoi_tracking_restore_one(vcpu); } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 2c721b986eec..c730ac9fe801 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -53,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu); +void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r); + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); @@ -160,13 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) return ldr & map->lid_mask; } -void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - struct kvm_lapic_irq *irq, - u64 *eoi_bitmap); - static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { return vcpu->arch.apic->pending_events; } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); + #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 633e30cfbd63..004cc87b781c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1501,15 +1501,11 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, mmu_spte_clear_no_track(parent_pte); } -static void make_mmu_pages_available(struct kvm_vcpu *vcpu); - static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; - make_mmu_pages_available(vcpu); - sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); if (!direct) @@ -2806,6 +2802,7 @@ exit: static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, pfn_t *pfn, bool write, bool *writable); +static void make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault) @@ -2847,6 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; + make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, @@ -2924,6 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL, NULL); ++sp->root_count; @@ -2935,6 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL, @@ -2973,6 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); root = __pa(sp->spt); @@ -3006,6 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return 1; } spin_lock(&vcpu->kvm->mmu_lock); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL, NULL); @@ -3311,6 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; + make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, gpa, write, map_writable, diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index af143f065532..da20860b457a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -627,6 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); + make_mmu_pages_available(vcpu); if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf97..c53e797e7369 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) return 1; } -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + u32 index = msr_info->index; + u64 data = msr_info->data; switch (index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } break; case MSR_CORE_PERF_GLOBAL_STATUS: + if (msr_info->host_initiated) { + pmu->global_status = data; + return 0; + } break; /* RO MSR */ case MSR_CORE_PERF_GLOBAL_CTRL: if (pmu->global_ctrl == data) @@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { - pmu->global_status &= ~data; + if (!msr_info->host_initiated) + pmu->global_status &= ~data; pmu->global_ovf_ctrl = data; return 0; } @@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) default: if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || (pmc = get_fixed_pmc(pmu, index))) { - data = (s64)(s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; pmc->counter += data - read_pmc(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7a46c1f46861..d6713e18bbc1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3577,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) return; } +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4233,6 +4238,11 @@ out: return ret; } +static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +{ + local_irq_enable(); +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4300,6 +4310,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, @@ -4328,6 +4339,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, + .handle_external_intr = svm_handle_external_intr, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 03f574641852..5a87a58af49d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid; +static bool __read_mostly enable_apicv = 1; +module_param(enable_apicv, bool, S_IRUGO); /* * If nested=1, nested virtualization is supported, i.e., guests may use @@ -366,6 +367,31 @@ struct nested_vmx { struct page *apic_access_page; }; +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + u32 control; /* bit 0 of control is outstanding notification bit */ + u32 rsvd[7]; +} __aligned(64); + +static bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -378,6 +404,7 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; + unsigned long host_idt_base; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; @@ -429,6 +456,9 @@ struct vcpu_vmx { bool rdtscp_enabled; + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -626,6 +656,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -784,6 +815,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_posted_intr(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static inline bool cpu_has_vmx_apicv(void) +{ + return cpu_has_vmx_apic_register_virt() && + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2551,12 +2594,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -2627,11 +2664,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | + VM_EXIT_ACK_INTR_ON_EXIT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -EIO; + + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; + min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2810,14 +2859,16 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt() || - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; - if (enable_apicv_reg_vid) + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; - else + else { kvm_x86_ops->hwapic_irr_update = NULL; + kvm_x86_ops->deliver_posted_interrupt = NULL; + kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; + } if (nested) nested_vmx_setup_ctls_msrs(); @@ -3873,13 +3924,57 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv && irqchip_in_kernel(kvm); +} + +/* + * Send interrupt to vcpu via posted interrupt way. + * 1. If target vcpu is running(non-root mode), send posted interrupt + * notification to vcpu and hardware will sync PIR to vIRR atomically. + * 2. If target vcpu isn't running(root mode), kick it to pick up the + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + return; + + r = pi_test_and_set_on(&vmx->pi_desc); + kvm_make_request(KVM_REQ_EVENT, vcpu); + if (!r && (vcpu->mode == IN_GUEST_MODE)) + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); + else + kvm_vcpu_kick(vcpu); +} + +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!pi_test_and_clear_on(&vmx->pi_desc)) + return; + + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); +} + +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) +{ + return; +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. * Note that host-state that does change is set elsewhere. E.g., host-state * that is set differently for each CPU is set in vmx_vcpu_load(), not here. */ -static void vmx_set_constant_host_state(void) +static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) { u32 low32, high32; unsigned long tmpl; @@ -3907,6 +4002,7 @@ static void vmx_set_constant_host_state(void) native_store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ + vmx->host_idt_base = dt.address; vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ @@ -3932,6 +4028,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); } +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +{ + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; + + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + return pin_based_exec_ctrl; +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -3949,11 +4054,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static int vmx_vm_has_apicv(struct kvm *kvm) -{ - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); -} - static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4009,8 +4109,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, - vmcs_config.pin_based_exec_ctrl); + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); @@ -4019,13 +4118,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } - if (enable_apicv_reg_vid) { + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); vmcs_write64(EOI_EXIT_BITMAP3, 0); vmcs_write16(GUEST_INTR_STATUS, 0); + + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } if (ple_gap) { @@ -4039,7 +4141,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ @@ -4167,6 +4269,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx_vm_has_apicv(vcpu->kvm)) + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); + if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); @@ -4325,16 +4430,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + if (is_guest_mode(vcpu)) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending || - (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK)) + + if (to_vmx(vcpu)->nested.nested_run_pending) return 0; - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; - /* fall through to normal code, but now in L1, not L2 */ + if (nested_exit_on_intr(vcpu)) { + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = + EXIT_REASON_EXTERNAL_INTERRUPT; + vmcs12->vm_exit_intr_info = 0; + /* + * fall through to normal code, but now in L1, not L2 + */ + } } return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -5189,7 +5298,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, 0); + err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); if (err == EMULATE_DO_MMIO) { ret = 0; @@ -6112,14 +6221,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_TRIPLE_FAULT: return 1; case EXIT_REASON_PENDING_INTERRUPT: + return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); case EXIT_REASON_NMI_WINDOW: - /* - * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit - * (aka Interrupt Window Exiting) only when L1 turned it on, - * so if we got a PENDING_INTERRUPT exit, this must be for L1. - * Same for NMI Window Exiting. - */ - return 1; + return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); case EXIT_REASON_TASK_SWITCH: return 1; case EXIT_REASON_CPUID: @@ -6370,6 +6474,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { + if (!vmx_vm_has_apicv(vcpu->kvm)) + return; + vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); @@ -6400,6 +6507,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) } } +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * If external interrupt exists, IF bit is set in rflags/eflags on the + * interrupt stack frame, and interrupt will be enabled on a return + * from interrupt handler. + */ + if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) + == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { + unsigned int vector; + unsigned long entry; + gate_desc *desc; + struct vcpu_vmx *vmx = to_vmx(vcpu); +#ifdef CONFIG_X86_64 + unsigned long tmp; +#endif + + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + desc = (gate_desc *)vmx->host_idt_base + vector; + entry = gate_offset(*desc); + asm volatile( +#ifdef CONFIG_X86_64 + "mov %%" _ASM_SP ", %[sp]\n\t" + "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" +#endif + "pushf\n\t" + "orl $0x200, (%%" _ASM_SP ")\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" + "call *%[entry]\n\t" + : +#ifdef CONFIG_X86_64 + [sp]"=&r"(tmp) +#endif + : + [entry]"r"(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); + } else + local_irq_enable(); +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -6498,8 +6651,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { - if (is_guest_mode(&vmx->vcpu)) - return; __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, VM_EXIT_INSTRUCTION_LEN, IDT_VECTORING_ERROR_CODE); @@ -6507,8 +6658,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) static void vmx_cancel_injection(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) - return; __vmx_complete_interrupts(vcpu, vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), VM_ENTRY_INSTRUCTION_LEN, @@ -6540,21 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long debugctlmsr; - if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK) { - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - vmcs12->idt_vectoring_info_field); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmcs12->vm_exit_instruction_len); - if (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_DELIVER_CODE_MASK) - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, - vmcs12->idt_vectoring_error_code); - } - } - /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) vmx->entry_time = ktime_get(); @@ -6713,17 +6847,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; - if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { - vmcs12->idt_vectoring_error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - vmcs12->vm_exit_instruction_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - } - } - vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); @@ -6785,10 +6908,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) put_cpu(); if (err) goto free_vmcs; - if (vm_need_virtualize_apic_accesses(kvm)) + if (vm_need_virtualize_apic_accesses(kvm)) { err = alloc_apic_access_page(kvm); if (err) goto free_vmcs; + } if (enable_ept) { if (!kvm->arch.ept_identity_map_addr) @@ -7071,7 +7195,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Other fields are different per CPU, and will be set later when * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); /* * HOST_RSP is normally set correctly in vmx_vcpu_run() just before @@ -7330,6 +7454,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vcpu->arch.cr4_guest_owned_bits)); } +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 idt_vectoring; + unsigned int nr; + + if (vcpu->arch.exception.pending) { + nr = vcpu->arch.exception.nr; + idt_vectoring = nr | VECTORING_INFO_VALID_MASK; + + if (kvm_exception_is_soft(nr)) { + vmcs12->vm_exit_instruction_len = + vcpu->arch.event_exit_inst_len; + idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; + } else + idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; + + if (vcpu->arch.exception.has_error_code) { + idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; + vmcs12->idt_vectoring_error_code = + vcpu->arch.exception.error_code; + } + + vmcs12->idt_vectoring_info_field = idt_vectoring; + } else if (vcpu->arch.nmi_pending) { + vmcs12->idt_vectoring_info_field = + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; + } else if (vcpu->arch.interrupt.pending) { + nr = vcpu->arch.interrupt.nr; + idt_vectoring = nr | VECTORING_INFO_VALID_MASK; + + if (vcpu->arch.interrupt.soft) { + idt_vectoring |= INTR_TYPE_SOFT_INTR; + vmcs12->vm_entry_instruction_len = + vcpu->arch.event_exit_inst_len; + } else + idt_vectoring |= INTR_TYPE_EXT_INTR; + + vmcs12->idt_vectoring_info_field = idt_vectoring; + } +} + /* * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), @@ -7402,7 +7568,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); @@ -7414,16 +7580,34 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - vmcs12->idt_vectoring_info_field = to_vmx(vcpu)->idt_vectoring_info; - vmcs12->idt_vectoring_error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); + if ((vmcs12->vm_exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + vmcs12->idt_vectoring_info_field = 0; vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - /* clear vm-entry fields which are to be cleared on exit */ - if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { + /* vm_entry_intr_info_field is cleared on exit. Emulate this + * instead of reading the real value. */ vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; + + /* + * Transfer the event that L0 or L1 may wanted to inject into + * L2 to IDT_VECTORING_INFO_FIELD. + */ + vmcs12_save_pending_event(vcpu, vmcs12); + } + + /* + * Drop what we picked up for L2 via vmx_complete_interrupts. It is + * preserved above and would only end up incorrectly in L1. + */ + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); } /* @@ -7523,6 +7707,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) int cpu; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + /* trying to cancel vmlaunch/vmresume is a bug */ + WARN_ON_ONCE(vmx->nested.nested_run_pending); + leave_guest_mode(vcpu); prepare_vmcs12(vcpu, vmcs12); @@ -7657,6 +7844,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7685,6 +7874,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, + .handle_external_intr = vmx_handle_external_intr, }; static int __init vmx_init(void) @@ -7741,7 +7931,7 @@ static int __init vmx_init(void) r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) - goto out3; + goto out5; #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, @@ -7759,7 +7949,7 @@ static int __init vmx_init(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); - if (enable_apicv_reg_vid) { + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); @@ -7789,6 +7979,8 @@ static int __init vmx_init(void) return 0; +out5: + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); out3: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2aaba814f1c8..50e2e10b8041 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -261,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) } EXPORT_SYMBOL_GPL(kvm_set_apic_base); +asmlinkage void kvm_spurious_fault(void) +{ + /* Fault while not rebooting. We want the trace. */ + BUG(); +} +EXPORT_SYMBOL_GPL(kvm_spurious_fault); + #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 #define EXCPT_PF 2 @@ -2040,7 +2047,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (pr || data != 0) vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " @@ -2086,7 +2093,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); @@ -2685,6 +2692,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -3484,13 +3492,15 @@ out: return r; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) { if (!irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level); + irq_event->irq, irq_event->level, + line_status); return 0; } @@ -4758,11 +4768,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, - bool write_fault_to_shadow_pgtable) + bool write_fault_to_shadow_pgtable, + int emulation_type) { gpa_t gpa = cr2; pfn_t pfn; + if (emulation_type & EMULTYPE_NO_REEXECUTE) + return false; + if (!vcpu->arch.mmu.direct_map) { /* * Write permission should be allowed since only @@ -4905,8 +4919,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2, - write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; @@ -4936,7 +4950,8 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; return handle_emulation_failure(vcpu); @@ -5647,14 +5662,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } -static void update_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; + u32 tmr[8]; + + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) + return; memset(eoi_exit_bitmap, 0, 32); + memset(tmr, 0, 32); - kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_apic_update_tmr(vcpu, tmr); } static int vcpu_enter_guest(struct kvm_vcpu *vcpu) @@ -5710,8 +5731,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_deliver_pmi(vcpu); - if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) - update_eoi_exitmap(vcpu); + if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) + vcpu_scan_ioapic(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -5806,7 +5827,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); + + /* Interrupt is enabled by handle_external_intr() */ + kvm_x86_ops->handle_external_intr(vcpu); ++vcpu->stat.exits; |