summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorAlexander Graf <agraf@suse.de>2013-04-17 15:20:38 +0200
committerAlexander Graf <agraf@suse.de>2013-04-17 15:20:38 +0200
commitfca7567c30a45962401d8d0707e6b6d7adf90f9a (patch)
tree8b67f90c33e9e67f7b7c61916796193a057e368c /arch
parentfbfba342a719b49d9cd0837202cf5365ba46ca9b (diff)
parent79558f112fc0352e057f7b5e158e3d88b8b62c60 (diff)
Merge commit 'origin/next' into kvm-ppc-next
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kvm/arm.c3
-rw-r--r--arch/ia64/kvm/lapic.h6
-rw-r--r--arch/s390/kvm/intercept.c12
-rw-r--r--arch/s390/kvm/kvm-s390.c32
-rw-r--r--arch/s390/kvm/kvm-s390.h12
-rw-r--r--arch/s390/kvm/priv.c203
-rw-r--r--arch/x86/include/asm/entry_arch.h4
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/hw_irq.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h10
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/irq.c22
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/i8254.c4
-rw-r--r--arch/x86/kvm/lapic.c141
-rw-r--r--arch/x86/kvm/lapic.h13
-rw-r--r--arch/x86/kvm/mmu.c11
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/pmu.c14
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx.c352
-rw-r--r--arch/x86/kvm/x86.c49
25 files changed, 591 insertions, 359 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e4ad0bb01843..678596f699f3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -805,7 +805,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
return 0;
}
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+ bool line_status)
{
u32 irq = irq_level->irq;
unsigned int irq_type, vcpu_idx, irq_num;
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index c3e2935b6db4..c5f92a926a9a 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
#define kvm_apic_present(x) (true)
#define kvm_lapic_enabled(x) (true)
-static inline bool kvm_apic_vid_enabled(void)
-{
- /* IA64 has no apicv supporting, do nothing here */
- return false;
-}
-
#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index c6ba4dfd7f1e..b7d1b2edeeb3 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -45,10 +45,8 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
do {
rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
(u64 __user *) useraddr);
- if (rc) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- break;
- }
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
useraddr += 8;
if (reg == reg3)
break;
@@ -79,10 +77,8 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
reg = reg1;
do {
rc = get_guest(vcpu, val, (u32 __user *) useraddr);
- if (rc) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- break;
- }
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
vcpu->arch.sie_block->gcr[reg] |= val;
useraddr += 4;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 33161b4a8280..c1c7c683fa26 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -149,6 +149,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
break;
@@ -633,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
} else {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- rc = 0;
+ rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
}
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
@@ -978,18 +980,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- /* A few sanity checks. We can have exactly one memory slot which has
- to start at guest virtual zero and which has to be located at a
- page boundary in userland and which has to end at a page boundary.
- The memory in userland is ok to be fragmented into various different
- vmas. It is okay to mmap() and munmap() stuff in this slot after
- doing this call at any time */
-
- if (mem->slot)
- return -EINVAL;
-
- if (mem->guest_phys_addr)
- return -EINVAL;
+ /* A few sanity checks. We can have memory slots which have to be
+ located/ended at a segment boundary (1MB). The memory in userland is
+ ok to be fragmented into various different vmas. It is okay to mmap()
+ and munmap() stuff in this slot after doing this call at any time */
if (mem->userspace_addr & 0xffffful)
return -EINVAL;
@@ -1007,6 +1001,16 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc;
+ /* If the basics of the memslot do not change, we do not want
+ * to update the gmap. Every update causes several unnecessary
+ * segment translation exceptions. This is usually handled just
+ * fine by the normal fault handler + gmap, but it will also
+ * cause faults on the prefix page of running guest CPUs.
+ */
+ if (old->userspace_addr == mem->userspace_addr &&
+ old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
+ old->npages * PAGE_SIZE == mem->memory_size)
+ return;
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 4d89d64a8161..efc14f687265 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
-int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 7db2ad076f31..6bbd7b5a0bbe 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -14,6 +14,7 @@
#include <linux/kvm.h>
#include <linux/gfp.h>
#include <linux/errno.h>
+#include <linux/compat.h>
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/debug.h>
@@ -36,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
- if (operand2 & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* get the value */
- if (get_guest(vcpu, address, (u32 __user *) operand2)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ if (get_guest(vcpu, address, (u32 __user *) operand2))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
address = address & 0x7fffe000u;
/* make sure that the new value is valid memory */
if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
- (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
kvm_s390_set_prefix(vcpu, address);
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 1, address);
-out:
return 0;
}
@@ -74,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
- if (operand2 & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
address = vcpu->arch.sie_block->prefix;
address = address & 0x7fffe000u;
/* get the value */
- if (put_guest(vcpu, address, (u32 __user *)operand2)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ if (put_guest(vcpu, address, (u32 __user *)operand2))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 0, address);
-out:
return 0;
}
static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
{
u64 useraddr;
- int rc;
vcpu->stat.instruction_stap++;
useraddr = kvm_s390_get_base_disp_s(vcpu);
- if (useraddr & 1) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (useraddr & 1)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr);
- if (rc) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
trace_kvm_s390_handle_stap(vcpu, useraddr);
-out:
return 0;
}
@@ -135,10 +117,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
int cc;
addr = kvm_s390_get_base_disp_s(vcpu);
- if (addr & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (addr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
cc = 0;
inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
if (!inti)
@@ -167,7 +147,6 @@ no_interrupt:
/* Set condition code and we're done. */
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
-out:
return 0;
}
@@ -237,12 +216,9 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
&facility_list, sizeof(facility_list));
if (rc)
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- else {
- VCPU_EVENT(vcpu, 5, "store facility list value %x",
- facility_list);
- trace_kvm_s390_handle_stfl(vcpu, facility_list);
- }
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
+ trace_kvm_s390_handle_stfl(vcpu, facility_list);
return 0;
}
@@ -255,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu)
#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
-#define PSW_ADDR_24 0x00000000000fffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
#define PSW_ADDR_31 0x000000007fffffffUL
+static int is_valid_psw(psw_t *psw) {
+ if (psw->mask & PSW_MASK_UNASSIGNED)
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+ if (psw->addr & ~PSW_ADDR_31)
+ return 0;
+ }
+ if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
+ return 0;
+ return 1;
+}
+
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
{
- u64 addr;
+ psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
psw_compat_t new_psw;
+ u64 addr;
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu,
PGM_PRIVILEGED_OPERATION);
-
addr = kvm_s390_get_base_disp_s(vcpu);
-
- if (addr & 7) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
-
- if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
-
- if (!(new_psw.mask & PSW32_MASK_BASE)) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
-
- vcpu->arch.sie_block->gpsw.mask =
- (new_psw.mask & ~PSW32_MASK_BASE) << 32;
- vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
-
- if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
- (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
- (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
- ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
- PSW_MASK_EA)) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
-
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (!(new_psw.mask & PSW32_MASK_BASE))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+ gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+ gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+ if (!is_valid_psw(gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
handle_new_psw(vcpu);
-out:
return 0;
}
static int handle_lpswe(struct kvm_vcpu *vcpu)
{
- u64 addr;
psw_t new_psw;
+ u64 addr;
addr = kvm_s390_get_base_disp_s(vcpu);
-
- if (addr & 7) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
-
- if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
-
- vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
- vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
-
- if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
- (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
- PSW_MASK_BA) &&
- (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
- (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
- (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
- ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
- PSW_MASK_EA)) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
-
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
handle_new_psw(vcpu);
-out:
return 0;
}
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 operand2;
- int rc;
vcpu->stat.instruction_stidp++;
operand2 = kvm_s390_get_base_disp_s(vcpu);
- if (operand2 & 7) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2);
- if (rc) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
-out:
return 0;
}
@@ -400,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
+ unsigned long mem = 0;
u64 operand2;
- unsigned long mem;
+ int rc = 0;
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -420,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
case 2:
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
- goto out_fail;
+ goto out_no_data;
if (stsi((void *) mem, fc, sel1, sel2))
- goto out_mem;
+ goto out_no_data;
break;
case 3:
if (sel1 != 2 || sel2 != 2)
- goto out_fail;
+ goto out_no_data;
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
- goto out_fail;
+ goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
default:
- goto out_fail;
+ goto out_no_data;
}
if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out_mem;
+ rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out_exception;
}
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
vcpu->run->s.regs.gprs[0] = 0;
return 0;
-out_mem:
- free_page(mem);
-out_fail:
+out_no_data:
/* condition code 3 */
vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
- return 0;
+out_exception:
+ free_page(mem);
+ return rc;
}
static const intercept_handler_t b2_handlers[256] = {
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 40afa0005c69..9bd4ecac72be 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
+#ifdef CONFIG_HAVE_KVM
+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+#endif
+
/*
* every pentium local APIC has two 'local interrupts', with a
* soft-definable vector attached to both interrupts, one of
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 81f04cee5f74..ab0ae1aa6d0a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -12,6 +12,9 @@ typedef struct {
unsigned int irq_spurious_count;
unsigned int icr_read_retry_count;
#endif
+#ifdef CONFIG_HAVE_KVM
+ unsigned int kvm_posted_intr_ipis;
+#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_irq_work_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 10a78c3d3d5a..1da97efad08a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -28,6 +28,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
extern void x86_platform_ipi(void);
+extern void kvm_posted_intr_ipi(void);
extern void error_interrupt(void);
extern void irq_work_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index aac5fa62a86c..5702d7e3111d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,11 @@
*/
#define X86_PLATFORM_IPI_VECTOR 0xf7
+/* Vector for KVM to deliver posted interrupt IPI */
+#ifdef CONFIG_HAVE_KVM
+#define POSTED_INTR_VECTOR 0xf2
+#endif
+
/*
* IRQ work vector:
*/
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b5a64621d5af..599f98b612d4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -94,9 +94,6 @@
#define ASYNC_PF_PER_VCPU 64
-extern raw_spinlock_t kvm_lock;
-extern struct list_head vm_list;
-
struct kvm_vcpu;
struct kvm;
struct kvm_async_pf;
@@ -704,6 +701,8 @@ struct kvm_x86_ops {
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+ void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -730,6 +729,7 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
+ void (*handle_external_intr)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -798,6 +798,7 @@ enum emulation_result {
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_RETRY (1 << 3)
+#define EMULTYPE_NO_REEXECUTE (1 << 4)
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
@@ -975,7 +976,6 @@ enum {
* Trap the fault and ignore the instruction if that happens.
*/
asmlinkage void kvm_spurious_fault(void);
-extern bool kvm_rebooting;
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
"666: " insn "\n\t" \
@@ -1030,7 +1030,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fc1c3134473b..6f07f1999138 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,6 +71,7 @@
#define PIN_BASED_NMI_EXITING 0x00000008
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
@@ -102,6 +103,7 @@
/* VMCS Encodings */
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
@@ -136,6 +138,8 @@ enum vmcs_field {
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
APIC_ACCESS_ADDR = 0x00002014,
APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
EOI_EXIT_BITMAP0 = 0x0000201c,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c1d01e6ca790..727208941030 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
+#ifdef CONFIG_HAVE_KVM
+apicinterrupt POSTED_INTR_VECTOR \
+ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+#endif
+
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
apicinterrupt THERMAL_APIC_VECTOR \
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e4595f105910..6ae6ea1d27d9 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -228,6 +228,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+#ifdef CONFIG_HAVE_KVM
+/*
+ * Handler for POSTED_INTERRUPT_VECTOR.
+ */
+void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ irq_enter();
+
+ exit_idle();
+
+ inc_irq_stat(kvm_posted_intr_ipis);
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+#endif
+
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7dc4e459c2b3..a2a1fbc594ff 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -172,6 +172,10 @@ static void __init apic_intr_init(void)
/* IPI for X86 platform specific use */
alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
+#ifdef CONFIG_HAVE_KVM
+ /* IPI for KVM to deliver posted interrupt */
+ alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+#endif
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a335cc6cde72..46f63b8d09f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -132,8 +132,9 @@
#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
#define No64 (1<<28)
#define PageTable (1 << 29) /* instruction used to write page table */
+#define NotImpl (1 << 30) /* instruction is not implemented */
/* Source 2 operand type */
-#define Src2Shift (30)
+#define Src2Shift (31)
#define Src2None (OpNone << Src2Shift)
#define Src2CL (OpCL << Src2Shift)
#define Src2ImmByte (OpImmByte << Src2Shift)
@@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
memset(&seg_desc, 0, sizeof seg_desc);
- if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
- || ctxt->mode == X86EMUL_MODE_REAL) {
- /* set real mode segment descriptor */
+ if (ctxt->mode == X86EMUL_MODE_REAL) {
+ /* set real mode segment descriptor (keep limit etc. for
+ * unreal mode) */
ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
set_desc_base(&seg_desc, selector << 4);
goto load;
+ } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
+ /* VM86 needs a clean new segment descriptor */
+ set_desc_base(&seg_desc, selector << 4);
+ set_desc_limit(&seg_desc, 0xffff);
+ seg_desc.type = 3;
+ seg_desc.p = 1;
+ seg_desc.s = 1;
+ seg_desc.dpl = 3;
+ goto load;
}
rpl = selector & 3;
@@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
.check_perm = (_p) }
-#define N D(0)
+#define N D(NotImpl)
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
@@ -3713,7 +3723,7 @@ static const struct opcode group5[] = {
I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
I(SrcMem | Stack, em_grp45),
I(SrcMemFAddr | ImplicitOps, em_grp45),
- I(SrcMem | Stack, em_grp45), N,
+ I(SrcMem | Stack, em_grp45), D(Undefined),
};
static const struct opcode group6[] = {
@@ -4373,7 +4383,7 @@ done_prefixes:
ctxt->intercept = opcode.intercept;
/* Unrecognised? */
- if (ctxt->d == 0 || (ctxt->d & Undefined))
+ if (ctxt->d == 0 || (ctxt->d & NotImpl))
return EMULATION_FAILED;
if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
@@ -4511,7 +4521,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
ctxt->mem_read.pos = 0;
- if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) {
+ if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
+ (ctxt->d & Undefined)) {
rc = emulate_ud(ctxt);
goto done;
}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c1d30b2fc9bb..412a5aa0ef94 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work)
}
spin_unlock(&ps->inject_lock);
if (inject) {
- kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
- kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
+ kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
+ kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
/*
* Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8e9369f41c5..e29883c604ff 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ return apic_test_vector(vector, apic->regs + APIC_ISR) ||
+ apic_test_vector(vector, apic->regs + APIC_IRR);
+}
+
static inline void apic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
- struct kvm_lapic_irq *irq,
- u64 *eoi_exit_bitmap)
-{
- struct kvm_lapic **dst;
- struct kvm_apic_map *map;
- unsigned long bitmap = 1;
- int i;
-
- rcu_read_lock();
- map = rcu_dereference(vcpu->kvm->arch.apic_map);
-
- if (unlikely(!map)) {
- __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
- goto out;
- }
-
- if (irq->dest_mode == 0) { /* physical mode */
- if (irq->delivery_mode == APIC_DM_LOWEST ||
- irq->dest_id == 0xff) {
- __set_bit(irq->vector,
- (unsigned long *)eoi_exit_bitmap);
- goto out;
- }
- dst = &map->phys_map[irq->dest_id & 0xff];
- } else {
- u32 mda = irq->dest_id << (32 - map->ldr_bits);
-
- dst = map->logical_map[apic_cluster_id(map, mda)];
-
- bitmap = apic_logical_id(map, mda);
- }
-
- for_each_set_bit(i, &bitmap, 16) {
- if (!dst[i])
- continue;
- if (dst[i]->vcpu == vcpu) {
- __set_bit(irq->vector,
- (unsigned long *)eoi_exit_bitmap);
- break;
- }
- }
-
-out:
- rcu_read_unlock();
-}
-
static void recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@@ -256,7 +217,7 @@ out:
if (old)
kfree_rcu(old, rcu);
- kvm_ioapic_make_eoibitmap_request(kvm);
+ kvm_vcpu_request_scan_ioapic(kvm);
}
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap)
return count;
}
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+{
+ u32 i, pir_val;
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ for (i = 0; i <= 7; i++) {
+ pir_val = xchg(&pir[i], 0);
+ if (pir_val)
+ *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
+
static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
{
apic->irr_pending = true;
@@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
if (!apic->irr_pending)
return -1;
+ kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
result = apic_search_irr(apic);
ASSERT(result == -1 || result >= 16);
@@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
}
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
- int vector, int level, int trig_mode);
+ int vector, int level, int trig_mode,
+ unsigned long *dest_map);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+ unsigned long *dest_map)
{
struct kvm_lapic *apic = vcpu->arch.apic;
return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
- irq->level, irq->trig_mode);
+ irq->level, irq->trig_mode, dest_map);
}
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
@@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
return result;
}
+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
+}
+
static void apic_update_ppr(struct kvm_lapic *apic)
{
u32 tpr, isrv, ppr, old_ppr;
@@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
}
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r)
+ struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
{
struct kvm_apic_map *map;
unsigned long bitmap = 1;
@@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
*r = -1;
if (irq->shorthand == APIC_DEST_SELF) {
- *r = kvm_apic_set_irq(src->vcpu, irq);
+ *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
return true;
}
@@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
continue;
if (*r < 0)
*r = 0;
- *r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+ *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
ret = true;
@@ -681,7 +667,8 @@ out:
* Return 1 if successfully added and 0 if discarded.
*/
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
- int vector, int level, int trig_mode)
+ int vector, int level, int trig_mode,
+ unsigned long *dest_map)
{
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (unlikely(!apic_enabled(apic)))
break;
- if (trig_mode) {
- apic_debug("level trig mode for vector %d", vector);
- apic_set_vector(vector, apic->regs + APIC_TMR);
- } else
- apic_clear_vector(vector, apic->regs + APIC_TMR);
+ if (dest_map)
+ __set_bit(vcpu->vcpu_id, dest_map);
- result = !apic_test_and_set_irr(vector, apic);
- trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector, !result);
- if (!result) {
- if (trig_mode)
- apic_debug("level trig mode repeatedly for "
- "vector %d", vector);
- break;
- }
+ if (kvm_x86_ops->deliver_posted_interrupt) {
+ result = 1;
+ kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
+ } else {
+ result = !apic_test_and_set_irr(vector, apic);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_vcpu_kick(vcpu);
+ if (!result) {
+ if (trig_mode)
+ apic_debug("level trig mode repeatedly "
+ "for vector %d", vector);
+ goto out;
+ }
+
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+out:
+ trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector, !result);
break;
case APIC_DM_REMRD:
@@ -786,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
- kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+ kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
}
}
@@ -852,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector);
- kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+ kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -1488,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
- return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+ return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
+ NULL);
}
return 0;
}
@@ -1658,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
apic->highest_isr_cache = -1;
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_rtc_eoi_tracking_restore_one(vcpu);
}
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2c721b986eec..c730ac9fe801 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -53,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+ unsigned long *dest_map);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r);
+ struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
@@ -160,13 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
return ldr & map->lid_mask;
}
-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
- struct kvm_lapic_irq *irq,
- u64 *eoi_bitmap);
-
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
{
return vcpu->arch.apic->pending_events;
}
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
+
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 633e30cfbd63..004cc87b781c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1501,15 +1501,11 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
mmu_spte_clear_no_track(parent_pte);
}
-static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
-
static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
u64 *parent_pte, int direct)
{
struct kvm_mmu_page *sp;
- make_mmu_pages_available(vcpu);
-
sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
if (!direct)
@@ -2806,6 +2802,7 @@ exit:
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, pfn_t *pfn, bool write, bool *writable);
+static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
gfn_t gfn, bool prefault)
@@ -2847,6 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
+ make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
@@ -2924,6 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
+ make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
1, ACC_ALL, NULL);
++sp->root_count;
@@ -2935,6 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
+ make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30,
PT32_ROOT_LEVEL, 1, ACC_ALL,
@@ -2973,6 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
+ make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
0, ACC_ALL, NULL);
root = __pa(sp->spt);
@@ -3006,6 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
+ make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, 0,
ACC_ALL, NULL);
@@ -3311,6 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
+ make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, gpa, write, map_writable,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index af143f065532..da20860b457a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -627,6 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
+ make_mmu_pages_available(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cfc258a6bf97..c53e797e7369 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
return 1;
}
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
+ u32 index = msr_info->index;
+ u64 data = msr_info->data;
switch (index) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
}
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
+ if (msr_info->host_initiated) {
+ pmu->global_status = data;
+ return 0;
+ }
break; /* RO MSR */
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu->global_ctrl == data)
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
- pmu->global_status &= ~data;
+ if (!msr_info->host_initiated)
+ pmu->global_status &= ~data;
pmu->global_ovf_ctrl = data;
return 0;
}
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
default:
if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
(pmc = get_fixed_pmc(pmu, index))) {
- data = (s64)(s32)data;
+ if (!msr_info->host_initiated)
+ data = (s64)(s32)data;
pmc->counter += data - read_pmc(pmc);
return 0;
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7a46c1f46861..d6713e18bbc1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3577,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
return;
}
+static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+ return;
+}
+
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4233,6 +4238,11 @@ out:
return ret;
}
+static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+ local_irq_enable();
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4300,6 +4310,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.vm_has_apicv = svm_vm_has_apicv,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_isr_update = svm_hwapic_isr_update,
+ .sync_pir_to_irr = svm_sync_pir_to_irr,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
@@ -4328,6 +4339,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
+ .handle_external_intr = svm_handle_external_intr,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 03f574641852..5a87a58af49d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv_reg_vid;
+static bool __read_mostly enable_apicv = 1;
+module_param(enable_apicv, bool, S_IRUGO);
/*
* If nested=1, nested virtualization is supported, i.e., guests may use
@@ -366,6 +367,31 @@ struct nested_vmx {
struct page *apic_access_page;
};
+#define POSTED_INTR_ON 0
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+ u32 pir[8]; /* Posted interrupt requested */
+ u32 control; /* bit 0 of control is outstanding notification bit */
+ u32 rsvd[7];
+} __aligned(64);
+
+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
struct vcpu_vmx {
struct kvm_vcpu vcpu;
unsigned long host_rsp;
@@ -378,6 +404,7 @@ struct vcpu_vmx {
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
+ unsigned long host_idt_base;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
@@ -429,6 +456,9 @@ struct vcpu_vmx {
bool rdtscp_enabled;
+ /* Posted interrupt descriptor */
+ struct pi_desc pi_desc;
+
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
};
@@ -626,6 +656,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
static bool guest_state_valid(struct kvm_vcpu *vcpu);
static u32 vmx_segment_access_rights(struct kvm_segment *var);
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -784,6 +815,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
}
+static inline bool cpu_has_vmx_posted_intr(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+}
+
+static inline bool cpu_has_vmx_apicv(void)
+{
+ return cpu_has_vmx_apic_register_virt() &&
+ cpu_has_vmx_virtual_intr_delivery() &&
+ cpu_has_vmx_posted_intr();
+}
+
static inline bool cpu_has_vmx_flexpriority(void)
{
return cpu_has_vmx_tpr_shadow() &&
@@ -2551,12 +2594,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
- &_pin_based_exec_control) < 0)
- return -EIO;
-
min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING |
@@ -2627,11 +2664,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
- opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+ opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
+ VM_EXIT_ACK_INTR_ON_EXIT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+ if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+ &_pin_based_exec_control) < 0)
+ return -EIO;
+
+ if (!(_cpu_based_2nd_exec_control &
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
+ !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+ _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
+
min = 0;
opt = VM_ENTRY_LOAD_IA32_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2810,14 +2859,16 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_ple())
ple_gap = 0;
- if (!cpu_has_vmx_apic_register_virt() ||
- !cpu_has_vmx_virtual_intr_delivery())
- enable_apicv_reg_vid = 0;
+ if (!cpu_has_vmx_apicv())
+ enable_apicv = 0;
- if (enable_apicv_reg_vid)
+ if (enable_apicv)
kvm_x86_ops->update_cr8_intercept = NULL;
- else
+ else {
kvm_x86_ops->hwapic_irr_update = NULL;
+ kvm_x86_ops->deliver_posted_interrupt = NULL;
+ kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+ }
if (nested)
nested_vmx_setup_ctls_msrs();
@@ -3873,13 +3924,57 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
msr, MSR_TYPE_W);
}
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+ return enable_apicv && irqchip_in_kernel(kvm);
+}
+
+/*
+ * Send interrupt to vcpu via posted interrupt way.
+ * 1. If target vcpu is running(non-root mode), send posted interrupt
+ * notification to vcpu and hardware will sync PIR to vIRR atomically.
+ * 2. If target vcpu isn't running(root mode), kick it to pick up the
+ * interrupt from PIR in next vmentry.
+ */
+static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int r;
+
+ if (pi_test_and_set_pir(vector, &vmx->pi_desc))
+ return;
+
+ r = pi_test_and_set_on(&vmx->pi_desc);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ if (!r && (vcpu->mode == IN_GUEST_MODE))
+ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
+ POSTED_INTR_VECTOR);
+ else
+ kvm_vcpu_kick(vcpu);
+}
+
+static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!pi_test_and_clear_on(&vmx->pi_desc))
+ return;
+
+ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+}
+
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
+{
+ return;
+}
+
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
* Note that host-state that does change is set elsewhere. E.g., host-state
* that is set differently for each CPU is set in vmx_vcpu_load(), not here.
*/
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{
u32 low32, high32;
unsigned long tmpl;
@@ -3907,6 +4002,7 @@ static void vmx_set_constant_host_state(void)
native_store_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
+ vmx->host_idt_base = dt.address;
vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
@@ -3932,6 +4028,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
}
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+{
+ u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
+
+ if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+ pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+ return pin_based_exec_ctrl;
+}
+
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -3949,11 +4054,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
-static int vmx_vm_has_apicv(struct kvm *kvm)
-{
- return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
-}
-
static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -4009,8 +4109,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Control */
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
- vmcs_config.pin_based_exec_ctrl);
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
@@ -4019,13 +4118,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmx_secondary_exec_control(vmx));
}
- if (enable_apicv_reg_vid) {
+ if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0);
vmcs_write64(EOI_EXIT_BITMAP1, 0);
vmcs_write64(EOI_EXIT_BITMAP2, 0);
vmcs_write64(EOI_EXIT_BITMAP3, 0);
vmcs_write16(GUEST_INTR_STATUS, 0);
+
+ vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+ vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
}
if (ple_gap) {
@@ -4039,7 +4141,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
- vmx_set_constant_host_state();
+ vmx_set_constant_host_state(vmx);
#ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -4167,6 +4269,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write64(APIC_ACCESS_ADDR,
page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+ if (vmx_vm_has_apicv(vcpu->kvm))
+ memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
+
if (vmx->vpid != 0)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
@@ -4325,16 +4430,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+ if (is_guest_mode(vcpu)) {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- if (to_vmx(vcpu)->nested.nested_run_pending ||
- (vmcs12->idt_vectoring_info_field &
- VECTORING_INFO_VALID_MASK))
+
+ if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
- nested_vmx_vmexit(vcpu);
- vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
- vmcs12->vm_exit_intr_info = 0;
- /* fall through to normal code, but now in L1, not L2 */
+ if (nested_exit_on_intr(vcpu)) {
+ nested_vmx_vmexit(vcpu);
+ vmcs12->vm_exit_reason =
+ EXIT_REASON_EXTERNAL_INTERRUPT;
+ vmcs12->vm_exit_intr_info = 0;
+ /*
+ * fall through to normal code, but now in L1, not L2
+ */
+ }
}
return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -5189,7 +5298,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
return 1;
- err = emulate_instruction(vcpu, 0);
+ err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
if (err == EMULATE_DO_MMIO) {
ret = 0;
@@ -6112,14 +6221,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_TRIPLE_FAULT:
return 1;
case EXIT_REASON_PENDING_INTERRUPT:
+ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
case EXIT_REASON_NMI_WINDOW:
- /*
- * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
- * (aka Interrupt Window Exiting) only when L1 turned it on,
- * so if we got a PENDING_INTERRUPT exit, this must be for L1.
- * Same for NMI Window Exiting.
- */
- return 1;
+ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
case EXIT_REASON_TASK_SWITCH:
return 1;
case EXIT_REASON_CPUID:
@@ -6370,6 +6474,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
+ if (!vmx_vm_has_apicv(vcpu->kvm))
+ return;
+
vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
@@ -6400,6 +6507,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
}
}
+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+ u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+ /*
+ * If external interrupt exists, IF bit is set in rflags/eflags on the
+ * interrupt stack frame, and interrupt will be enabled on a return
+ * from interrupt handler.
+ */
+ if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
+ == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
+ unsigned int vector;
+ unsigned long entry;
+ gate_desc *desc;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+#ifdef CONFIG_X86_64
+ unsigned long tmp;
+#endif
+
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ desc = (gate_desc *)vmx->host_idt_base + vector;
+ entry = gate_offset(*desc);
+ asm volatile(
+#ifdef CONFIG_X86_64
+ "mov %%" _ASM_SP ", %[sp]\n\t"
+ "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
+ "push $%c[ss]\n\t"
+ "push %[sp]\n\t"
+#endif
+ "pushf\n\t"
+ "orl $0x200, (%%" _ASM_SP ")\n\t"
+ __ASM_SIZE(push) " $%c[cs]\n\t"
+ "call *%[entry]\n\t"
+ :
+#ifdef CONFIG_X86_64
+ [sp]"=&r"(tmp)
+#endif
+ :
+ [entry]"r"(entry),
+ [ss]"i"(__KERNEL_DS),
+ [cs]"i"(__KERNEL_CS)
+ );
+ } else
+ local_irq_enable();
+}
+
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -6498,8 +6651,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
- if (is_guest_mode(&vmx->vcpu))
- return;
__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
VM_EXIT_INSTRUCTION_LEN,
IDT_VECTORING_ERROR_CODE);
@@ -6507,8 +6658,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
- return;
__vmx_complete_interrupts(vcpu,
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
VM_ENTRY_INSTRUCTION_LEN,
@@ -6540,21 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr;
- if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- if (vmcs12->idt_vectoring_info_field &
- VECTORING_INFO_VALID_MASK) {
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- vmcs12->idt_vectoring_info_field);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
- vmcs12->vm_exit_instruction_len);
- if (vmcs12->idt_vectoring_info_field &
- VECTORING_INFO_DELIVER_CODE_MASK)
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
- vmcs12->idt_vectoring_error_code);
- }
- }
-
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
vmx->entry_time = ktime_get();
@@ -6713,17 +6847,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
- if (is_guest_mode(vcpu)) {
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
- if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
- vmcs12->idt_vectoring_error_code =
- vmcs_read32(IDT_VECTORING_ERROR_CODE);
- vmcs12->vm_exit_instruction_len =
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- }
- }
-
vmx->loaded_vmcs->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6785,10 +6908,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
put_cpu();
if (err)
goto free_vmcs;
- if (vm_need_virtualize_apic_accesses(kvm))
+ if (vm_need_virtualize_apic_accesses(kvm)) {
err = alloc_apic_access_page(kvm);
if (err)
goto free_vmcs;
+ }
if (enable_ept) {
if (!kvm->arch.ept_identity_map_addr)
@@ -7071,7 +7195,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* Other fields are different per CPU, and will be set later when
* vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
*/
- vmx_set_constant_host_state();
+ vmx_set_constant_host_state(vmx);
/*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -7330,6 +7454,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vcpu->arch.cr4_guest_owned_bits));
}
+static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 idt_vectoring;
+ unsigned int nr;
+
+ if (vcpu->arch.exception.pending) {
+ nr = vcpu->arch.exception.nr;
+ idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+ if (kvm_exception_is_soft(nr)) {
+ vmcs12->vm_exit_instruction_len =
+ vcpu->arch.event_exit_inst_len;
+ idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
+ } else
+ idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
+
+ if (vcpu->arch.exception.has_error_code) {
+ idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
+ vmcs12->idt_vectoring_error_code =
+ vcpu->arch.exception.error_code;
+ }
+
+ vmcs12->idt_vectoring_info_field = idt_vectoring;
+ } else if (vcpu->arch.nmi_pending) {
+ vmcs12->idt_vectoring_info_field =
+ INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
+ } else if (vcpu->arch.interrupt.pending) {
+ nr = vcpu->arch.interrupt.nr;
+ idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+ if (vcpu->arch.interrupt.soft) {
+ idt_vectoring |= INTR_TYPE_SOFT_INTR;
+ vmcs12->vm_entry_instruction_len =
+ vcpu->arch.event_exit_inst_len;
+ } else
+ idt_vectoring |= INTR_TYPE_EXT_INTR;
+
+ vmcs12->idt_vectoring_info_field = idt_vectoring;
+ }
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -7402,7 +7568,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
/* TODO: These cannot have changed unless we have MSR bitmaps and
* the relevant bit asks not to trap the change */
vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT)
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
@@ -7414,16 +7580,34 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
- vmcs12->idt_vectoring_info_field = to_vmx(vcpu)->idt_vectoring_info;
- vmcs12->idt_vectoring_error_code =
- vmcs_read32(IDT_VECTORING_ERROR_CODE);
+ if ((vmcs12->vm_exit_intr_info &
+ (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
+ (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
+ vmcs12->vm_exit_intr_error_code =
+ vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ vmcs12->idt_vectoring_info_field = 0;
vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- /* clear vm-entry fields which are to be cleared on exit */
- if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+ /* vm_entry_intr_info_field is cleared on exit. Emulate this
+ * instead of reading the real value. */
vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+
+ /*
+ * Transfer the event that L0 or L1 may wanted to inject into
+ * L2 to IDT_VECTORING_INFO_FIELD.
+ */
+ vmcs12_save_pending_event(vcpu, vmcs12);
+ }
+
+ /*
+ * Drop what we picked up for L2 via vmx_complete_interrupts. It is
+ * preserved above and would only end up incorrectly in L1.
+ */
+ vcpu->arch.nmi_injected = false;
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
}
/*
@@ -7523,6 +7707,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
int cpu;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ /* trying to cancel vmlaunch/vmresume is a bug */
+ WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
leave_guest_mode(vcpu);
prepare_vmcs12(vcpu, vmcs12);
@@ -7657,6 +7844,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
+ .sync_pir_to_irr = vmx_sync_pir_to_irr,
+ .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
@@ -7685,6 +7874,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tdp_cr3 = vmx_set_cr3,
.check_intercept = vmx_check_intercept,
+ .handle_external_intr = vmx_handle_external_intr,
};
static int __init vmx_init(void)
@@ -7741,7 +7931,7 @@ static int __init vmx_init(void)
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
- goto out3;
+ goto out5;
#ifdef CONFIG_KEXEC
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -7759,7 +7949,7 @@ static int __init vmx_init(void)
memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
- if (enable_apicv_reg_vid) {
+ if (enable_apicv) {
for (msr = 0x800; msr <= 0x8ff; msr++)
vmx_disable_intercept_msr_read_x2apic(msr);
@@ -7789,6 +7979,8 @@ static int __init vmx_init(void)
return 0;
+out5:
+ free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
out3:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2aaba814f1c8..50e2e10b8041 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -261,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+asmlinkage void kvm_spurious_fault(void)
+{
+ /* Fault while not rebooting. We want the trace. */
+ BUG();
+}
+EXPORT_SYMBOL_GPL(kvm_spurious_fault);
+
#define EXCPT_BENIGN 0
#define EXCPT_CONTRIBUTORY 1
#define EXCPT_PF 2
@@ -2040,7 +2047,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
if (kvm_pmu_msr(vcpu, msr))
- return kvm_pmu_set_msr(vcpu, msr, data);
+ return kvm_pmu_set_msr(vcpu, msr_info);
if (pr || data != 0)
vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
@@ -2086,7 +2093,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (kvm_pmu_msr(vcpu, msr))
- return kvm_pmu_set_msr(vcpu, msr, data);
+ return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
msr, data);
@@ -2685,6 +2692,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
+ kvm_x86_ops->sync_pir_to_irr(vcpu);
memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
return 0;
@@ -3484,13 +3492,15 @@ out:
return r;
}
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+ bool line_status)
{
if (!irqchip_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event->irq, irq_event->level);
+ irq_event->irq, irq_event->level,
+ line_status);
return 0;
}
@@ -4758,11 +4768,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
}
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
- bool write_fault_to_shadow_pgtable)
+ bool write_fault_to_shadow_pgtable,
+ int emulation_type)
{
gpa_t gpa = cr2;
pfn_t pfn;
+ if (emulation_type & EMULTYPE_NO_REEXECUTE)
+ return false;
+
if (!vcpu->arch.mmu.direct_map) {
/*
* Write permission should be allowed since only
@@ -4905,8 +4919,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (r != EMULATION_OK) {
if (emulation_type & EMULTYPE_TRAP_UD)
return EMULATE_FAIL;
- if (reexecute_instruction(vcpu, cr2,
- write_fault_to_spt))
+ if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+ emulation_type))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
@@ -4936,7 +4950,8 @@ restart:
return EMULATE_DONE;
if (r == EMULATION_FAILED) {
- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
+ if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+ emulation_type))
return EMULATE_DONE;
return handle_emulation_failure(vcpu);
@@ -5647,14 +5662,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
#endif
}
-static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
u64 eoi_exit_bitmap[4];
+ u32 tmr[8];
+
+ if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+ return;
memset(eoi_exit_bitmap, 0, 32);
+ memset(tmr, 0, 32);
- kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
+ kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+ kvm_apic_update_tmr(vcpu, tmr);
}
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
@@ -5710,8 +5731,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_handle_pmu_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_deliver_pmi(vcpu);
- if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
- update_eoi_exitmap(vcpu);
+ if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
+ vcpu_scan_ioapic(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5806,7 +5827,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- local_irq_enable();
+
+ /* Interrupt is enabled by handle_external_intr() */
+ kvm_x86_ops->handle_external_intr(vcpu);
++vcpu->stat.exits;