summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-03-14 12:35:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-03-14 12:35:02 -0700
commit9d0c8e793f0eb0613efe81d2cdca8c2efa0ad33c (patch)
tree2483edd3c314bb118fb76f1d8c9d9bd73f9c2259 /arch/x86
parent50eb842fe517b2765b7748c3016082b484a6dbb8 (diff)
parent35737d2db2f4567106c90060ad110b27cb354fa4 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "More fixes for ARM and x86" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: LAPIC: Advancing the timer expiration on guest initiated write KVM: x86/mmu: Skip !MMU-present SPTEs when removing SP in exclusive mode KVM: kvmclock: Fix vCPUs > 64 can't be online/hotpluged kvm: x86: annotate RCU pointers KVM: arm64: Fix exclusive limit for IPA size KVM: arm64: Reject VM creation when the default IPA size is unsupported KVM: arm64: Ensure I-cache isolation between vcpus of a same VM KVM: arm64: Don't use cbz/adr with external symbols KVM: arm64: Fix range alignment when walking page tables KVM: arm64: Workaround firmware wrongly advertising GICv2-on-v3 compatibility KVM: arm64: Rename __vgic_v3_get_ich_vtr_el2() to __vgic_v3_get_gic_config() KVM: arm64: Don't access PMSELR_EL0/PMUSERENR_EL0 when no PMU is available KVM: arm64: Turn kvm_arm_support_pmu_v3() into a static key KVM: arm64: Fix nVHE hyp panic host context restore KVM: arm64: Avoid corrupting vCPU context register in guest exit KVM: arm64: nvhe: Save the SPE context early kvm: x86: use NULL instead of using plain integer as pointer KVM: SVM: Connect 'npt' module param to KVM's internal 'npt_enabled' KVM: x86: Ensure deadline timer has truly expired before posting its IRQ
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/kernel/kvmclock.c19
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c11
-rw-r--r--arch/x86/kvm/svm/svm.c25
-rw-r--r--arch/x86/kvm/x86.c2
6 files changed, 47 insertions, 26 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 877a4025d8da..9bc091ecaaeb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -963,7 +963,7 @@ struct kvm_arch {
struct kvm_pit *vpit;
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
- struct kvm_apic_map *apic_map;
+ struct kvm_apic_map __rcu *apic_map;
atomic_t apic_map_dirty;
bool apic_access_page_done;
@@ -1036,7 +1036,7 @@ struct kvm_arch {
bool bus_lock_detection_enabled;
- struct kvm_pmu_event_filter *pmu_event_filter;
+ struct kvm_pmu_event_filter __rcu *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index aa593743acf6..1fc0962c89c0 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void)
static int __init kvm_setup_vsyscall_timeinfo(void)
{
-#ifdef CONFIG_X86_64
- u8 flags;
+ kvmclock_init_mem();
- if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
- return 0;
+#ifdef CONFIG_X86_64
+ if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
+ u8 flags;
- flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
- if (!(flags & PVCLOCK_TSC_STABLE_BIT))
- return 0;
+ flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
+ if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+ return 0;
- kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+ kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+ }
#endif
- kvmclock_init_mem();
-
return 0;
}
early_initcall(kvm_setup_vsyscall_timeinfo);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 45d40bfacb7c..cc369b9ad8f1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
}
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
- kvm_wait_lapic_expire(vcpu);
+ /*
+ * Ensure the guest's timer has truly expired before posting an
+ * interrupt. Open code the relevant checks to avoid querying
+ * lapic_timer_int_injected(), which will be false since the
+ * interrupt isn't yet injected. Waiting until after injecting
+ * is not an option since that won't help a posted interrupt.
+ */
+ if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
+ __kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic);
return;
}
@@ -2595,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
apic_update_ppr(apic);
hrtimer_cancel(&apic->lapic_timer.timer);
+ apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
update_divide_count(apic);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c926c6b899a1..d78915019b08 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
cpu_relax();
}
} else {
+ /*
+ * If the SPTE is not MMU-present, there is no backing
+ * page associated with the SPTE and so no side effects
+ * that need to be recorded, and exclusive ownership of
+ * mmu_lock ensures the SPTE can't be made present.
+ * Note, zapping MMIO SPTEs is also unnecessary as they
+ * are guarded by the memslots generation, not by being
+ * unreachable.
+ */
old_child_spte = READ_ONCE(*sptep);
+ if (!is_shadow_present_pte(old_child_spte))
+ continue;
/*
* Marking the SPTE as a removed SPTE is not
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index baee91c1e936..58a45bb139f8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_INVALID, .always = false },
};
-/* enable NPT for AMD64 and X86 with PAE */
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-bool npt_enabled = true;
-#else
-bool npt_enabled;
-#endif
-
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* pause_filter_count: On processors that support Pause filtering(indicated
@@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444);
static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444);
-/* allow nested paging (virtualized MMU) for all guests */
-static int npt = true;
-module_param(npt, int, S_IRUGO);
+/*
+ * Use nested page tables by default. Note, NPT may get forced off by
+ * svm_hardware_setup() if it's unsupported by hardware or the host kernel.
+ */
+bool npt_enabled = true;
+module_param_named(npt, npt_enabled, bool, 0444);
/* allow nested virtualization in KVM/SVM */
static int nested = true;
@@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void)
goto err;
}
- if (!boot_cpu_has(X86_FEATURE_NPT))
+ /*
+ * KVM's MMU doesn't support using 2-level paging for itself, and thus
+ * NPT isn't supported if the host is using 2-level paging since host
+ * CR4 is unchanged on VMRUN.
+ */
+ if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
npt_enabled = false;
- if (npt_enabled && !npt)
+ if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2a20ce60152e..47e021bdcc94 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10601,7 +10601,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return (void __user *)hva;
} else {
if (!slot || !slot->npages)
- return 0;
+ return NULL;
old_npages = slot->npages;
hva = slot->userspace_addr;