From 54effa653246c35997f5e990e0134be5be09f9d1 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 3 Feb 2021 14:19:30 +0000 Subject: asm-generic: export: Stub EXPORT_SYMBOL with __DISABLE_EXPORTS It is currently possible to stub EXPORT_SYMBOL() macros in C code using __DISABLE_EXPORTS, which is necessary to run in constrained environments such as the EFI stub or the decompressor. But this currently doesn't apply to exports from assembly, which can lead to somewhat confusing situations. Consolidate the __DISABLE_EXPORTS infrastructure by checking it from asm-generic/export.h as well. Signed-off-by: Quentin Perret Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210203141931.615898-2-qperret@google.com --- include/asm-generic/export.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 365345f9a9e3..07a36a874dca 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -33,7 +33,7 @@ */ .macro ___EXPORT_SYMBOL name,val,sec -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) && !defined(__DISABLE_EXPORTS) .section ___ksymtab\sec+\name,"a" .balign KSYM_ALIGN __ksymtab_\name: -- cgit v1.2.3 From 2c07ded06427dd3339278487a1413d5e478f05f9 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 4 Jan 2021 09:17:49 -0600 Subject: KVM/SVM: add support for SEV attestation command The SEV FW version >= 0.23 added a new command that can be used to query the attestation report containing the SHA-256 digest of the guest memory encrypted through the KVM_SEV_LAUNCH_UPDATE_{DATA, VMSA} commands and sign the report with the Platform Endorsement Key (PEK). See the SEV FW API spec section 6.8 for more details. Note there already exist a command (KVM_SEV_LAUNCH_MEASURE) that can be used to get the SHA-256 digest. The main difference between the KVM_SEV_LAUNCH_MEASURE and KVM_SEV_ATTESTATION_REPORT is that the latter can be called while the guest is running and the measurement value is signed with PEK. Cc: James Bottomley Cc: Tom Lendacky Cc: David Rientjes Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Borislav Petkov Cc: John Allen Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Reviewed-by: Tom Lendacky Acked-by: David Rientjes Tested-by: James Bottomley Signed-off-by: Brijesh Singh Message-Id: <20210104151749.30248-1-brijesh.singh@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/psp-sev.h | 17 +++++++++++++++++ include/uapi/linux/kvm.h | 8 ++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 49d155cd2dfe..b801ead1e2bb 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -66,6 +66,7 @@ enum sev_cmd { SEV_CMD_LAUNCH_MEASURE = 0x033, SEV_CMD_LAUNCH_UPDATE_SECRET = 0x034, SEV_CMD_LAUNCH_FINISH = 0x035, + SEV_CMD_ATTESTATION_REPORT = 0x036, /* Guest migration commands (outgoing) */ SEV_CMD_SEND_START = 0x040, @@ -483,6 +484,22 @@ struct sev_data_dbg { u32 len; /* In */ } __packed; +/** + * struct sev_data_attestation_report - SEV_ATTESTATION_REPORT command parameters + * + * @handle: handle of the VM + * @mnonce: a random nonce that will be included in the report. + * @address: physical address where the report will be copied. + * @len: length of the physical buffer. + */ +struct sev_data_attestation_report { + u32 handle; /* In */ + u32 reserved; + u64 address; /* In */ + u8 mnonce[16]; /* In */ + u32 len; /* In/Out */ +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 374c67875cdb..07c194e2c302 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1593,6 +1593,8 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, KVM_SEV_NR_MAX, }; @@ -1645,6 +1647,12 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) -- cgit v1.2.3 From fe6b6bc802b40081e8a7a1abe8d32b88d10a03e1 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 6 Nov 2020 17:03:14 +0800 Subject: KVM: VMX: Enable bus lock VM exit Virtual Machine can exploit bus locks to degrade the performance of system. Bus lock can be caused by split locked access to writeback(WB) memory or by using locks on uncacheable(UC) memory. The bus lock is typically >1000 cycles slower than an atomic operation within a cache line. It also disrupts performance on other cores (which must wait for the bus lock to be released before their memory operations can complete). To address the threat, bus lock VM exit is introduced to notify the VMM when a bus lock was acquired, allowing it to enforce throttling or other policy based mitigations. A VMM can enable VM exit due to bus locks by setting a new "Bus Lock Detection" VM-execution control(bit 30 of Secondary Processor-based VM execution controls). If delivery of this VM exit was preempted by a higher priority VM exit (e.g. EPT misconfiguration, EPT violation, APIC access VM exit, APIC write VM exit, exception bitmap exiting), bit 26 of exit reason in vmcs field is set to 1. In current implementation, the KVM exposes this capability through KVM_CAP_X86_BUS_LOCK_EXIT. The user can get the supported mode bitmap (i.e. off and exit) and enable it explicitly (disabled by default). If bus locks in guest are detected by KVM, exit to user space even when current exit reason is handled by KVM internally. Set a new field KVM_RUN_BUS_LOCK in vcpu->run->flags to inform the user space that there is a bus lock detected in guest. Document for Bus Lock VM exit is now available at the latest "Intel Architecture Instruction Set Extensions Programming Reference". Document Link: https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20201106090315.18606-4-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 07c194e2c302..dfe3ba5cf262 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -252,6 +252,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 #define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1058,6 +1059,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #ifdef KVM_CAP_IRQ_ROUTING @@ -1774,4 +1776,7 @@ struct kvm_dirty_gfn { __u64 offset; }; +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 26128cb6c7e6731fe644c687af97733adfdb5ee9 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:12 -0800 Subject: locking/rwlocks: Add contention detection for rwlocks rwlocks do not currently have any facility to detect contention like spinlocks do. In order to allow users of rwlocks to better manage latency, add contention detection for queued rwlocks. CC: Ingo Molnar CC: Will Deacon Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Waiman Long Acked-by: Paolo Bonzini Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-7-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/asm-generic/qrwlock.h | 24 ++++++++++++++++++------ include/linux/rwlock.h | 7 +++++++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 84ce841ce735..0020d3b820a7 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -14,6 +14,7 @@ #include #include +#include /* * Writer states & reader shift and bias. @@ -116,15 +117,26 @@ static inline void queued_write_unlock(struct qrwlock *lock) smp_store_release(&lock->wlocked, 0); } +/** + * queued_rwlock_is_contended - check if the lock is contended + * @lock : Pointer to queue rwlock structure + * Return: 1 if lock contended, 0 otherwise + */ +static inline int queued_rwlock_is_contended(struct qrwlock *lock) +{ + return arch_spin_is_locked(&lock->wait_lock); +} + /* * Remapping rwlock architecture specific functions to the corresponding * queue rwlock functions. */ -#define arch_read_lock(l) queued_read_lock(l) -#define arch_write_lock(l) queued_write_lock(l) -#define arch_read_trylock(l) queued_read_trylock(l) -#define arch_write_trylock(l) queued_write_trylock(l) -#define arch_read_unlock(l) queued_read_unlock(l) -#define arch_write_unlock(l) queued_write_unlock(l) +#define arch_read_lock(l) queued_read_lock(l) +#define arch_write_lock(l) queued_write_lock(l) +#define arch_read_trylock(l) queued_read_trylock(l) +#define arch_write_trylock(l) queued_write_trylock(l) +#define arch_read_unlock(l) queued_read_unlock(l) +#define arch_write_unlock(l) queued_write_unlock(l) +#define arch_rwlock_is_contended(l) queued_rwlock_is_contended(l) #endif /* __ASM_GENERIC_QRWLOCK_H */ diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 3dcd617e65ae..7ce9a51ae5c0 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -128,4 +128,11 @@ do { \ 1 : ({ local_irq_restore(flags); 0; }); \ }) +#ifdef arch_rwlock_is_contended +#define rwlock_is_contended(lock) \ + arch_rwlock_is_contended(&(lock)->raw_lock) +#else +#define rwlock_is_contended(lock) ((void)(lock), 0) +#endif /* arch_rwlock_is_contended */ + #endif /* __LINUX_RWLOCK_H */ -- cgit v1.2.3 From a09a689a534183c48f200bc2de1ae61ae9c462ad Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:13 -0800 Subject: sched: Add needbreak for rwlocks Contention awareness while holding a spin lock is essential for reducing latency when long running kernel operations can hold that lock. Add the same contention detection interface for read/write spin locks. CC: Ingo Molnar CC: Will Deacon Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Waiman Long Acked-by: Paolo Bonzini Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-8-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/sched.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e3a5eeec509..5d1378e5a040 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1912,6 +1912,23 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +/* + * Check if a rwlock is contended. + * Returns non-zero if there is another task waiting on the rwlock. + * Returns zero if the lock is not contended or the system / underlying + * rwlock implementation does not support contention detection. + * Technically does not depend on CONFIG_PREEMPTION, but a general need + * for low latency. + */ +static inline int rwlock_needbreak(rwlock_t *lock) +{ +#ifdef CONFIG_PREEMPTION + return rwlock_is_contended(lock); +#else + return 0; +#endif +} + static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); -- cgit v1.2.3 From f3d4b4b1dc1c5fb9ea17cac14133463bfe72f170 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:14 -0800 Subject: sched: Add cond_resched_rwlock Safely rescheduling while holding a spin lock is essential for keeping long running kernel operations running smoothly. Add the facility to cond_resched rwlocks. CC: Ingo Molnar CC: Will Deacon Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Waiman Long Acked-by: Paolo Bonzini Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-9-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5d1378e5a040..3052d16da3cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1883,12 +1883,24 @@ static inline int _cond_resched(void) { return 0; } }) extern int __cond_resched_lock(spinlock_t *lock); +extern int __cond_resched_rwlock_read(rwlock_t *lock); +extern int __cond_resched_rwlock_write(rwlock_t *lock); #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ }) +#define cond_resched_rwlock_read(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + __cond_resched_rwlock_read(lock); \ +}) + +#define cond_resched_rwlock_write(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + __cond_resched_rwlock_write(lock); \ +}) + static inline void cond_resched_rcu(void) { #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) -- cgit v1.2.3 From 531810caa9f4bc99ffbb90e09256792c56a6b07a Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:24 -0800 Subject: KVM: x86/mmu: Use an rwlock for the x86 MMU Add a read / write lock to be used in place of the MMU spinlock on x86. The rwlock will enable the TDP MMU to handle page faults, and other operations in parallel in future commits. Reviewed-by: Peter Feiner Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-19-bgardon@google.com> [Introduce virt/kvm/mmu_lock.h - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f3b1013fb22c..f417447129b9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -451,7 +451,12 @@ struct kvm_memslots { }; struct kvm { +#ifdef KVM_HAVE_MMU_RWLOCK + rwlock_t mmu_lock; +#else spinlock_t mmu_lock; +#endif /* KVM_HAVE_MMU_RWLOCK */ + struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; -- cgit v1.2.3 From 23200b7a30de315d0e9a40663c905869d29d833c Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 13 Jun 2018 09:55:44 -0400 Subject: KVM: x86/xen: intercept xen hypercalls if enabled Add a new exit reason for emulator to handle Xen hypercalls. Since this means KVM owns the ABI, dispense with the facility for the VMM to provide its own copy of the hypercall pages; just fill them in directly using VMCALL/VMMCALL as we do for the Hyper-V hypercall page. This behaviour is enabled by a new INTERCEPT_HCALL flag in the KVM_XEN_HVM_CONFIG ioctl structure, and advertised by the same flag being returned from the KVM_CAP_XEN_HVM check. Rename xen_hvm_config() to kvm_xen_write_hypercall_page() and move it to the nascent xen.c while we're at it, and add a test case. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dfe3ba5cf262..c87defe5db4f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -216,6 +216,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -253,6 +267,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_DIRTY_RING_FULL 31 #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -429,6 +444,8 @@ struct kvm_run { __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; /* Fix the size of the union. */ char padding[256]; }; @@ -1133,6 +1150,9 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; -- cgit v1.2.3 From a76b9641ad1c0b045045727a6cbbeebf80b6b9bb Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 3 Dec 2020 15:52:25 +0000 Subject: KVM: x86/xen: add KVM_XEN_HVM_SET_ATTR/KVM_XEN_HVM_GET_ATTR This will be used to set up shared info pages etc. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c87defe5db4f..334796799dbc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1587,6 +1587,17 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 pad[8]; + } u; +}; + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From a3833b81b05d0ae92ae085959dd8da136ec91868 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Dec 2020 16:20:32 +0000 Subject: KVM: x86/xen: latch long_mode when hypercall page is set up Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 334796799dbc..11644954a2e2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1594,10 +1594,13 @@ struct kvm_xen_hvm_attr { __u16 type; __u16 pad[3]; union { + __u8 long_mode; __u64 pad[8]; } u; }; +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From 13ffb97a3b11998450d51457b6b3617781953f7c Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 15 Jun 2018 21:17:14 -0400 Subject: KVM: x86/xen: register shared_info page Add KVM_XEN_ATTR_TYPE_SHARED_INFO to allow hypervisor to know where the guest's shared info page is. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 11644954a2e2..f57f6e741a28 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1595,11 +1595,15 @@ struct kvm_xen_hvm_attr { __u16 pad[3]; union { __u8 long_mode; + struct { + __u64 gfn; + } shared_info; __u64 pad[8]; } u; }; #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 42387042ba38cca8fb86bb3a7913e44cd3569750 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Dec 2020 21:02:23 +0000 Subject: xen: add wc_sec_hi to struct shared_info Xen added this in 2015 (Xen 4.6). On x86_64 and Arm it fills what was previously a 32-bit hole in the generic shared_info structure; on i386 it had to go at the end of struct arch_shared_info. Signed-off-by: David Woodhouse --- include/xen/interface/xen.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 8bfb242f433e..5ee37a296481 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -598,7 +598,9 @@ struct shared_info { * their gettimeofday() syscall on this wallclock-base value. */ struct pvclock_wall_clock wc; - +#ifndef CONFIG_X86_32 + uint32_t wc_sec_hi; +#endif struct arch_shared_info arch; }; -- cgit v1.2.3 From 3e3246158808d46b81edb8246214c0ab5a852594 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 2 Feb 2021 16:53:25 +0000 Subject: KVM: x86/xen: Add KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR This will be used for per-vCPU setup such as runstate and vcpu_info. Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f57f6e741a28..e2b0cbde1908 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1587,6 +1587,7 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +/* Per-VM Xen attributes */ #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) @@ -1605,6 +1606,18 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 pad[8]; + } u; +}; + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From 73e69a86347afe8156aa50c436fc192b280b0cd7 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 29 Jun 2018 10:52:52 -0400 Subject: KVM: x86/xen: register vcpu info The vcpu info supersedes the per vcpu area of the shared info page and the guest vcpus will use this instead. Signed-off-by: Joao Martins Signed-off-by: Ankur Arora Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e2b0cbde1908..2db0657b3337 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1614,10 +1614,13 @@ struct kvm_xen_vcpu_attr { __u16 type; __u16 pad[3]; union { + __u64 gpa; __u64 pad[8]; } u; }; +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From f2340cd9e41dc463cb1189274f3db560c1dfa1f4 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Mon, 23 Jul 2018 11:20:57 -0400 Subject: KVM: x86/xen: register vcpu time info region Allow the Xen emulated guest the ability to register secondary vcpu time information. On Xen guests this is used in order to be mapped to userspace and hence allow vdso gettimeofday to work. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2db0657b3337..0f045ffd9cb6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1620,6 +1620,7 @@ struct kvm_xen_vcpu_attr { }; #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 40da8ccd724f7ca2f08550a46268bc3a91cc8869 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 9 Dec 2020 20:08:30 +0000 Subject: KVM: x86/xen: Add event channel interrupt vector upcall It turns out that we can't handle event channels *entirely* in userspace by delivering them as ExtINT, because KVM is a bit picky about when it accepts ExtINT interrupts from a legacy PIC. The in-kernel local APIC has to have LVT0 configured in APIC_MODE_EXTINT and unmasked, which isn't necessarily the case for Xen guests especially on secondary CPUs. To cope with this, add kvm_xen_get_interrupt() which checks the evtchn_pending_upcall field in the Xen vcpu_info, and delivers the Xen upcall vector (configured by KVM_XEN_ATTR_TYPE_UPCALL_VECTOR) if it's set regardless of LAPIC LVT0 configuration. This gives us the minimum support we need for completely userspace-based implementation of event channels. This does mean that vcpu_enter_guest() needs to check for the evtchn_pending_upcall flag being set, because it can't rely on someone having set KVM_REQ_EVENT unless we were to add some way for userspace to do so manually. Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0f045ffd9cb6..4d4cd001c908 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1596,6 +1596,7 @@ struct kvm_xen_hvm_attr { __u16 pad[3]; union { __u8 long_mode; + __u8 vector; struct { __u64 gfn; } shared_info; @@ -1605,6 +1606,7 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -- cgit v1.2.3 From 8d4e7e80838f45d3466d36d4fcb890424825faa9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 4 Dec 2020 01:02:04 +0000 Subject: KVM: x86: declare Xen HVM shared info capability and add test case Instead of adding a plethora of new KVM_CAP_XEN_FOO capabilities, just add bits to the return value of KVM_CAP_XEN_HVM. Signed-off-by: David Woodhouse --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4d4cd001c908..63f8f6e95648 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1152,6 +1152,7 @@ struct kvm_x86_mce { #ifdef KVM_CAP_XEN_HVM #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) struct kvm_xen_hvm_config { __u32 flags; @@ -1604,6 +1605,7 @@ struct kvm_xen_hvm_attr { } u; }; +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 @@ -1621,6 +1623,7 @@ struct kvm_xen_vcpu_attr { } u; }; +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 -- cgit v1.2.3 From 9fd6dad1261a541b3f5fa7dc5b152222306e6702 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Feb 2021 05:07:11 -0500 Subject: mm: provide a saner PTE walking API for modules Currently, the follow_pfn function is exported for modules but follow_pte is not. However, follow_pfn is very easy to misuse, because it does not provide protections (so most of its callers assume the page is writable!) and because it returns after having already unlocked the page table lock. Provide instead a simplified version of follow_pte that does not have the pmdpp and range arguments. The older version survives as follow_invalidate_pte() for use by fs/dax.c. Reviewed-by: Jason Gunthorpe Signed-off-by: Paolo Bonzini --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6ae..24b292fce8e5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1658,9 +1658,11 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); +int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, pte_t **ptepp, + pmd_t **pmdpp, spinlock_t **ptlp); int follow_pte(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, - spinlock_t **ptlp); + pte_t **ptepp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); int follow_phys(struct vm_area_struct *vma, unsigned long address, -- cgit v1.2.3 From 4fc096a99e01dd06dc55bef76ade7f8d76653245 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 28 Jan 2021 13:01:31 -0500 Subject: KVM: Raise the maximum number of user memslots Current KVM_USER_MEM_SLOTS limits are arch specific (512 on Power, 509 on x86, 32 on s390, 16 on MIPS) but they don't really need to be. Memory slots are allocated dynamically in KVM when added so the only real limitation is 'id_to_index' array which is 'short'. We don't have any other KVM_MEM_SLOTS_NUM/KVM_USER_MEM_SLOTS-sized statically defined structures. Low KVM_USER_MEM_SLOTS can be a limiting factor for some configurations. In particular, when QEMU tries to start a Windows guest with Hyper-V SynIC enabled and e.g. 256 vCPUs the limit is hit as SynIC requires two pages per vCPU and the guest is free to pick any GFN for each of them, this fragments memslots as QEMU wants to have a separate memslot for each of these pages (which are supposed to act as 'overlay' pages). Signed-off-by: Vitaly Kuznetsov Message-Id: <20210127175731.2020089-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f417447129b9..e126ebda36d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -425,9 +425,8 @@ struct kvm_irq_routing_table { #define KVM_PRIVATE_MEM_SLOTS 0 #endif -#ifndef KVM_MEM_SLOTS_NUM -#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -#endif +#define KVM_MEM_SLOTS_NUM SHRT_MAX +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From d9a47edabc4f948102753fa9d41f2dc1dbeb28be Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:19 +0530 Subject: KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR Introduce KVM_CAP_PPC_DAWR1 which can be used by QEMU to query whether KVM supports 2nd DAWR or not. The capability is by default disabled even when the underlying CPU supports 2nd DAWR. QEMU needs to check and enable it manually to use the feature. Signed-off-by: Ravi Bangoria Signed-off-by: Paul Mackerras --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 63f8f6e95648..8b281f722e5b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1077,6 +1077,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 +#define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From d8d0da4eee5c4e86ea08abde6975848376b4ac13 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 10 Feb 2021 13:16:31 -0500 Subject: locking/arch: Move qrwlock.h include after qspinlock.h include/asm-generic/qrwlock.h was trying to get arch_spin_is_locked via asm-generic/qspinlock.h. However, this does not work because architectures might be using queued rwlocks but not queued spinlocks (csky), or because they might be defining their own queued_* macros before including asm/qspinlock.h. To fix this, ensure that asm/spinlock.h always includes qrwlock.h after defining arch_spin_is_locked (either directly for csky, or via asm/qspinlock.h for other architectures). The only inclusion elsewhere is in kernel/locking/qrwlock.c. That one is really unnecessary because the file is only compiled in SMP configurations (config QUEUED_RWLOCKS depends on SMP) and in that case linux/spinlock.h already includes asm/qrwlock.h if needed, via asm/spinlock.h. Reported-by: Guenter Roeck Signed-off-by: Waiman Long Fixes: 26128cb6c7e6 ("locking/rwlocks: Add contention detection for rwlocks") Tested-by: Guenter Roeck Reviewed-by: Ben Gardon [Add arch/sparc and kernel/locking parts per discussion with Waiman. - Paolo] Signed-off-by: Paolo Bonzini --- include/asm-generic/qrwlock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 0020d3b820a7..7ae0ece07b4e 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -14,7 +14,8 @@ #include #include -#include + +/* Must be included from asm/spinlock.h after defining arch_spin_is_locked. */ /* * Writer states & reader shift and bias. -- cgit v1.2.3