From 98fda169290b3b28c0f2db2b8f02290c13da50ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 Sep 2013 22:32:24 +0200 Subject: kvm: remove .done from struct kvm_async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '.done' is used to mark the completion of 'async_pf_execute()', but 'cancel_work_sync()' returns true when the work was canceled, so we use it instead. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0fbbc7aa02cb..749bdb12cd15 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,7 +190,6 @@ struct kvm_async_pf { unsigned long addr; struct kvm_arch_async_pf arch; struct page *page; - bool done; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 2f303b74a62fb74983c0a66e2df353be963c527c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Sep 2013 13:53:07 +0200 Subject: KVM: Convert kvm_lock back to non-raw spinlock In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"), the kvm_lock was made a raw lock. However, the kvm mmu_shrink() function tries to grab the (non-raw) mmu_lock within the scope of the raw locked kvm_lock being held. This leads to the following: BUG: sleeping function called from invalid context at kernel/rtmutex.c:659 in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0 Preemption disabled at:[] mmu_shrink+0x5c/0x1b0 [kvm] Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt Call Trace: [] __might_sleep+0xfd/0x160 [] rt_spin_lock+0x24/0x50 [] mmu_shrink+0xec/0x1b0 [kvm] [] shrink_slab+0x17d/0x3a0 [] ? mem_cgroup_iter+0x130/0x260 [] balance_pgdat+0x54a/0x730 [] ? set_pgdat_percpu_threshold+0xa7/0xd0 [] kswapd+0x18f/0x490 [] ? get_parent_ip+0x11/0x50 [] ? __init_waitqueue_head+0x50/0x50 [] ? balance_pgdat+0x730/0x730 [] kthread+0xdb/0xe0 [] ? finish_task_switch+0x52/0x100 [] kernel_thread_helper+0x4/0x10 [] ? __init_kthread_worker+0x After the previous patch, kvm_lock need not be a raw spinlock anymore, so change it back. Reported-by: Paul Gortmaker Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 749bdb12cd15..7c961e1e9270 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -142,7 +142,7 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -extern raw_spinlock_t kvm_lock; +extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_io_range { -- cgit v1.2.3 From 42c4e0c77ac91505ab94284b14025e3a0865c0a5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:07 +0530 Subject: ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET ioctl For implementing CPU=host, we need a mechanism for querying preferred VCPU target type on underlying Host. This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which returns struct kvm_vcpu_init instance containing information about preferred VCPU target type and target specific features available for it. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 99c25338ede8..e32e776f20c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1012,6 +1012,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 6d9d41e57440e32a3400f37aa05ef7a1a09ced64 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:28 -0700 Subject: KVM: Move gfn_to_index to x86 specific code The gfn_to_index function relies on huge page defines which either may not make sense on systems that don't support huge pages or are defined in an unconvenient way for other architectures. Since this is x86-specific, move the function to arch/x86/include/asm/kvm_host.h. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov --- include/linux/kvm_host.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7c961e1e9270..f6dccde755f6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -841,13 +841,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) return gfn_to_memslot(kvm, gfn)->id; } -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { -- cgit v1.2.3 From f2e106692d5189303997ad7b96de8d8123aa5613 Mon Sep 17 00:00:00 2001 From: chai wen Date: Mon, 14 Oct 2013 22:22:33 +0800 Subject: KVM: Drop FOLL_GET in GUP when doing async page fault Page pinning is not mandatory in kvm async page fault processing since after async page fault event is delivered to a guest it accesses page once again and does its own GUP. Drop the FOLL_GET flag in GUP in async_pf code, and do some simplifying in check/clear processing. Suggested-by: Gleb Natapov Signed-off-by: Gu zheng Signed-off-by: chai wen Signed-off-by: Gleb Natapov --- include/linux/kvm_host.h | 2 +- include/trace/events/kvm.h | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6dccde755f6..c9d4236ab442 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -189,7 +189,7 @@ struct kvm_async_pf { gva_t gva; unsigned long addr; struct kvm_arch_async_pf arch; - struct page *page; + bool wakeup_all; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7005d1109ec9..131a0bda7aec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TRACE_EVENT( kvm_async_pf_completed, - TP_PROTO(unsigned long address, struct page *page, u64 gva), - TP_ARGS(address, page, gva), + TP_PROTO(unsigned long address, u64 gva), + TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) - __field(pfn_t, pfn) __field(u64, gva) ), TP_fast_assign( __entry->address = address; - __entry->pfn = page ? page_to_pfn(page) : 0; __entry->gva = gva; ), - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, - __entry->address, __entry->pfn) + TP_printk("gva %#llx address %#lx", __entry->gva, + __entry->address) ); #endif -- cgit v1.2.3 From 5587027ce9d59a57aecaa190be1c8e560aaff45d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:00 +0530 Subject: kvm: Add struct kvm arg to memslot APIs We will use that in the later patch to find the kvm ops handler Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf --- include/linux/kvm_host.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..8b0107dc2067 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -507,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, -- cgit v1.2.3 From cbbc58d4fdfab1a39a6ac1b41fcb17885952157a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:01 +0530 Subject: kvm: powerpc: book3s: Allow the HV and PR selection per virtual machine This moves the kvmppc_ops callbacks to be a per VM entity. This enables us to select HV and PR mode when creating a VM. We also allow both kvm-hv and kvm-pr kernel module to be loaded. To achieve this we move /dev/kvm ownership to kvm.ko module. Depending on which KVM mode we select during VM creation we take a reference count on respective module Signed-off-by: Aneesh Kumar K.V [agraf: fix coding style] Signed-off-by: Alexander Graf --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..5b5341f78368 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* -- cgit v1.2.3 From 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Sep 2013 16:44:50 +0200 Subject: kvm: Add KVM_GET_EMULATED_CPUID Add a kvm ioctl which states which system functionality kvm emulates. The format used is that of CPUID and we return the corresponding CPUID bits set for which we do emulate functionality. Make sure ->padding is being passed on clean from userspace so that we can use it for something in the future, after the ioctl gets cast in stone. s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at it. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..32c60b98ddf8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -668,6 +669,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ec53500fae421e07c5d035918ca454a429732ef4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..7beddbd38ac7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1058,6 +1058,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 32c60b98ddf8..509cfbfe9658 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -845,6 +845,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds -- cgit v1.2.3 From d96eb2c6f480769bff32054e78b964860dae4d56 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:23 -0600 Subject: kvm/x86: Convert iommu_flags to iommu_noncoherent Default to operating in coherent mode. This simplifies the logic when we switch to a model of registering and unregistering noncoherent I/O with KVM. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7beddbd38ac7..ed64880e4915 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -746,9 +746,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -/* For vcpu->arch.iommu_flags */ -#define KVM_IOMMU_CACHE_COHERENCY 0x1 - #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); -- cgit v1.2.3 From e0f0bbc527f6e9c0261f1d16b2a0b47612b7f235 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:30 -0600 Subject: kvm: Create non-coherent DMA registeration We currently use some ad-hoc arch variables tied to legacy KVM device assignment to manage emulation of instructions that depend on whether non-coherent DMA is present. Create an interface for this, adapting legacy KVM device assignment and adding VFIO via the KVM-VFIO device. For now we assume that non-coherent DMA is possible any time we have a VFIO group. Eventually an interface can be developed as part of the VFIO external user interface to query the coherency of a group. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ed64880e4915..92aae88756db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -670,6 +670,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif +#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA +void kvm_arch_register_noncoherent_dma(struct kvm *kvm); +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); +#else +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return false; +} +#endif + static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP -- cgit v1.2.3 From 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Oct 2013 21:43:01 +0200 Subject: kvm_host: typo fix fix up typo in comment. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 92aae88756db..9bb1048d010e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -804,7 +804,7 @@ static inline void kvm_guest_enter(void) /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode - * is very similar to exiting to userspase from rcu point of view. In + * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. -- cgit v1.2.3 From ce332f662deb545c8a4f3f58debcca26bb2e44b0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 4 Nov 2013 22:36:17 +0200 Subject: srcu: API for barrier after srcu read unlock srcu read lock/unlock include a full memory barrier but that's an implementation detail. Add an API for make memory fencing explicit for users that need this barrier, to make sure we can change it as needed without breaking all users. Acked-by: "Paul E. McKenney" Reviewed-by: Paul E. McKenney Signed-off-by: Michael S. Tsirkin Signed-off-by: Gleb Natapov --- include/linux/srcu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c114614ed172..9b058eecd403 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -237,4 +237,18 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __srcu_read_unlock(sp, idx); } +/** + * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock + * + * Converts the preceding srcu_read_unlock into a two-way memory barrier. + * + * Call this after srcu_read_unlock, to guarantee that all memory operations + * that occur after smp_mb__after_srcu_read_unlock will appear to happen after + * the preceding srcu_read_unlock. + */ +static inline void smp_mb__after_srcu_read_unlock(void) +{ + /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ +} + #endif -- cgit v1.2.3 From 8b414521bc5375ae8ba18c083af95d44b8da0d04 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 11 Oct 2013 21:39:26 -0300 Subject: hung_task: add method to reset detector In certain occasions it is possible for a hung task detector positive to be false: continuation from a paused VM, for example. Add a method to reset detection, similar as is done with other kernel watchdogs. Acked-by: Don Zickus Acked-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b293..7bb4b4a2a101 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -285,6 +285,14 @@ static inline void lockup_detector_init(void) } #endif +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3