diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 435 |
1 files changed, 269 insertions, 166 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 00268290dcbd..7e63a3236364 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -104,16 +104,16 @@ static cpumask_var_t cpus_hardware_enabled; static int kvm_usage_count; static atomic_t hardware_enable_failed; -struct kmem_cache *kvm_vcpu_cache; -EXPORT_SYMBOL_GPL(kvm_vcpu_cache); +static struct kmem_cache *kvm_vcpu_cache; static __read_mostly struct preempt_ops kvm_preempt_ops; +static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); struct dentry *kvm_debugfs_dir; EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static int kvm_debugfs_num_entries; -static const struct file_operations *stat_fops_per_vm[]; +static const struct file_operations stat_fops_per_vm; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); @@ -191,12 +191,24 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn) return true; } +bool kvm_is_transparent_hugepage(kvm_pfn_t pfn) +{ + struct page *page = pfn_to_page(pfn); + + if (!PageTransCompoundMap(page)) + return false; + + return is_transparent_hugepage(compound_head(page)); +} + /* * Switches to specified vcpu, until a matching vcpu_put() */ void vcpu_load(struct kvm_vcpu *vcpu) { int cpu = get_cpu(); + + __this_cpu_write(kvm_running_vcpu, vcpu); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); @@ -208,6 +220,7 @@ void vcpu_put(struct kvm_vcpu *vcpu) preempt_disable(); kvm_arch_vcpu_put(vcpu); preempt_notifier_unregister(&vcpu->preempt_notifier); + __this_cpu_write(kvm_running_vcpu, NULL); preempt_enable(); } EXPORT_SYMBOL_GPL(vcpu_put); @@ -322,11 +335,8 @@ void kvm_reload_remote_mmus(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } -int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) +static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { - struct page *page; - int r; - mutex_init(&vcpu->mutex); vcpu->cpu = -1; vcpu->kvm = kvm; @@ -338,42 +348,28 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->pre_pcpu = -1; INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) { - r = -ENOMEM; - goto fail; - } - vcpu->run = page_address(page); - kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; vcpu->ready = false; - - r = kvm_arch_vcpu_init(vcpu); - if (r < 0) - goto fail_free_run; - return 0; - -fail_free_run: - free_page((unsigned long)vcpu->run); -fail: - return r; + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); } -EXPORT_SYMBOL_GPL(kvm_vcpu_init); -void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) +void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_arch_vcpu_destroy(vcpu); + /* - * no need for rcu_read_lock as VCPU_RUN is the only place that - * will change the vcpu->pid pointer and on uninit all file - * descriptors are already gone. + * No need for rcu_read_lock as VCPU_RUN is the only place that changes + * the vcpu->pid pointer, and at destruction time all file descriptors + * are already gone. */ put_pid(rcu_dereference_protected(vcpu->pid, 1)); - kvm_arch_vcpu_uninit(vcpu); + free_page((unsigned long)vcpu->run); + kmem_cache_free(kvm_vcpu_cache, vcpu); } -EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); +EXPORT_SYMBOL_GPL(kvm_vcpu_destroy); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) @@ -650,11 +646,11 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return -ENOMEM; stat_data->kvm = kvm; - stat_data->offset = p->offset; - stat_data->mode = p->mode ? p->mode : 0644; + stat_data->dbgfs_item = p; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, - stat_data, stat_fops_per_vm[p->kind]); + debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p), + kvm->debugfs_dentry, stat_data, + &stat_fops_per_vm); } return 0; } @@ -964,7 +960,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, /* * Increment the new memslot generation a second time, dropping the - * update in-progress flag and incrementing then generation based on + * update in-progress flag and incrementing the generation based on * the number of address spaces. This provides a unique and easily * identifiable generation number while the memslots are in flux. */ @@ -1117,7 +1113,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * * validation of sp->gfn happens in: * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_roots) + * - kvm_is_visible_gfn (mmu_check_root) */ kvm_arch_flush_shadow_memslot(kvm, slot); @@ -1406,14 +1402,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr, size; size = PAGE_SIZE; - addr = gfn_to_hva(kvm, gfn); + addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); if (kvm_is_error_hva(addr)) return PAGE_SIZE; @@ -1519,7 +1515,7 @@ static inline int check_user_page_hwpoison(unsigned long addr) /* * The fast path to get the writable pfn which will be stored in @pfn, * true indicates success, otherwise false is returned. It's also the - * only part that runs if we can are in atomic context. + * only part that runs if we can in atomic context. */ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, bool *writable, kvm_pfn_t *pfn) @@ -1821,26 +1817,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_page); -static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, - struct kvm_host_map *map) +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache) +{ + if (pfn == 0) + return; + + if (cache) + cache->pfn = cache->gfn = 0; + + if (dirty) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); +} + +static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn, + struct gfn_to_pfn_cache *cache, u64 gen) +{ + kvm_release_pfn(cache->pfn, cache->dirty, cache); + + cache->pfn = gfn_to_pfn_memslot(slot, gfn); + cache->gfn = gfn; + cache->dirty = false; + cache->generation = gen; +} + +static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn, + struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, + bool atomic) { kvm_pfn_t pfn; void *hva = NULL; struct page *page = KVM_UNMAPPED_PAGE; + struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn); + u64 gen = slots->generation; if (!map) return -EINVAL; - pfn = gfn_to_pfn_memslot(slot, gfn); + if (cache) { + if (!cache->pfn || cache->gfn != gfn || + cache->generation != gen) { + if (atomic) + return -EAGAIN; + kvm_cache_gfn_to_pfn(slot, gfn, cache, gen); + } + pfn = cache->pfn; + } else { + if (atomic) + return -EAGAIN; + pfn = gfn_to_pfn_memslot(slot, gfn); + } if (is_error_noslot_pfn(pfn)) return -EINVAL; if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - hva = kmap(page); + if (atomic) + hva = kmap_atomic(page); + else + hva = kmap(page); #ifdef CONFIG_HAS_IOMEM - } else { + } else if (!atomic) { hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); + } else { + return -EINVAL; #endif } @@ -1855,14 +1897,25 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, return 0; } +int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool atomic) +{ + return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map, + cache, atomic); +} +EXPORT_SYMBOL_GPL(kvm_map_gfn); + int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) { - return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map); + return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map, + NULL, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_map); -void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, - bool dirty) +static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, + struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, + bool dirty, bool atomic) { if (!map) return; @@ -1870,23 +1923,45 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, if (!map->hva) return; - if (map->page != KVM_UNMAPPED_PAGE) - kunmap(map->page); + if (map->page != KVM_UNMAPPED_PAGE) { + if (atomic) + kunmap_atomic(map->hva); + else + kunmap(map->page); + } #ifdef CONFIG_HAS_IOMEM - else + else if (!atomic) memunmap(map->hva); + else + WARN_ONCE(1, "Unexpected unmapping in atomic context"); #endif - if (dirty) { - kvm_vcpu_mark_page_dirty(vcpu, map->gfn); - kvm_release_pfn_dirty(map->pfn); - } else { - kvm_release_pfn_clean(map->pfn); - } + if (dirty) + mark_page_dirty_in_slot(memslot, map->gfn); + + if (cache) + cache->dirty |= dirty; + else + kvm_release_pfn(map->pfn, dirty, NULL); map->hva = NULL; map->page = NULL; } + +int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) +{ + __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, + cache, dirty, atomic); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_unmap_gfn); + +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) +{ + __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL, + dirty, false); +} EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -1931,11 +2006,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { - struct page *page = pfn_to_page(pfn); - - SetPageDirty(page); - } + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) + SetPageDirty(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); @@ -2051,17 +2123,6 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, return 0; } -int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, - unsigned long len) -{ - gfn_t gfn = gpa >> PAGE_SHIFT; - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - int offset = offset_in_page(gpa); - - return __kvm_read_guest_atomic(slot, gfn, data, offset, len); -} -EXPORT_SYMBOL_GPL(kvm_read_guest_atomic); - int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len) { @@ -2158,33 +2219,36 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; gfn_t nr_pages_needed = end_gfn - start_gfn + 1; gfn_t nr_pages_avail; - int r = start_gfn <= end_gfn ? 0 : -EINVAL; - ghc->gpa = gpa; + /* Update ghc->generation before performing any error checks. */ ghc->generation = slots->generation; - ghc->len = len; - ghc->hva = KVM_HVA_ERR_BAD; + + if (start_gfn > end_gfn) { + ghc->hva = KVM_HVA_ERR_BAD; + return -EINVAL; + } /* * If the requested region crosses two memslots, we still * verify that the entire region is valid here. */ - while (!r && start_gfn <= end_gfn) { + for ( ; start_gfn <= end_gfn; start_gfn += nr_pages_avail) { ghc->memslot = __gfn_to_memslot(slots, start_gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); if (kvm_is_error_hva(ghc->hva)) - r = -EFAULT; - start_gfn += nr_pages_avail; + return -EFAULT; } /* Use the slow path for cross page reads and writes. */ - if (!r && nr_pages_needed == 1) + if (nr_pages_needed == 1) ghc->hva += offset; else ghc->memslot = NULL; - return r; + ghc->gpa = gpa; + ghc->len = len; + return 0; } int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, @@ -2205,15 +2269,17 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, BUG_ON(len + offset > ghc->len); - if (slots->generation != ghc->generation) - __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); + if (slots->generation != ghc->generation) { + if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) + return -EFAULT; + } if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2238,15 +2304,17 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, BUG_ON(len > ghc->len); - if (slots->generation != ghc->generation) - __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); + if (slots->generation != ghc->generation) { + if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) + return -EFAULT; + } if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT; @@ -2718,6 +2786,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) { int r; struct kvm_vcpu *vcpu; + struct page *page; if (id >= KVM_MAX_VCPU_ID) return -EINVAL; @@ -2731,17 +2800,29 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) kvm->created_vcpus++; mutex_unlock(&kvm->lock); - vcpu = kvm_arch_vcpu_create(kvm, id); - if (IS_ERR(vcpu)) { - r = PTR_ERR(vcpu); + r = kvm_arch_vcpu_precreate(kvm, id); + if (r) + goto vcpu_decrement; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + r = -ENOMEM; goto vcpu_decrement; } - preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); + BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto vcpu_free; + } + vcpu->run = page_address(page); + + kvm_vcpu_init(vcpu, kvm, id); - r = kvm_arch_vcpu_setup(vcpu); + r = kvm_arch_vcpu_create(vcpu); if (r) - goto vcpu_destroy; + goto vcpu_free_run_page; kvm_create_vcpu_debugfs(vcpu); @@ -2778,8 +2859,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); debugfs_remove_recursive(vcpu->debugfs_dentry); -vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); +vcpu_free_run_page: + free_page((unsigned long)vcpu->run); +vcpu_free: + kmem_cache_free(kvm_vcpu_cache, vcpu); vcpu_decrement: mutex_lock(&kvm->lock); kvm->created_vcpus--; @@ -4013,8 +4097,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, return -ENOENT; if (simple_attr_open(inode, file, get, - stat_data->mode & S_IWUGO ? set : NULL, - fmt)) { + KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222 + ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -4033,105 +4118,111 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file) return 0; } -static int vm_stat_get_per_vm(void *data, u64 *val) +static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val) { - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + *val = *(ulong *)((void *)kvm + offset); + + return 0; +} - *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset); +static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset) +{ + *(ulong *)((void *)kvm + offset) = 0; return 0; } -static int vm_stat_clear_per_vm(void *data, u64 val) +static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val) { - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + int i; + struct kvm_vcpu *vcpu; - if (val) - return -EINVAL; + *val = 0; - *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0; + kvm_for_each_vcpu(i, vcpu, kvm) + *val += *(u64 *)((void *)vcpu + offset); return 0; } -static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file) +static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset) { - __simple_attr_check_format("%llu\n", 0ull); - return kvm_debugfs_open(inode, file, vm_stat_get_per_vm, - vm_stat_clear_per_vm, "%llu\n"); -} + int i; + struct kvm_vcpu *vcpu; -static const struct file_operations vm_stat_get_per_vm_fops = { - .owner = THIS_MODULE, - .open = vm_stat_get_per_vm_open, - .release = kvm_debugfs_release, - .read = simple_attr_read, - .write = simple_attr_write, - .llseek = no_llseek, -}; + kvm_for_each_vcpu(i, vcpu, kvm) + *(u64 *)((void *)vcpu + offset) = 0; + + return 0; +} -static int vcpu_stat_get_per_vm(void *data, u64 *val) +static int kvm_stat_data_get(void *data, u64 *val) { - int i; + int r = -EFAULT; struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - struct kvm_vcpu *vcpu; - - *val = 0; - kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *val += *(u64 *)((void *)vcpu + stat_data->offset); + switch (stat_data->dbgfs_item->kind) { + case KVM_STAT_VM: + r = kvm_get_stat_per_vm(stat_data->kvm, + stat_data->dbgfs_item->offset, val); + break; + case KVM_STAT_VCPU: + r = kvm_get_stat_per_vcpu(stat_data->kvm, + stat_data->dbgfs_item->offset, val); + break; + } - return 0; + return r; } -static int vcpu_stat_clear_per_vm(void *data, u64 val) +static int kvm_stat_data_clear(void *data, u64 val) { - int i; + int r = -EFAULT; struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - struct kvm_vcpu *vcpu; if (val) return -EINVAL; - kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *(u64 *)((void *)vcpu + stat_data->offset) = 0; + switch (stat_data->dbgfs_item->kind) { + case KVM_STAT_VM: + r = kvm_clear_stat_per_vm(stat_data->kvm, + stat_data->dbgfs_item->offset); + break; + case KVM_STAT_VCPU: + r = kvm_clear_stat_per_vcpu(stat_data->kvm, + stat_data->dbgfs_item->offset); + break; + } - return 0; + return r; } -static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file) +static int kvm_stat_data_open(struct inode *inode, struct file *file) { __simple_attr_check_format("%llu\n", 0ull); - return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm, - vcpu_stat_clear_per_vm, "%llu\n"); + return kvm_debugfs_open(inode, file, kvm_stat_data_get, + kvm_stat_data_clear, "%llu\n"); } -static const struct file_operations vcpu_stat_get_per_vm_fops = { - .owner = THIS_MODULE, - .open = vcpu_stat_get_per_vm_open, +static const struct file_operations stat_fops_per_vm = { + .owner = THIS_MODULE, + .open = kvm_stat_data_open, .release = kvm_debugfs_release, - .read = simple_attr_read, - .write = simple_attr_write, - .llseek = no_llseek, -}; - -static const struct file_operations *stat_fops_per_vm[] = { - [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops, - [KVM_STAT_VM] = &vm_stat_get_per_vm_fops, + .read = simple_attr_read, + .write = simple_attr_write, + .llseek = no_llseek, }; static int vm_stat_get(void *_offset, u64 *val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; u64 tmp_val; *val = 0; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + kvm_get_stat_per_vm(kvm, offset, &tmp_val); *val += tmp_val; } mutex_unlock(&kvm_lock); @@ -4142,15 +4233,13 @@ static int vm_stat_clear(void *_offset, u64 val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; if (val) return -EINVAL; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vm_stat_clear_per_vm((void *)&stat_tmp, 0); + kvm_clear_stat_per_vm(kvm, offset); } mutex_unlock(&kvm_lock); @@ -4163,14 +4252,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; u64 tmp_val; *val = 0; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + kvm_get_stat_per_vcpu(kvm, offset, &tmp_val); *val += tmp_val; } mutex_unlock(&kvm_lock); @@ -4181,15 +4268,13 @@ static int vcpu_stat_clear(void *_offset, u64 val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; if (val) return -EINVAL; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); + kvm_clear_stat_per_vcpu(kvm, offset); } mutex_unlock(&kvm_lock); @@ -4262,9 +4347,8 @@ static void kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - int mode = p->mode ? p->mode : 0644; - debugfs_create_file(p->name, mode, kvm_debugfs_dir, - (void *)(long)p->offset, + debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p), + kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } } @@ -4304,8 +4388,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) WRITE_ONCE(vcpu->preempted, false); WRITE_ONCE(vcpu->ready, false); + __this_cpu_write(kvm_running_vcpu, vcpu); kvm_arch_sched_in(vcpu, cpu); - kvm_arch_vcpu_load(vcpu, cpu); } @@ -4319,6 +4403,25 @@ static void kvm_sched_out(struct preempt_notifier *pn, WRITE_ONCE(vcpu->ready, true); } kvm_arch_vcpu_put(vcpu); + __this_cpu_write(kvm_running_vcpu, NULL); +} + +/** + * kvm_get_running_vcpu - get the vcpu running on the current CPU. + * Thanks to preempt notifiers, this can also be called from + * preemptible context. + */ +struct kvm_vcpu *kvm_get_running_vcpu(void) +{ + return __this_cpu_read(kvm_running_vcpu); +} + +/** + * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus. + */ +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) +{ + return &kvm_running_vcpu; } static void check_processor_compat(void *rtn) |