diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-02-22 21:35:15 -0800 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-02-22 21:35:15 -0800 |
commit | cbecf716ca618fd44feda6bd9a64a8179d031fc5 (patch) | |
tree | 186c9f69f0d11f773253c440dac85087f67288b7 /virt | |
parent | 6524d8eac258452e547f8a49c8a965ac6dd8a161 (diff) | |
parent | 4c47097f8514e4b35a31e04e33172d0193cb38ed (diff) |
Merge branch 'next' into for-linus
Prepare input updates for 5.12 merge window.
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 2 | ||||
-rw-r--r-- | virt/kvm/dirty_ring.c | 194 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 13 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 190 |
4 files changed, 362 insertions, 37 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index e2c197fd4f9d..62bd908ecd58 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -111,7 +111,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) { struct page *page; - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) return -ENOMEM; diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c new file mode 100644 index 000000000000..9d01299563ee --- /dev/null +++ b/virt/kvm/dirty_ring.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * KVM dirty ring implementation + * + * Copyright 2019 Red Hat, Inc. + */ +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/vmalloc.h> +#include <linux/kvm_dirty_ring.h> +#include <trace/events/kvm.h> + +int __weak kvm_cpu_dirty_log_size(void) +{ + return 0; +} + +u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); +} + +static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) +{ + return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); +} + +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return kvm_dirty_ring_used(ring) >= ring->soft_limit; +} + +static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) +{ + return kvm_dirty_ring_used(ring) >= ring->size; +} + +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + + WARN_ON_ONCE(vcpu->kvm != kvm); + + return &vcpu->dirty_ring; +} + +static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) +{ + struct kvm_memory_slot *memslot; + int as_id, id; + + as_id = slot >> 16; + id = (u16)slot; + + if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) + return; + + memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); + + if (!memslot || (offset + __fls(mask)) >= memslot->npages) + return; + + spin_lock(&kvm->mmu_lock); + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); + spin_unlock(&kvm->mmu_lock); +} + +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) +{ + ring->dirty_gfns = vmalloc(size); + if (!ring->dirty_gfns) + return -ENOMEM; + memset(ring->dirty_gfns, 0, size); + + ring->size = size / sizeof(struct kvm_dirty_gfn); + ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries(); + ring->dirty_index = 0; + ring->reset_index = 0; + ring->index = index; + + return 0; +} + +static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = 0; +} + +static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_DIRTY; +} + +static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == 0; +} + +static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags & KVM_DIRTY_GFN_F_RESET; +} + +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) +{ + u32 cur_slot, next_slot; + u64 cur_offset, next_offset; + unsigned long mask; + int count = 0; + struct kvm_dirty_gfn *entry; + bool first_round = true; + + /* This is only needed to make compilers happy */ + cur_slot = cur_offset = mask = 0; + + while (true) { + entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; + + if (!kvm_dirty_gfn_harvested(entry)) + break; + + next_slot = READ_ONCE(entry->slot); + next_offset = READ_ONCE(entry->offset); + + /* Update the flags to reflect that this GFN is reset */ + kvm_dirty_gfn_set_invalid(entry); + + ring->reset_index++; + count++; + /* + * Try to coalesce the reset operations when the guest is + * scanning pages in the same slot. + */ + if (!first_round && next_slot == cur_slot) { + s64 delta = next_offset - cur_offset; + + if (delta >= 0 && delta < BITS_PER_LONG) { + mask |= 1ull << delta; + continue; + } + + /* Backwards visit, careful about overflows! */ + if (delta > -BITS_PER_LONG && delta < 0 && + (mask << -delta >> -delta) == mask) { + cur_offset = next_offset; + mask = (mask << -delta) | 1; + continue; + } + } + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + cur_slot = next_slot; + cur_offset = next_offset; + mask = 1; + first_round = false; + } + + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + + trace_kvm_dirty_ring_reset(ring); + + return count; +} + +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) +{ + struct kvm_dirty_gfn *entry; + + /* It should never get full */ + WARN_ON_ONCE(kvm_dirty_ring_full(ring)); + + entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)]; + + entry->slot = slot; + entry->offset = offset; + /* + * Make sure the data is filled in before we publish this to + * the userspace program. There's no paired kernel-side reader. + */ + smp_wmb(); + kvm_dirty_gfn_set_dirtied(entry); + ring->dirty_index++; + trace_kvm_dirty_ring_push(ring, slot, offset); +} + +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) +{ + return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE); +} + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ + vfree(ring->dirty_gfns); + ring->dirty_gfns = NULL; +} diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index d6408bb497dc..e996989cd580 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -191,8 +191,12 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) struct kvm *kvm = irqfd->kvm; unsigned seq; int idx; + int ret = 0; if (flags & EPOLLIN) { + u64 cnt; + eventfd_ctx_do_read(irqfd->eventfd, &cnt); + idx = srcu_read_lock(&kvm->irq_srcu); do { seq = read_seqcount_begin(&irqfd->irq_entry_sc); @@ -204,6 +208,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); + ret = 1; } if (flags & EPOLLHUP) { @@ -227,7 +232,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) spin_unlock_irqrestore(&kvm->irqfds.lock, iflags); } - return 0; + return ret; } static void @@ -236,7 +241,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, { struct kvm_kernel_irqfd *irqfd = container_of(pt, struct kvm_kernel_irqfd, pt); - add_wait_queue(wqh, &irqfd->wait); + add_wait_queue_priority(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ @@ -853,15 +858,17 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, struct eventfd_ctx *eventfd; struct kvm_io_bus *bus; int ret = -ENOENT; + bool wildcard; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); + wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); + mutex_lock(&kvm->slots_lock); list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { - bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); if (p->bus_idx != bus_idx || p->eventfd != eventfd || diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cf88233b819a..8367d88ce39b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -63,6 +63,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/kvm.h> +#include <linux/kvm_dirty_ring.h> + /* Worst case buffer size needed for holding an integer. */ #define ITOA_MAX_LEN 12 @@ -143,8 +145,6 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); - __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -417,6 +417,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_dirty_ring_free(&vcpu->dirty_ring); kvm_arch_vcpu_destroy(vcpu); /* @@ -484,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mmu_notifier_count++; need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, range->flags); - need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); @@ -1247,6 +1247,11 @@ static int kvm_delete_memslot(struct kvm *kvm, memset(&new, 0, sizeof(new)); new.id = old->id; + /* + * This is only for debugging purpose; it should never be referenced + * for a removed memslot. + */ + new.as_id = as_id; r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE); if (r) @@ -1287,6 +1292,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ if ((mem->userspace_addr & (PAGE_SIZE - 1)) || + (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, mem->memory_size)) return -EINVAL; @@ -1313,6 +1319,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!mem->memory_size) return kvm_delete_memslot(kvm, mem, &old, as_id); + new.as_id = as_id; new.id = id; new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; new.npages = mem->memory_size >> PAGE_SHIFT; @@ -1358,7 +1365,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Allocate/free page dirty bitmap as needed */ if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; - else if (!new.dirty_bitmap) { + else if (!new.dirty_bitmap && !kvm->dirty_ring_size) { r = kvm_alloc_dirty_bitmap(&new); if (r) return r; @@ -1419,6 +1426,10 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, unsigned long n; unsigned long any = 0; + /* Dirty ring tracking is exclusive to dirty log tracking */ + if (kvm->dirty_ring_size) + return -ENXIO; + *memslot = NULL; *is_dirty = 0; @@ -1480,6 +1491,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) unsigned long *dirty_bitmap_buffer; bool flush; + /* Dirty ring tracking is exclusive to dirty log tracking */ + if (kvm->dirty_ring_size) + return -ENXIO; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) @@ -1588,6 +1603,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long *dirty_bitmap_buffer; bool flush; + /* Dirty ring tracking is exclusive to dirty log tracking */ + if (kvm->dirty_ring_size) + return -ENXIO; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) @@ -2192,7 +2211,8 @@ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) } EXPORT_SYMBOL_GPL(kvm_vcpu_map); -static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, +static void __kvm_unmap_gfn(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_host_map *map, struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) @@ -2217,7 +2237,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, #endif if (dirty) - mark_page_dirty_in_slot(memslot, map->gfn); + mark_page_dirty_in_slot(kvm, memslot, map->gfn); if (cache) cache->dirty |= dirty; @@ -2231,7 +2251,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) { - __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, + __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map, cache, dirty, atomic); return 0; } @@ -2239,8 +2259,8 @@ EXPORT_SYMBOL_GPL(kvm_unmap_gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) { - __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL, - dirty, false); + __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), + map, NULL, dirty, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); @@ -2414,7 +2434,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, } EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); -static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn, +static int __kvm_write_guest_page(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn, const void *data, int offset, int len) { int r; @@ -2426,7 +2447,7 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn, r = __copy_to_user((void __user *)addr + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(memslot, gfn); + mark_page_dirty_in_slot(kvm, memslot, gfn); return 0; } @@ -2435,7 +2456,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - return __kvm_write_guest_page(slot, gfn, data, offset, len); + return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -2444,7 +2465,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - return __kvm_write_guest_page(slot, gfn, data, offset, len); + return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); @@ -2563,7 +2584,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); return 0; } @@ -2612,23 +2633,16 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } EXPORT_SYMBOL_GPL(kvm_read_guest_cached); -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) -{ - const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); - - return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); -} -EXPORT_SYMBOL_GPL(kvm_clear_guest_page); - int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) { + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); gfn_t gfn = gpa >> PAGE_SHIFT; int seg; int offset = offset_in_page(gpa); int ret; while ((seg = next_segment(len, offset)) != 0) { - ret = kvm_clear_guest_page(kvm, gfn, offset, seg); + ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len); if (ret < 0) return ret; offset = 0; @@ -2639,22 +2653,29 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, - gfn_t gfn) +void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gfn_t gfn) { - if (memslot && memslot->dirty_bitmap) { + if (memslot && kvm_slot_dirty_track_enabled(memslot)) { unsigned long rel_gfn = gfn - memslot->base_gfn; + u32 slot = (memslot->as_id << 16) | memslot->id; - set_bit_le(rel_gfn, memslot->dirty_bitmap); + if (kvm->dirty_ring_size) + kvm_dirty_ring_push(kvm_dirty_ring_get(kvm), + slot, rel_gfn); + else + set_bit_le(rel_gfn, memslot->dirty_bitmap); } } +EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot); void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot; memslot = gfn_to_memslot(kvm, gfn); - mark_page_dirty_in_slot(memslot, gfn); + mark_page_dirty_in_slot(kvm, memslot, gfn); } EXPORT_SYMBOL_GPL(mark_page_dirty); @@ -2663,7 +2684,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) struct kvm_memory_slot *memslot; memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - mark_page_dirty_in_slot(memslot, gfn); + mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); @@ -3004,6 +3025,17 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); +static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff) +{ +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0 + return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) && + (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET + + kvm->dirty_ring_size / PAGE_SIZE); +#else + return false; +#endif +} + static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) { struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; @@ -3019,6 +3051,10 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif + else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) + page = kvm_dirty_ring_get_page( + &vcpu->dirty_ring, + vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); else return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); @@ -3032,6 +3068,14 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = { static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) { + struct kvm_vcpu *vcpu = file->private_data; + unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || + kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && + ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) + return -EINVAL; + vma->vm_ops = &kvm_vcpu_vm_ops; return 0; } @@ -3112,7 +3156,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) { r = -ENOMEM; goto vcpu_free; @@ -3125,6 +3169,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_free_run_page; + if (kvm->dirty_ring_size) { + r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, + id, kvm->dirty_ring_size); + if (r) + goto arch_vcpu_destroy; + } + mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; @@ -3158,6 +3209,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); + kvm_dirty_ring_free(&vcpu->dirty_ring); +arch_vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); vcpu_free_run_page: free_page((unsigned long)vcpu->run); @@ -3630,12 +3683,78 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_NR_MEMSLOTS: return KVM_USER_MEM_SLOTS; + case KVM_CAP_DIRTY_LOG_RING: +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0 + return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); +#else + return 0; +#endif default: break; } return kvm_vm_ioctl_check_extension(kvm, arg); } +static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size) +{ + int r; + + if (!KVM_DIRTY_LOG_PAGE_OFFSET) + return -EINVAL; + + /* the size should be power of 2 */ + if (!size || (size & (size - 1))) + return -EINVAL; + + /* Should be bigger to keep the reserved entries, or a page */ + if (size < kvm_dirty_ring_get_rsvd_entries() * + sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE) + return -EINVAL; + + if (size > KVM_DIRTY_RING_MAX_ENTRIES * + sizeof(struct kvm_dirty_gfn)) + return -E2BIG; + + /* We only allow it to set once */ + if (kvm->dirty_ring_size) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (kvm->created_vcpus) { + /* We don't allow to change this value after vcpu created */ + r = -EINVAL; + } else { + kvm->dirty_ring_size = size; + r = 0; + } + + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + int cleared = 0; + + if (!kvm->dirty_ring_size) + return -EINVAL; + + mutex_lock(&kvm->slots_lock); + + kvm_for_each_vcpu(i, vcpu, kvm) + cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); + + mutex_unlock(&kvm->slots_lock); + + if (cleared) + kvm_flush_remote_tlbs(kvm); + + return cleared; +} + int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -3666,6 +3785,8 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, kvm->max_halt_poll_ns = cap->args[0]; return 0; } + case KVM_CAP_DIRTY_LOG_RING: + return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); default: return kvm_vm_ioctl_enable_cap(kvm, cap); } @@ -3850,6 +3971,9 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; + case KVM_RESET_DIRTY_RINGS: + r = kvm_vm_ioctl_reset_dirty_pages(kvm); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -4350,10 +4474,10 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), GFP_KERNEL_ACCOUNT); if (new_bus) { - memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + memcpy(new_bus, bus, struct_size(bus, range, i)); new_bus->dev_count--; memcpy(new_bus->range + i, bus->range + i + 1, - (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); + flex_array_size(new_bus, range, new_bus->dev_count - i)); } else { pr_err("kvm: failed to shrink bus, removing it completely\n"); for (j = 0; j < bus->dev_count; j++) { |