diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 17:57:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 17:57:35 -0700 |
commit | 0d1e8b8d2bcd3150d51754d8d0fdbf44dc88b0d3 (patch) | |
tree | 2794cb2347daa76b00160a6ffb68663f4138dcc7 /virt/kvm | |
parent | 83c4087ce468601501ecde4d0ec5b2abd5f57c31 (diff) | |
parent | 22a7cdcae6a4a3c8974899e62851d270956f58ce (diff) |
Merge tag 'kvm-4.20-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"ARM:
- Improved guest IPA space support (32 to 52 bits)
- RAS event delivery for 32bit
- PMU fixes
- Guest entry hardening
- Various cleanups
- Port of dirty_log_test selftest
PPC:
- Nested HV KVM support for radix guests on POWER9. The performance
is much better than with PR KVM. Migration and arbitrary level of
nesting is supported.
- Disable nested HV-KVM on early POWER9 chips that need a particular
hardware bug workaround
- One VM per core mode to prevent potential data leaks
- PCI pass-through optimization
- merge ppc-kvm topic branch and kvm-ppc-fixes to get a better base
s390:
- Initial version of AP crypto virtualization via vfio-mdev
- Improvement for vfio-ap
- Set the host program identifier
- Optimize page table locking
x86:
- Enable nested virtualization by default
- Implement Hyper-V IPI hypercalls
- Improve #PF and #DB handling
- Allow guests to use Enlightened VMCS
- Add migration selftests for VMCS and Enlightened VMCS
- Allow coalesced PIO accesses
- Add an option to perform nested VMCS host state consistency check
through hardware
- Automatic tuning of lapic_timer_advance_ns
- Many fixes, minor improvements, and cleanups"
* tag 'kvm-4.20-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
KVM/nVMX: Do not validate that posted_intr_desc_addr is page aligned
Revert "kvm: x86: optimize dr6 restore"
KVM: PPC: Optimize clearing TCEs for sparse tables
x86/kvm/nVMX: tweak shadow fields
selftests/kvm: add missing executables to .gitignore
KVM: arm64: Safety check PSTATE when entering guest and handle IL
KVM: PPC: Book3S HV: Don't use streamlined entry path on early POWER9 chips
arm/arm64: KVM: Enable 32 bits kvm vcpu events support
arm/arm64: KVM: Rename function kvm_arch_dev_ioctl_check_extension()
KVM: arm64: Fix caching of host MDCR_EL2 value
KVM: VMX: enable nested virtualization by default
KVM/x86: Use 32bit xor to clear registers in svm.c
kvm: x86: Introduce KVM_CAP_EXCEPTION_PAYLOAD
kvm: vmx: Defer setting of DR6 until #DB delivery
kvm: x86: Defer setting of CR2 until #PF delivery
kvm: x86: Add payload operands to kvm_multiple_exception
kvm: x86: Add exception payload fields to kvm_vcpu_events
kvm: x86: Add has_payload and payload to kvm_queued_exception
KVM: Documentation: Fix omission in struct kvm_vcpu_events
KVM: selftests: add Enlightened VMCS test
...
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/arm/arm.c | 26 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 128 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 36 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-kvm-device.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 2 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 12 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 39 |
7 files changed, 125 insertions, 120 deletions
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 150c8a69cdaf..23774970c9df 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - if (type) - return -EINVAL; + ret = kvm_arm_setup_stage2(kvm, type); + if (ret) + return ret; kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); if (!kvm->arch.last_vcpu_ran) @@ -212,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -240,7 +242,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; default: - r = kvm_arch_dev_ioctl_check_extension(kvm, ext); + r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; } return r; @@ -544,7 +546,7 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); - BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); + BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; @@ -1295,8 +1297,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - - kvm_arm_init_debug(); } static void cpu_hyp_reset(void) @@ -1309,16 +1309,12 @@ static void cpu_hyp_reinit(void) { cpu_hyp_reset(); - if (is_kernel_in_hyp_mode()) { - /* - * __cpu_init_stage2() is safe to call even if the PM - * event was cancelled before the CPU was reset. - */ - __cpu_init_stage2(); + if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); - } else { + else cpu_init_hyp_mode(NULL); - } + + kvm_arm_init_debug(); if (vgic_present) kvm_vgic_init_cpu_hardware(); @@ -1412,6 +1408,8 @@ static int init_common_resources(void) kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_set_ipa_limit(); + return 0; } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1a2c3a1c56ce..5eca48bdb1a6 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector; static unsigned long io_map_base; -#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -150,20 +149,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); - stage2_pgd_clear(pgd); + pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL); + stage2_pgd_clear(kvm, pgd); kvm_tlb_flush_vmid_ipa(kvm, addr); - stage2_pud_free(pud_table); + stage2_pud_free(kvm, pud_table); put_page(virt_to_page(pgd)); } static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); - VM_BUG_ON(stage2_pud_huge(*pud)); - stage2_pud_clear(pud); + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_pud_clear(kvm, pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - stage2_pmd_free(pmd_table); + stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } @@ -252,7 +251,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(start_pte)) + if (stage2_pte_table_empty(kvm, start_pte)) clear_stage2_pmd_entry(kvm, pmd, start_addr); } @@ -262,9 +261,9 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; - start_pmd = pmd = stage2_pmd_offset(pud, addr); + start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; @@ -281,7 +280,7 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, } } while (pmd++, addr = next, addr != end); - if (stage2_pmd_table_empty(start_pmd)) + if (stage2_pmd_table_empty(kvm, start_pmd)) clear_stage2_pud_entry(kvm, pud, start_addr); } @@ -291,14 +290,14 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; - start_pud = pud = stage2_pud_offset(pgd, addr); + start_pud = pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { - if (stage2_pud_huge(*pud)) { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { pud_t old_pud = *pud; - stage2_pud_clear(pud); + stage2_pud_clear(kvm, pud); kvm_tlb_flush_vmid_ipa(kvm, addr); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); @@ -308,7 +307,7 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, } } while (pud++, addr = next, addr != end); - if (stage2_pud_table_empty(start_pud)) + if (stage2_pud_table_empty(kvm, start_pud)) clear_stage2_pgd_entry(kvm, pgd, start_addr); } @@ -332,7 +331,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread @@ -341,8 +340,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) */ if (!READ_ONCE(kvm->arch.pgd)) break; - next = stage2_pgd_addr_end(addr, end); - if (!stage2_pgd_none(*pgd)) + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) unmap_stage2_puds(kvm, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock @@ -371,9 +370,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, pmd_t *pmd; phys_addr_t next; - pmd = stage2_pmd_offset(pud, addr); + pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); @@ -389,11 +388,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, pud_t *pud; phys_addr_t next; - pud = stage2_pud_offset(pgd, addr); + pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { - if (stage2_pud_huge(*pud)) + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else stage2_flush_pmds(kvm, pud, addr, next); @@ -409,10 +408,11 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { - next = stage2_pgd_addr_end(addr, end); - stage2_flush_puds(kvm, pgd, addr, next); + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) + stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -897,7 +897,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) } /* Allocate the HW PGD, making sure that each page gets its own refcount */ - pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); + pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); if (!pgd) return -ENOMEM; @@ -986,7 +986,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; } @@ -994,7 +994,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) /* Free the HW pgd, one page at a time */ if (pgd) - free_pages_exact(pgd, S2_PGD_SIZE); + free_pages_exact(pgd, stage2_pgd_size(kvm)); } static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -1003,16 +1003,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); - if (WARN_ON(stage2_pgd_none(*pgd))) { + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); - stage2_pgd_populate(pgd, pud); + stage2_pgd_populate(kvm, pgd, pud); get_page(virt_to_page(pgd)); } - return stage2_pud_offset(pgd, addr); + return stage2_pud_offset(kvm, pgd, addr); } static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -1025,15 +1025,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache if (!pud) return NULL; - if (stage2_pud_none(*pud)) { + if (stage2_pud_none(kvm, *pud)) { if (!cache) return NULL; pmd = mmu_memory_cache_alloc(cache); - stage2_pud_populate(pud, pmd); + stage2_pud_populate(kvm, pud, pmd); get_page(virt_to_page(pud)); } - return stage2_pmd_offset(pud, addr); + return stage2_pmd_offset(kvm, pud, addr); } static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache @@ -1207,8 +1207,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (writable) pte = kvm_s2pte_mkwrite(pte); - ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, - KVM_NR_MEM_OBJS); + ret = mmu_topup_memory_cache(&cache, + kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); if (ret) goto out; spin_lock(&kvm->mmu_lock); @@ -1230,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; + struct page *page = pfn_to_page(pfn); - if (PageTransCompoundMap(pfn_to_page(pfn))) { + /* + * PageTransCompoungMap() returns true for THP and + * hugetlbfs. Make sure the adjustment is done only for THP + * pages. + */ + if (!PageHuge(page) && PageTransCompoundMap(page)) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge @@ -1296,19 +1303,21 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) /** * stage2_wp_pmds - write protect PUD range + * kvm: kvm instance for the VM * @pud: pointer to pud entry * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { pmd_t *pmd; phys_addr_t next; - pmd = stage2_pmd_offset(pud, addr); + pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) @@ -1328,18 +1337,19 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) * * Process PUD entries, for a huge PUD we cause a panic. */ -static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) { pud_t *pud; phys_addr_t next; - pud = stage2_pud_offset(pgd, addr); + pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(stage2_pud_huge(*pud)); - stage2_wp_pmds(pud, addr, next); + BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_wp_pmds(kvm, pud, addr, next); } } while (pud++, addr = next, addr != end); } @@ -1355,7 +1365,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1369,9 +1379,9 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) cond_resched_lock(&kvm->mmu_lock); if (!READ_ONCE(kvm->arch.pgd)) break; - next = stage2_pgd_addr_end(addr, end); - if (stage2_pgd_present(*pgd)) - stage2_wp_puds(pgd, addr, next); + next = stage2_pgd_addr_end(kvm, addr, end); + if (stage2_pgd_present(kvm, *pgd)) + stage2_wp_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1514,7 +1524,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ - ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), KVM_NR_MEM_OBJS); if (ret) return ret; @@ -1757,7 +1767,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) } /* Userspace should not be able to register out-of-bounds IPAs */ - VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); if (fault_status == FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); @@ -2056,7 +2066,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * space addressable by the KVM guest IPA space. */ if (memslot->base_gfn + memslot->npages >= - (KVM_PHYS_SIZE >> PAGE_SHIFT)) + (kvm_phys_size(kvm) >> PAGE_SHIFT)) return -EFAULT; down_read(¤t->mm->mmap_sem); diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 12502251727e..eb2a390a6c86 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, list_for_each_entry(dev, &(its)->device_list, dev_list) \ list_for_each_entry(ite, &(dev)->itt_head, ite_list) -/* - * We only implement 48 bits of PA at the moment, although the ITS - * supports more. Let's be restrictive here. - */ -#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) -#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) - #define GIC_LPI_OFFSET 8192 #define VITS_TYPER_IDBITS 16 @@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, { int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); int esz = GITS_BASER_ENTRY_SIZE(baser); int index; gfn_t gfn; @@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (id >= (l1_tbl_size / esz)) return false; - addr = BASER_ADDRESS(baser) + id * esz; + addr = base + id * esz; gfn = addr >> PAGE_SHIFT; if (eaddr) @@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, /* Each 1st level entry is represented by a 64-bit value. */ if (kvm_read_guest_lock(its->dev->kvm, - BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), + base + index * sizeof(indirect_ptr), &indirect_ptr, sizeof(indirect_ptr))) return false; @@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (!(indirect_ptr & BIT_ULL(63))) return false; - /* - * Mask the guest physical address and calculate the frame number. - * Any address beyond our supported 48 bits of PA will be caught - * by the actual check in the final step. - */ + /* Mask the guest physical address and calculate the frame number. */ indirect_ptr &= GENMASK_ULL(51, 16); /* Find the address of the actual entry */ @@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg) GITS_BASER_OUTER_CACHEABILITY_SHIFT, vgic_sanitise_outer_cacheability); - /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ - reg &= ~GENMASK_ULL(15, 12); - /* We support only one (ITS) page size: 64K */ reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; @@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg) GITS_CBASER_OUTER_CACHEABILITY_SHIFT, vgic_sanitise_outer_cacheability); - /* - * Sanitise the physical address to be 64k aligned. - * Also limit the physical addresses to 48 bits. - */ - reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); + /* Sanitise the physical address to be 64k aligned. */ + reg &= ~GENMASK_ULL(15, 12); return reg; } @@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) if (!its->enabled) return; - cbaser = CBASER_ADDRESS(its->cbaser); + cbaser = GITS_CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, @@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) if (!(baser & GITS_BASER_VALID)) return 0; - l1_gpa = BASER_ADDRESS(baser); + l1_gpa = GITS_BASER_ADDR_48_to_52(baser); if (baser & GITS_BASER_INDIRECT) { l1_esz = GITS_LVL1_ENTRY_SIZE; @@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 baser = its->baser_coll_table; - gpa_t gpa = BASER_ADDRESS(baser); + gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); struct its_collection *collection; u64 val; size_t max_size, filled = 0; @@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) if (!(baser & GITS_BASER_VALID)) return 0; - gpa = BASER_ADDRESS(baser); + gpa = GITS_BASER_ADDR_48_to_52(baser); max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 6ada2432e37c..114dce9f4bf5 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -25,7 +25,7 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment) { - if (addr & ~KVM_PHYS_MASK) + if (addr & ~kvm_phys_mask(kvm)) return -E2BIG; if (!IS_ALIGNED(addr, alignment)) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a2a175b08b17..b3d1f0985117 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg) vgic_sanitise_outer_cacheability); reg &= ~PENDBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); return reg; } @@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg) vgic_sanitise_outer_cacheability); reg &= ~PROPBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); return reg; } diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 9e65feb6fa58..3710342cf6ad 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -83,6 +83,7 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu, ring->coalesced_mmio[ring->last].phys_addr = addr; ring->coalesced_mmio[ring->last].len = len; memcpy(ring->coalesced_mmio[ring->last].data, val, len); + ring->coalesced_mmio[ring->last].pio = dev->zone.pio; smp_wmb(); ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; spin_unlock(&dev->kvm->ring_lock); @@ -140,6 +141,9 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, int ret; struct kvm_coalesced_mmio_dev *dev; + if (zone->pio != 1 && zone->pio != 0) + return -EINVAL; + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); if (!dev) return -ENOMEM; @@ -149,8 +153,9 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, dev->zone = *zone; mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, - zone->size, &dev->dev); + ret = kvm_io_bus_register_dev(kvm, + zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, + zone->addr, zone->size, &dev->dev); if (ret < 0) goto out_free_dev; list_add_tail(&dev->list, &kvm->coalesced_zones); @@ -174,7 +179,8 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); + kvm_io_bus_unregister_dev(kvm, + zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev); kvm_iodevice_destructor(&dev->dev); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f986e31fa68c..786ade1843a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -219,7 +219,7 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, me = get_cpu(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (!test_bit(i, vcpu_bitmap)) + if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) continue; kvm_make_request(req, vcpu); @@ -243,12 +243,10 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) { cpumask_var_t cpus; bool called; - static unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] - = {[0 ... BITS_TO_LONGS(KVM_MAX_VCPUS)-1] = ULONG_MAX}; zalloc_cpumask_var(&cpus, GFP_ATOMIC); - called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap, cpus); + called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus); free_cpumask_var(cpus); return called; @@ -807,20 +805,25 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) * sorted array and known changed memslot position. */ static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new) + struct kvm_memory_slot *new, + enum kvm_mr_change change) { int id = new->id; int i = slots->id_to_index[id]; struct kvm_memory_slot *mslots = slots->memslots; WARN_ON(mslots[i].id != id); - if (!new->npages) { - WARN_ON(!mslots[i].npages); - if (mslots[i].npages) - slots->used_slots--; - } else { - if (!mslots[i].npages) - slots->used_slots++; + switch (change) { + case KVM_MR_CREATE: + slots->used_slots++; + WARN_ON(mslots[i].npages || !new->npages); + break; + case KVM_MR_DELETE: + slots->used_slots--; + WARN_ON(new->npages || !mslots[i].npages); + break; + default: + break; } while (i < KVM_MEM_SLOTS_NUM - 1 && @@ -1056,7 +1059,7 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(&new.arch, 0, sizeof(new.arch)); } - update_memslots(slots, &new); + update_memslots(slots, &new, change); old_memslots = install_new_memslots(kvm, as_id, slots); kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); @@ -1311,8 +1314,12 @@ unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva); /* - * If writable is set to false, the hva returned by this function is only - * allowed to be read. + * Return the hva of a @gfn and the R/W attribute if possible. + * + * @slot: the kvm_memory_slot which contains @gfn + * @gfn: the gfn to be translated + * @writable: used to return the read/write attribute of the @slot if the hva + * is valid and @writable is not NULL */ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable) @@ -2946,6 +2953,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: return KVM_COALESCED_MMIO_PAGE_OFFSET; + case KVM_CAP_COALESCED_PIO: + return 1; #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: |