diff options
author | Paul Mackerras <paulus@samba.org> | 2011-12-12 12:31:41 +0000 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-03-05 14:52:37 +0200 |
commit | da9d1d7f2875cc8c1ffbce8f3501d0b33f4e7a4d (patch) | |
tree | a811ee19778715766e720646506311c8fc7d7bd0 /arch/powerpc/kvm | |
parent | c77162dee7aff6ab5f075da9b60f649cbbeb86cc (diff) |
KVM: PPC: Allow use of small pages to back Book3S HV guests
This relaxes the requirement that the guest memory be provided as
16MB huge pages, allowing it to be provided as normal memory, i.e.
in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index
the kvm->arch.slot_phys[] arrays with a small page index, even if
huge pages are being used, and use the low-order 5 bits of each
entry to store the order of the enclosing page with respect to
normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 57 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 |
3 files changed, 119 insertions, 66 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 87016ccd8648..cc18f3d67a57 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -34,8 +34,6 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -/* Pages in the VRMA are 16MB pages */ -#define VRMA_PAGE_ORDER 24 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ @@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm) free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } -void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) +/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; +} + +/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize == 0x10000) ? 0x1000 : 0; +} + +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, + unsigned long porder) { - struct kvm *kvm = vcpu->kvm; unsigned long i; unsigned long npages; unsigned long hp_v, hp_r; unsigned long addr, hash; - unsigned long porder = kvm->arch.ram_porder; + unsigned long psize; + unsigned long hp0, hp1; long ret; - npages = kvm->arch.slot_npages[memslot->id]; + psize = 1ul << porder; + npages = memslot->npages >> (porder - PAGE_SHIFT); /* VRMA can't be > 1TB */ if (npages > 1ul << (40 - porder)) @@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) if (npages > HPT_NPTEG) npages = HPT_NPTEG; + hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); + hp1 = hpte1_pgsize_encoding(psize) | + HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ @@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) * is available and use it. */ hash = (hash << 3) + 7; - hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | - (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | - HPTE_V_LARGE | HPTE_V_VALID; - hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + hp_v = hp0 | ((addr >> 16) & ~0x7fUL); + hp_r = hp1 | addr; ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); if (ret != H_SUCCESS) { pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", @@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) * one already in the kvm->arch.slot_phys[][] arrays. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot, + unsigned long psize) { unsigned long start; - long np; - struct page *page, *pages[1]; + long np, err; + struct page *page, *hpage, *pages[1]; + unsigned long s, pgsize; unsigned long *physp; - unsigned long pfn, i; + unsigned int got, pgorder; + unsigned long pfn, i, npages; physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return -EINVAL; - i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); - if (physp[i]) + if (physp[gfn - memslot->base_gfn]) return 0; page = NULL; + pgsize = psize; start = gfn_to_hva_memslot(memslot, gfn); /* Instantiate and get the page we want access to */ @@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, if (np != 1) return -EINVAL; page = pages[0]; - - /* Check it's a 16MB page */ - if (!PageHead(page) || - compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) { - pr_err("page at %lx isn't 16MB (o=%d)\n", - start, compound_order(page)); - put_page(page); - return -EINVAL; + got = KVMPPC_GOT_PAGE; + + /* See if this is a large page */ + s = PAGE_SIZE; + if (PageHuge(page)) { + hpage = compound_head(page); + s <<= compound_order(hpage); + /* Get the whole large page if slot alignment is ok */ + if (s > psize && slot_is_aligned(memslot, s) && + !(memslot->userspace_addr & (s - 1))) { + start &= ~(s - 1); + pgsize = s; + page = hpage; + } } + err = -EINVAL; + if (s < psize) + goto out; pfn = page_to_pfn(page); + npages = pgsize >> PAGE_SHIFT; + pgorder = __ilog2(npages); + physp += (gfn - memslot->base_gfn) & ~(npages - 1); spin_lock(&kvm->arch.slot_phys_lock); - if (!physp[i]) - physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE; - else - put_page(page); + for (i = 0; i < npages; ++i) { + if (!physp[i]) { + physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder; + got = 0; + } + } spin_unlock(&kvm->arch.slot_phys_lock); + err = 0; - return 0; + out: + if (got) { + if (PageHuge(page)) + page = compound_head(page); + put_page(page); + } + return err; } /* @@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; + if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) return H_PARAMETER; preempt_disable(); @@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct kvm_memory_slot *memslot; unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page; - unsigned long offset; - unsigned long pfn, pa; + unsigned long psize, offset; + unsigned long pa; unsigned long *physp; memslot = gfn_to_memslot(kvm, gfn); @@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return NULL; - physp += (gfn - memslot->base_gfn) >> - (kvm->arch.ram_porder - PAGE_SHIFT); + physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) return NULL; pa = *physp; } - pfn = pa >> PAGE_SHIFT; - page = pfn_to_page(pfn); + page = pfn_to_page(pa >> PAGE_SHIFT); + psize = PAGE_SIZE; + if (PageHuge(page)) { + page = compound_head(page); + psize <<= compound_order(page); + } get_page(page); - offset = gpa & (kvm->arch.ram_psize - 1); + offset = gpa & (psize - 1); if (nb_ret) - *nb_ret = kvm->arch.ram_psize - offset; + *nb_ret = psize - offset; return page_address(page) + offset; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ce5a13fb974b..6ed0a84ef91c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -51,8 +51,6 @@ #include <linux/highmem.h> #include <linux/hugetlb.h> -#define LARGE_PAGE_ORDER 24 /* 16MB pages */ - /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -1074,24 +1072,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - unsigned long psize; unsigned long npages; unsigned long *phys; - /* For now, only allow 16MB-aligned slots */ - psize = kvm->arch.ram_psize; - if ((mem->memory_size & (psize - 1)) || - (mem->guest_phys_addr & (psize - 1))) { - pr_err("bad memory_size=%llx @ %llx\n", - mem->memory_size, mem->guest_phys_addr); - return -EINVAL; - } - /* Allocate a slot_phys array */ - npages = mem->memory_size >> kvm->arch.ram_porder; + npages = mem->memory_size >> PAGE_SHIFT; phys = kvm->arch.slot_phys[mem->slot]; if (!phys) { phys = vzalloc(npages * sizeof(unsigned long)); @@ -1119,6 +1119,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id) continue; pfn = physp[j] >> PAGE_SHIFT; page = pfn_to_page(pfn); + if (PageHuge(page)) + page = compound_head(page); SetPageDirty(page); put_page(page); } @@ -1141,12 +1143,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; - unsigned long lpcr; + unsigned long lpcr, senc; unsigned long psize, porder; unsigned long rma_size; unsigned long rmls; unsigned long *physp; - unsigned long i, npages, pa; + unsigned long i, npages; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1168,8 +1170,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto up_out; psize = vma_kernel_pagesize(vma); - if (psize != kvm->arch.ram_psize) - goto up_out; + porder = __ilog2(psize); /* Is this one of our preallocated RMAs? */ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && @@ -1186,13 +1187,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto out; } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out; + /* Update VRMASD field in the LPCR */ - lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH); - lpcr |= LPCR_VRMA_L; + senc = slb_pgsize_encoding(psize); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); kvm->arch.lpcr = lpcr; /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot); + kvmppc_map_vrma(vcpu, memslot, porder); } else { /* Set up to use an RMO region */ @@ -1231,13 +1239,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); /* Initialize phys addrs of pages in RMO */ - porder = kvm->arch.ram_porder; - npages = rma_size >> porder; - pa = ri->base_pfn << PAGE_SHIFT; + npages = ri->npages; + porder = __ilog2(npages); physp = kvm->arch.slot_phys[memslot->id]; spin_lock(&kvm->arch.slot_phys_lock); for (i = 0; i < npages; ++i) - physp[i] = pa + (i << porder); + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; spin_unlock(&kvm->arch.slot_phys_lock); } @@ -1266,8 +1273,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; - kvm->arch.ram_porder = LARGE_PAGE_ORDER; kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 047c5e1fd70f..c086eb0fa992 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -77,6 +77,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = builtin_gfn_to_memslot(kvm, gfn); if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) return H_PARAMETER; + + /* Check if the requested page fits entirely in the memslot. */ + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; physp = kvm->arch.slot_phys[memslot->id]; @@ -88,9 +92,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, pa = *physp; if (!pa) return H_TOO_HARD; + pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); pa &= PAGE_MASK; - pte_size = kvm->arch.ram_psize; if (pte_size < psize) return H_PARAMETER; if (pa && pte_size > psize) |