diff options
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/boot/dts/imx6sx-sdb.dts | 15 | ||||
-rw-r--r-- | arch/arm/boot/dts/tegra20-seaboard.dts | 2 | ||||
-rw-r--r-- | arch/arm/boot/dts/vf610-twr.dts | 15 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_emulate.h | 10 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_mmu.h | 77 | ||||
-rw-r--r-- | arch/arm/kernel/entry-header.S | 13 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 10 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 7 | ||||
-rw-r--r-- | arch/arm/kvm/arm.c | 10 | ||||
-rw-r--r-- | arch/arm/kvm/coproc.c | 70 | ||||
-rw-r--r-- | arch/arm/kvm/coproc.h | 6 | ||||
-rw-r--r-- | arch/arm/kvm/coproc_a15.c | 2 | ||||
-rw-r--r-- | arch/arm/kvm/coproc_a7.c | 2 | ||||
-rw-r--r-- | arch/arm/kvm/mmu.c | 164 | ||||
-rw-r--r-- | arch/arm/kvm/trace.h | 39 |
16 files changed, 330 insertions, 115 deletions
diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index 1e6e5cc1c14c..c108bb451337 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -159,13 +159,28 @@ pinctrl-0 = <&pinctrl_enet1>; phy-supply = <®_enet_3v3>; phy-mode = "rgmii"; + phy-handle = <ðphy1>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy1: ethernet-phy@1 { + reg = <1>; + }; + + ethphy2: ethernet-phy@2 { + reg = <2>; + }; + }; }; &fec2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet2>; phy-mode = "rgmii"; + phy-handle = <ðphy2>; status = "okay"; }; diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts index ea282c7c0ca5..e2fed2712249 100644 --- a/arch/arm/boot/dts/tegra20-seaboard.dts +++ b/arch/arm/boot/dts/tegra20-seaboard.dts @@ -406,7 +406,7 @@ clock-frequency = <400000>; magnetometer@c { - compatible = "ak,ak8975"; + compatible = "asahi-kasei,ak8975"; reg = <0xc>; interrupt-parent = <&gpio>; interrupts = <TEGRA_GPIO(N, 5) IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/vf610-twr.dts b/arch/arm/boot/dts/vf610-twr.dts index a0f762159cb2..f2b64b1b00fa 100644 --- a/arch/arm/boot/dts/vf610-twr.dts +++ b/arch/arm/boot/dts/vf610-twr.dts @@ -129,13 +129,28 @@ &fec0 { phy-mode = "rmii"; + phy-handle = <ðphy0>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_fec0>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy0: ethernet-phy@0 { + reg = <0>; + }; + + ethphy1: ethernet-phy@1 { + reg = <1>; + }; + }; }; &fec1 { phy-mode = "rmii"; + phy-handle = <ðphy1>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_fec1>; status = "okay"; diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 66ce17655bb9..7b0152321b20 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -38,6 +38,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr = HCR_GUEST_MASK; } +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hcr; +} + +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) +{ + vcpu->arch.hcr = hcr; +} + static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 254e0650e48b..04b4ea0b550a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -125,9 +125,6 @@ struct kvm_vcpu_arch { * Anything that is not used directly from assembly code goes * here. */ - /* dcache set/way operation pending */ - int last_pcpu; - cpumask_t require_dcache_flush; /* Don't run the guest on this vcpu */ bool pause; diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 63e0ecc04901..1bca8f8af442 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -44,6 +44,7 @@ #ifndef __ASSEMBLY__ +#include <linux/highmem.h> #include <asm/cacheflush.h> #include <asm/pgalloc.h> @@ -161,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; } -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size, - bool ipa_uncached) +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) - kvm_flush_dcache_to_poc((void *)hva, size); - /* * If we are going to insert an instruction page and the icache is * either VIPT or PIPT, there is a potential problem where the host @@ -179,18 +177,77 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, * * VIVT caches are tagged using both the ASID and the VMID and doesn't * need any kind of flushing (DDI 0406C.b - Page B3-1392). + * + * We need to do this through a kernel mapping (using the + * user-space mapping has proved to be the wrong + * solution). For that, we need to kmap one page at a time, + * and iterate over the range. */ - if (icache_is_pipt()) { - __cpuc_coherent_user_range(hva, hva + size); - } else if (!icache_is_vivt_asid_tagged()) { + + bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; + + VM_BUG_ON(size & PAGE_MASK); + + if (!need_flush && !icache_is_pipt()) + goto vipt_cache; + + while (size) { + void *va = kmap_atomic_pfn(pfn); + + if (need_flush) + kvm_flush_dcache_to_poc(va, PAGE_SIZE); + + if (icache_is_pipt()) + __cpuc_coherent_user_range((unsigned long)va, + (unsigned long)va + PAGE_SIZE); + + size -= PAGE_SIZE; + pfn++; + + kunmap_atomic(va); + } + +vipt_cache: + if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); } } +static inline void __kvm_flush_dcache_pte(pte_t pte) +{ + void *va = kmap_atomic(pte_page(pte)); + + kvm_flush_dcache_to_poc(va, PAGE_SIZE); + + kunmap_atomic(va); +} + +static inline void __kvm_flush_dcache_pmd(pmd_t pmd) +{ + unsigned long size = PMD_SIZE; + pfn_t pfn = pmd_pfn(pmd); + + while (size) { + void *va = kmap_atomic_pfn(pfn); + + kvm_flush_dcache_to_poc(va, PAGE_SIZE); + + pfn++; + size -= PAGE_SIZE; + + kunmap_atomic(va); + } +} + +static inline void __kvm_flush_dcache_pud(pud_t pud) +{ +} + #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) -void stage2_flush_vm(struct kvm *kvm); +void kvm_set_way_flush(struct kvm_vcpu *vcpu); +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 4176df721bf0..1a0045abead7 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -253,21 +253,22 @@ .endm .macro restore_user_regs, fast = 0, offset = 0 - ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr - ldr lr, [sp, #\offset + S_PC]! @ get pc + mov r2, sp + ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr + ldr lr, [r2, #\offset + S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r1, r2, [sp] @ clear the exclusive monitor + strex r1, r2, [r2] @ clear the exclusive monitor #endif .if \fast - ldmdb sp, {r1 - lr}^ @ get calling r1 - lr + ldmdb r2, {r1 - lr}^ @ get calling r1 - lr .else - ldmdb sp, {r0 - lr}^ @ get calling r0 - lr + ldmdb r2, {r0 - lr}^ @ get calling r0 - lr .endif mov r0, r0 @ ARMv5T and earlier require a nop @ after ldm {}^ - add sp, sp, #S_FRAME_SIZE - S_PC + add sp, sp, #\offset + S_FRAME_SIZE movs pc, lr @ return & move spsr_svc into cpsr .endm diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..557e128e4df0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -116,8 +116,14 @@ int armpmu_event_set_period(struct perf_event *event) ret = 1; } - if (left > (s64)armpmu->max_period) - left = armpmu->max_period; + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (armpmu->max_period >> 1)) + left = armpmu->max_period >> 1; local64_set(&hwc->prev_count, (u64)-left); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 715ae19bc7c8..e55408e96559 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -657,10 +657,13 @@ int __init arm_add_memory(u64 start, u64 size) /* * Ensure that start/size are aligned to a page boundary. - * Size is appropriately rounded down, start is rounded up. + * Size is rounded down, start is rounded up. */ - size -= start & ~PAGE_MASK; aligned_start = PAGE_ALIGN(start); + if (aligned_start > start + size) + size = 0; + else + size -= aligned_start - start; #ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT if (aligned_start > ULONG_MAX) { diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d6d91001062..0b0d58a905c4 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -281,15 +281,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); - /* - * Check whether this vcpu requires the cache to be flushed on - * this physical CPU. This is a consequence of doing dcache - * operations by set/way on this vcpu. We do it here to be in - * a non-preemptible section. - */ - if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) - flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ - kvm_arm_set_running_vcpu(vcpu); } @@ -541,7 +532,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; - vcpu->arch.last_pcpu = smp_processor_id(); kvm_guest_exit(); trace_kvm_exit(*vcpu_pc(vcpu)); /* diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 7928dbdf2102..f3d88dc388bc 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -189,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, return true; } -/* See note at ARM ARM B1.14.4 */ +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - unsigned long val; - int cpu; - if (!p->is_write) return read_from_write_only(vcpu, p); - cpu = get_cpu(); - - cpumask_setall(&vcpu->arch.require_dcache_flush); - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); - - /* If we were already preempted, take the long way around */ - if (cpu != vcpu->arch.last_pcpu) { - flush_cache_all(); - goto done; - } - - val = *vcpu_reg(vcpu, p->Rt1); - - switch (p->CRm) { - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ - case 14: /* DCCISW */ - asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val)); - break; - - case 10: /* DCCSW */ - asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val)); - break; - } - -done: - put_cpu(); - + kvm_set_way_flush(vcpu); return true; } /* * Generic accessor for VM registers. Only called as long as HCR_TVM - * is set. + * is set. If the guest enables the MMU, we stop trapping the VM + * sys_regs and leave it in complete control of the caches. + * + * Used by the cpu-specific code. */ -static bool access_vm_reg(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) +bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) { + bool was_enabled = vcpu_has_cache_enabled(vcpu); + BUG_ON(!p->is_write); vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); if (p->is_64bit) vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); - return true; -} - -/* - * SCTLR accessor. Only called as long as HCR_TVM is set. If the - * guest enables the MMU, we stop trapping the VM sys_regs and leave - * it in complete control of the caches. - * - * Used by the cpu-specific code. - */ -bool access_sctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - access_vm_reg(vcpu, p, r); - - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ - vcpu->arch.hcr &= ~HCR_TVM; - stage2_flush_vm(vcpu->kvm); - } - + kvm_toggle_cache(vcpu, was_enabled); return true; } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 1a44bbe39643..88d24a3a9778 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -153,8 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define is64 .is_64 = true #define is32 .is_64 = false -bool access_sctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r); +bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r); #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index e6f4ae48bda9..a7136757d373 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -34,7 +34,7 @@ static const struct coproc_reg a15_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, + access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 }, }; static struct kvm_coproc_target_table a15_target_table = { diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c index 17fc7cd479d3..b19e46d1b2c0 100644 --- a/arch/arm/kvm/coproc_a7.c +++ b/arch/arm/kvm/coproc_a7.c @@ -37,7 +37,7 @@ static const struct coproc_reg a7_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, + access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 }, }; static struct kvm_coproc_target_table a7_target_table = { diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1dc9778a00af..136662547ca6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } +/* + * D-Cache management functions. They take the page table entries by + * value, as they are flushing the cache using the kernel mapping (or + * kmap on 32bit). + */ +static void kvm_flush_dcache_pte(pte_t pte) +{ + __kvm_flush_dcache_pte(pte); +} + +static void kvm_flush_dcache_pmd(pmd_t pmd) +{ + __kvm_flush_dcache_pmd(pmd); +} + +static void kvm_flush_dcache_pud(pud_t pud) +{ + __kvm_flush_dcache_pud(pud); +} + static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min, int max) { @@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) put_page(virt_to_page(pmd)); } +/* + * Unmapping vs dcache management: + * + * If a guest maps certain memory pages as uncached, all writes will + * bypass the data cache and go directly to RAM. However, the CPUs + * can still speculate reads (not writes) and fill cache lines with + * data. + * + * Those cache lines will be *clean* cache lines though, so a + * clean+invalidate operation is equivalent to an invalidate + * operation, because no cache lines are marked dirty. + * + * Those clean cache lines could be filled prior to an uncached write + * by the guest, and the cache coherent IO subsystem would therefore + * end up writing old data to disk. + * + * This is why right after unmapping a page/section and invalidating + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure + * the IO subsystem will never hit in the cache. + */ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { @@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, start_pte = pte = pte_offset_kernel(pmd, addr); do { if (!pte_none(*pte)) { + pte_t old_pte = *pte; + kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); kvm_tlb_flush_vmid_ipa(kvm, addr); + + /* No need to invalidate the cache for device mappings */ + if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + kvm_flush_dcache_pte(old_pte); + + put_page(virt_to_page(pte)); } } while (pte++, addr += PAGE_SIZE, addr != end); @@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (kvm_pmd_huge(*pmd)) { + pmd_t old_pmd = *pmd; + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pmd(old_pmd); + put_page(virt_to_page(pmd)); } else { unmap_ptes(kvm, pmd, addr, next); @@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd, next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { if (pud_huge(*pud)) { + pud_t old_pud = *pud; + pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pud(old_pud); + put_page(virt_to_page(pud)); } else { unmap_pmds(kvm, pud, addr, next); @@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, pte = pte_offset_kernel(pmd, addr); do { - if (!pte_none(*pte)) { - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); - kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); - } + if (!pte_none(*pte) && + (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + kvm_flush_dcache_pte(*pte); } while (pte++, addr += PAGE_SIZE, addr != end); } @@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) { - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); - kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); - } else { + if (kvm_pmd_huge(*pmd)) + kvm_flush_dcache_pmd(*pmd); + else stage2_flush_ptes(kvm, pmd, addr, next); - } } } while (pmd++, addr = next, addr != end); } @@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, do { next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { - if (pud_huge(*pud)) { - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); - kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); - } else { + if (pud_huge(*pud)) + kvm_flush_dcache_pud(*pud); + else stage2_flush_pmds(kvm, pud, addr, next); - } } } while (pud++, addr = next, addr != end); } @@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm, * Go through the stage 2 page tables and invalidate any cache lines * backing memory already mapped to the VM. */ -void stage2_flush_vm(struct kvm *kvm) +static void stage2_flush_vm(struct kvm *kvm) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn) return !pfn_valid(pfn); } +static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, + unsigned long size, bool uncached) +{ + __coherent_cache_guest_page(vcpu, pfn, size, uncached); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, - fault_ipa_uncached); + coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, - fault_ipa_uncached); + coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } @@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, unmap_stage2_range(kvm, gpa, size); spin_unlock(&kvm->mmu_lock); } + +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + * + * Main problems: + * - S/W ops are local to a CPU (not broadcast) + * - We have line migration behind our back (speculation) + * - System caches don't support S/W at all (damn!) + * + * In the face of the above, the best we can do is to try and convert + * S/W ops to VA ops. Because the guest is not allowed to infer the + * S/W to PA mapping, it can only use S/W to nuke the whole cache, + * which is a rather good thing for us. + * + * Also, it is only used when turning caches on/off ("The expected + * usage of the cache maintenance instructions that operate by set/way + * is associated with the cache maintenance instructions associated + * with the powerdown and powerup of caches, if this is required by + * the implementation."). + * + * We use the following policy: + * + * - If we trap a S/W operation, we enable VM trapping to detect + * caches being turned on/off, and do a full clean. + * + * - We flush the caches on both caches being turned on and off. + * + * - Once the caches are enabled, we stop trapping VM ops. + */ +void kvm_set_way_flush(struct kvm_vcpu *vcpu) +{ + unsigned long hcr = vcpu_get_hcr(vcpu); + + /* + * If this is the first time we do a S/W operation + * (i.e. HCR_TVM not set) flush the whole memory, and set the + * VM trapping. + * + * Otherwise, rely on the VM trapping to wait for the MMU + + * Caches to be turned off. At that point, we'll be able to + * clean the caches again. + */ + if (!(hcr & HCR_TVM)) { + trace_kvm_set_way_flush(*vcpu_pc(vcpu), + vcpu_has_cache_enabled(vcpu)); + stage2_flush_vm(vcpu->kvm); + vcpu_set_hcr(vcpu, hcr | HCR_TVM); + } +} + +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) +{ + bool now_enabled = vcpu_has_cache_enabled(vcpu); + + /* + * If switching the MMU+caches on, need to invalidate the caches. + * If switching it off, need to clean the caches. + * Clean + invalidate does the trick always. + */ + if (now_enabled != was_enabled) + stage2_flush_vm(vcpu->kvm); + + /* Caches are now on, stop trapping VM ops (until a S/W op) */ + if (now_enabled) + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); + + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); +} diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index b1d640f78623..b6a6e7102201 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -223,6 +223,45 @@ TRACE_EVENT(kvm_hvc, __entry->vcpu_pc, __entry->r0, __entry->imm) ); +TRACE_EVENT(kvm_set_way_flush, + TP_PROTO(unsigned long vcpu_pc, bool cache), + TP_ARGS(vcpu_pc, cache), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, cache ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->cache = cache; + ), + + TP_printk("S/W flush at 0x%016lx (cache %s)", + __entry->vcpu_pc, __entry->cache ? "on" : "off") +); + +TRACE_EVENT(kvm_toggle_cache, + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), + TP_ARGS(vcpu_pc, was, now), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, was ) + __field( bool, now ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->was = was; + __entry->now = now; + ), + + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", + __entry->vcpu_pc, __entry->was ? "on" : "off", + __entry->now ? "on" : "off") +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH |