diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2021-04-23 07:41:17 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2021-04-23 07:41:17 -0400 |
commit | c4f71901d53b6d8a4703389459d9f99fbd80ffd2 (patch) | |
tree | af8a0c33cec6dfb8a5d5cd7fcef245ab02b12691 /arch/arm64/kvm/arm.c | |
parent | fd49e8ee70b306a003323a17bbcc0633f322c135 (diff) | |
parent | 9a8aae605b80fc0a830cdce747eed48e11acc067 (diff) |
Merge tag 'kvmarm-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for Linux 5.13
New features:
- Stage-2 isolation for the host kernel when running in protected mode
- Guest SVE support when running in nVHE mode
- Force W^X hypervisor mappings in nVHE mode
- ITS save/restore for guests using direct injection with GICv4.1
- nVHE panics now produce readable backtraces
- Guest support for PTP using the ptp_kvm driver
- Performance improvements in the S2 fault handler
- Alexandru is now a reviewer (not really a new feature...)
Fixes:
- Proper emulation of the GICR_TYPER register
- Handle the complete set of relocation in the nVHE EL2 object
- Get rid of the oprofile dependency in the PMU code (and of the
oprofile body parts at the same time)
- Debug and SPE fixes
- Fix vcpu reset
Diffstat (limited to 'arch/arm64/kvm/arm.c')
-rw-r--r-- | arch/arm64/kvm/arm.c | 216 |
1 files changed, 184 insertions, 32 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0d92a4e5fe80..1cb39c0803a4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -206,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_INJECT_EXT_DABT: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_PTP_KVM: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -418,10 +419,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_arch_vcpu_put_debug_state_flags(vcpu); kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) kvm_vcpu_put_sysregs_vhe(vcpu); @@ -582,6 +585,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; + kvm_arm_vcpu_init_debug(vcpu); + if (likely(irqchip_in_kernel(kvm))) { /* * Map the VGIC hardware resources before running a vcpu the @@ -1352,16 +1357,9 @@ static unsigned long nvhe_percpu_order(void) /* A lookup table holding the hypervisor VA for each vector slot */ static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; -static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) -{ - return slot - (slot != HYP_VECTOR_DIRECT); -} - static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) { - int idx = __kvm_vector_slot2idx(slot); - - hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); + hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot); } static int kvm_init_vector_slots(void) @@ -1390,22 +1388,18 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_init_hyp_mode(void) +static void cpu_prepare_hyp_mode(int cpu) { - struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); - struct arm_smccc_res res; + struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; - /* Switch from the HYP stub to our own HYP init vector */ - __hyp_set_vectors(kvm_get_idmap_vector()); - /* * Calculate the raw per-cpu offset without a translation from the * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. * Also drop the KASAN tag which gets in the way... */ - params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) - + params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); params->mair_el2 = read_sysreg(mair_el1); @@ -1429,14 +1423,28 @@ static void cpu_init_hyp_mode(void) tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; params->tcr_el2 = tcr; - params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); + params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); + if (is_protected_kvm_enabled()) + params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; + else + params->hcr_el2 = HCR_HOST_NVHE_FLAGS; + params->vttbr = params->vtcr = 0; /* * Flush the init params from the data cache because the struct will * be read while the MMU is off. */ kvm_flush_dcache_to_poc(params, sizeof(*params)); +} + +static void hyp_install_host_vector(void) +{ + struct kvm_nvhe_init_params *params; + struct arm_smccc_res res; + + /* Switch from the HYP stub to our own HYP init vector */ + __hyp_set_vectors(kvm_get_idmap_vector()); /* * Call initialization code, and switch to the full blown HYP code. @@ -1445,8 +1453,14 @@ static void cpu_init_hyp_mode(void) * cpus_have_const_cap() wrapper. */ BUG_ON(!system_capabilities_finalized()); + params = this_cpu_ptr_nvhe_sym(kvm_init_params); arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); +} + +static void cpu_init_hyp_mode(void) +{ + hyp_install_host_vector(); /* * Disabling SSBD on a non-VHE system requires us to enable SSBS @@ -1489,7 +1503,10 @@ static void cpu_set_hyp_vector(void) struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); void *vector = hyp_spectre_vector_selector[data->slot]; - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; + if (!is_protected_kvm_enabled()) + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; + else + kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); } static void cpu_hyp_reinit(void) @@ -1497,13 +1514,14 @@ static void cpu_hyp_reinit(void) kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); else cpu_init_hyp_mode(); + cpu_set_hyp_vector(); + kvm_arm_init_debug(); if (vgic_present) @@ -1699,18 +1717,62 @@ static void teardown_hyp_mode(void) } } +static int do_pkvm_init(u32 hyp_va_bits) +{ + void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base); + int ret; + + preempt_disable(); + hyp_install_host_vector(); + ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, + num_possible_cpus(), kern_hyp_va(per_cpu_base), + hyp_va_bits); + preempt_enable(); + + return ret; +} + +static int kvm_hyp_init_protection(u32 hyp_va_bits) +{ + void *addr = phys_to_virt(hyp_mem_base); + int ret; + + kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + + ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); + if (ret) + return ret; + + ret = do_pkvm_init(hyp_va_bits); + if (ret) + return ret; + + free_hyp_pgds(); + + return 0; +} + /** * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { + u32 hyp_va_bits; int cpu; - int err = 0; + int err = -ENOMEM; + + /* + * The protected Hyp-mode cannot be initialized if the memory pool + * allocation has failed. + */ + if (is_protected_kvm_enabled() && !hyp_mem_base) + goto out_err; /* * Allocate Hyp PGD and setup Hyp identity mapping */ - err = kvm_mmu_init(); + err = kvm_mmu_init(&hyp_va_bits); if (err) goto out_err; @@ -1771,7 +1833,19 @@ static int init_hyp_mode(void) goto out_err; } - err = create_hyp_mappings(kvm_ksym_ref(__bss_start), + /* + * .hyp.bss is guaranteed to be placed at the beginning of the .bss + * section thanks to an assertion in the linker script. Map it RW and + * the rest of .bss RO. + */ + err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start), + kvm_ksym_ref(__hyp_bss_end), PAGE_HYP); + if (err) { + kvm_err("Cannot map hyp bss section: %d\n", err); + goto out_err; + } + + err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end), kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); if (err) { kvm_err("Cannot map bss section\n"); @@ -1792,26 +1866,36 @@ static int init_hyp_mode(void) } } - /* - * Map Hyp percpu pages - */ for_each_possible_cpu(cpu) { char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu]; char *percpu_end = percpu_begin + nvhe_percpu_size(); + /* Map Hyp percpu pages */ err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP); - if (err) { kvm_err("Cannot map hyp percpu region\n"); goto out_err; } + + /* Prepare the CPU initialization parameters */ + cpu_prepare_hyp_mode(cpu); } if (is_protected_kvm_enabled()) { init_cpu_logical_map(); - if (!init_psci_relay()) + if (!init_psci_relay()) { + err = -ENODEV; + goto out_err; + } + } + + if (is_protected_kvm_enabled()) { + err = kvm_hyp_init_protection(hyp_va_bits); + if (err) { + kvm_err("Failed to init hyp memory protection\n"); goto out_err; + } } return 0; @@ -1822,6 +1906,72 @@ out_err: return err; } +static void _kvm_host_prot_finalize(void *discard) +{ + WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); +} + +static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) +{ + return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end); +} + +#define pkvm_mark_hyp_section(__section) \ + pkvm_mark_hyp(__pa_symbol(__section##_start), \ + __pa_symbol(__section##_end)) + +static int finalize_hyp_mode(void) +{ + int cpu, ret; + + if (!is_protected_kvm_enabled()) + return 0; + + ret = pkvm_mark_hyp_section(__hyp_idmap_text); + if (ret) + return ret; + + ret = pkvm_mark_hyp_section(__hyp_text); + if (ret) + return ret; + + ret = pkvm_mark_hyp_section(__hyp_rodata); + if (ret) + return ret; + + ret = pkvm_mark_hyp_section(__hyp_bss); + if (ret) + return ret; + + ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size); + if (ret) + return ret; + + for_each_possible_cpu(cpu) { + phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]); + phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order()); + + ret = pkvm_mark_hyp(start, end); + if (ret) + return ret; + + start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu)); + end = start + PAGE_SIZE; + ret = pkvm_mark_hyp(start, end); + if (ret) + return ret; + } + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, NULL, 1); + + return 0; +} + static void check_kvm_target_cpu(void *ret) { *(int *)ret = kvm_target_cpu(); @@ -1896,11 +2046,6 @@ int kvm_arch_init(void *opaque) in_hyp_mode = is_kernel_in_hyp_mode(); - if (!in_hyp_mode && kvm_arch_requires_vhe()) { - kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); - return -ENODEV; - } - if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ @@ -1938,8 +2083,15 @@ int kvm_arch_init(void *opaque) if (err) goto out_hyp; + if (!in_hyp_mode) { + err = finalize_hyp_mode(); + if (err) { + kvm_err("Failed to finalize Hyp protection\n"); + goto out_hyp; + } + } + if (is_protected_kvm_enabled()) { - static_branch_enable(&kvm_protected_mode_initialized); kvm_info("Protected nVHE mode initialized successfully\n"); } else if (in_hyp_mode) { kvm_info("VHE mode initialized successfully\n"); |