diff options
Diffstat (limited to 'arch')
39 files changed, 490 insertions, 278 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55d1c22f107c..9b4d629f7628 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -111,7 +111,6 @@ config ARM64 select GENERIC_FIND_FIRST_BIT select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI - select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL @@ -139,6 +138,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) + select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) select HAVE_ARCH_KFENCE @@ -195,6 +195,7 @@ config ARM64 select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN_GENERIC select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH @@ -1059,6 +1060,9 @@ config SYS_SUPPORTS_HUGETLBFS config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 @@ -1417,19 +1421,6 @@ config ARM64_USE_LSE_ATOMICS built with binutils >= 2.25 in order for the new instructions to be used. -config ARM64_VHE - bool "Enable support for Virtualization Host Extensions (VHE)" - default y - help - Virtualization Host Extensions (VHE) allow the kernel to run - directly at EL2 (instead of EL1) on processors that support - it. This leads to better performance for KVM, as they reduce - the cost of the world switch. - - Selecting this option allows the VHE feature to be detected - at runtime, and does not affect processors that do not - implement this feature. - endmenu menu "ARMv8.2 architectural features" @@ -1682,10 +1673,23 @@ config ARM64_MTE endmenu +menu "ARMv8.7 architectural features" + +config ARM64_EPAN + bool "Enable support for Enhanced Privileged Access Never (EPAN)" + default y + depends on ARM64_PAN + help + Enhanced Privileged Access Never (EPAN) allows Privileged + Access Never to be used with Execute-only mappings. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y - depends on !KVM || ARM64_VHE help The Scalable Vector Extension (SVE) is an extension to the AArch64 execution state which complements and extends the SIMD functionality @@ -1714,12 +1718,6 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. - CPUs that support SVE are architecturally required to support the - Virtualization Host Extensions (VHE), so the kernel makes no - provision for supporting SVE alongside KVM without VHE enabled. - Thus, you will need to enable CONFIG_ARM64_VHE if you want to support - KVM in the same kernel image. - config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index bbdb54702aa7..ab6c14ef9f4e 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -700,7 +700,7 @@ AES_FUNC_START(aes_mac_update) cbz w5, .Lmacout encrypt_block v0, w2, x1, x7, w8 st1 {v0.16b}, [x4] /* return dg */ - cond_yield .Lmacout, x7 + cond_yield .Lmacout, x7, x8 b .Lmacloop4x .Lmac1x: add w3, w3, #4 diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 8c02bbc2684e..889ca0f8972b 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -121,7 +121,7 @@ CPU_LE( rev32 v11.16b, v11.16b ) add dgav.4s, dgav.4s, dg0v.4s cbz w2, 2f - cond_yield 3f, x5 + cond_yield 3f, x5, x6 b 0b /* diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 6cdea7d56059..491179922f49 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -129,7 +129,7 @@ CPU_LE( rev32 v19.16b, v19.16b ) /* handled all input blocks? */ cbz w2, 2f - cond_yield 3f, x5 + cond_yield 3f, x5, x6 b 0b /* diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index 6f5208414fe3..9c77313f5a60 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -184,11 +184,11 @@ SYM_FUNC_START(sha3_ce_transform) eor v0.16b, v0.16b, v31.16b cbnz w8, 3b - cond_yield 3f, x8 + cond_yield 4f, x8, x9 cbnz w2, 0b /* save state */ -3: st1 { v0.1d- v3.1d}, [x0], #32 +4: st1 { v0.1d- v3.1d}, [x0], #32 st1 { v4.1d- v7.1d}, [x0], #32 st1 { v8.1d-v11.1d}, [x0], #32 st1 {v12.1d-v15.1d}, [x0], #32 diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index d6e7f6c95fa6..b6a3a36e15f5 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -195,7 +195,7 @@ CPU_LE( rev64 v19.16b, v19.16b ) add v10.2d, v10.2d, v2.2d add v11.2d, v11.2d, v3.2d - cond_yield 3f, x4 + cond_yield 3f, x4, x5 /* handled all input blocks? */ cbnz w2, 0b diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 880b9054d75c..934b9be582d2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void) static inline void gic_arch_enable_irqs(void) { - asm volatile ("msr daifclr, #2" : : : "memory"); + asm volatile ("msr daifclr, #3" : : : "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 9f0ec21d6327..88d20f04c64a 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -165,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } -/* - * Ensure that reads of the counter are treated the same as memory reads - * for the purposes of ordering by subsequent memory barriers. - * - * This insanity brought to you by speculative system register reads, - * out-of-order memory accesses, sequence locks and Thomas Gleixner. - * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html - */ -#define arch_counter_enforce_ordering(val) do { \ - u64 tmp, _val = (val); \ - \ - asm volatile( \ - " eor %0, %1, %1\n" \ - " add %0, sp, %0\n" \ - " ldr xzr, [%0]" \ - : "=r" (tmp) : "r" (_val)); \ -} while (0) - static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; @@ -224,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void) return cnt; } -#undef arch_counter_enforce_ordering - static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ca31594d3d6c..ab569b0b45fc 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -15,6 +15,7 @@ #include <asm-generic/export.h> #include <asm/asm-offsets.h> +#include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> @@ -23,6 +24,14 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> + /* + * Provide a wxN alias for each wN register so what we can paste a xN + * reference after a 'w' to obtain the 32-bit version. + */ + .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + wx\n .req w\n + .endr + .macro save_and_disable_daif, flags mrs \flags, daif msr daifset, #0xf @@ -40,9 +49,9 @@ msr daif, \flags .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ - .macro enable_da_f - msr daifclr, #(8 | 4 | 1) + /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ + .macro enable_da + msr daifclr, #(8 | 4) .endm /* @@ -50,7 +59,7 @@ */ .macro save_and_disable_irq, flags mrs \flags, daif - msr daifset, #2 + msr daifset, #3 .endm .macro restore_irq, flags @@ -692,90 +701,33 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU isb .endm -/* - * Check whether to yield to another runnable task from kernel mode NEON code - * (which runs with preemption disabled). - * - * if_will_cond_yield_neon - * // pre-yield patchup code - * do_cond_yield_neon - * // post-yield patchup code - * endif_yield_neon <label> - * - * where <label> is optional, and marks the point where execution will resume - * after a yield has been performed. If omitted, execution resumes right after - * the endif_yield_neon invocation. Note that the entire sequence, including - * the provided patchup code, will be omitted from the image if - * CONFIG_PREEMPTION is not defined. - * - * As a convenience, in the case where no patchup code is required, the above - * sequence may be abbreviated to - * - * cond_yield_neon <label> - * - * Note that the patchup code does not support assembler directives that change - * the output section, any use of such directives is undefined. - * - * The yield itself consists of the following: - * - Check whether the preempt count is exactly 1 and a reschedule is also - * needed. If so, calling of preempt_enable() in kernel_neon_end() will - * trigger a reschedule. If it is not the case, yielding is pointless. - * - Disable and re-enable kernel mode NEON, and branch to the yield fixup - * code. - * - * This macro sequence may clobber all CPU state that is not guaranteed by the - * AAPCS to be preserved across an ordinary function call. - */ - - .macro cond_yield_neon, lbl - if_will_cond_yield_neon - do_cond_yield_neon - endif_yield_neon \lbl - .endm - - .macro if_will_cond_yield_neon -#ifdef CONFIG_PREEMPTION - get_current_task x0 - ldr x0, [x0, #TSK_TI_PREEMPT] - sub x0, x0, #PREEMPT_DISABLE_OFFSET - cbz x0, .Lyield_\@ - /* fall through to endif_yield_neon */ - .subsection 1 -.Lyield_\@ : -#else - .section ".discard.cond_yield_neon", "ax" -#endif - .endm - - .macro do_cond_yield_neon - bl kernel_neon_end - bl kernel_neon_begin - .endm - - .macro endif_yield_neon, lbl - .ifnb \lbl - b \lbl - .else - b .Lyield_out_\@ - .endif - .previous -.Lyield_out_\@ : - .endm - /* - * Check whether preempt-disabled code should yield as soon as it - * is able. This is the case if re-enabling preemption a single - * time results in a preempt count of zero, and the TIF_NEED_RESCHED - * flag is set. (Note that the latter is stored negated in the - * top word of the thread_info::preempt_count field) + * Check whether preempt/bh-disabled asm code should yield as soon as + * it is able. This is the case if we are currently running in task + * context, and either a softirq is pending, or the TIF_NEED_RESCHED + * flag is set and re-enabling preemption a single time would result in + * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is + * stored negated in the top word of the thread_info::preempt_count + * field) */ - .macro cond_yield, lbl:req, tmp:req -#ifdef CONFIG_PREEMPTION + .macro cond_yield, lbl:req, tmp:req, tmp2:req get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] + /* + * If we are serving a softirq, there is no point in yielding: the + * softirq will not be preempted no matter what we do, so we should + * run to completion as quickly as we can. + */ + tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ +#ifdef CONFIG_PREEMPTION sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif + adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING + this_cpu_offset \tmp2 + ldr w\tmp, [\tmp, \tmp2] + cbnz w\tmp, \lbl // yield on pending softirq in task context +.Lnoyield_\@: .endm /* diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index bab29932d21b..065ba482daf0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -66,6 +66,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, return mask; } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997b173b..9e3ec4dd56d8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_HAS_EPAN 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 61177bac49fa..338840c00e8e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -63,6 +63,23 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +/* + * Describe the early feature override to the core override code: + * + * @val Values that are to be merged into the final + * sanitised value of the register. Only the bitfields + * set to 1 in @mask are valid + * @mask Mask of the features that are overridden by @val + * + * A @mask field set to full-1 indicates that the corresponding field + * in @val is a valid override. + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-0 denotes that this field has no override + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-1 denotes thath this field has an invalid override. + */ struct arm64_ftr_override { u64 val; u64 mask; diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 1c26d7baa67f..5eb7af9c4557 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,8 +13,8 @@ #include <asm/ptrace.h> #define DAIF_PROCCTX 0 -#define DAIF_PROCCTX_NOIRQ PSR_I_BIT -#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) +#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) - flags |= PSR_I_BIT; + flags |= PSR_I_BIT | PSR_F_BIT; } return flags; @@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags) bool irq_disabled = flags & PSR_I_BIT; WARN_ON(system_has_prio_mask_debugging() && - !(read_sysreg(daif) & PSR_I_BIT)); + (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { trace_hardirqs_on(); @@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags) * If interrupts are disabled but we can take * asynchronous errors, we can take NMIs */ - flags &= ~PSR_I_BIT; + flags &= ~(PSR_I_BIT | PSR_F_BIT); pmr = GIC_PRIO_IRQOFF; } else { pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index d77d358f9395..b3f2d3bb0938 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -131,6 +131,26 @@ .Lskip_sve_\@: .endm +/* Disable any fine grained traps */ +.macro __init_el2_fgt + mrs x1, id_aa64mmfr0_el1 + ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HDFGRTR_EL2, xzr + msr_s SYS_HDFGWTR_EL2, xzr + msr_s SYS_HFGRTR_EL2, xzr + msr_s SYS_HFGWTR_EL2, xzr + msr_s SYS_HFGITR_EL2, xzr + + mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU + ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HAFGRTR_EL2, xzr +.Lskip_fgt_\@: +.endm + .macro __init_el2_nvhe_prepare_eret mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 @@ -155,6 +175,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b2b0c6405eb0..fac08e18bcd5 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,6 +8,10 @@ struct pt_regs; +int set_handle_irq(void (*handle_irq)(struct pt_regs *)); +#define set_handle_irq set_handle_irq +int set_handle_fiq(void (*handle_fiq)(struct pt_regs *)); + static inline int nr_legacy_irqs(void) { return 0; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75..b57b9b1e4344 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -12,15 +12,13 @@ /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and - * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' + * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. - * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. */ /* @@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable", + "msr daifclr, #3 // arch_local_irq_enable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void) } asm volatile(ALTERNATIVE( - "msr daifset, #2 // arch_local_irq_disable", + "msr daifset, #3 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 3c6a7f5988b1..31fbab3d6f99 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); + pudval_t pudval = PUD_TYPE_TABLE; + + pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; + __pud_populate(pudp, __pa(pmdp), pudval); } #else static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) @@ -45,7 +48,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE); + p4dval_t p4dval = P4D_TYPE_TABLE; + + p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; + __p4d_populate(p4dp, __pa(pudp), p4dval); } #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) @@ -70,16 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - /* - * The pmd must be loaded with the physical address of the PTE table - */ - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm != &init_mm); + __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm == &init_mm); + __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 42442a0ae2ab..b82575a33f8b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,17 @@ /* * Hardware page table definitions. * + * Level 0 descriptor (P4D). + */ +#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) +#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) +#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) +#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) +#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) +#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) + +/* * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) @@ -101,6 +112,8 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) +#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) /* * Level 2 descriptor (PMD). @@ -122,6 +135,8 @@ #define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) +#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59) +#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60) /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a65fb528110..fab2f573f7a4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings; #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 47027796c2f9..0b10204e72fc 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +/* + * Execute-only user mappings do not have the PTE_USER bit set. All valid + * kernel mappings have the PTE_UXN bit set. + */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) - + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been @@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* - * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check). PROT_NONE mappings do not have the PTE_VALID bit - * set. + * p??_access_permitted() is true for valid user mappings (PTE_USER + * bit set, subject to the write permission check). For execute-only + * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits + * not set) must return false. PROT_NONE mappings do not have the + * PTE_VALID bit set. */ #define pte_access_permitted(pte, write) \ - (pte_valid_user(pte) && (!(write) || pte_write(pte))) + (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + return prot; + + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) + return prot; + + return PAGE_READONLY_EXEC; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..ff7724cdd350 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -475,9 +475,15 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) +#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) +#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) +#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) @@ -597,6 +603,7 @@ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 @@ -637,7 +644,7 @@ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ - SCTLR_EL1_RES1) + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 631ab1281633..4b4c0dac0e14 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -83,11 +83,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, */ isb(); asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); - /* - * This isb() is required to prevent that the seq lock is - * speculated.# - */ - isb(); + arch_counter_enforce_ordering(res); return res; } diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a36e2fc330d4..cc7267a24bf7 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -95,6 +95,8 @@ int main(void) DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); + DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); + DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..3bc30656880d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -809,6 +809,12 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->name, ftrp->shift + ftrp->width - 1, ftrp->shift, str, tmp); + } else if ((ftr_mask & reg->override->val) == ftr_mask) { + reg->override->val &= ~ftr_mask; + pr_warn("%s[%d:%d]: impossible override, ignored\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift); } val = arm64_ftr_set_value(ftrp, val, ftr_new); @@ -1617,7 +1623,6 @@ int get_cpu_with_amu_feat(void) } #endif -#ifdef CONFIG_ARM64_VHE static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); @@ -1636,7 +1641,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -#endif static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) { @@ -1821,6 +1825,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_EPAN + { + .desc = "Enhanced Privileged Access Never", + .capability = ARM64_HAS_EPAN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 3, + }, +#endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", @@ -1839,7 +1855,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, -#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, @@ -1847,7 +1862,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, -#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a82af88e8599..a45b4ebbfe7d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -491,8 +491,8 @@ tsk .req x28 // current thread_info /* * Interrupt handling. */ - .macro irq_handler - ldr_l x1, handle_arch_irq + .macro irq_handler, handler:req + ldr_l x1, \handler mov x0, sp irq_stack_entry blr x1 @@ -517,6 +517,47 @@ tsk .req x28 // current thread_info #endif .endm + .macro el1_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x1 + enable_da + + mov x0, sp + bl enter_el1_irq_or_nmi + + irq_handler \handler + +#ifdef CONFIG_PREEMPTION + ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + /* + * DA were cleared at start of handling, and IF are cleared by + * the GIC irqchip driver using gic_arch_enable_irqs() for + * normal IRQs. If anything is set, it means we come back from + * an NMI instead of a normal IRQ, so skip preemption + */ + mrs x0, daif + orr x24, x24, x0 +alternative_else_nop_endif + cbnz x24, 1f // preempt count != 0 || NMI return path + bl arm64_preempt_schedule_irq // irq en/disable is done inside +1: +#endif + + mov x0, sp + bl exit_el1_irq_or_nmi + .endm + + .macro el0_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x0 + user_exit_irqoff + enable_da + + tbz x22, #55, 1f + bl do_el0_irq_bp_hardening +1: + irq_handler \handler + .endm + .text /* @@ -533,18 +574,18 @@ SYM_CODE_START(vectors) kernel_ventry 1, sync // Synchronous EL1h kernel_ventry 1, irq // IRQ EL1h - kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, fiq // FIQ EL1h kernel_ventry 1, error // Error EL1h kernel_ventry 0, sync // Synchronous 64-bit EL0 kernel_ventry 0, irq // IRQ 64-bit EL0 - kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, fiq // FIQ 64-bit EL0 kernel_ventry 0, error // Error 64-bit EL0 #ifdef CONFIG_COMPAT kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0 kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 #else kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 @@ -610,12 +651,6 @@ SYM_CODE_START_LOCAL(el0_error_invalid) inv_entry 0, BAD_ERROR SYM_CODE_END(el0_error_invalid) -#ifdef CONFIG_COMPAT -SYM_CODE_START_LOCAL(el0_fiq_invalid_compat) - inv_entry 0, BAD_FIQ, 32 -SYM_CODE_END(el0_fiq_invalid_compat) -#endif - SYM_CODE_START_LOCAL(el1_sync_invalid) inv_entry 1, BAD_SYNC SYM_CODE_END(el1_sync_invalid) @@ -646,35 +681,16 @@ SYM_CODE_END(el1_sync) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_entry 1 - gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f - - mov x0, sp - bl enter_el1_irq_or_nmi - - irq_handler - -#ifdef CONFIG_PREEMPTION - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption - */ - mrs x0, daif - orr x24, x24, x0 -alternative_else_nop_endif - cbnz x24, 1f // preempt count != 0 || NMI return path - bl arm64_preempt_schedule_irq // irq en/disable is done inside -1: -#endif - - mov x0, sp - bl exit_el1_irq_or_nmi - + el1_interrupt_handler handle_arch_irq kernel_exit 1 SYM_CODE_END(el1_irq) +SYM_CODE_START_LOCAL_NOALIGN(el1_fiq) + kernel_entry 1 + el1_interrupt_handler handle_arch_fiq + kernel_exit 1 +SYM_CODE_END(el1_fiq) + /* * EL0 mode handlers. */ @@ -701,6 +717,11 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) b el0_irq_naked SYM_CODE_END(el0_irq_compat) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat) + kernel_entry 0, 32 + b el0_fiq_naked +SYM_CODE_END(el0_fiq_compat) + SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) kernel_entry 0, 32 b el0_error_naked @@ -711,18 +732,17 @@ SYM_CODE_END(el0_error_compat) SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: - gic_prio_irq_setup pmr=x20, tmp=x0 - user_exit_irqoff - enable_da_f - - tbz x22, #55, 1f - bl do_el0_irq_bp_hardening -1: - irq_handler - + el0_interrupt_handler handle_arch_irq b ret_to_user SYM_CODE_END(el0_irq) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq) + kernel_entry 0 +el0_fiq_naked: + el0_interrupt_handler handle_arch_fiq + b ret_to_user +SYM_CODE_END(el0_fiq) + SYM_CODE_START_LOCAL(el1_error) kernel_entry 1 mrs x1, esr_el1 @@ -743,7 +763,7 @@ el0_error_naked: mov x0, sp mov x1, x25 bl do_serror - enable_da_f + enable_da b ret_to_user SYM_CODE_END(el0_error) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index da8cc14746e9..ad3dd34a83cf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -180,7 +180,7 @@ static void __get_cpu_fpsimd_context(void) */ static void get_cpu_fpsimd_context(void) { - preempt_disable(); + local_bh_disable(); __get_cpu_fpsimd_context(); } @@ -201,7 +201,7 @@ static void __put_cpu_fpsimd_context(void) static void put_cpu_fpsimd_context(void) { __put_cpu_fpsimd_context(); - preempt_enable(); + local_bh_enable(); } static bool have_cpu_fpsimd_context(void) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 840bda1869e9..96873dfa67fd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -504,9 +503,43 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr vbar_el2, x0 isb + /* + * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, + * making it impossible to start in nVHE mode. Is that + * compliant with the architecture? Absolutely not! + */ + mrs x0, hcr_el2 + and x0, x0, #HCR_E2H + cbz x0, 1f + + /* Switching to VHE requires a sane SCTLR_EL1 as a start */ + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr_s SYS_SCTLR_EL12, x0 + + /* + * Force an eret into a helper "function", and let it return + * to our original caller... This makes sure that we have + * initialised the basic PSTATE state. + */ + mov x0, #INIT_PSTATE_EL2 + msr spsr_el1, x0 + adr x0, __cpu_stick_to_vhe + msr elr_el1, x0 + eret + +1: + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret + +__cpu_stick_to_vhe: + mov x0, #HVC_VHE_RESTART + hvc #0 + mov x0, #BOOT_CPU_MODE_EL2 + ret SYM_FUNC_END(init_kernel_el) /* diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 5eccbd62fec8..74ad3db061d1 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors) ventry el2_fiq_invalid // FIQ EL2t ventry el2_error_invalid // Error EL2t - ventry el2_sync_invalid // Synchronous EL2h + ventry elx_sync // Synchronous EL2h ventry el2_irq_invalid // IRQ EL2h ventry el2_fiq_invalid // FIQ EL2h ventry el2_error_invalid // Error EL2h - ventry el1_sync // Synchronous 64-bit EL1 + ventry elx_sync // Synchronous 64-bit EL1 ventry el1_irq_invalid // IRQ 64-bit EL1 ventry el1_fiq_invalid // FIQ 64-bit EL1 ventry el1_error_invalid // Error 64-bit EL1 @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors) .align 11 -SYM_CODE_START_LOCAL(el1_sync) +SYM_CODE_START_LOCAL(elx_sync) cmp x0, #HVC_SET_VECTORS b.ne 1f msr vbar_el2, x1 @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync) 9: mov x0, xzr eret -SYM_CODE_END(el1_sync) +SYM_CODE_END(elx_sync) // nVHE? No way! Give me the real thing! SYM_CODE_START_LOCAL(mutate_to_vhe) @@ -224,7 +224,6 @@ SYM_FUNC_END(__hyp_reset_vectors) * Entry point to switch to VHE if deemed capable */ SYM_FUNC_START(switch_to_vhe) -#ifdef CONFIG_ARM64_VHE // Need to have booted at EL2 adr_l x1, __boot_cpu_mode ldr w0, [x1] @@ -240,6 +239,5 @@ SYM_FUNC_START(switch_to_vhe) mov x0, #HVC_VHE_RESTART hvc #0 1: -#endif ret SYM_FUNC_END(switch_to_vhe) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 83f1c4b92095..e628c8ce1ffe 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -25,14 +25,26 @@ struct ftr_set_desc { struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; + bool (*filter)(u64 val); } fields[]; }; +static bool __init mmfr1_vh_filter(u64 val) +{ + /* + * If we ever reach this point while running VHE, we're + * guaranteed to be on one of these funky, VHE-stuck CPUs. If + * the user was trying to force nVHE on us, proceed with + * attitude adjustment. + */ + return !(is_kernel_in_hyp_mode() && val == 0); +} + static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - { "vh", ID_AA64MMFR1_VHE_SHIFT }, + { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, {} }, }; @@ -124,6 +136,18 @@ static void __init match_options(const char *cmdline) if (find_field(cmdline, regs[i], f, &v)) continue; + /* + * If an override gets filtered out, advertise + * it by setting the value to 0xf, but + * clearing the mask... Yes, this is fragile. + */ + if (regs[i]->fields[f].filter && + !regs[i]->fields[f].filter(v)) { + regs[i]->override->val |= mask; + regs[i]->override->mask &= ~mask; + continue; + } + regs[i]->override->val &= ~mask; regs[i]->override->val |= (v << shift) & mask; regs[i]->override->mask |= mask; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index dfb1feab867d..bda49430c9ea 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -71,13 +71,44 @@ static void init_irq_stacks(void) } #endif +static void default_handle_irq(struct pt_regs *regs) +{ + panic("IRQ taken without a root IRQ handler\n"); +} + +static void default_handle_fiq(struct pt_regs *regs) +{ + panic("FIQ taken without a root FIQ handler\n"); +} + +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; +void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq; + +int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq != default_handle_irq) + return -EBUSY; + + handle_arch_irq = handle_irq; + pr_info("Root IRQ handler: %ps\n", handle_irq); + return 0; +} + +int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *)) +{ + if (handle_arch_fiq != default_handle_fiq) + return -EBUSY; + + handle_arch_fiq = handle_fiq; + pr_info("Root FIQ handler: %ps\n", handle_fiq); + return 0; +} + void __init init_IRQ(void) { init_irq_stacks(); init_irq_scs(); irqchip_init(); - if (!handle_arch_irq) - panic("No interrupt controller found."); if (system_uses_irq_prio_masking()) { /* diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 27f8939deb1b..341342b207f6 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -128,15 +128,17 @@ u64 __init kaslr_early_init(void) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && + (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS))) /* - * KASAN does not expect the module region to intersect the - * vmalloc region, since shadow memory is allocated for each - * module at load time, whereas the vmalloc region is shadowed - * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled, and put the kernel well within - * 4 GB of the module region. + * KASAN without KASAN_VMALLOC does not expect the module region + * to intersect the vmalloc region, since shadow memory is + * allocated for each module at load time, whereas the vmalloc + * region is shadowed by KASAN zero pages. So keep modules + * out of the vmalloc region if KASAN is enabled without + * KASAN_VMALLOC, and put the kernel well within 4 GB of the + * module region. */ return offset % SZ_2G; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fe21e0f06492..b5ec010c481f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -40,14 +40,16 @@ void *module_alloc(unsigned long size) NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - !IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + (IS_ENABLED(CONFIG_KASAN_VMALLOC) || + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && + !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) /* - * KASAN can only deal with module allocations being served - * from the reserved module region, since the remainder of - * the vmalloc region is already backed by zero shadow pages, - * and punching holes into it is non-trivial. Since the module - * region is not randomized when KASAN is enabled, it is even + * KASAN without KASAN_VMALLOC can only deal with module + * allocations being served from the reserved module region, + * since the remainder of the vmalloc region is already + * backed by zero shadow pages, and punching holes into it + * is non-trivial. Since the module region is not randomized + * when KASAN is enabled without KASAN_VMALLOC, it is even * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4658fcf88c2b..f594957e29bd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -470,9 +470,8 @@ static inline u64 armv8pmu_read_evcntr(int idx) static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) { int idx = event->hw.idx; - u64 val = 0; + u64 val = armv8pmu_read_evcntr(idx); - val = armv8pmu_read_evcntr(idx); if (armv8pmu_event_is_chained(event)) val = (val << 32) | armv8pmu_read_evcntr(idx - 1); return val; @@ -520,7 +519,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u64 value = 0; + u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 325c83b1a24d..a29028d3d46e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -84,7 +84,7 @@ static void noinstr __cpu_do_idle_irqprio(void) unsigned long daif_bits; daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT, daif); + write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); /* * Unmask PMR before going idle to make sure interrupts can diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 357590beaabb..dcd7041b2b07 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -188,6 +188,7 @@ static void init_gic_priority_masking(void) cpuflags = read_sysreg(daif); WARN_ON(!(cpuflags & PSR_I_BIT)); + WARN_ON(!(cpuflags & PSR_F_BIT)); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index d1fa288518a7..a61fc4f989b3 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -271,6 +271,14 @@ enum aarch32_map { static struct page *aarch32_vectors_page __ro_after_init; static struct page *aarch32_sig_page __ro_after_init; +static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.sigpage = (void *)new_vma->vm_start; + + return 0; +} + static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ @@ -279,6 +287,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, + .mremap = aarch32_sigpage_mremap, }, [AA32_MAP_VVAR] = { .name = "[vvar]", @@ -299,30 +308,31 @@ static int aarch32_alloc_kuser_vdso_page(void) if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) return 0; - vdso_page = get_zeroed_page(GFP_ATOMIC); + vdso_page = get_zeroed_page(GFP_KERNEL); if (!vdso_page) return -ENOMEM; memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); aarch32_vectors_page = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vectors_page); return 0; } +#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long sigpage; + __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); + void *sigpage; - sigpage = get_zeroed_page(GFP_ATOMIC); + sigpage = (void *)__get_free_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; - memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); + memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); + memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); - flush_dcache_page(aarch32_sig_page); return 0; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f37d4e3830b7..871c82ab0a30 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -527,7 +527,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault; - unsigned long vm_flags = VM_ACCESS_FLAGS; + unsigned long vm_flags; unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); @@ -544,12 +544,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; + /* + * vm_flags tells us what bits we must have in vma->vm_flags + * for the fault to be benign, __do_page_fault() would check + * vma->vm_flags & vm_flags and returns an error if the + * intersection is empty + */ if (is_el0_instruction_abort(esr)) { + /* It was exec fault */ vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; } else if (is_write_abort(esr)) { + /* It was write fault */ vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; + } else { + /* It was read fault */ + vm_flags = VM_READ; + /* Write implies read */ + vm_flags |= VM_WRITE; + /* If EPAN is absent then exec implies read */ + if (!cpus_have_const_cap(ARM64_HAS_EPAN)) + vm_flags |= VM_EXEC; } if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index d8e66c78440e..61b52a92b8b6 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -79,7 +79,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); - __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); } return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr); @@ -92,7 +92,7 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); - __p4d_populate(p4dp, pud_phys, PMD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); } return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr); @@ -214,15 +214,18 @@ static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; + u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; - kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; - kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); + kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK; + kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call below). @@ -237,16 +240,22 @@ static void __init kasan_init_shadow(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START)))); kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); + } else { + kasan_populate_early_shadow((void *)kimg_shadow_end, + (void *)KASAN_SHADOW_END); + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_early_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); + } for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7484ea4f6ba0..fac957ff5187 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -39,6 +39,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) +#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; @@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; phys_addr_t pte_phys; + + if (flags & NO_EXEC_MAPPINGS) + pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); - __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); + __pmd_populate(pmdp, pte_phys, pmdval); pmd = READ_ONCE(*pmdp); } BUG_ON(pmd_bad(pmd)); @@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; phys_addr_t pmd_phys; + + if (flags & NO_EXEC_MAPPINGS) + pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); - __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, pudval); pud = READ_ONCE(*pudp); } BUG_ON(pud_bad(pud)); @@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t p4d = READ_ONCE(*p4dp); if (p4d_none(p4d)) { + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; phys_addr_t pud_phys; + + if (flags & NO_EXEC_MAPPINGS) + p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); - __p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, p4dval); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); @@ -486,14 +499,24 @@ early_param("crashkernel", enable_crash_mem_map); static void __init map_mem(pgd_t *pgdp) { + static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; - int flags = 0; + int flags = NO_EXEC_MAPPINGS; u64 i; + /* + * Setting hierarchical PXNTable attributes on table entries covering + * the linear region is only possible if it is guaranteed that no table + * entries at any level are being shared between the linear region and + * the vmalloc region. Check whether this is true for the PGD level, in + * which case it is guaranteed to be true for all other levels as well. + */ + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* * Take care not to create a writable alias for the @@ -1210,11 +1233,11 @@ void __init early_fixmap_init(void) pudp = pud_offset_kimg(p4dp, addr); } else { if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = fixmap_pud(addr); } if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); pmdp = fixmap_pmd(addr); __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); @@ -1463,7 +1486,7 @@ struct range arch_get_mappable_range(void) int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { - int ret, flags = 0; + int ret, flags = NO_EXEC_MAPPINGS; VM_BUG_ON(!mhp_range_allowed(start, size, true)); @@ -1473,7 +1496,7 @@ int arch_add_memory(int nid, u64 start, u64 size, */ if (rodata_full || debug_pagealloc_enabled() || IS_ENABLED(CONFIG_KFENCE)) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), size, params->pgprot, __pgd_pgtable_alloc, |