diff options
Diffstat (limited to 'arch')
46 files changed, 539 insertions, 286 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..9b4d629f7628 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -108,9 +108,9 @@ config ARM64 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP + select GENERIC_FIND_FIRST_BIT select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI - select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL @@ -138,6 +138,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) + select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) select HAVE_ARCH_KFENCE @@ -194,6 +195,7 @@ config ARM64 select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN_GENERIC select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH @@ -1058,6 +1060,9 @@ config SYS_SUPPORTS_HUGETLBFS config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 @@ -1416,19 +1421,6 @@ config ARM64_USE_LSE_ATOMICS built with binutils >= 2.25 in order for the new instructions to be used. -config ARM64_VHE - bool "Enable support for Virtualization Host Extensions (VHE)" - default y - help - Virtualization Host Extensions (VHE) allow the kernel to run - directly at EL2 (instead of EL1) on processors that support - it. This leads to better performance for KVM, as they reduce - the cost of the world switch. - - Selecting this option allows the VHE feature to be detected - at runtime, and does not affect processors that do not - implement this feature. - endmenu menu "ARMv8.2 architectural features" @@ -1681,10 +1673,23 @@ config ARM64_MTE endmenu +menu "ARMv8.7 architectural features" + +config ARM64_EPAN + bool "Enable support for Enhanced Privileged Access Never (EPAN)" + default y + depends on ARM64_PAN + help + Enhanced Privileged Access Never (EPAN) allows Privileged + Access Never to be used with Execute-only mappings. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y - depends on !KVM || ARM64_VHE help The Scalable Vector Extension (SVE) is an extension to the AArch64 execution state which complements and extends the SIMD functionality @@ -1713,12 +1718,6 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. - CPUs that support SVE are architecturally required to support the - Virtualization Host Extensions (VHE), so the kernel makes no - provision for supporting SVE alongside KVM without VHE enabled. - Thus, you will need to enable CONFIG_ARM64_VHE if you want to support - KVM in the same kernel image. - config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d612f633b771..8793a9cb9d4b 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1156,6 +1156,7 @@ CONFIG_CRYPTO_DEV_HISI_TRNG=m CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 880b9054d75c..934b9be582d2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void) static inline void gic_arch_enable_irqs(void) { - asm volatile ("msr daifclr, #2" : : : "memory"); + asm volatile ("msr daifclr, #3" : : : "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 9f0ec21d6327..88d20f04c64a 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -165,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } -/* - * Ensure that reads of the counter are treated the same as memory reads - * for the purposes of ordering by subsequent memory barriers. - * - * This insanity brought to you by speculative system register reads, - * out-of-order memory accesses, sequence locks and Thomas Gleixner. - * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html - */ -#define arch_counter_enforce_ordering(val) do { \ - u64 tmp, _val = (val); \ - \ - asm volatile( \ - " eor %0, %1, %1\n" \ - " add %0, sp, %0\n" \ - " ldr xzr, [%0]" \ - : "=r" (tmp) : "r" (_val)); \ -} while (0) - static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; @@ -224,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void) return cnt; } -#undef arch_counter_enforce_ordering - static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6ac38f7cf824..ab569b0b45fc 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -49,9 +49,9 @@ msr daif, \flags .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ - .macro enable_da_f - msr daifclr, #(8 | 4 | 1) + /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ + .macro enable_da + msr daifclr, #(8 | 4) .endm /* @@ -59,7 +59,7 @@ */ .macro save_and_disable_irq, flags mrs \flags, daif - msr daifset, #2 + msr daifset, #3 .endm .macro restore_irq, flags diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index c3009b0e5239..065ba482daf0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,10 +25,6 @@ #define psb_csync() asm volatile("hint #17" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") -#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ - SB_BARRIER_INSN"nop\n", \ - ARM64_HAS_SB)) - #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ @@ -70,6 +66,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, return mask; } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997b173b..9e3ec4dd56d8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_HAS_EPAN 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 61177bac49fa..338840c00e8e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -63,6 +63,23 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +/* + * Describe the early feature override to the core override code: + * + * @val Values that are to be merged into the final + * sanitised value of the register. Only the bitfields + * set to 1 in @mask are valid + * @mask Mask of the features that are overridden by @val + * + * A @mask field set to full-1 indicates that the corresponding field + * in @val is a valid override. + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-0 denotes that this field has no override + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-1 denotes thath this field has an invalid override. + */ struct arm64_ftr_override { u64 val; u64 mask; diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 1c26d7baa67f..5eb7af9c4557 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,8 +13,8 @@ #include <asm/ptrace.h> #define DAIF_PROCCTX 0 -#define DAIF_PROCCTX_NOIRQ PSR_I_BIT -#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) +#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) - flags |= PSR_I_BIT; + flags |= PSR_I_BIT | PSR_F_BIT; } return flags; @@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags) bool irq_disabled = flags & PSR_I_BIT; WARN_ON(system_has_prio_mask_debugging() && - !(read_sysreg(daif) & PSR_I_BIT)); + (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { trace_hardirqs_on(); @@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags) * If interrupts are disabled but we can take * asynchronous errors, we can take NMIs */ - flags &= ~PSR_I_BIT; + flags &= ~(PSR_I_BIT | PSR_F_BIT); pmr = GIC_PRIO_IRQOFF; } else { pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index d77d358f9395..b3f2d3bb0938 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -131,6 +131,26 @@ .Lskip_sve_\@: .endm +/* Disable any fine grained traps */ +.macro __init_el2_fgt + mrs x1, id_aa64mmfr0_el1 + ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HDFGRTR_EL2, xzr + msr_s SYS_HDFGWTR_EL2, xzr + msr_s SYS_HFGRTR_EL2, xzr + msr_s SYS_HFGWTR_EL2, xzr + msr_s SYS_HFGITR_EL2, xzr + + mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU + ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HAFGRTR_EL2, xzr +.Lskip_fgt_\@: +.endm + .macro __init_el2_nvhe_prepare_eret mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 @@ -155,6 +175,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bec5f14b622a..ebb263b2d3b1 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -73,6 +73,7 @@ extern void sve_flush_live(void); extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); +extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b2b0c6405eb0..fac08e18bcd5 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,6 +8,10 @@ struct pt_regs; +int set_handle_irq(void (*handle_irq)(struct pt_regs *)); +#define set_handle_irq set_handle_irq +int set_handle_fiq(void (*handle_fiq)(struct pt_regs *)); + static inline int nr_legacy_irqs(void) { return 0; diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index a1020285ea75..81bbfa3a035b 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,6 +2,8 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H +extern void arch_irq_work_raise(void); + static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75..b57b9b1e4344 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -12,15 +12,13 @@ /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and - * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' + * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. - * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. */ /* @@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable", + "msr daifclr, #3 // arch_local_irq_enable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void) } asm volatile(ALTERNATIVE( - "msr daifset, #2 // arch_local_irq_disable", + "msr daifset, #3 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9b557a457f24..387279540139 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -47,8 +47,6 @@ long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, unsigned long addr, unsigned long data); -void mte_assign_mem_tag_range(void *addr, size_t size); - #else /* CONFIG_ARM64_MTE */ /* unused if !CONFIG_ARM64_MTE, silence the compiler */ @@ -84,10 +82,6 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, return -EIO; } -static inline void mte_assign_mem_tag_range(void *addr, size_t size) -{ -} - #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 3c6a7f5988b1..31fbab3d6f99 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); + pudval_t pudval = PUD_TYPE_TABLE; + + pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; + __pud_populate(pudp, __pa(pmdp), pudval); } #else static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) @@ -45,7 +48,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE); + p4dval_t p4dval = P4D_TYPE_TABLE; + + p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; + __p4d_populate(p4dp, __pa(pudp), p4dval); } #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) @@ -70,16 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - /* - * The pmd must be loaded with the physical address of the PTE table - */ - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm != &init_mm); + __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm == &init_mm); + __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 42442a0ae2ab..b82575a33f8b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,17 @@ /* * Hardware page table definitions. * + * Level 0 descriptor (P4D). + */ +#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) +#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) +#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) +#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) +#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) +#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) + +/* * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) @@ -101,6 +112,8 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) +#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) /* * Level 2 descriptor (PMD). @@ -122,6 +135,8 @@ #define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) +#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59) +#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60) /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a65fb528110..fab2f573f7a4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings; #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 47027796c2f9..0b10204e72fc 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +/* + * Execute-only user mappings do not have the PTE_USER bit set. All valid + * kernel mappings have the PTE_UXN bit set. + */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) - + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been @@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* - * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check). PROT_NONE mappings do not have the PTE_VALID bit - * set. + * p??_access_permitted() is true for valid user mappings (PTE_USER + * bit set, subject to the write permission check). For execute-only + * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits + * not set) must return false. PROT_NONE mappings do not have the + * PTE_VALID bit set. */ #define pte_access_permitted(pte, write) \ - (pte_valid_user(pte) && (!(write) || pte_write(pte))) + (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + return prot; + + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) + return prot; + + return PAGE_READONLY_EXEC; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 38187f74e089..b1dd7ecff7ef 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -23,7 +23,7 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS -void ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index bcb01ca15325..0e357757c0cc 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -145,6 +145,7 @@ bool cpus_are_stuck_in_kernel(void); extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); +extern void panic_smp_self_stop(void); #endif /* ifndef __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index eb29b1fe8255..4b33ca620679 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -148,27 +148,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk, return false; } -static inline void start_backtrace(struct stackframe *frame, - unsigned long fp, unsigned long pc) -{ - frame->fp = fp; - frame->pc = pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame->graph = 0; -#endif - - /* - * Prime the first unwind. - * - * In unwind_frame() we'll check that the FP points to a valid stack, - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be - * treated as a transition to whichever stack that happens to be. The - * prev_fp value won't be used, but we set it to 0 such that it is - * definitely not an accessible stack address. - */ - bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); - frame->prev_fp = 0; - frame->prev_type = STACK_TYPE_UNKNOWN; -} +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..ff7724cdd350 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -475,9 +475,15 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) +#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) +#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) +#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) @@ -597,6 +603,7 @@ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 @@ -637,7 +644,7 @@ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ - SCTLR_EL1_RES1) + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 631ab1281633..4b4c0dac0e14 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -83,11 +83,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, */ isb(); asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); - /* - * This isb() is required to prevent that the seq lock is - * speculated.# - */ - isb(); + arch_counter_enforce_ordering(res); return res; } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..3bc30656880d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -809,6 +809,12 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->name, ftrp->shift + ftrp->width - 1, ftrp->shift, str, tmp); + } else if ((ftr_mask & reg->override->val) == ftr_mask) { + reg->override->val &= ~ftr_mask; + pr_warn("%s[%d:%d]: impossible override, ignored\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift); } val = arm64_ftr_set_value(ftrp, val, ftr_new); @@ -1617,7 +1623,6 @@ int get_cpu_with_amu_feat(void) } #endif -#ifdef CONFIG_ARM64_VHE static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); @@ -1636,7 +1641,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -#endif static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) { @@ -1821,6 +1825,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_EPAN + { + .desc = "Enhanced Privileged Access Never", + .capability = ARM64_HAS_EPAN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 3, + }, +#endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", @@ -1839,7 +1855,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, -#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, @@ -1847,7 +1862,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, -#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 2ca395c25448..3ecec60d3295 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -48,6 +48,11 @@ SYM_FUNC_START(sve_get_vl) ret SYM_FUNC_END(sve_get_vl) +SYM_FUNC_START(sve_set_vq) + sve_load_vq x0, x1, x2 + ret +SYM_FUNC_END(sve_set_vq) + /* * Load SVE state from FPSIMD state. * diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..a45b4ebbfe7d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -491,28 +491,14 @@ tsk .req x28 // current thread_info /* * Interrupt handling. */ - .macro irq_handler - ldr_l x1, handle_arch_irq + .macro irq_handler, handler:req + ldr_l x1, \handler mov x0, sp irq_stack_entry blr x1 irq_stack_exit .endm -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * Set res to 0 if irqs were unmasked in interrupted context. - * Otherwise set res to non-0 value. - */ - .macro test_irqs_unmasked res:req, pmr:req -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - sub \res, \pmr, #GIC_PRIO_IRQON -alternative_else - mov \res, xzr -alternative_endif - .endm -#endif - .macro gic_prio_kentry_setup, tmp:req #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if ARM64_HAS_IRQ_PRIO_MASKING @@ -531,6 +517,47 @@ alternative_endif #endif .endm + .macro el1_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x1 + enable_da + + mov x0, sp + bl enter_el1_irq_or_nmi + + irq_handler \handler + +#ifdef CONFIG_PREEMPTION + ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + /* + * DA were cleared at start of handling, and IF are cleared by + * the GIC irqchip driver using gic_arch_enable_irqs() for + * normal IRQs. If anything is set, it means we come back from + * an NMI instead of a normal IRQ, so skip preemption + */ + mrs x0, daif + orr x24, x24, x0 +alternative_else_nop_endif + cbnz x24, 1f // preempt count != 0 || NMI return path + bl arm64_preempt_schedule_irq // irq en/disable is done inside +1: +#endif + + mov x0, sp + bl exit_el1_irq_or_nmi + .endm + + .macro el0_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x0 + user_exit_irqoff + enable_da + + tbz x22, #55, 1f + bl do_el0_irq_bp_hardening +1: + irq_handler \handler + .endm + .text /* @@ -547,18 +574,18 @@ SYM_CODE_START(vectors) kernel_ventry 1, sync // Synchronous EL1h kernel_ventry 1, irq // IRQ EL1h - kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, fiq // FIQ EL1h kernel_ventry 1, error // Error EL1h kernel_ventry 0, sync // Synchronous 64-bit EL0 kernel_ventry 0, irq // IRQ 64-bit EL0 - kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, fiq // FIQ 64-bit EL0 kernel_ventry 0, error // Error 64-bit EL0 #ifdef CONFIG_COMPAT kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0 kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 #else kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 @@ -624,12 +651,6 @@ SYM_CODE_START_LOCAL(el0_error_invalid) inv_entry 0, BAD_ERROR SYM_CODE_END(el0_error_invalid) -#ifdef CONFIG_COMPAT -SYM_CODE_START_LOCAL(el0_fiq_invalid_compat) - inv_entry 0, BAD_FIQ, 32 -SYM_CODE_END(el0_fiq_invalid_compat) -#endif - SYM_CODE_START_LOCAL(el1_sync_invalid) inv_entry 1, BAD_SYNC SYM_CODE_END(el1_sync_invalid) @@ -660,35 +681,16 @@ SYM_CODE_END(el1_sync) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_entry 1 - gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f - - mov x0, sp - bl enter_el1_irq_or_nmi - - irq_handler - -#ifdef CONFIG_PREEMPTION - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption - */ - mrs x0, daif - orr x24, x24, x0 -alternative_else_nop_endif - cbnz x24, 1f // preempt count != 0 || NMI return path - bl arm64_preempt_schedule_irq // irq en/disable is done inside -1: -#endif - - mov x0, sp - bl exit_el1_irq_or_nmi - + el1_interrupt_handler handle_arch_irq kernel_exit 1 SYM_CODE_END(el1_irq) +SYM_CODE_START_LOCAL_NOALIGN(el1_fiq) + kernel_entry 1 + el1_interrupt_handler handle_arch_fiq + kernel_exit 1 +SYM_CODE_END(el1_fiq) + /* * EL0 mode handlers. */ @@ -715,6 +717,11 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) b el0_irq_naked SYM_CODE_END(el0_irq_compat) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat) + kernel_entry 0, 32 + b el0_fiq_naked +SYM_CODE_END(el0_fiq_compat) + SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) kernel_entry 0, 32 b el0_error_naked @@ -725,18 +732,17 @@ SYM_CODE_END(el0_error_compat) SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: - gic_prio_irq_setup pmr=x20, tmp=x0 - user_exit_irqoff - enable_da_f - - tbz x22, #55, 1f - bl do_el0_irq_bp_hardening -1: - irq_handler - + el0_interrupt_handler handle_arch_irq b ret_to_user SYM_CODE_END(el0_irq) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq) + kernel_entry 0 +el0_fiq_naked: + el0_interrupt_handler handle_arch_fiq + b ret_to_user +SYM_CODE_END(el0_fiq) + SYM_CODE_START_LOCAL(el1_error) kernel_entry 1 mrs x1, esr_el1 @@ -757,7 +763,7 @@ el0_error_naked: mov x0, sp mov x1, x25 bl do_serror - enable_da_f + enable_da b ret_to_user SYM_CODE_END(el0_error) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 823e3a8a8871..ad3dd34a83cf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -285,7 +285,7 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); @@ -307,7 +307,8 @@ static void fpsimd_save(void) WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && + test_thread_flag(TIF_SVE)) { if (WARN_ON(sve_get_vl() != last->sve_vl)) { /* * Can't save the user regs, so current would @@ -926,9 +927,8 @@ void fpsimd_release_task(struct task_struct *dead_task) * Trapped SVE access * * Storage is allocated for the full SVE state, the current FPSIMD - * register contents are migrated across, and TIF_SVE is set so that - * the SVE access trap will be disabled the next time this task - * reaches ret_to_user. + * register contents are migrated across, and the access trap is + * disabled. * * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SVE access trap for userspace during @@ -946,15 +946,24 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) get_cpu_fpsimd_context(); - fpsimd_save(); - - /* Force ret_to_user to reload the registers: */ - fpsimd_flush_task_state(current); - - fpsimd_to_sve(current); if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ + /* + * Convert the FPSIMD state to SVE, zeroing all the state that + * is not shared with FPSIMD. If (as is likely) the current + * state is live in the registers then do this there and + * update our metadata for the current task including + * disabling the trap, otherwise update our in-memory copy. + */ + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); + sve_flush_live(); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + } + put_cpu_fpsimd_context(); } @@ -1092,7 +1101,7 @@ void fpsimd_preserve_current_state(void) void fpsimd_signal_preserve_current_state(void) { fpsimd_preserve_current_state(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) + if (test_thread_flag(TIF_SVE)) sve_to_fpsimd(current); } @@ -1181,7 +1190,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; - if (system_supports_sve() && test_thread_flag(TIF_SVE)) + if (test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); task_fpsimd_load(); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 840bda1869e9..96873dfa67fd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -504,9 +503,43 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr vbar_el2, x0 isb + /* + * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, + * making it impossible to start in nVHE mode. Is that + * compliant with the architecture? Absolutely not! + */ + mrs x0, hcr_el2 + and x0, x0, #HCR_E2H + cbz x0, 1f + + /* Switching to VHE requires a sane SCTLR_EL1 as a start */ + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr_s SYS_SCTLR_EL12, x0 + + /* + * Force an eret into a helper "function", and let it return + * to our original caller... This makes sure that we have + * initialised the basic PSTATE state. + */ + mov x0, #INIT_PSTATE_EL2 + msr spsr_el1, x0 + adr x0, __cpu_stick_to_vhe + msr elr_el1, x0 + eret + +1: + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret + +__cpu_stick_to_vhe: + mov x0, #HVC_VHE_RESTART + hvc #0 + mov x0, #BOOT_CPU_MODE_EL2 + ret SYM_FUNC_END(init_kernel_el) /* diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 5eccbd62fec8..74ad3db061d1 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors) ventry el2_fiq_invalid // FIQ EL2t ventry el2_error_invalid // Error EL2t - ventry el2_sync_invalid // Synchronous EL2h + ventry elx_sync // Synchronous EL2h ventry el2_irq_invalid // IRQ EL2h ventry el2_fiq_invalid // FIQ EL2h ventry el2_error_invalid // Error EL2h - ventry el1_sync // Synchronous 64-bit EL1 + ventry elx_sync // Synchronous 64-bit EL1 ventry el1_irq_invalid // IRQ 64-bit EL1 ventry el1_fiq_invalid // FIQ 64-bit EL1 ventry el1_error_invalid // Error 64-bit EL1 @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors) .align 11 -SYM_CODE_START_LOCAL(el1_sync) +SYM_CODE_START_LOCAL(elx_sync) cmp x0, #HVC_SET_VECTORS b.ne 1f msr vbar_el2, x1 @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync) 9: mov x0, xzr eret -SYM_CODE_END(el1_sync) +SYM_CODE_END(elx_sync) // nVHE? No way! Give me the real thing! SYM_CODE_START_LOCAL(mutate_to_vhe) @@ -224,7 +224,6 @@ SYM_FUNC_END(__hyp_reset_vectors) * Entry point to switch to VHE if deemed capable */ SYM_FUNC_START(switch_to_vhe) -#ifdef CONFIG_ARM64_VHE // Need to have booted at EL2 adr_l x1, __boot_cpu_mode ldr w0, [x1] @@ -240,6 +239,5 @@ SYM_FUNC_START(switch_to_vhe) mov x0, #HVC_VHE_RESTART hvc #0 1: -#endif ret SYM_FUNC_END(switch_to_vhe) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 83f1c4b92095..e628c8ce1ffe 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -25,14 +25,26 @@ struct ftr_set_desc { struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; + bool (*filter)(u64 val); } fields[]; }; +static bool __init mmfr1_vh_filter(u64 val) +{ + /* + * If we ever reach this point while running VHE, we're + * guaranteed to be on one of these funky, VHE-stuck CPUs. If + * the user was trying to force nVHE on us, proceed with + * attitude adjustment. + */ + return !(is_kernel_in_hyp_mode() && val == 0); +} + static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - { "vh", ID_AA64MMFR1_VHE_SHIFT }, + { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, {} }, }; @@ -124,6 +136,18 @@ static void __init match_options(const char *cmdline) if (find_field(cmdline, regs[i], f, &v)) continue; + /* + * If an override gets filtered out, advertise + * it by setting the value to 0xf, but + * clearing the mask... Yes, this is fragile. + */ + if (regs[i]->fields[f].filter && + !regs[i]->fields[f].filter(v)) { + regs[i]->override->val |= mask; + regs[i]->override->mask &= ~mask; + continue; + } + regs[i]->override->val &= ~mask; regs[i]->override->val |= (v << shift) & mask; regs[i]->override->mask |= mask; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index dfb1feab867d..bda49430c9ea 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -71,13 +71,44 @@ static void init_irq_stacks(void) } #endif +static void default_handle_irq(struct pt_regs *regs) +{ + panic("IRQ taken without a root IRQ handler\n"); +} + +static void default_handle_fiq(struct pt_regs *regs) +{ + panic("FIQ taken without a root FIQ handler\n"); +} + +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; +void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq; + +int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq != default_handle_irq) + return -EBUSY; + + handle_arch_irq = handle_irq; + pr_info("Root IRQ handler: %ps\n", handle_irq); + return 0; +} + +int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *)) +{ + if (handle_arch_fiq != default_handle_fiq) + return -EBUSY; + + handle_arch_fiq = handle_fiq; + pr_info("Root FIQ handler: %ps\n", handle_fiq); + return 0; +} + void __init init_IRQ(void) { init_irq_stacks(); init_irq_scs(); irqchip_init(); - if (!handle_arch_irq) - panic("No interrupt controller found."); if (system_uses_irq_prio_masking()) { /* diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 27f8939deb1b..341342b207f6 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -128,15 +128,17 @@ u64 __init kaslr_early_init(void) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && + (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS))) /* - * KASAN does not expect the module region to intersect the - * vmalloc region, since shadow memory is allocated for each - * module at load time, whereas the vmalloc region is shadowed - * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled, and put the kernel well within - * 4 GB of the module region. + * KASAN without KASAN_VMALLOC does not expect the module region + * to intersect the vmalloc region, since shadow memory is + * allocated for each module at load time, whereas the vmalloc + * region is shadowed by KASAN zero pages. So keep modules + * out of the vmalloc region if KASAN is enabled without + * KASAN_VMALLOC, and put the kernel well within 4 GB of the + * module region. */ return offset % SZ_2G; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fe21e0f06492..b5ec010c481f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -40,14 +40,16 @@ void *module_alloc(unsigned long size) NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - !IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + (IS_ENABLED(CONFIG_KASAN_VMALLOC) || + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && + !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) /* - * KASAN can only deal with module allocations being served - * from the reserved module region, since the remainder of - * the vmalloc region is already backed by zero shadow pages, - * and punching holes into it is non-trivial. Since the module - * region is not randomized when KASAN is enabled, it is even + * KASAN without KASAN_VMALLOC can only deal with module + * allocations being served from the reserved module region, + * since the remainder of the vmalloc region is already + * backed by zero shadow pages, and punching holes into it + * is non-trivial. Since the module region is not randomized + * when KASAN is enabled without KASAN_VMALLOC, it is even * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4658fcf88c2b..f594957e29bd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -470,9 +470,8 @@ static inline u64 armv8pmu_read_evcntr(int idx) static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) { int idx = event->hw.idx; - u64 val = 0; + u64 val = armv8pmu_read_evcntr(idx); - val = armv8pmu_read_evcntr(idx); if (armv8pmu_event_is_chained(event)) val = (val << 32) | armv8pmu_read_evcntr(idx - 1); return val; @@ -520,7 +519,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u64 value = 0; + u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 66aac2881ba8..ecf0f613327f 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -264,8 +264,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) * normal page fault. */ instruction_pointer_set(regs, (unsigned long) cur->addr); - if (!instruction_pointer(regs)) - BUG(); + BUG_ON(!instruction_pointer(regs)); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 325c83b1a24d..a29028d3d46e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -84,7 +84,7 @@ static void noinstr __cpu_do_idle_irqprio(void) unsigned long daif_bits; daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT, daif); + write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); /* * Unmask PMR before going idle to make sure interrupts can diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 357590beaabb..dcd7041b2b07 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -188,6 +188,7 @@ static void init_gic_priority_masking(void) cpuflags = read_sysreg(daif); WARN_ON(!(cpuflags & PSR_I_BIT)); + WARN_ON(!(cpuflags & PSR_F_BIT)); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ad20981dfda4..2a1a7a281d34 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -32,6 +32,30 @@ * add sp, sp, #0x10 */ + +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) +{ + frame->fp = fp; + frame->pc = pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame->graph = 0; +#endif + + /* + * Prime the first unwind. + * + * In unwind_frame() we'll check that the FP points to a valid stack, + * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be + * treated as a transition to whichever stack that happens to be. The + * prev_fp value won't be used, but we set it to 0 such that it is + * definitely not an accessible stack address. + */ + bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); + frame->prev_fp = 0; + frame->prev_type = STACK_TYPE_UNKNOWN; +} + /* * Unwind from one frame record (A) to the next frame record (B). * diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index cee5d04ea9ad..a61fc4f989b3 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -86,7 +86,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static int __vdso_init(enum vdso_abi abi) +static int __init __vdso_init(enum vdso_abi abi) { int i; struct page **vdso_pagelist; @@ -271,6 +271,14 @@ enum aarch32_map { static struct page *aarch32_vectors_page __ro_after_init; static struct page *aarch32_sig_page __ro_after_init; +static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.sigpage = (void *)new_vma->vm_start; + + return 0; +} + static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ @@ -279,6 +287,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, + .mremap = aarch32_sigpage_mremap, }, [AA32_MAP_VVAR] = { .name = "[vvar]", @@ -299,34 +308,35 @@ static int aarch32_alloc_kuser_vdso_page(void) if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) return 0; - vdso_page = get_zeroed_page(GFP_ATOMIC); + vdso_page = get_zeroed_page(GFP_KERNEL); if (!vdso_page) return -ENOMEM; memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); aarch32_vectors_page = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vectors_page); return 0; } +#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long sigpage; + __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); + void *sigpage; - sigpage = get_zeroed_page(GFP_ATOMIC); + sigpage = (void *)__get_free_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; - memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); + memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); + memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); - flush_dcache_page(aarch32_sig_page); return 0; } -static int __aarch32_alloc_vdso_pages(void) +static int __init __aarch32_alloc_vdso_pages(void) { if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f37d4e3830b7..871c82ab0a30 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -527,7 +527,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault; - unsigned long vm_flags = VM_ACCESS_FLAGS; + unsigned long vm_flags; unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); @@ -544,12 +544,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; + /* + * vm_flags tells us what bits we must have in vma->vm_flags + * for the fault to be benign, __do_page_fault() would check + * vma->vm_flags & vm_flags and returns an error if the + * intersection is empty + */ if (is_el0_instruction_abort(esr)) { + /* It was exec fault */ vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; } else if (is_write_abort(esr)) { + /* It was write fault */ vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; + } else { + /* It was read fault */ + vm_flags = VM_READ; + /* Write implies read */ + vm_flags |= VM_WRITE; + /* If EPAN is absent then exec implies read */ + if (!cpus_have_const_cap(ARM64_HAS_EPAN)) + vm_flags |= VM_EXEC; } if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index d8e66c78440e..61b52a92b8b6 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -79,7 +79,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); - __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); } return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr); @@ -92,7 +92,7 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); - __p4d_populate(p4dp, pud_phys, PMD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); } return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr); @@ -214,15 +214,18 @@ static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; + u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; - kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; - kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); + kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK; + kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call below). @@ -237,16 +240,22 @@ static void __init kasan_init_shadow(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START)))); kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); + } else { + kasan_populate_early_shadow((void *)kimg_shadow_end, + (void *)KASAN_SHADOW_END); + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_early_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); + } for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7484ea4f6ba0..fac957ff5187 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -39,6 +39,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) +#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; @@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; phys_addr_t pte_phys; + + if (flags & NO_EXEC_MAPPINGS) + pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); - __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); + __pmd_populate(pmdp, pte_phys, pmdval); pmd = READ_ONCE(*pmdp); } BUG_ON(pmd_bad(pmd)); @@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; phys_addr_t pmd_phys; + + if (flags & NO_EXEC_MAPPINGS) + pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); - __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, pudval); pud = READ_ONCE(*pudp); } BUG_ON(pud_bad(pud)); @@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t p4d = READ_ONCE(*p4dp); if (p4d_none(p4d)) { + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; phys_addr_t pud_phys; + + if (flags & NO_EXEC_MAPPINGS) + p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); - __p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, p4dval); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); @@ -486,14 +499,24 @@ early_param("crashkernel", enable_crash_mem_map); static void __init map_mem(pgd_t *pgdp) { + static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; - int flags = 0; + int flags = NO_EXEC_MAPPINGS; u64 i; + /* + * Setting hierarchical PXNTable attributes on table entries covering + * the linear region is only possible if it is guaranteed that no table + * entries at any level are being shared between the linear region and + * the vmalloc region. Check whether this is true for the PGD level, in + * which case it is guaranteed to be true for all other levels as well. + */ + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* * Take care not to create a writable alias for the @@ -1210,11 +1233,11 @@ void __init early_fixmap_init(void) pudp = pud_offset_kimg(p4dp, addr); } else { if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = fixmap_pud(addr); } if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); pmdp = fixmap_pmd(addr); __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); @@ -1463,7 +1486,7 @@ struct range arch_get_mappable_range(void) int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { - int ret, flags = 0; + int ret, flags = NO_EXEC_MAPPINGS; VM_BUG_ON(!mhp_range_allowed(start, size, true)); @@ -1473,7 +1496,7 @@ int arch_add_memory(int nid, u64 start, u64 size, */ if (rodata_full || debug_pagealloc_enabled() || IS_ENABLED(CONFIG_KFENCE)) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), size, params->pgprot, __pgd_pgtable_alloc, diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c967bfd30d2b..0a48191534ff 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -419,14 +419,17 @@ SYM_FUNC_START(__cpu_setup) reset_amuserenr_el0 x1 // Disable AMU access from EL0 /* - * Memory region attributes + * Default values for VMSA control registers. These will be adjusted + * below depending on detected CPU features. */ - mov_q x5, MAIR_EL1_SET -#ifdef CONFIG_ARM64_MTE - mte_tcr .req x20 - - mov mte_tcr, #0 + mair .req x17 + tcr .req x16 + mov_q mair, MAIR_EL1_SET + mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ + TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ + TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS +#ifdef CONFIG_ARM64_MTE /* * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported * (ID_AA64PFR1_EL1[11:8] > 1). @@ -438,7 +441,7 @@ SYM_FUNC_START(__cpu_setup) /* Normal Tagged memory type at the corresponding MAIR index */ mov x10, #MAIR_ATTR_NORMAL_TAGGED - bfi x5, x10, #(8 * MT_NORMAL_TAGGED), #8 + bfi mair, x10, #(8 * MT_NORMAL_TAGGED), #8 /* initialize GCR_EL1: all non-zero tags excluded by default */ mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) @@ -449,37 +452,26 @@ SYM_FUNC_START(__cpu_setup) msr_s SYS_TFSRE0_EL1, xzr /* set the TCR_EL1 bits */ - mov_q mte_tcr, TCR_KASAN_HW_FLAGS + mov_q x10, TCR_KASAN_HW_FLAGS + orr tcr, tcr, x10 1: #endif - msr mair_el1, x5 - /* - * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further - * adjusted if the kernel is compiled with 52bit VA support. - */ - mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS -#ifdef CONFIG_ARM64_MTE - orr x10, x10, mte_tcr - .unreq mte_tcr -#endif - tcr_clear_errata_bits x10, x9, x5 + tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 ldr_l x9, vabits_actual sub x9, xzr, x9 add x9, x9, #64 - tcr_set_t1sz x10, x9 + tcr_set_t1sz tcr, x9 #else ldr_l x9, idmap_t0sz #endif - tcr_set_t0sz x10, x9 + tcr_set_t0sz tcr, x9 /* * Set the IPS bits in TCR_EL1. */ - tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6 + tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* * Enable hardware update of the Access Flags bit. @@ -489,13 +481,17 @@ SYM_FUNC_START(__cpu_setup) mrs x9, ID_AA64MMFR1_EL1 and x9, x9, #0xf cbz x9, 1f - orr x10, x10, #TCR_HA // hardware Access flag update + orr tcr, tcr, #TCR_HA // hardware Access flag update 1: #endif /* CONFIG_ARM64_HW_AFDBM */ - msr tcr_el1, x10 + msr mair_el1, mair + msr tcr_el1, tcr /* * Prepare SCTLR */ mov_q x0, INIT_SCTLR_EL1_MMU_ON ret // return to head.S + + .unreq mair + .unreq tcr SYM_FUNC_END(__cpu_setup) diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 0e050d76b83a..a50e92ea1878 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -337,7 +337,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } -static void ptdump_initialize(void) +static void __init ptdump_initialize(void) { unsigned i, j; @@ -381,7 +381,7 @@ void ptdump_check_wx(void) pr_info("Checked W+X mappings: passed, no W+X pages found\n"); } -static int ptdump_init(void) +static int __init ptdump_init(void) { address_markers[PAGE_END_NR].start_address = PAGE_END; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index d29d722ec3ec..68bf1a125502 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -16,7 +16,7 @@ static int ptdump_show(struct seq_file *m, void *v) } DEFINE_SHOW_ATTRIBUTE(ptdump); -void ptdump_debugfs_register(struct ptdump_info *info, const char *name) +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name) { debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); } |