From 7c116db24d944ff04a67c8bd89cb32c5cd0894ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 9 Jul 2020 21:48:41 +0100 Subject: efi/libstub/arm64: Retain 2MB kernel Image alignment if !KASLR Since commit 82046702e288 ("efi/libstub/arm64: Replace 'preferred' offset with alignment check"), loading a relocatable arm64 kernel at a physical address which is not 2MB aligned and subsequently booting with EFI will leave the Image in-place, relying on the kernel to relocate itself early during boot. In conjunction with commit dd4bc6076587 ("arm64: warn on incorrect placement of the kernel by the bootloader"), which enables CONFIG_RELOCATABLE by default, this effectively means that entering an arm64 kernel loaded at an alignment smaller than 2MB with EFI (e.g. using QEMU) will result in silent relocation at runtime. Unfortunately, this has a subtle but confusing affect for developers trying to inspect the PC value during a crash and comparing it to the symbol addresses in vmlinux using tools such as 'nm' or 'addr2line'; all text addresses will be displaced by a sub-2MB offset, resulting in the wrong symbol being identified in many cases. Passing "nokaslr" on the command line or disabling "CONFIG_RANDOMIZE_BASE" does not help, since the EFI stub only copies the kernel Image to a 2MB boundary if it is not relocatable. Adjust the EFI stub for arm64 so that the minimum Image alignment is 2MB unless KASLR is in use. Cc: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: David Brazdil Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/firmware/efi/libstub/arm64-stub.c | 25 ++++++++++++++----------- drivers/firmware/efi/libstub/efi-stub-helper.c | 2 +- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 7f6a57dec513..e5bfac79e5ac 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -35,13 +35,16 @@ efi_status_t check_platform_features(void) } /* - * Relocatable kernels can fix up the misalignment with respect to - * MIN_KIMG_ALIGN, so they only require a minimum alignment of EFI_KIMG_ALIGN - * (which accounts for the alignment of statically allocated objects such as - * the swapper stack.) + * Although relocatable kernels can fix up the misalignment with respect to + * MIN_KIMG_ALIGN, the resulting virtual text addresses are subtly out of + * sync with those recorded in the vmlinux when kaslr is disabled but the + * image required relocation anyway. Therefore retain 2M alignment unless + * KASLR is in use. */ -static const u64 min_kimg_align = IS_ENABLED(CONFIG_RELOCATABLE) ? EFI_KIMG_ALIGN - : MIN_KIMG_ALIGN; +static u64 min_kimg_align(void) +{ + return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; +} efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, @@ -74,21 +77,21 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); - *reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align; + *reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align(); if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { /* * If KASLR is enabled, and we have some randomness available, * locate the kernel at a randomized offset in physical memory. */ - status = efi_random_alloc(*reserve_size, min_kimg_align, + status = efi_random_alloc(*reserve_size, min_kimg_align(), reserve_addr, phys_seed); } else { status = EFI_OUT_OF_RESOURCES; } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align)) { + if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align())) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -99,7 +102,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align); + ULONG_MAX, min_kimg_align()); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); @@ -108,7 +111,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - *image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align; + *image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align(); memcpy((void *)*image_addr, _text, kernel_size); return EFI_SUCCESS; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 89f075275300..7038287a4608 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -19,7 +19,7 @@ #include "efistub.h" bool efi_nochunk; -bool efi_nokaslr; +bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE); bool efi_noinitrd; int efi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; bool efi_novamap; -- cgit v1.2.3 From bdc5c744c7b6457d18a95c26769dad0e7f480a08 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 16 Jul 2020 17:19:25 +0800 Subject: drivers/perf: Fix kernel panic when rmmod PMU modules during perf sampling When users try to remove PMU modules during perf sampling, kernel panic will happen because the pmu->read() is a NULL pointer here. INFO on HiSilicon hip08 platform as follow: pc : hisi_uncore_pmu_event_update+0x30/0xa4 [hisi_uncore_pmu] lr : hisi_uncore_pmu_read+0x20/0x2c [hisi_uncore_pmu] sp : ffff800010103e90 x29: ffff800010103e90 x28: ffff0027db0c0e40 x27: ffffa29a76f129d8 x26: ffffa29a77ceb000 x25: ffffa29a773a5000 x24: ffffa29a77392000 x23: ffffddffe5943f08 x22: ffff002784285960 x21: ffff002784285800 x20: ffff0027d2e76c80 x19: ffff0027842859e0 x18: ffff80003498bcc8 x17: ffffa29a76afe910 x16: ffffa29a7583f530 x15: 16151a1512061a1e x14: 0000000000000000 x13: ffffa29a76f1e238 x12: 0000000000000001 x11: 0000000000000400 x10: 00000000000009f0 x9 : ffff8000107b3e70 x8 : ffff0027db0c1890 x7 : ffffa29a773a7000 x6 : 00000007f5131013 x5 : 00000007f5131013 x4 : 09f257d417c00000 x3 : 00000002187bd7ce x2 : ffffa29a38f0f0d8 x1 : ffffa29a38eae268 x0 : ffff0027d2e76c80 Call trace: hisi_uncore_pmu_event_update+0x30/0xa4 [hisi_uncore_pmu] hisi_uncore_pmu_read+0x20/0x2c [hisi_uncore_pmu] __perf_event_read+0x1a0/0x1f8 flush_smp_call_function_queue+0xa0/0x160 generic_smp_call_function_single_interrupt+0x18/0x20 handle_IPI+0x31c/0x4dc gic_handle_irq+0x2c8/0x310 el1_irq+0xcc/0x180 arch_cpu_idle+0x4c/0x20c default_idle_call+0x20/0x30 do_idle+0x1b4/0x270 cpu_startup_entry+0x28/0x30 secondary_start_kernel+0x1a4/0x1fc To solve the above issue, current module should be registered to kernel, so that try_module_get() can be invoked when perf sampling starts. This adds the reference counting of module and could prevent users from removing modules during sampling. Reported-by: Haifeng Wang Signed-off-by: Qi Liu Reviewed-by: John Garry Link: https://lore.kernel.org/r/1594891165-8228-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 1 + drivers/perf/fsl_imx8_ddr_perf.c | 1 + drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 48e28ef93a70..90caba56dfbc 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -742,6 +742,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, smmu_pmu); smmu_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, .task_ctx_nr = perf_invalid_context, .pmu_enable = smmu_pmu_enable, .pmu_disable = smmu_pmu_disable, diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 90884d14f95f..2aed2d96f8ae 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -512,6 +512,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, { *pmu = (struct ddr_pmu) { .pmu = (struct pmu) { + .module = THIS_MODULE, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .attr_groups = attr_groups, diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 15713faaa07e..71587f19fcd5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -378,6 +378,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) ddrc_pmu->sccl_id, ddrc_pmu->index_id); ddrc_pmu->pmu = (struct pmu) { .name = name, + .module = THIS_MODULE, .task_ctx_nr = perf_invalid_context, .event_init = hisi_uncore_pmu_event_init, .pmu_enable = hisi_uncore_pmu_enable, diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index dcc5600788a9..c199de7b219d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -390,6 +390,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) hha_pmu->sccl_id, hha_pmu->index_id); hha_pmu->pmu = (struct pmu) { .name = name, + .module = THIS_MODULE, .task_ctx_nr = perf_invalid_context, .event_init = hisi_uncore_pmu_event_init, .pmu_enable = hisi_uncore_pmu_enable, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 7719ae4e2c56..567d7e6f69d8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -380,6 +380,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) l3c_pmu->sccl_id, l3c_pmu->index_id); l3c_pmu->pmu = (struct pmu) { .name = name, + .module = THIS_MODULE, .task_ctx_nr = perf_invalid_context, .event_init = hisi_uncore_pmu_event_init, .pmu_enable = hisi_uncore_pmu_enable, -- cgit v1.2.3 From ac2081cdc4d99c57f219c1a6171526e0fa0a6fff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jul 2020 21:16:20 +0100 Subject: arm64: ptrace: Consistently use pseudo-singlestep exceptions Although the arm64 single-step state machine can be fast-forwarded in cases where we wish to generate a SIGTRAP without actually executing an instruction, this has two major limitations outside of simply skipping an instruction due to emulation. 1. Stepping out of a ptrace signal stop into a signal handler where SIGTRAP is blocked. Fast-forwarding the stepping state machine in this case will result in a forced SIGTRAP, with the handler reset to SIG_DFL. 2. The hardware implicitly fast-forwards the state machine when executing an SVC instruction for issuing a system call. This can interact badly with subsequent ptrace stops signalled during the execution of the system call (e.g. SYSCALL_EXIT or seccomp traps), as they may corrupt the stepping state by updating the PSTATE for the tracee. Resolve both of these issues by injecting a pseudo-singlestep exception on entry to a signal handler and also on return to userspace following a system call. Cc: Cc: Mark Rutland Tested-by: Luis Machado Reported-by: Keno Fischer Signed-off-by: Will Deacon --- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/ptrace.c | 25 +++++++++++++++++++------ arch/arm64/kernel/signal.c | 11 ++--------- arch/arm64/kernel/syscall.c | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 6ea8b6a26ae9..5e784e16ee89 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -93,6 +93,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 68b7f34a08f5..057d4aa1af4d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1818,12 +1818,23 @@ static void tracehook_report_syscall(struct pt_regs *regs, saved_reg = regs->regs[regno]; regs->regs[regno] = dir; - if (dir == PTRACE_SYSCALL_EXIT) + if (dir == PTRACE_SYSCALL_ENTER) { + if (tracehook_report_syscall_entry(regs)) + forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else if (!test_thread_flag(TIF_SINGLESTEP)) { tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) - forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else { + regs->regs[regno] = saved_reg; - regs->regs[regno] = saved_reg; + /* + * Signal a pseudo-step exception since we are stepping but + * tracer modifications to the registers may have rewound the + * state machine. + */ + tracehook_report_syscall_exit(regs, 1); + } } int syscall_trace_enter(struct pt_regs *regs) @@ -1851,12 +1862,14 @@ int syscall_trace_enter(struct pt_regs *regs) void syscall_trace_exit(struct pt_regs *regs) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + audit_syscall_exit(regs); - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + if (flags & _TIF_SYSCALL_TRACEPOINT) trace_sys_exit(regs, regs_return_value(regs)); - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); rseq_syscall(regs); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 801d56cdf701..3b4f31f35e45 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -800,7 +800,6 @@ static void setup_restart_syscall(struct pt_regs *regs) */ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { - struct task_struct *tsk = current; sigset_t *oldset = sigmask_to_save(); int usig = ksig->sig; int ret; @@ -824,14 +823,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) */ ret |= !valid_user_regs(®s->user_regs, current); - /* - * Fast forward the stepping logic so we step into the signal - * handler. - */ - if (!ret) - user_fastforward_single_step(tsk); - - signal_setup_done(ret, ksig, 0); + /* Step into the signal handler if we are stepping */ + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5f5b868292f5..7c14466a12af 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -139,7 +139,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags)) { + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { /* * We're off to userspace, where interrupts are * always enabled after we restore the flags from -- cgit v1.2.3 From 3a5a4366cecc25daa300b9a9174f7fdd352b9068 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 13 Feb 2020 12:06:26 +0000 Subject: arm64: ptrace: Override SPSR.SS when single-stepping is enabled Luis reports that, when reverse debugging with GDB, single-step does not function as expected on arm64: | I've noticed, under very specific conditions, that a PTRACE_SINGLESTEP | request by GDB won't execute the underlying instruction. As a consequence, | the PC doesn't move, but we return a SIGTRAP just like we would for a | regular successful PTRACE_SINGLESTEP request. The underlying problem is that when the CPU register state is restored as part of a reverse step, the SPSR.SS bit is cleared and so the hardware single-step state can transition to the "active-pending" state, causing an unexpected step exception to be taken immediately if a step operation is attempted. In hindsight, we probably shouldn't have exposed SPSR.SS in the pstate accessible by the GPR regset, but it's a bit late for that now. Instead, simply prevent userspace from configuring the bit to a value which is inconsistent with the TIF_SINGLESTEP state for the task being traced. Cc: Cc: Mark Rutland Cc: Keno Fischer Link: https://lore.kernel.org/r/1eed6d69-d53d-9657-1fc9-c089be07f98c@linaro.org Reported-by: Luis Machado Tested-by: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 2 ++ arch/arm64/kernel/debug-monitors.c | 20 ++++++++++++++++---- arch/arm64/kernel/ptrace.c | 4 ++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index e5ceea213e39..0b298f48f5bf 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -109,6 +109,8 @@ void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task); void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 5df49366e9ab..91146c0a3691 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -141,17 +141,20 @@ postcore_initcall(debug_monitors_init); /* * Single step API and exception handling. */ -static void set_regs_spsr_ss(struct pt_regs *regs) +static void set_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate |= DBG_SPSR_SS; } -NOKPROBE_SYMBOL(set_regs_spsr_ss); +NOKPROBE_SYMBOL(set_user_regs_spsr_ss); -static void clear_regs_spsr_ss(struct pt_regs *regs) +static void clear_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate &= ~DBG_SPSR_SS; } -NOKPROBE_SYMBOL(clear_regs_spsr_ss); +NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); + +#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) +#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) static DEFINE_SPINLOCK(debug_hook_lock); static LIST_HEAD(user_step_hook); @@ -402,6 +405,15 @@ void user_fastforward_single_step(struct task_struct *task) clear_regs_spsr_ss(task_pt_regs(task)); } +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) + set_user_regs_spsr_ss(regs); + else + clear_user_regs_spsr_ss(regs); +} + /* Kernel API */ void kernel_enable_single_step(struct pt_regs *regs) { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 057d4aa1af4d..22f9053b55b6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1947,8 +1947,8 @@ static int valid_native_regs(struct user_pt_regs *regs) */ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) - regs->pstate &= ~DBG_SPSR_SS; + /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */ + user_regs_reset_single_step(regs, task); if (is_compat_thread(task_thread_info(task))) return valid_compat_regs(regs); -- cgit v1.2.3 From 15956689a0e60aa0c795174f3c310b60d8794235 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Jul 2020 12:08:42 +0100 Subject: arm64: compat: Ensure upper 32 bits of x0 are zero on syscall return Although we zero the upper bits of x0 on entry to the kernel from an AArch32 task, we do not clear them on the exception return path and can therefore expose 64-bit sign extended syscall return values to userspace via interfaces such as the 'perf_regs' ABI, which deal exclusively with 64-bit registers. Explicitly clear the upper 32 bits of x0 on return from a compat system call. Cc: Cc: Mark Rutland Cc: Keno Fischer Cc: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/include/asm/syscall.h | 12 +++++++++++- arch/arm64/kernel/syscall.c | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 65299a2dcf9c..cfc0672013f6 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -34,6 +34,10 @@ static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + error = sign_extend64(error, 31); + return IS_ERR_VALUE(error) ? error : 0; } @@ -47,7 +51,13 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->regs[0] = (long) error ? error : val; + if (error) + val = error; + + if (is_compat_thread(task_thread_info(task))) + val = lower_32_bits(val); + + regs->regs[0] = val; } #define SYSCALL_MAX_ARGS 6 diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 7c14466a12af..98a26d4e7b0c 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -50,6 +50,9 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } + if (is_compat_task()) + ret = lower_32_bits(ret); + regs->regs[0] = ret; } -- cgit v1.2.3 From 59ee987ea47caff8c1e7ba4b89932c6900a35d0c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jul 2020 21:14:20 +0100 Subject: arm64: ptrace: Add a comment describing our syscall entry/exit trap ABI Our tracehook logic for syscall entry/exit raises a SIGTRAP back to the tracer following a ptrace request such as PTRACE_SYSCALL. As part of this procedure, we clobber the reported value of one of the tracee's general purpose registers (x7 for native tasks, r12 for compat) to indicate whether the stop occurred on syscall entry or exit. This is a slightly unfortunate ABI, as it prevents the tracer from accessing the real register value and is at odds with other similar stops such as seccomp traps. Since we're stuck with this ABI, expand the comment in our tracehook logic to acknowledge the issue and describe the behaviour in more detail. Cc: Mark Rutland Cc: Luis Machado Reported-by: Keno Fischer Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 22f9053b55b6..89fbee3991a2 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1811,8 +1811,20 @@ static void tracehook_report_syscall(struct pt_regs *regs, unsigned long saved_reg; /* - * A scratch register (ip(r12) on AArch32, x7 on AArch64) is - * used to denote syscall entry/exit: + * We have some ABI weirdness here in the way that we handle syscall + * exit stops because we indicate whether or not the stop has been + * signalled from syscall entry or syscall exit by clobbering a general + * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee + * and restoring its old value after the stop. This means that: + * + * - Any writes by the tracer to this register during the stop are + * ignored/discarded. + * + * - The actual value of the register is not available during the stop, + * so the tracer cannot save it and restore it later. + * + * - Syscall stops behave differently to seccomp and pseudo-step traps + * (the latter do not nobble any registers). */ regno = (is_compat_task() ? 12 : 7); saved_reg = regs->regs[regno]; -- cgit v1.2.3 From 139dbe5d8ed383cbd1ada56c78dbbbd35bf6a9d3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Jul 2020 09:41:24 +0100 Subject: arm64: syscall: Expand the comment about ptrace and syscall(-1) If a task executes syscall(-1), we intercept this early and force x0 to be -ENOSYS so that we don't need to distinguish this scenario from one where the scno is -1 because a tracer wants to skip the system call using ptrace. With the return value set, the return path is the same as the skip case. Although there is a one-line comment noting this in el0_svc_common(), it misses out most of the detail. Expand the comment to describe a bit more about what is going on. Cc: Mark Rutland Cc: Keno Fischer Cc: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/syscall.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 98a26d4e7b0c..5f0c04863d2c 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -124,7 +124,21 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, user_exit(); if (has_syscall_work(flags)) { - /* set default errno for user-issued syscall(-1) */ + /* + * The de-facto standard way to skip a system call using ptrace + * is to set the system call to -1 (NO_SYSCALL) and set x0 to a + * suitable error code for consumption by userspace. However, + * this cannot be distinguished from a user-issued syscall(-1) + * and so we must set x0 to -ENOSYS here in case the tracer doesn't + * issue the skip and we fall into trace_exit with x0 preserved. + * + * This is slightly odd because it also means that if a tracer + * sets the system call number to -1 but does not initialise x0, + * then x0 will be preserved for all system calls apart from a + * user-issued syscall(-1). However, requesting a skip and not + * setting the return value is unlikely to do anything sensible + * anyway. + */ if (scno == NO_SYSCALL) regs->regs[0] = -ENOSYS; scno = syscall_trace_enter(regs); -- cgit v1.2.3 From d83ee6e3e75db6f518ef2b0858f163849f2ddeb7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jul 2020 13:20:57 +0100 Subject: arm64: ptrace: Use NO_SYSCALL instead of -1 in syscall_trace_enter() Setting a system call number of -1 is special, as it indicates that the current system call should be skipped. Use NO_SYSCALL instead of -1 when checking for this scenario, which is different from the -1 returned due to a seccomp failure. Cc: Mark Rutland Cc: Keno Fischer Cc: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 89fbee3991a2..1e02e98e68dd 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1856,12 +1856,12 @@ int syscall_trace_enter(struct pt_regs *regs) if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU)) - return -1; + return NO_SYSCALL; } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing() == -1) - return -1; + return NO_SYSCALL; if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); -- cgit v1.2.3 From 5afc78551bf5d53279036e0bf63314e35631d79f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 13 Feb 2020 12:12:26 +0000 Subject: arm64: Use test_tsk_thread_flag() for checking TIF_SINGLESTEP Rather than open-code test_tsk_thread_flag() at each callsite, simply replace the couple of offenders with calls to test_tsk_thread_flag() directly. Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 91146c0a3691..7310a4f7f993 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -394,14 +394,14 @@ void user_rewind_single_step(struct task_struct *task) * If single step is active for this thread, then set SPSR.SS * to 1 to avoid returning to the active-pending state. */ - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) clear_regs_spsr_ss(task_pt_regs(task)); } -- cgit v1.2.3 From bd024e82e4cd95c7f1a475a55f99871936c2b2db Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 16 Jul 2020 12:28:16 +0100 Subject: asm-generic/mmiowb: Allow mmiowb_set_pending() when preemptible() Although mmiowb() is concerned only with serialising MMIO writes occuring in contexts where a spinlock is held, the call to mmiowb_set_pending() from the MMIO write accessors can occur in preemptible contexts, such as during driver probe() functions where ordering between CPUs is not usually a concern, assuming that the task migration path provides the necessary ordering guarantees. Unfortunately, the default implementation of mmiowb_set_pending() is not preempt-safe, as it makes use of a a per-cpu variable to track its internal state. This has been reported to generate the following splat on riscv: | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 | caller is regmap_mmio_write32le+0x1c/0x46 | CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.8.0-rc3-hfu+ #1 | Call Trace: | walk_stackframe+0x0/0x7a | dump_stack+0x6e/0x88 | regmap_mmio_write32le+0x18/0x46 | check_preemption_disabled+0xa4/0xaa | regmap_mmio_write32le+0x18/0x46 | regmap_mmio_write+0x26/0x44 | regmap_write+0x28/0x48 | sifive_gpio_probe+0xc0/0x1da Although it's possible to fix the driver in this case, other splats have been seen from other drivers, including the infamous 8250 UART, and so it's better to address this problem in the mmiowb core itself. Fix mmiowb_set_pending() by using the raw_cpu_ptr() to get at the mmiowb state and then only updating the 'mmiowb_pending' field if we are not preemptible (i.e. we have a non-zero nesting count). Cc: Arnd Bergmann Cc: Paul Walmsley Cc: Guo Ren Cc: Michael Ellerman Reported-by: Palmer Dabbelt Reported-by: Emil Renner Berthing Tested-by: Emil Renner Berthing Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20200716112816.7356-1-will@kernel.org Signed-off-by: Will Deacon --- include/asm-generic/mmiowb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/mmiowb.h b/include/asm-generic/mmiowb.h index 9439ff037b2d..5698fca3bf56 100644 --- a/include/asm-generic/mmiowb.h +++ b/include/asm-generic/mmiowb.h @@ -27,7 +27,7 @@ #include DECLARE_PER_CPU(struct mmiowb_state, __mmiowb_state); -#define __mmiowb_state() this_cpu_ptr(&__mmiowb_state) +#define __mmiowb_state() raw_cpu_ptr(&__mmiowb_state) #else #define __mmiowb_state() arch_mmiowb_state() #endif /* arch_mmiowb_state */ @@ -35,7 +35,9 @@ DECLARE_PER_CPU(struct mmiowb_state, __mmiowb_state); static inline void mmiowb_set_pending(void) { struct mmiowb_state *ms = __mmiowb_state(); - ms->mmiowb_pending = ms->nesting_count; + + if (likely(ms->nesting_count)) + ms->mmiowb_pending = ms->nesting_count; } static inline void mmiowb_spin_lock(void) -- cgit v1.2.3 From f32ed8eb0e3f0d0ef4ddb854554d60ca5863a9f9 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 17 Jul 2020 16:49:23 +0800 Subject: drivers/perf: Prevent forced unbinding of PMU drivers Forcefully unbinding PMU drivers during perf sampling will lead to a kernel panic, because the perf upper-layer framework call a NULL pointer in this situation. To solve this issue, "suppress_bind_attrs" should be set to true, so that bind/unbind can be disabled via sysfs and prevent unbinding PMU drivers during perf sampling. Signed-off-by: Qi Liu Reviewed-by: John Garry Link: https://lore.kernel.org/r/1594975763-32966-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 1 + drivers/perf/arm-ccn.c | 1 + drivers/perf/arm_dsu_pmu.c | 1 + drivers/perf/arm_smmuv3_pmu.c | 1 + drivers/perf/arm_spe_pmu.c | 1 + drivers/perf/fsl_imx8_ddr_perf.c | 1 + drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 1 + drivers/perf/qcom_l2_pmu.c | 1 + drivers/perf/qcom_l3_pmu.c | 1 + drivers/perf/thunderx2_pmu.c | 1 + drivers/perf/xgene_pmu.c | 1 + 13 files changed, 13 insertions(+) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 1b8e337a29ca..87c4be9dd412 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1718,6 +1718,7 @@ static struct platform_driver cci_pmu_driver = { .driver = { .name = DRIVER_NAME, .of_match_table = arm_cci_pmu_matches, + .suppress_bind_attrs = true, }, .probe = cci_pmu_probe, .remove = cci_pmu_remove, diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index d50edef91f59..7b7d23f25713 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1545,6 +1545,7 @@ static struct platform_driver arm_ccn_driver = { .driver = { .name = "arm-ccn", .of_match_table = arm_ccn_match, + .suppress_bind_attrs = true, }, .probe = arm_ccn_probe, .remove = arm_ccn_remove, diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 518d0603e24f..96ed93cc78e6 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -757,6 +757,7 @@ static struct platform_driver dsu_pmu_driver = { .driver = { .name = DRVNAME, .of_match_table = of_match_ptr(dsu_pmu_of_match), + .suppress_bind_attrs = true, }, .probe = dsu_pmu_device_probe, .remove = dsu_pmu_device_remove, diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 90caba56dfbc..4cdb35d166ac 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -860,6 +860,7 @@ static void smmu_pmu_shutdown(struct platform_device *pdev) static struct platform_driver smmu_pmu_driver = { .driver = { .name = "arm-smmu-v3-pmcg", + .suppress_bind_attrs = true, }, .probe = smmu_pmu_probe, .remove = smmu_pmu_remove, diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index d80f48798bce..e51ddb6d63ed 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1226,6 +1226,7 @@ static struct platform_driver arm_spe_pmu_driver = { .driver = { .name = DRVNAME, .of_match_table = of_match_ptr(arm_spe_pmu_of_match), + .suppress_bind_attrs = true, }, .probe = arm_spe_pmu_device_probe, .remove = arm_spe_pmu_device_remove, diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 2aed2d96f8ae..397540a4b799 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -707,6 +707,7 @@ static struct platform_driver imx_ddr_pmu_driver = { .driver = { .name = "imx-ddr-pmu", .of_match_table = imx_ddr_pmu_dt_ids, + .suppress_bind_attrs = true, }, .probe = ddr_perf_probe, .remove = ddr_perf_remove, diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 71587f19fcd5..5e3645c96443 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -419,6 +419,7 @@ static struct platform_driver hisi_ddrc_pmu_driver = { .driver = { .name = "hisi_ddrc_pmu", .acpi_match_table = ACPI_PTR(hisi_ddrc_pmu_acpi_match), + .suppress_bind_attrs = true, }, .probe = hisi_ddrc_pmu_probe, .remove = hisi_ddrc_pmu_remove, diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index c199de7b219d..5eb8168029c0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -431,6 +431,7 @@ static struct platform_driver hisi_hha_pmu_driver = { .driver = { .name = "hisi_hha_pmu", .acpi_match_table = ACPI_PTR(hisi_hha_pmu_acpi_match), + .suppress_bind_attrs = true, }, .probe = hisi_hha_pmu_probe, .remove = hisi_hha_pmu_remove, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 567d7e6f69d8..3e8b5eab5514 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -421,6 +421,7 @@ static struct platform_driver hisi_l3c_pmu_driver = { .driver = { .name = "hisi_l3c_pmu", .acpi_match_table = ACPI_PTR(hisi_l3c_pmu_acpi_match), + .suppress_bind_attrs = true, }, .probe = hisi_l3c_pmu_probe, .remove = hisi_l3c_pmu_remove, diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 21d6991dbe0b..4da37f650f98 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -1028,6 +1028,7 @@ static struct platform_driver l2_cache_pmu_driver = { .driver = { .name = "qcom-l2cache-pmu", .acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match), + .suppress_bind_attrs = true, }, .probe = l2_cache_pmu_probe, .remove = l2_cache_pmu_remove, diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 656e830798d9..9ddb577c542b 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -814,6 +814,7 @@ static struct platform_driver qcom_l3_cache_pmu_driver = { .driver = { .name = "qcom-l3cache-pmu", .acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match), + .suppress_bind_attrs = true, }, .probe = qcom_l3_cache_pmu_probe, }; diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 51b31d6ff2c4..aac9823b0c6b 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -1017,6 +1017,7 @@ static struct platform_driver tx2_uncore_driver = { .driver = { .name = "tx2-uncore-pmu", .acpi_match_table = ACPI_PTR(tx2_uncore_acpi_match), + .suppress_bind_attrs = true, }, .probe = tx2_uncore_probe, .remove = tx2_uncore_remove, diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 46ee6807d533..edac28cd25dd 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1975,6 +1975,7 @@ static struct platform_driver xgene_pmu_driver = { .name = "xgene-pmu", .of_match_table = xgene_pmu_of_match, .acpi_match_table = ACPI_PTR(xgene_pmu_acpi_match), + .suppress_bind_attrs = true, }, }; -- cgit v1.2.3