From 7231d0165df3754ec90a2868a026a146401ec751 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 23 May 2019 10:23:25 -0400 Subject: x86/asm: Remove unused TASK_TI_flags from asm-offsets.c Since commit: 21d375b6b34ff5 ("x86/entry/64: Remove the SYSCALL64 fast path") there is no user of TASK_TI_flags in assembly. There's no need to keep it around in asm-offsets.c Signed-off-by: Steven Rostedt (VMware) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190523102325.22eacdf7@gandalf.local.home Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 168543d077d7..da64452584b0 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -38,7 +38,6 @@ static void __used common(void) #endif BLANK(); - OFFSET(TASK_TI_flags, task_struct, thread_info.flags); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); -- cgit v1.2.3 From 873d50d58f67ef15d2777b5e7f7a5268bb1fbae2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Jun 2019 21:55:02 -0700 Subject: x86/asm: Pin sensitive CR4 bits Several recent exploits have used direct calls to the native_write_cr4() function to disable SMEP and SMAP before then continuing their exploits using userspace memory access. Direct calls of this form can be mitigate by pinning bits of CR4 so that they cannot be changed through a common function. This is not intended to be a general ROP protection (which would require CFI to defend against properly), but rather a way to avoid trivial direct function calling (or CFI bypasses via a matching function prototype) as seen in: https://googleprojectzero.blogspot.com/2017/05/exploiting-linux-kernel-via-packet.html (https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-7308) The goals of this change: - Pin specific bits (SMEP, SMAP, and UMIP) when writing CR4. - Avoid setting the bits too early (they must become pinned only after CPU feature detection and selection has finished). - Pinning mask needs to be read-only during normal runtime. - Pinning needs to be checked after write to validate the cr4 state Using __ro_after_init on the mask is done so it can't be first disabled with a malicious write. Since these bits are global state (once established by the boot CPU and kernel boot parameters), they are safe to write to secondary CPUs before those CPUs have finished feature detection. As such, the bits are set at the first cr4 write, so that cr4 write bugs can be detected (instead of silently papered over). This uses a few bytes less storage of a location we don't have: read-only per-CPU data. A check is performed after the register write because an attack could just skip directly to the register write. Such a direct jump is possible because of how this function may be built by the compiler (especially due to the removal of frame pointers) where it doesn't add a stack frame (function exit may only be a retq without pops) which is sufficient for trivial exploitation like in the timer overwrites mentioned above). The asm argument constraints gain the "+" modifier to convince the compiler that it shouldn't make ordering assumptions about the arguments or memory, and treat them as changed. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Dave Hansen Cc: kernel-hardening@lists.openwall.com Link: https://lkml.kernel.org/r/20190618045503.39105-3-keescook@chromium.org --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ arch/x86/kernel/smpboot.c | 8 +++++++- 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c57fffebf9b..c578addfcf8a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,6 +366,25 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +EXPORT_SYMBOL(cr_pinning); +unsigned long cr4_pinned_bits __ro_after_init; +EXPORT_SYMBOL(cr4_pinned_bits); + +/* + * Once CPU feature detection is finished (and boot params have been + * parsed), record any of the sensitive CR bits that are set, and + * enable CR pinning. + */ +static void __init setup_cr_pinning(void) +{ + unsigned long mask; + + mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + static_key_enable(&cr_pinning.key); +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -1464,6 +1483,7 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); + setup_cr_pinning(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 362dd8953f48..1af7a2d89419 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -205,13 +205,19 @@ static int enable_start_cpu0; */ static void notrace start_secondary(void *unused) { + unsigned long cr4 = __read_cr4(); + /* * Don't put *anything* except direct CPU state initialization * before cpu_init(), SMP booting is too fragile that we want to * limit the things done here to the most necessary things. */ if (boot_cpu_has(X86_FEATURE_PCID)) - __write_cr4(__read_cr4() | X86_CR4_PCIDE); + cr4 |= X86_CR4_PCIDE; + if (static_branch_likely(&cr_pinning)) + cr4 |= cr4_pinned_bits; + + __write_cr4(cr4); #ifdef CONFIG_X86_32 /* switch away from the initial page table */ -- cgit v1.2.3 From 4201311dae59781fb19c40adddda45bf3c562b63 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 May 2019 23:25:46 +0200 Subject: x86/stackframe, x86/kprobes: Fix frame pointer annotations The kprobe trampolines have a FRAME_POINTER annotation that makes no sense. It marks the frame in the middle of pt_regs, at the place of saving BP. Change it to mark the pt_regs frame as per the ENCODE_FRAME_POINTER from the respective entry_*.S. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Acked-by: Masami Hiramatsu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 2b949f4fd4d8..2e25f5f3fa1f 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -5,15 +5,10 @@ /* Kprobes and Optprobes common header */ #include - -#ifdef CONFIG_FRAME_POINTER -# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \ - " mov %" _ASM_SP ", %" _ASM_BP "\n" -#else -# define SAVE_RBP_STRING " push %" _ASM_BP "\n" -#endif +#include #ifdef CONFIG_X86_64 + #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax. */ \ " subq $24, %rsp\n" \ @@ -27,11 +22,13 @@ " pushq %r10\n" \ " pushq %r11\n" \ " pushq %rbx\n" \ - SAVE_RBP_STRING \ + " pushq %rbp\n" \ " pushq %r12\n" \ " pushq %r13\n" \ " pushq %r14\n" \ - " pushq %r15\n" + " pushq %r15\n" \ + ENCODE_FRAME_POINTER + #define RESTORE_REGS_STRING \ " popq %r15\n" \ " popq %r14\n" \ @@ -51,19 +48,22 @@ /* Skip orig_ax, ip, cs */ \ " addq $24, %rsp\n" #else + #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ + " subl $4*4, %esp\n" \ " pushl %fs\n" \ " pushl %es\n" \ " pushl %ds\n" \ " pushl %eax\n" \ - SAVE_RBP_STRING \ + " pushl %ebp\n" \ " pushl %edi\n" \ " pushl %esi\n" \ " pushl %edx\n" \ " pushl %ecx\n" \ - " pushl %ebx\n" + " pushl %ebx\n" \ + ENCODE_FRAME_POINTER + #define RESTORE_REGS_STRING \ " popl %ebx\n" \ " popl %ecx\n" \ -- cgit v1.2.3 From ea1ed38dba64b64a245ab8ca1406269d17b99485 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 May 2019 23:25:50 +0200 Subject: x86/stackframe, x86/ftrace: Add pt_regs frame annotations When CONFIG_FRAME_POINTER, we should mark pt_regs frames. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace_32.S | 3 +++ arch/x86/kernel/ftrace_64.S | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 2ba914a34b06..0344c3aad4a5 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -9,6 +9,7 @@ #include #include #include +#include # define function_hook __fentry__ EXPORT_SYMBOL(__fentry__) @@ -116,6 +117,8 @@ ENTRY(ftrace_regs_caller) pushl %ecx pushl %ebx + ENCODE_FRAME_POINTER + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 10eb2760ef2c..809d54397dba 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -9,6 +9,7 @@ #include #include #include +#include .code64 .section .entry.text, "ax" @@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr) leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx movq %rcx, RSP(%rsp) + ENCODE_FRAME_POINTER + /* regs go into 4th parameter */ leaq (%rsp), %rcx -- cgit v1.2.3 From 3c88c692c28746473791276f8b42d2c989d6cbe6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 May 2019 23:25:54 +0200 Subject: x86/stackframe/32: Provide consistent pt_regs Currently pt_regs on x86_32 has an oddity in that kernel regs (!user_mode(regs)) are short two entries (esp/ss). This means that any code trying to use them (typically: regs->sp) needs to jump through some unfortunate hoops. Change the entry code to fix this up and create a full pt_regs frame. This then simplifies various trampolines in ftrace and kprobes, the stack unwinder, ptrace, kdump and kgdb. Much thanks to Josh for help with the cleanups! Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Acked-by: Masami Hiramatsu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 8 ----- arch/x86/kernel/ftrace_32.S | 75 ++++++++++++++++++++++------------------ arch/x86/kernel/kgdb.c | 8 ----- arch/x86/kernel/kprobes/common.h | 4 +-- arch/x86/kernel/kprobes/core.c | 29 +++++++--------- arch/x86/kernel/kprobes/opt.c | 20 ++++++----- arch/x86/kernel/process_32.c | 16 +++------ arch/x86/kernel/ptrace.c | 29 ---------------- arch/x86/kernel/time.c | 3 +- arch/x86/kernel/unwind_frame.c | 32 +++-------------- arch/x86/kernel/unwind_orc.c | 2 +- 11 files changed, 79 insertions(+), 147 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 576b2e1bfc12..84e2d3ddd0eb 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { -#ifdef CONFIG_X86_32 - struct pt_regs fixed_regs; - - if (!user_mode(regs)) { - crash_fixup_ss_esp(&fixed_regs, regs); - regs = &fixed_regs; - } -#endif crash_save_cpu(regs, cpu); /* diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 0344c3aad4a5..073aab525d80 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -10,6 +10,7 @@ #include #include #include +#include # define function_hook __fentry__ EXPORT_SYMBOL(__fentry__) @@ -90,26 +91,38 @@ END(ftrace_caller) ENTRY(ftrace_regs_caller) /* - * i386 does not save SS and ESP when coming from kernel. - * Instead, to get sp, ®s->sp is used (see ptrace.h). - * Unfortunately, that means eflags must be at the same location - * as the current return ip is. We move the return ip into the - * regs->ip location, and move flags into the return ip location. + * We're here from an mcount/fentry CALL, and the stack frame looks like: + * + * + * RET-IP + * + * The purpose of this function is to call out in an emulated INT3 + * environment with a stack frame like: + * + * + * gap / RET-IP + * gap + * gap + * gap + * pt_regs + * + * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds */ - pushl $__KERNEL_CS - pushl 4(%esp) /* Save the return ip */ - pushl $0 /* Load 0 into orig_ax */ + subl $3*4, %esp # RET-IP + 3 gaps + pushl %ss # ss + pushl %esp # points at ss + addl $5*4, (%esp) # make it point at + pushfl # flags + pushl $__KERNEL_CS # cs + pushl 7*4(%esp) # ip <- RET-IP + pushl $0 # orig_eax + pushl %gs pushl %fs pushl %es pushl %ds - pushl %eax - - /* Get flags and place them into the return ip slot */ - pushf - popl %eax - movl %eax, 8*4(%esp) + pushl %eax pushl %ebp pushl %edi pushl %esi @@ -119,24 +132,25 @@ ENTRY(ftrace_regs_caller) ENCODE_FRAME_POINTER - movl 12*4(%esp), %eax /* Load ip (1st parameter) */ - subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ - movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ - movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ - pushl %esp /* Save pt_regs as 4th parameter */ + movl PT_EIP(%esp), %eax # 1st argument: IP + subl $MCOUNT_INSN_SIZE, %eax + movl 21*4(%esp), %edx # 2nd argument: parent ip + movl function_trace_op, %ecx # 3rd argument: ftrace_pos + pushl %esp # 4th argument: pt_regs GLOBAL(ftrace_regs_call) call ftrace_stub - addl $4, %esp /* Skip pt_regs */ + addl $4, %esp # skip 4th argument - /* restore flags */ - push 14*4(%esp) - popf + /* place IP below the new SP */ + movl PT_OLDESP(%esp), %eax + movl PT_EIP(%esp), %ecx + movl %ecx, -4(%eax) - /* Move return ip back to its original location */ - movl 12*4(%esp), %eax - movl %eax, 14*4(%esp) + /* place EAX below that */ + movl PT_EAX(%esp), %ecx + movl %ecx, -8(%eax) popl %ebx popl %ecx @@ -144,14 +158,9 @@ GLOBAL(ftrace_regs_call) popl %esi popl %edi popl %ebp - popl %eax - popl %ds - popl %es - popl %fs - popl %gs - /* use lea to not affect flags */ - lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */ + lea -8(%eax), %esp + popl %eax jmp .Lftrace_ret diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 6690c5652aeb..23297ea64f5f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) #ifdef CONFIG_X86_32 switch (regno) { - case GDB_SS: - if (!user_mode(regs)) - *(unsigned long *)mem = __KERNEL_DS; - break; - case GDB_SP: - if (!user_mode(regs)) - *(unsigned long *)mem = kernel_stack_pointer(regs); - break; case GDB_GS: case GDB_FS: *(unsigned long *)mem = 0xFFFF; diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 2e25f5f3fa1f..7d3a2e2daf01 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -72,8 +72,8 @@ " popl %edi\n" \ " popl %ebp\n" \ " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" + /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\ + " addl $7*4, %esp\n" #endif /* Ensure if the instruction can be boostable */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6afd8061dbae..bd17dbb15d6a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -56,7 +56,7 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) +#define stack_addr(regs) ((unsigned long *)regs->sp) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -718,29 +718,27 @@ asm( ".global kretprobe_trampoline\n" ".type kretprobe_trampoline, @function\n" "kretprobe_trampoline:\n" -#ifdef CONFIG_X86_64 /* We don't bother saving the ss register */ +#ifdef CONFIG_X86_64 " pushq %rsp\n" " pushfq\n" SAVE_REGS_STRING " movq %rsp, %rdi\n" " call trampoline_handler\n" /* Replace saved sp with true return address. */ - " movq %rax, 152(%rsp)\n" + " movq %rax, 19*8(%rsp)\n" RESTORE_REGS_STRING " popfq\n" #else - " pushf\n" + " pushl %esp\n" + " pushfl\n" SAVE_REGS_STRING " movl %esp, %eax\n" " call trampoline_handler\n" - /* Move flags to cs */ - " movl 56(%esp), %edx\n" - " movl %edx, 52(%esp)\n" - /* Replace saved flags with true return address. */ - " movl %eax, 56(%esp)\n" + /* Replace saved sp with true return address. */ + " movl %eax, 15*4(%esp)\n" RESTORE_REGS_STRING - " popf\n" + " popfl\n" #endif " ret\n" ".size kretprobe_trampoline, .-kretprobe_trampoline\n" @@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ -#ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; - /* On x86-64, we use pt_regs->sp for return address holder. */ - frame_pointer = ®s->sp; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); +#ifdef CONFIG_X86_32 + regs->cs |= get_kernel_rpl(); regs->gs = 0; - /* On x86-32, we use pt_regs->flags for return address holder. */ - frame_pointer = ®s->flags; #endif + /* We use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; regs->ip = trampoline_address; regs->orig_ax = ~0UL; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7c361a24c6df..282b4eb67e30 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -102,14 +102,15 @@ asm ( "optprobe_template_call:\n" ASM_NOP5 /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" + " movq 18*8(%rsp), %rdx\n" + " movq %rdx, 19*8(%rsp)\n" RESTORE_REGS_STRING /* Skip flags entry */ " addq $8, %rsp\n" " popfq\n" #else /* CONFIG_X86_32 */ - " pushf\n" + " pushl %esp\n" + " pushfl\n" SAVE_REGS_STRING " movl %esp, %edx\n" ".global optprobe_template_val\n" @@ -118,9 +119,13 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 + /* Move flags into esp */ + " movl 14*4(%esp), %edx\n" + " movl %edx, 15*4(%esp)\n" RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" + /* Skip flags entry */ + " addl $4, %esp\n" + " popfl\n" #endif ".global optprobe_template_end\n" "optprobe_template_end:\n" @@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) } else { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Save skipped registers */ -#ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); +#ifdef CONFIG_X86_32 + regs->cs |= get_kernel_rpl(); regs->gs = 0; #endif regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2399e910d109..b8ceec4974fe 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; - unsigned long sp; - unsigned short ss, gs; + unsigned short gs; - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss; + if (user_mode(regs)) gs = get_user_gs(regs); - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); + else savesegment(gs, gs); - } show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", - regs->si, regs->di, regs->bp, sp); + regs->si, regs->di, regs->bp, regs->sp); printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags); if (mode != SHOW_REGS_ALL) return; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a166c960bc9e..1f92958d8165 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -154,35 +154,6 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -/* - * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode - * when it traps. The previous stack will be directly underneath the saved - * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. - * - * Now, if the stack is empty, '®s->sp' is out of range. In this - * case we try to take the previous stack. To always return a non-null - * stack pointer we fall back to regs as stack if no previous stack - * exists. - * - * This is valid only for kernel mode traps. - */ -unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ - unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); - unsigned long sp = (unsigned long)®s->sp; - u32 *prev_esp; - - if (context == (sp & ~(THREAD_SIZE - 1))) - return sp; - - prev_esp = (u32 *)(context); - if (*prev_esp) - return (unsigned long)*prev_esp; - - return (unsigned long)regs; -} -EXPORT_SYMBOL_GPL(kernel_stack_pointer); - static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0e14f6c0d35e..2acbab329754 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = - (unsigned long *)kernel_stack_pointer(regs); + unsigned long *sp = (unsigned long *)regs->sp; /* * Return address is either directly at stack pointer * or above a saved flags. Eflags has bits 22-31 zero, diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 6106760de716..a224b5ab103f 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state) } } -static size_t regs_size(struct pt_regs *regs) -{ - /* x86_32 regs from kernel mode are two words shorter: */ - if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) - return sizeof(*regs) - 2*sizeof(long); - - return sizeof(*regs); -} - static bool in_entry_code(unsigned long ip) { char *addr = (char *)ip; @@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) } #endif -#ifdef CONFIG_X86_32 -#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) -#else -#define KERNEL_REGS_SIZE (sizeof(struct pt_regs)) -#endif - static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) { @@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state, size_t len; if (state->regs) - prev_frame_end = (void *)state->regs + regs_size(state->regs); + prev_frame_end = (void *)state->regs + sizeof(*state->regs); else prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; @@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state, regs = decode_frame_pointer(next_bp); if (regs) { frame = (unsigned long *)regs; - len = KERNEL_REGS_SIZE; + len = sizeof(*regs); state->got_irq = true; } else { frame = next_bp; @@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state, frame < prev_frame_end) return false; - /* - * On 32-bit with user mode regs, make sure the last two regs are safe - * to access: - */ - if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) && - !on_stack(info, frame, len + 2*sizeof(long))) - return false; - /* Move state to the next frame: */ if (regs) { state->regs = regs; @@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, * Pretend that the frame is complete and that BP points to it, but save * the real BP so that we can use it when looking for the next frame. */ - if (regs && regs->ip == 0 && - (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) { state->next_bp = bp; - bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + bp = ((unsigned long *)regs->sp) - 1; } /* Initialize stack info and make sure the frame data is accessible: */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b66b5c5aec..7f79aeebe3ee 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -580,7 +580,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, goto done; state->ip = regs->ip; - state->sp = kernel_stack_pointer(regs); + state->sp = regs->sp; state->bp = regs->bp; state->regs = regs; state->full_regs = true; -- cgit v1.2.3 From faeedb0679bee39ebffc6d53111e86932dea189a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 May 2019 09:40:54 +0200 Subject: x86/stackframe/32: Allow int3_emulate_push() Now that x86_32 has an unconditional gap on the kernel stack frame, the int3_emulate_push() thing will work without further changes. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0927bb158ffc..a4eea7bad4a1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -300,7 +300,6 @@ int ftrace_int3_handler(struct pt_regs *regs) ip = regs->ip - INT3_INSN_SIZE; -#ifdef CONFIG_X86_64 if (ftrace_location(ip)) { int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); return 1; @@ -312,12 +311,6 @@ int ftrace_int3_handler(struct pt_regs *regs) int3_emulate_call(regs, ftrace_update_func_call); return 1; } -#else - if (ftrace_location(ip) || is_ftrace_caller(ip)) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } -#endif return 0; } -- cgit v1.2.3 From 7457c0da024b181a9143988d740001f9bc98698d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 May 2019 12:22:47 +0200 Subject: x86/alternatives: Add int3_emulate_call() selftest Given that the entry_*.S changes for this functionality are somewhat tricky, make sure the paths are tested every boot, instead of on the rare occasion when we trip an INT3 while rewriting text. Requested-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Acked-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 81 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 390596b761e3..65aa83d1b7d9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -615,11 +615,83 @@ extern struct paravirt_patch_site __start_parainstructions[], __stop_parainstructions[]; #endif /* CONFIG_PARAVIRT */ +/* + * Self-test for the INT3 based CALL emulation code. + * + * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up + * properly and that there is a stack gap between the INT3 frame and the + * previous context. Without this gap doing a virtual PUSH on the interrupted + * stack would corrupt the INT3 IRET frame. + * + * See entry_{32,64}.S for more details. + */ +static void __init int3_magic(unsigned int *ptr) +{ + *ptr = 1; +} + +extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ + +static int __init +int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!regs || user_mode(regs)) + return NOTIFY_DONE; + + if (val != DIE_INT3) + return NOTIFY_DONE; + + if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + return NOTIFY_DONE; + + int3_emulate_call(regs, (unsigned long)&int3_magic); + return NOTIFY_STOP; +} + +static void __init int3_selftest(void) +{ + static __initdata struct notifier_block int3_exception_nb = { + .notifier_call = int3_exception_notify, + .priority = INT_MAX-1, /* last */ + }; + unsigned int val = 0; + + BUG_ON(register_die_notifier(&int3_exception_nb)); + + /* + * Basically: int3_magic(&val); but really complicated :-) + * + * Stick the address of the INT3 instruction into int3_selftest_ip, + * then trigger the INT3, padded with NOPs to match a CALL instruction + * length. + */ + asm volatile ("1: int3; nop; nop; nop; nop\n\t" + ".pushsection .init.data,\"aw\"\n\t" + ".align " __ASM_SEL(4, 8) "\n\t" + ".type int3_selftest_ip, @object\n\t" + ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" + "int3_selftest_ip:\n\t" + __ASM_SEL(.long, .quad) " 1b\n\t" + ".popsection\n\t" + : : __ASM_SEL_RAW(a, D) (&val) : "memory"); + + BUG_ON(val != 1); + + unregister_die_notifier(&int3_exception_nb); +} + void __init alternative_instructions(void) { - /* The patching is not fully atomic, so try to avoid local interruptions - that might execute the to be patched code. - Other CPUs are not running. */ + int3_selftest(); + + /* + * The patching is not fully atomic, so try to avoid local + * interruptions that might execute the to be patched code. + * Other CPUs are not running. + */ stop_nmi(); /* @@ -644,10 +716,11 @@ void __init alternative_instructions(void) _text, _etext); } - if (!uniproc_patched || num_possible_cpus() == 1) + if (!uniproc_patched || num_possible_cpus() == 1) { free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); + } #endif apply_paravirt(__parainstructions, __parainstructions_end); -- cgit v1.2.3