diff options
author | Andy Lutomirski <luto@amacapital.net> | 2014-11-11 12:49:41 -0800 |
---|---|---|
committer | Andy Lutomirski <luto@amacapital.net> | 2015-01-02 10:22:45 -0800 |
commit | 48e08d0fb265b007ebbb29a72297ff7e40938969 (patch) | |
tree | 424a8207cc53c2b0dfbd9fb12bee15952ce822ae /arch/x86 | |
parent | 734d16801349fbe951d2f780191d32c5b8a892d1 (diff) |
x86, entry: Switch stacks on a paranoid entry from userspace
This causes all non-NMI, non-double-fault kernel entries from
userspace to run on the normal kernel stack. Double-fault is
exempt to minimize confusion if we double-fault directly from
userspace due to a bad kernel stack.
This is, suprisingly, simpler and shorter than the current code. It
removes the IMO rather frightening paranoid_userspace path, and it
make sync_regs much simpler.
There is no risk of stack overflow due to this change -- the kernel
stack that we switch to is empty.
This will also enable us to create non-atomic sections within
machine checks from userspace, which will simplify memory failure
handling. It will also allow the upcoming fsgsbase code to be
simplified, because it doesn't need to worry about usergs when
scheduling in paranoid_exit, as that code no longer exists.
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Tony Luck <tony.luck@intel.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kernel/entry_64.S | 86 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 23 |
2 files changed, 50 insertions, 59 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9ebaf63ba182..931f32f4578b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1048,6 +1048,11 @@ ENTRY(\sym) CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 .if \paranoid + .if \paranoid == 1 + CFI_REMEMBER_STATE + testl $3, CS(%rsp) /* If coming from userspace, switch */ + jnz 1f /* stacks. */ + .endif call save_paranoid .else call error_entry @@ -1088,6 +1093,36 @@ ENTRY(\sym) jmp error_exit /* %ebx: no swapgs flag */ .endif + .if \paranoid == 1 + CFI_RESTORE_STATE + /* + * Paranoid entry from userspace. Switch stacks and treat it + * as a normal entry. This means that paranoid handlers + * run in real process context if user_mode(regs). + */ +1: + call error_entry + + DEFAULT_FRAME 0 + + movq %rsp,%rdi /* pt_regs pointer */ + call sync_regs + movq %rax,%rsp /* switch stack */ + + movq %rsp,%rdi /* pt_regs pointer */ + + .if \has_error_code + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi,%esi /* no error code */ + .endif + + call \do_sym + + jmp error_exit /* %ebx: no swapgs flag */ + .endif + CFI_ENDPROC END(\sym) .endm @@ -1108,7 +1143,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=1 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1289,16 +1324,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( #endif /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. + * "Paranoid" exit path from exception stack. This is invoked + * only on return from non-NMI IST interrupts that came + * from kernel space. * - * "trace" is 0 for the NMI handler only, because irq-tracing - * is fundamentally NMI-unsafe. (we cannot change the soft and - * hard flags at once, atomically) + * We may be returning to very strange contexts (e.g. very early + * in syscall entry), so checking for preemption here would + * be complicated. Fortunately, we there's no good reason + * to try to handle preemption here. */ /* ebx: no swapgs flag */ @@ -1308,43 +1341,14 @@ ENTRY(paranoid_exit) TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK RESTORE_ALL 8 - jmp irq_return + INTERRUPT_RETURN paranoid_restore: TRACE_IRQS_IRETQ_DEBUG 0 RESTORE_ALL 8 - jmp irq_return -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp paranoid_userspace -paranoid_schedule: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - SCHEDULE_USER - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - jmp paranoid_userspace + INTERRUPT_RETURN CFI_ENDPROC END(paranoid_exit) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88900e288021..28f3e5ffc55d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -466,27 +466,14 @@ NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* - * Help handler running on IST stack to switch back to user stack - * for scheduling or signal handling. The actual stack switch is done in - * entry.S + * Help handler running on IST stack to switch off the IST stack if the + * interrupted code was in user mode. The actual stack switch is done in + * entry_64.S */ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = eregs; - /* Did already sync */ - if (eregs == (struct pt_regs *)eregs->sp) - ; - /* Exception from user space */ - else if (user_mode(eregs)) - regs = task_pt_regs(current); - /* - * Exception from kernel and interrupts are enabled. Move to - * kernel process stack. - */ - else if (eregs->flags & X86_EFLAGS_IF) - regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); - if (eregs != regs) - *regs = *eregs; + struct pt_regs *regs = task_pt_regs(current); + *regs = *eregs; return regs; } NOKPROBE_SYMBOL(sync_regs); |