summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2019-11-26 18:27:16 +0100
committerIngo Molnar <mingo@kernel.org>2019-11-26 21:53:34 +0100
commitdc4e0021b00b5a4ecba56fae509217776592b0aa (patch)
treef62d0813a0fe93bc4aa7612632ec7cd744ccf33f /arch/x86/kernel
parente99b6f46ee5c127d39d2f3a2682fdeef10386316 (diff)
x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area
There are three problems with the current layout of the doublefault stack and TSS. First, the TSS is only cacheline-aligned, which is not enough -- if the hardware portion of the TSS (struct x86_hw_tss) crosses a page boundary, horrible things happen [0]. Second, the stack and TSS are global, so simultaneous double faults on different CPUs will cause massive corruption. Third, the whole mechanism won't work if user CR3 is loaded, resulting in a triple fault [1]. Let the doublefault stack and TSS share a page (which prevents the TSS from spanning a page boundary), make it percpu, and move it into cpu_entry_area. Teach the stack dump code about the doublefault stack. [0] Real hardware will read past the end of the page onto the next *physical* page if a task switch happens. Virtual machines may have any number of bugs, and I would consider it reasonable for a VM to summarily kill the guest if it tries to task-switch to a page-spanning TSS. [1] Real hardware triple faults. At least some VMs seem to hang. I'm not sure what's going on. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/common.c12
-rw-r--r--arch/x86/kernel/doublefault_32.c58
-rw-r--r--arch/x86/kernel/dumpstack_32.c30
3 files changed, 71 insertions, 29 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index baa2fed8deb6..2e4d90294fe6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -24,6 +24,7 @@
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
+#include <asm/doublefault.h>
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -1814,8 +1815,6 @@ static inline void tss_setup_ist(struct tss_struct *tss)
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
}
-static inline void gdt_setup_doublefault_tss(int cpu) { }
-
#else /* CONFIG_X86_64 */
static inline void setup_getcpu(int cpu) { }
@@ -1827,13 +1826,6 @@ static inline void ucode_cpu_init(int cpu)
static inline void tss_setup_ist(struct tss_struct *tss) { }
-static inline void gdt_setup_doublefault_tss(int cpu)
-{
-#ifdef CONFIG_DOUBLEFAULT
- /* Set up the doublefault TSS pointer in the GDT */
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
-#endif
-}
#endif /* !CONFIG_X86_64 */
static inline void tss_setup_io_bitmap(struct tss_struct *tss)
@@ -1923,7 +1915,7 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- gdt_setup_doublefault_tss(cpu);
+ doublefault_init_cpu_tss();
fpu__init_cpu();
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 61c707ca8a09..4eecfe4825ed 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -10,10 +10,6 @@
#include <asm/processor.h>
#include <asm/desc.h>
-#define DOUBLEFAULT_STACKSIZE (1024)
-static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
-
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
static void doublefault_fn(void)
@@ -21,6 +17,8 @@ static void doublefault_fn(void)
struct desc_ptr gdt_desc = {0, 0};
unsigned long gdt, tss;
+ BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
+
native_store_gdt(&gdt_desc);
gdt = gdt_desc.address;
@@ -48,24 +46,46 @@ static void doublefault_fn(void)
cpu_relax();
}
-struct x86_hw_tss doublefault_tss __cacheline_aligned = {
- .sp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
+DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
+ .tss = {
+ /*
+ * No sp0 or ss0 -- we never run CPL != 0 with this TSS
+ * active. sp is filled in later.
+ */
+ .ldt = 0,
.io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
- .ip = (unsigned long) doublefault_fn,
- /* 0x2 bit is always set */
- .flags = X86_EFLAGS_SF | 0x2,
- .sp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
- .fs = __KERNEL_PERCPU,
+ .ip = (unsigned long) doublefault_fn,
+ /* 0x2 bit is always set */
+ .flags = X86_EFLAGS_SF | 0x2,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,
#ifndef CONFIG_X86_32_LAZY_GS
- .gs = __KERNEL_STACK_CANARY,
+ .gs = __KERNEL_STACK_CANARY,
#endif
- .__cr3 = __pa_nodebug(swapper_pg_dir),
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
+ },
};
+
+void doublefault_init_cpu_tss(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+
+ /*
+ * The linker isn't smart enough to initialize percpu variables that
+ * point to other places in percpu space.
+ */
+ this_cpu_write(doublefault_stack.tss.sp,
+ (unsigned long)&cea->doublefault_stack.stack +
+ sizeof(doublefault_stack.stack));
+
+ /* Set up doublefault TSS pointer in the GDT */
+ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
+ &get_cpu_entry_area(cpu)->doublefault_stack.tss);
+
+}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 64a59d726639..8e3a8fedfa4d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -29,6 +29,9 @@ const char *stack_type_name(enum stack_type type)
if (type == STACK_TYPE_ENTRY)
return "ENTRY_TRAMPOLINE";
+ if (type == STACK_TYPE_EXCEPTION)
+ return "#DF";
+
return NULL;
}
@@ -82,6 +85,30 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
return true;
}
+static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
+{
+#ifdef CONFIG_DOUBLEFAULT
+ struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id());
+ struct doublefault_stack *ss = &cea->doublefault_stack;
+
+ void *begin = ss->stack;
+ void *end = begin + sizeof(ss->stack);
+
+ if ((void *)stack < begin || (void *)stack >= end)
+ return false;
+
+ info->type = STACK_TYPE_EXCEPTION;
+ info->begin = begin;
+ info->end = end;
+ info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp);
+
+ return true;
+#else
+ return false;
+#endif
+}
+
+
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask)
{
@@ -105,6 +132,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
if (in_softirq_stack(stack, info))
goto recursion_check;
+ if (in_doublefault_stack(stack, info))
+ goto recursion_check;
+
goto unknown;
recursion_check: