diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 188 | ||||
-rw-r--r-- | arch/x86/kernel/doublefault.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/ioport.c | 209 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 205 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 77 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 86 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 12 |
8 files changed, 392 insertions, 389 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3103f2bfed85..baa2fed8deb6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -53,10 +53,7 @@ #include <asm/microcode_intel.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> - -#ifdef CONFIG_X86_LOCAL_APIC #include <asm/uv/uv.h> -#endif #include "cpu.h" @@ -1781,7 +1778,7 @@ static void wait_for_master_cpu(int cpu) } #ifdef CONFIG_X86_64 -static void setup_getcpu(int cpu) +static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); struct desc_struct d = { }; @@ -1801,7 +1798,59 @@ static void setup_getcpu(int cpu) write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S); } + +static inline void ucode_cpu_init(int cpu) +{ + if (cpu) + load_ucode_ap(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) +{ + /* Set up the per-CPU TSS IST stacks */ + tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); + tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); + tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); + tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); +} + +static inline void gdt_setup_doublefault_tss(int cpu) { } + +#else /* CONFIG_X86_64 */ + +static inline void setup_getcpu(int cpu) { } + +static inline void ucode_cpu_init(int cpu) +{ + show_ucode_info_early(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) { } + +static inline void gdt_setup_doublefault_tss(int cpu) +{ +#ifdef CONFIG_DOUBLEFAULT + /* Set up the doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif +} +#endif /* !CONFIG_X86_64 */ + +static inline void tss_setup_io_bitmap(struct tss_struct *tss) +{ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + +#ifdef CONFIG_X86_IOPL_IOPERM + tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; #endif +} /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1809,21 +1858,15 @@ static void setup_getcpu(int cpu) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -#ifdef CONFIG_X86_64 - void cpu_init(void) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - struct task_struct *me; - struct tss_struct *t; - int i; wait_for_master_cpu(cpu); - if (cpu) - load_ucode_ap(); - - t = &per_cpu(cpu_tss_rw, cpu); + ucode_cpu_init(cpu); #ifdef CONFIG_NUMA if (this_cpu_read(numa_node) == 0 && @@ -1832,63 +1875,47 @@ void cpu_init(void) #endif setup_getcpu(cpu); - me = current; - pr_debug("Initializing CPU#%d\n", cpu); - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) + cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - switch_to_new_gdt(cpu); - loadsegment(fs, 0); - load_current_idt(); - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); + if (IS_ENABLED(CONFIG_X86_64)) { + loadsegment(fs, 0); + memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); - x86_configure_nx(); - x2apic_setup(); + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); - /* - * set up and load the per-CPU TSS - */ - if (!t->x86_tss.ist[0]) { - t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); - t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); - t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); - t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); + x2apic_setup(); } - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - mmgrab(&init_mm); - me->active_mm = &init_mm; - BUG_ON(me->mm); + cur->active_mm = &init_mm; + BUG_ON(cur->mm); initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, me); + enter_lazy_tlb(&init_mm, cur); - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ + /* Initialize the TSS. */ + tss_setup_ist(tss); + tss_setup_io_bitmap(tss); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); + /* + * sp0 points to the entry trampoline stack regardless of what task + * is running. + */ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); @@ -1896,6 +1923,8 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); + gdt_setup_doublefault_tss(cpu); + fpu__init_cpu(); if (is_uv_system()) @@ -1904,63 +1933,6 @@ void cpu_init(void) load_fixmap_gdt(cpu); } -#else - -void cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); - - wait_for_master_cpu(cpu); - - show_ucode_info_early(); - - pr_info("Initializing CPU#%d\n", cpu); - - if (cpu_feature_enabled(X86_FEATURE_VME) || - boot_cpu_has(X86_FEATURE_TSC) || - boot_cpu_has(X86_FEATURE_DE)) - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_current_idt(); - switch_to_new_gdt(cpu); - - /* - * Set up and load the per-CPU TSS and LDT - */ - mmgrab(&init_mm); - curr->active_mm = &init_mm; - BUG_ON(curr->mm); - initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, curr); - - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ - set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); - load_TR_desc(); - load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); - - load_mm_ldt(&init_mm); - - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - clear_all_debug_regs(); - dbg_restore_debug_regs(); - - fpu__init_cpu(); - - load_fixmap_gdt(cpu); -} -#endif - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index d5c9b13bafdf..0d6c657593f8 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = { .sp0 = STACK_START, .ss0 = __KERNEL_DS, .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, .ip = (unsigned long) doublefault_fn, /* 0x2 bit is always set */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 61a89d3c0382..8abeee0dd7bf 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -3,32 +3,69 @@ * This contains the io-permission bitmap code - written by obz, with changes * by Linus. 32/64 bits code unification by Miguel Botón. */ - -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/kernel.h> #include <linux/capability.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/ioport.h> #include <linux/security.h> -#include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/slab.h> -#include <linux/thread_info.h> #include <linux/syscalls.h> #include <linux/bitmap.h> -#include <asm/syscalls.h> +#include <linux/ioport.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/io_bitmap.h> #include <asm/desc.h> +#ifdef CONFIG_X86_IOPL_IOPERM + +static atomic64_t io_bitmap_sequence; + +void io_bitmap_share(struct task_struct *tsk) +{ + /* Can be NULL when current->thread.iopl_emul == 3 */ + if (current->thread.io_bitmap) { + /* + * Take a refcount on current's bitmap. It can be used by + * both tasks as long as none of them changes the bitmap. + */ + refcount_inc(¤t->thread.io_bitmap->refcnt); + tsk->thread.io_bitmap = current->thread.io_bitmap; + } + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); +} + +static void task_update_io_bitmap(void) +{ + struct thread_struct *t = ¤t->thread; + + if (t->iopl_emul == 3 || t->io_bitmap) { + /* TSS update is handled on exit to user space */ + set_thread_flag(TIF_IO_BITMAP); + } else { + clear_thread_flag(TIF_IO_BITMAP); + /* Invalidate TSS */ + preempt_disable(); + tss_update_io_bitmap(); + preempt_enable(); + } +} + +void io_bitmap_exit(void) +{ + struct io_bitmap *iobm = current->thread.io_bitmap; + + current->thread.io_bitmap = NULL; + task_update_io_bitmap(); + if (iobm && refcount_dec_and_test(&iobm->refcnt)) + kfree(iobm); +} + /* - * this changes the io permissions bitmap in the current task. + * This changes the io permissions bitmap in the current task. */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; - struct tss_struct *tss; - unsigned int i, max_long, bytes, bytes_updated; + unsigned int i, max_long; + struct io_bitmap *iobm; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -41,59 +78,72 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ - if (!t->io_bitmap_ptr) { - unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - - if (!bitmap) + iobm = t->io_bitmap; + if (!iobm) { + /* No point to allocate a bitmap just to clear permissions */ + if (!turn_on) + return 0; + iobm = kmalloc(sizeof(*iobm), GFP_KERNEL); + if (!iobm) return -ENOMEM; - memset(bitmap, 0xff, IO_BITMAP_BYTES); - t->io_bitmap_ptr = bitmap; - set_thread_flag(TIF_IO_BITMAP); + memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap)); + refcount_set(&iobm->refcnt, 1); + } - /* - * Now that we have an IO bitmap, we need our TSS limit to be - * correct. It's fine if we are preempted after doing this: - * with TIF_IO_BITMAP set, context switches will keep our TSS - * limit correct. - */ - preempt_disable(); - refresh_tss_limit(); - preempt_enable(); + /* + * If the bitmap is not shared, then nothing can take a refcount as + * current can obviously not fork at the same time. If it's shared + * duplicate it and drop the refcount on the original one. + */ + if (refcount_read(&iobm->refcnt) > 1) { + iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); + if (!iobm) + return -ENOMEM; + refcount_set(&iobm->refcnt, 1); + io_bitmap_exit(); } /* - * do it in the per-thread copy and in the TSS ... - * - * Disable preemption via get_cpu() - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: + * Store the bitmap pointer (might be the same if the task already + * head one). Must be done here so freeing the bitmap when all + * permissions are dropped has the pointer set up. */ - tss = &per_cpu(cpu_tss_rw, get_cpu()); + t->io_bitmap = iobm; + /* Mark it active for context switching and exit to user mode */ + set_thread_flag(TIF_IO_BITMAP); + /* + * Update the tasks bitmap. The update of the TSS bitmap happens on + * exit to user mode. So this needs no protection. + */ if (turn_on) - bitmap_clear(t->io_bitmap_ptr, from, num); + bitmap_clear(iobm->bitmap, from, num); else - bitmap_set(t->io_bitmap_ptr, from, num); + bitmap_set(iobm->bitmap, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ - max_long = 0; - for (i = 0; i < IO_BITMAP_LONGS; i++) - if (t->io_bitmap_ptr[i] != ~0UL) + max_long = UINT_MAX; + for (i = 0; i < IO_BITMAP_LONGS; i++) { + if (iobm->bitmap[i] != ~0UL) max_long = i; + } + /* All permissions dropped? */ + if (max_long == UINT_MAX) { + io_bitmap_exit(); + return 0; + } - bytes = (max_long + 1) * sizeof(unsigned long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; - - /* Update the TSS: */ - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + iobm->max = (max_long + 1) * sizeof(unsigned long); - put_cpu(); + /* + * Update the sequence number to force a TSS update on return to + * user mode. + */ + iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); return 0; } @@ -104,38 +154,61 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) } /* - * sys_iopl has to be used when you want to access the IO ports - * beyond the 0x3ff range: to get the full 65536 ports bitmapped - * you'd need 8kB of bitmaps/process, which is a bit excessive. + * The sys_iopl functionality depends on the level argument, which if + * granted for the task is used to enable access to all 65536 I/O ports. + * + * This does not use the IOPL mechanism provided by the CPU as that would + * also allow the user space task to use the CLI/STI instructions. * - * Here we just change the flags value on the stack: we allow - * only the super-user to do it. This depends on the stack-layout - * on system-call entry - see also fork() and the signal handling - * code. + * Disabling interrupts in a user space task is dangerous as it might lock + * up the machine and the semantics vs. syscalls and exceptions is + * undefined. + * + * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3 + * 3 enables them. + * + * IOPL is strictly per thread and inherited on fork. */ SYSCALL_DEFINE1(iopl, unsigned int, level) { - struct pt_regs *regs = current_pt_regs(); struct thread_struct *t = ¤t->thread; - - /* - * Careful: the IOPL bits in regs->flags are undefined under Xen PV - * and changing them has no effect. - */ - unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; + unsigned int old; if (level > 3) return -EINVAL; + + old = t->iopl_emul; + + /* No point in going further if nothing changes */ + if (level == old) + return 0; + /* Trying to gain more privileges? */ if (level > old) { if (!capable(CAP_SYS_RAWIO) || security_locked_down(LOCKDOWN_IOPORT)) return -EPERM; } - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | - (level << X86_EFLAGS_IOPL_BIT); - t->iopl = level << X86_EFLAGS_IOPL_BIT; - set_iopl_mask(t->iopl); + + t->iopl_emul = level; + task_update_io_bitmap(); return 0; } + +#else /* CONFIG_X86_IOPL_IOPERM */ + +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + return -ENOSYS; +} +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +{ + return -ENOSYS; +} + +SYSCALL_DEFINE1(iopl, unsigned int, level) +{ + return -ENOSYS; +} +#endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 59d3d2763a9e..789f5e4f89de 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -341,8 +341,6 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, - .cpu.set_iopl_mask = native_set_iopl_mask, - .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e94c4354d4e..bd2a11ca5dd6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,7 @@ #include <asm/desc.h> #include <asm/prctl.h> #include <asm/spec-ctrl.h> +#include <asm/io_bitmap.h> #include <asm/proto.h> #include "process.h" @@ -72,18 +73,9 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, #endif + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, }, -#ifdef CONFIG_X86_32 - /* - * Note that the .io_bitmap member must be extra-big. This is because - * the CPU will access an additional byte beyond the end of the IO - * permission bitmap. The extra byte must be all 1 bits, and must - * be within the limit. - */ - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, -#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); @@ -110,28 +102,89 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) void exit_thread(struct task_struct *tsk) { struct thread_struct *t = &tsk->thread; - unsigned long *bp = t->io_bitmap_ptr; struct fpu *fpu = &t->fpu; - if (bp) { - struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); - - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); - t->io_bitmap_max = 0; - put_cpu(); - kfree(bp); - } + if (test_thread_flag(TIF_IO_BITMAP)) + io_bitmap_exit(); free_vm86(t); fpu__drop(fpu); } +static int set_new_tls(struct task_struct *p, unsigned long tls) +{ + struct user_desc __user *utls = (struct user_desc __user *)tls; + + if (in_ia32_syscall()) + return do_set_thread_area(p, -1, utls, 0); + else + return do_set_thread_area_64(p, ARCH_SET_FS, tls); +} + +int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct *p, unsigned long tls) +{ + struct inactive_task_frame *frame; + struct fork_frame *fork_frame; + struct pt_regs *childregs; + int ret = 0; + + childregs = task_pt_regs(p); + fork_frame = container_of(childregs, struct fork_frame, regs); + frame = &fork_frame->frame; + + frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) fork_frame; + p->thread.io_bitmap = NULL; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + +#ifdef CONFIG_X86_64 + savesegment(gs, p->thread.gsindex); + p->thread.gsbase = p->thread.gsindex ? 0 : current->thread.gsbase; + savesegment(fs, p->thread.fsindex); + p->thread.fsbase = p->thread.fsindex ? 0 : current->thread.fsbase; + savesegment(es, p->thread.es); + savesegment(ds, p->thread.ds); +#else + p->thread.sp0 = (unsigned long) (childregs + 1); + /* + * Clear all status flags including IF and set fixed bit. 64bit + * does not have this initialization as the frame does not contain + * flags. The flags consistency (especially vs. AC) is there + * ensured via objtool, which lacks 32bit support. + */ + frame->flags = X86_EFLAGS_FIXED; +#endif + + /* Kernel thread ? */ + if (unlikely(p->flags & PF_KTHREAD)) { + memset(childregs, 0, sizeof(struct pt_regs)); + kthread_frame_init(frame, sp, arg); + return 0; + } + + frame->bx = 0; + *childregs = *current_pt_regs(); + childregs->ax = 0; + if (sp) + childregs->sp = sp; + +#ifdef CONFIG_X86_32 + task_user_gs(p) = get_user_gs(current_pt_regs()); +#endif + + /* Set a new TLS for the child thread? */ + if (clone_flags & CLONE_SETTLS) + ret = set_new_tls(p, tls); + + if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) + io_bitmap_share(p); + + return ret; +} + void flush_thread(void) { struct task_struct *tsk = current; @@ -269,31 +322,96 @@ void arch_setup_new_exec(void) } } -static inline void switch_to_bitmap(struct thread_struct *prev, - struct thread_struct *next, - unsigned long tifp, unsigned long tifn) +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) { - struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + /* + * Invalidate the I/O bitmap by moving io_bitmap_base outside the + * TSS limit so any subsequent I/O access from user space will + * trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. + */ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; +} - if (tifn & _TIF_IO_BITMAP) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); +static inline void switch_to_bitmap(unsigned long tifp) +{ + /* + * Invalidate I/O bitmap if the previous task used it. This prevents + * any possible leakage of an active I/O bitmap. + * + * If the next task has an I/O bitmap it will handle it on exit to + * user mode. + */ + if (tifp & _TIF_IO_BITMAP) + tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw)); +} + +static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) +{ + /* + * Copy at least the byte range of the incoming tasks bitmap which + * covers the permitted I/O ports. + * + * If the previous task which used an I/O bitmap had more bits + * permitted, then the copy needs to cover those as well so they + * get turned off. + */ + memcpy(tss->io_bitmap.bitmap, iobm->bitmap, + max(tss->io_bitmap.prev_max, iobm->max)); + + /* + * Store the new max and the sequence number of this bitmap + * and a pointer to the bitmap itself. + */ + tss->io_bitmap.prev_max = iobm->max; + tss->io_bitmap.prev_sequence = iobm->sequence; +} + +/** + * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode + */ +void tss_update_io_bitmap(void) +{ + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + u16 *base = &tss->x86_tss.io_bitmap_base; + + if (test_thread_flag(TIF_IO_BITMAP)) { + struct thread_struct *t = ¤t->thread; + + if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) { + *base = IO_BITMAP_OFFSET_VALID_ALL; + } else { + struct io_bitmap *iobm = t->io_bitmap; + /* + * Only copy bitmap data when the sequence number + * differs. The update time is accounted to the + * incoming task. + */ + if (tss->io_bitmap.prev_sequence != iobm->sequence) + tss_copy_io_bitmap(tss, iobm); + + /* Enable the bitmap */ + *base = IO_BITMAP_OFFSET_VALID_MAP; + } /* - * Make sure that the TSS limit is correct for the CPU - * to notice the IO bitmap. + * Make sure that the TSS limit is covering the io bitmap. + * It might have been cut down by a VMEXIT to 0x67 which + * would cause a subsequent I/O access from user space to + * trigger a #GP because tbe bitmap is outside the TSS + * limit. */ refresh_tss_limit(); - } else if (tifp & _TIF_IO_BITMAP) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + } else { + tss_invalidate_io_bitmap(tss); } } +#else /* CONFIG_X86_IOPL_IOPERM */ +static inline void switch_to_bitmap(unsigned long tifp) { } +#endif #ifdef CONFIG_SMP @@ -505,7 +623,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(prev, next, tifp, tifn); + + switch_to_bitmap(tifp); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b8ceec4974fe..323499f48858 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -112,74 +112,6 @@ void release_thread(struct task_struct *dead_task) release_vm86_irqs(dead_task); } -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct *p, unsigned long tls) -{ - struct pt_regs *childregs = task_pt_regs(p); - struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); - struct inactive_task_frame *frame = &fork_frame->frame; - struct task_struct *tsk; - int err; - - /* - * For a new task use the RESET flags value since there is no before. - * All the status flags are zero; DF and all the system flags must also - * be 0, specifically IF must be 0 because we context switch to the new - * task with interrupts disabled. - */ - frame->flags = X86_EFLAGS_FIXED; - frame->bp = 0; - frame->ret_addr = (unsigned long) ret_from_fork; - p->thread.sp = (unsigned long) fork_frame; - p->thread.sp0 = (unsigned long) (childregs+1); - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(p->flags & PF_KTHREAD)) { - /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - frame->bx = sp; /* function */ - frame->di = arg; - p->thread.io_bitmap_ptr = NULL; - return 0; - } - frame->bx = 0; - *childregs = *current_pt_regs(); - childregs->ax = 0; - if (sp) - childregs->sp = sp; - - task_user_gs(p) = get_user_gs(current_pt_regs()); - - p->thread.io_bitmap_ptr = NULL; - tsk = current; - err = -ENOMEM; - - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - err = 0; - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)tls, 0); - - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - return err; -} - void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { @@ -255,15 +187,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_TLS(next, cpu); - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - switch_to_extra(prev_p, next_p); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index af64519b2695..506d66830d4d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -371,81 +371,6 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) task->thread.gsbase = gsbase; } -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct *p, unsigned long tls) -{ - int err; - struct pt_regs *childregs; - struct fork_frame *fork_frame; - struct inactive_task_frame *frame; - struct task_struct *me = current; - - childregs = task_pt_regs(p); - fork_frame = container_of(childregs, struct fork_frame, regs); - frame = &fork_frame->frame; - - frame->bp = 0; - frame->ret_addr = (unsigned long) ret_from_fork; - p->thread.sp = (unsigned long) fork_frame; - p->thread.io_bitmap_ptr = NULL; - - savesegment(gs, p->thread.gsindex); - p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; - savesegment(fs, p->thread.fsindex); - p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; - savesegment(es, p->thread.es); - savesegment(ds, p->thread.ds); - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(p->flags & PF_KTHREAD)) { - /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - frame->bx = sp; /* function */ - frame->r12 = arg; - return 0; - } - frame->bx = 0; - *childregs = *current_pt_regs(); - - childregs->ax = 0; - if (sp) - childregs->sp = sp; - - err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) { -#ifdef CONFIG_IA32_EMULATION - if (in_ia32_syscall()) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)tls, 0); - else -#endif - err = do_arch_prctl_64(p, ARCH_SET_FS, tls); - if (err) - goto out; - } - err = 0; -out: - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - - return err; -} - static void start_thread_common(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp, @@ -572,17 +497,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) switch_to_extra(prev_p, next_p); -#ifdef CONFIG_XEN_PV - /* - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and - * current_pt_regs()->flags may not match the current task's - * intended IOPL. We need to switch it manually. - */ - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && - prev->iopl != next->iopl)) - xen_set_iopl_mask(next->iopl); -#endif - if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3c5bbe8e4120..066e5b01a7e0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -42,6 +42,7 @@ #include <asm/traps.h> #include <asm/syscall.h> #include <asm/fsgsbase.h> +#include <asm/io_bitmap.h> #include "tls.h" @@ -697,7 +698,9 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, static int ioperm_active(struct task_struct *target, const struct user_regset *regset) { - return target->thread.io_bitmap_max / regset->size; + struct io_bitmap *iobm = target->thread.io_bitmap; + + return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0; } static int ioperm_get(struct task_struct *target, @@ -705,12 +708,13 @@ static int ioperm_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - if (!target->thread.io_bitmap_ptr) + struct io_bitmap *iobm = target->thread.io_bitmap; + + if (!iobm) return -ENXIO; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.io_bitmap_ptr, - 0, IO_BITMAP_BYTES); + iobm->bitmap, 0, IO_BITMAP_BYTES); } /* |