diff options
Diffstat (limited to 'arch/x86/kernel/vsyscall_64.c')
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 147 |
1 files changed, 64 insertions, 83 deletions
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 957779f4eb40..2dcc6ff6fdcc 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -1,59 +1,43 @@ /* + * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net> + * + * Based on the original implementation which is: * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * - * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] + * Parts of the original code have been moved to arch/x86/vdso/vma.c + * + * This file implements vsyscall emulation. vsyscalls are a legacy ABI: + * Userspace can request certain kernel services by calling fixed + * addresses. This concept is problematic: * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. + * - It interferes with ASLR. + * - It's awkward to write code that lives in kernel addresses but is + * callable by userspace at fixed addresses. + * - The whole concept is impossible for 32-bit compat userspace. + * - UML cannot easily virtualize a vsyscall. * - * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located - * at virtual address -10Mbyte+1024bytes etc... There are at max 4 - * vsyscalls. One vsyscall can reserve more than 1 slot to avoid - * jumping out of line if necessary. We cannot add more with this - * mechanism because older kernels won't return -ENOSYS. + * As of mid-2014, I believe that there is no new userspace code that + * will use a vsyscall if the vDSO is present. I hope that there will + * soon be no new userspace code that will ever use a vsyscall. * - * Note: the concept clashes with user mode linux. UML users should - * use the vDSO. + * The code in this file emulates vsyscalls when notified of a page + * fault to a vsyscall address. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/time.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/seqlock.h> -#include <linux/jiffies.h> -#include <linux/sysctl.h> -#include <linux/topology.h> -#include <linux/timekeeper_internal.h> -#include <linux/getcpu.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <linux/notifier.h> #include <linux/syscalls.h> #include <linux/ratelimit.h> #include <asm/vsyscall.h> -#include <asm/pgtable.h> -#include <asm/compat.h> -#include <asm/page.h> #include <asm/unistd.h> #include <asm/fixmap.h> -#include <asm/errno.h> -#include <asm/io.h> -#include <asm/segment.h> -#include <asm/desc.h> -#include <asm/topology.h> #include <asm/traps.h> #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -DEFINE_VVAR(int, vgetcpu_mode); - static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; static int __init vsyscall_setup(char *str) @@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) "seccomp tried to change syscall nr or ip"); do_exit(SIGSYS); } + regs->orig_ax = -1; if (tmp) goto do_ret; /* skip requested */ @@ -284,46 +269,54 @@ sigsegv: } /* - * Assume __initcall executes before all user space. Hopefully kmod - * doesn't violate that. We'll find out if it does. + * A pseudo VMA to allow ptrace access for the vsyscall page. This only + * covers the 64bit vsyscall page now. 32bit has a real VMA now and does + * not need special handling anymore: */ -static void vsyscall_set_cpu(int cpu) +static const char *gate_vma_name(struct vm_area_struct *vma) { - unsigned long d; - unsigned long node = 0; -#ifdef CONFIG_NUMA - node = cpu_to_node(cpu); -#endif - if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) - write_rdtscp_aux((node << 12) | cpu); - - /* - * Store cpu number in limit so that it can be loaded quickly - * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) - */ - d = 0x0f40000000000ULL; - d |= cpu; - d |= (node & 0xf) << 12; - d |= (node >> 4) << 48; - - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); + return "[vsyscall]"; } - -static void cpu_vsyscall_init(void *arg) +static struct vm_operations_struct gate_vma_ops = { + .name = gate_vma_name, +}; +static struct vm_area_struct gate_vma = { + .vm_start = VSYSCALL_ADDR, + .vm_end = VSYSCALL_ADDR + PAGE_SIZE, + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC, + .vm_ops = &gate_vma_ops, +}; + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - /* preemption should be already off */ - vsyscall_set_cpu(raw_smp_processor_id()); +#ifdef CONFIG_IA32_EMULATION + if (!mm || mm->context.ia32_compat) + return NULL; +#endif + if (vsyscall_mode == NONE) + return NULL; + return &gate_vma; } -static int -cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - long cpu = (long)arg; + struct vm_area_struct *vma = get_gate_vma(mm); + + if (!vma) + return 0; - if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); + return (addr >= vma->vm_start) && (addr < vma->vm_end); +} - return NOTIFY_DONE; +/* + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. + */ +int in_gate_area_no_mm(unsigned long addr) +{ + return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } void __init map_vsyscall(void) @@ -331,24 +324,12 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + if (vsyscall_mode != NONE) + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, + vsyscall_mode == NATIVE + ? PAGE_KERNEL_VSYSCALL + : PAGE_KERNEL_VVAR); + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } - -static int __init vsyscall_init(void) -{ - cpu_notifier_register_begin(); - - on_each_cpu(cpu_vsyscall_init, NULL, 1); - /* notifier priority > KVM */ - __hotcpu_notifier(cpu_vsyscall_notifier, 30); - - cpu_notifier_register_done(); - - return 0; -} -__initcall(vsyscall_init); |