diff options
author | Dominik Brodowski <linux@dominikbrodowski.net> | 2018-04-05 11:53:02 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-04-05 16:59:26 +0200 |
commit | fa697140f9a20119a9ec8fd7460cc4314fbdaff3 (patch) | |
tree | f7dbd8c7fe4b9a4db2fb4f14f5faf9f51956ef43 /arch/x86/entry | |
parent | 1bd21c6c21e848996339508d3ffb106d505256a8 (diff) |
syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls
Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems:
Each syscall defines a stub which takes struct pt_regs as its only
argument. It decodes just those parameters it needs, e.g:
asmlinkage long sys_xyzzy(const struct pt_regs *regs)
{
return SyS_xyzzy(regs->di, regs->si, regs->dx);
}
This approach avoids leaking random user-provided register content down
the call chain.
For example, for sys_recv() which is a 4-parameter syscall, the assembly
now is (in slightly reordered fashion):
<sys_recv>:
callq <__fentry__>
/* decode regs->di, ->si, ->dx and ->r10 */
mov 0x70(%rdi),%rdi
mov 0x68(%rdi),%rsi
mov 0x60(%rdi),%rdx
mov 0x38(%rdi),%rcx
[ SyS_recv() is automatically inlined by the compiler,
as it is not [yet] used anywhere else ]
/* clear %r9 and %r8, the 5th and 6th args */
xor %r9d,%r9d
xor %r8d,%r8d
/* do the actual work */
callq __sys_recvfrom
/* cleanup and return */
cltq
retq
The only valid place in an x86-64 kernel which rightfully calls
a syscall function on its own -- vsyscall -- needs to be modified
to pass struct pt_regs onwards as well.
To keep the syscall table generation working independent of
SYSCALL_PTREGS being enabled, the stubs are named the same as the
"original" syscall stubs, i.e. sys_*().
This patch is based on an original proof-of-concept
| From: Linus Torvalds <torvalds@linux-foundation.org>
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
and was split up and heavily modified by me, in particular to base it on
ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being,
and to update the vsyscall to the new calling convention.
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/entry')
-rw-r--r-- | arch/x86/entry/common.c | 4 | ||||
-rw-r--r-- | arch/x86/entry/syscall_64.c | 9 | ||||
-rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_64.c | 22 |
3 files changed, 33 insertions, 2 deletions
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a8b066dbbf48..e1b91bffa988 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -284,9 +284,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) nr &= __SYSCALL_MASK; if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); +#ifdef CONFIG_SYSCALL_PTREGS + regs->ax = sys_call_table[nr](regs); +#else regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); +#endif } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index c176d2fab1da..6197850adf91 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -7,14 +7,19 @@ #include <asm/asm-offsets.h> #include <asm/syscall.h> +#ifdef CONFIG_SYSCALL_PTREGS +/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ +extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); +#else /* CONFIG_SYSCALL_PTREGS */ +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#endif /* CONFIG_SYSCALL_PTREGS */ #include <asm/syscalls_64.h> #undef __SYSCALL_64 #define __SYSCALL_64(nr, sym, qual) [nr] = sym, -extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* * Smells like a compiler bug -- it doesn't work diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 317be365bce3..05eebbf9b989 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -127,6 +127,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_err; long ret; +#ifdef CONFIG_SYSCALL_PTREGS + unsigned long orig_dx; +#endif /* * No point in checking CS -- the only way to get here is a user mode @@ -227,19 +230,38 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ret = -EFAULT; switch (vsyscall_nr) { case 0: +#ifdef CONFIG_SYSCALL_PTREGS + /* this decodes regs->di and regs->si on its own */ + ret = sys_gettimeofday(regs); +#else ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); +#endif /* CONFIG_SYSCALL_PTREGS */ break; case 1: +#ifdef CONFIG_SYSCALL_PTREGS + /* this decodes regs->di on its own */ + ret = sys_time(regs); +#else ret = sys_time((time_t __user *)regs->di); +#endif /* CONFIG_SYSCALL_PTREGS */ break; case 2: +#ifdef CONFIG_SYSCALL_PTREGS + /* while we could clobber regs->dx, we didn't in the past... */ + orig_dx = regs->dx; + regs->dx = 0; + /* this decodes regs->di, regs->si and regs->dx on its own */ + ret = sys_getcpu(regs); + regs->dx = orig_dx; +#else ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, NULL); +#endif /* CONFIG_SYSCALL_PTREGS */ break; } |