From cf5f5cea270655dd49370760576c64b228583b79 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 4 Aug 2017 16:00:09 -0700 Subject: bpf: add support for sys_enter_* and sys_exit_* tracepoints Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* style tracepoints. The iovisor/bcc issue #748 (https://github.com/iovisor/bcc/issues/748) documents this issue. For example, if you try to attach a bpf program to tracepoints syscalls/sys_enter_newfstat, you will get the following error: # ./tools/trace.py t:syscalls:sys_enter_newfstat Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument Failed to attach BPF to tracepoint The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* tracepoints are treated differently from other tracepoints and there is no bpf hook to it. This patch adds bpf support for these syscalls tracepoints by . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF . calling bpf programs in perf_syscall_enter and perf_syscall_exit The legality of bpf program ctx access is also checked. Function trace_event_get_offsets returns correct max offset for each specific syscall tracepoint, which is compared against the maximum offset access in bpf program. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/trace_syscalls.c | 53 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5e10395da88e..7a1a92036563 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; +static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, + struct syscall_metadata *sys_data, + struct syscall_trace_enter *rec) { + struct syscall_tp_t { + unsigned long long regs; + unsigned long syscall_nr; + unsigned long args[sys_data->nb_args]; + } param; + int i; + + *(struct pt_regs **)¶m = regs; + param.syscall_nr = rec->nr; + for (i = 0; i < sys_data->nb_args; i++) + param.args[i] = rec->args[i]; + return trace_call_bpf(prog, ¶m); +} + static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; + struct bpf_prog *prog; int syscall_nr; int rctx; int size; @@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; + prog = READ_ONCE(sys_data->enter_event->prog); head = this_cpu_ptr(sys_data->enter_event->perf_events); - if (hlist_empty(head)) + if (!prog && hlist_empty(head)) return; /* get the size after alignment with the u32 buffer size field */ @@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); + + if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || + hlist_empty(head)) { + perf_swevent_put_recursion_context(rctx); + return; + } + perf_trace_buf_submit(rec, size, rctx, sys_data->enter_event->event.type, 1, regs, head, NULL); @@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call) mutex_unlock(&syscall_trace_lock); } +static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, + struct syscall_trace_exit *rec) { + struct syscall_tp_t { + unsigned long long regs; + unsigned long syscall_nr; + unsigned long ret; + } param; + + *(struct pt_regs **)¶m = regs; + param.syscall_nr = rec->nr; + param.ret = rec->ret; + return trace_call_bpf(prog, ¶m); +} + static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; + struct bpf_prog *prog; int syscall_nr; int rctx; int size; @@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; + prog = READ_ONCE(sys_data->exit_event->prog); head = this_cpu_ptr(sys_data->exit_event->perf_events); - if (hlist_empty(head)) + if (!prog && hlist_empty(head)) return; /* We can probably do that at build time */ @@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); + + if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || + hlist_empty(head)) { + perf_swevent_put_recursion_context(rctx); + return; + } + perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, 1, regs, head, NULL); } -- cgit v1.2.3