From 0ebeea8ca8a4d1d453ad299aef0507dab04f6e8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 May 2020 12:11:16 +0200 Subject: bpf: Restrict bpf_probe_read{, str}() only to archs where they work Given the legacy bpf_probe_read{,str}() BPF helpers are broken on archs with overlapping address ranges, we should really take the next step to disable them from BPF use there. To generally fix the situation, we've recently added new helper variants bpf_probe_read_{user,kernel}() and bpf_probe_read_{user,kernel}_str(). For details on them, see 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user,kernel}_str helpers"). Given bpf_probe_read{,str}() have been around for ~5 years by now, there are plenty of users at least on x86 still relying on them today, so we cannot remove them entirely w/o breaking the BPF tracing ecosystem. However, their use should be restricted to archs with non-overlapping address ranges where they are working in their current form. Therefore, move this behind a CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE and have x86, arm64, arm select it (other archs supporting it can follow-up on it as well). For the remaining archs, they can workaround easily by relying on the feature probe from bpftool which spills out defines that can be used out of BPF C code to implement the drop-in replacement for old/new kernels via: bpftool feature probe macro Suggested-by: Linus Torvalds Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: Masami Hiramatsu Acked-by: Linus Torvalds Cc: Brendan Gregg Cc: Christoph Hellwig Link: https://lore.kernel.org/bpf/20200515101118.6508-2-daniel@iogearbox.net --- kernel/trace/bpf_trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1796747a77..b83bdaa31c7b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -825,14 +825,16 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: return &bpf_probe_read_kernel_proto; - case BPF_FUNC_probe_read: - return &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: return &bpf_probe_read_kernel_str_proto; +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + case BPF_FUNC_probe_read: + return &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_str: return &bpf_probe_read_compat_str_proto; +#endif #ifdef CONFIG_CGROUPS case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; -- cgit v1.2.3 From b2a5212fb634561bb734c6356904e37f6665b955 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 May 2020 12:11:18 +0200 Subject: bpf: Restrict bpf_trace_printk()'s %s usage and add %pks, %pus specifier Usage of plain %s conversion specifier in bpf_trace_printk() suffers from the very same issue as bpf_probe_read{,str}() helpers, that is, it is broken on archs with overlapping address ranges. While the helpers have been addressed through work in 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers"), we need an option for bpf_trace_printk() as well to fix it. Similarly as with the helpers, force users to make an explicit choice by adding %pks and %pus specifier to bpf_trace_printk() which will then pick the corresponding strncpy_from_unsafe*() variant to perform the access under KERNEL_DS or USER_DS. The %pk* (kernel specifier) and %pu* (user specifier) can later also be extended for other objects aside strings that are probed and printed under tracing, and reused out of other facilities like bpf_seq_printf() or BTF based type printing. Existing behavior of %s for current users is still kept working for archs where it is not broken and therefore gated through CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE. For archs not having this property we fall-back to pick probing under KERNEL_DS as a sensible default. Fixes: 8d3b7dce8622 ("bpf: add support for %s specifier to bpf_trace_printk()") Reported-by: Linus Torvalds Reported-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Masami Hiramatsu Cc: Brendan Gregg Link: https://lore.kernel.org/bpf/20200515101118.6508-4-daniel@iogearbox.net --- kernel/trace/bpf_trace.c | 94 +++++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 32 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b83bdaa31c7b..a010edc37ee0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -323,17 +323,15 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) /* * Only limited trace_printk() conversion specifiers allowed: - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pks %pus %s */ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { + int i, mod[3] = {}, fmt_cnt = 0; + char buf[64], fmt_ptype; + void *unsafe_ptr = NULL; bool str_seen = false; - int mod[3] = {}; - int fmt_cnt = 0; - u64 unsafe_addr; - char buf[64]; - int i; /* * bpf_check()->check_func_arg()->check_stack_boundary() @@ -359,40 +357,71 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, if (fmt[i] == 'l') { mod[fmt_cnt]++; i++; - } else if (fmt[i] == 'p' || fmt[i] == 's') { + } else if (fmt[i] == 'p') { mod[fmt_cnt]++; + if ((fmt[i + 1] == 'k' || + fmt[i + 1] == 'u') && + fmt[i + 2] == 's') { + fmt_ptype = fmt[i + 1]; + i += 2; + goto fmt_str; + } + /* disallow any further format extensions */ if (fmt[i + 1] != 0 && !isspace(fmt[i + 1]) && !ispunct(fmt[i + 1])) return -EINVAL; - fmt_cnt++; - if (fmt[i] == 's') { - if (str_seen) - /* allow only one '%s' per fmt string */ - return -EINVAL; - str_seen = true; - - switch (fmt_cnt) { - case 1: - unsafe_addr = arg1; - arg1 = (long) buf; - break; - case 2: - unsafe_addr = arg2; - arg2 = (long) buf; - break; - case 3: - unsafe_addr = arg3; - arg3 = (long) buf; - break; - } - buf[0] = 0; - strncpy_from_unsafe(buf, - (void *) (long) unsafe_addr, + + goto fmt_next; + } else if (fmt[i] == 's') { + mod[fmt_cnt]++; + fmt_ptype = fmt[i]; +fmt_str: + if (str_seen) + /* allow only one '%s' per fmt string */ + return -EINVAL; + str_seen = true; + + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) + return -EINVAL; + + switch (fmt_cnt) { + case 0: + unsafe_ptr = (void *)(long)arg1; + arg1 = (long)buf; + break; + case 1: + unsafe_ptr = (void *)(long)arg2; + arg2 = (long)buf; + break; + case 2: + unsafe_ptr = (void *)(long)arg3; + arg3 = (long)buf; + break; + } + + buf[0] = 0; + switch (fmt_ptype) { + case 's': +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + strncpy_from_unsafe(buf, unsafe_ptr, sizeof(buf)); + break; +#endif + case 'k': + strncpy_from_unsafe_strict(buf, unsafe_ptr, + sizeof(buf)); + break; + case 'u': + strncpy_from_unsafe_user(buf, + (__force void __user *)unsafe_ptr, + sizeof(buf)); + break; } - continue; + goto fmt_next; } if (fmt[i] == 'l') { @@ -403,6 +432,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') return -EINVAL; +fmt_next: fmt_cnt++; } -- cgit v1.2.3