diff options
author | David S. Miller <davem@davemloft.net> | 2021-05-11 16:05:56 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-05-11 16:05:56 -0700 |
commit | df6f8237036938d48b7705681c170566c00593fa (patch) | |
tree | 343db1e2b6ba0f11485473f6d5579701b53e9038 /kernel | |
parent | 9fe37a80c9298936a535a242355d4ef536f82dd9 (diff) | |
parent | 569c484f9995f489f2b80dd134269fe07d2b900d (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says:
====================
pull-request: bpf 2021-05-11
The following pull-request contains BPF updates for your *net* tree.
We've added 13 non-merge commits during the last 8 day(s) which contain
a total of 21 files changed, 817 insertions(+), 382 deletions(-).
The main changes are:
1) Fix multiple ringbuf bugs in particular to prevent writable mmap of
read-only pages, from Andrii Nakryiko & Thadeu Lima de Souza Cascardo.
2) Fix verifier alu32 known-const subregister bound tracking for bitwise
operations and/or/xor, from Daniel Borkmann.
3) Reject trampoline attachment for functions with variable arguments,
and also add a deny list of other forbidden functions, from Jiri Olsa.
4) Fix nested bpf_bprintf_prepare() calls used by various helpers by
switching to per-CPU buffers, from Florent Revest.
5) Fix kernel compilation with BTF debug info on ppc64 due to pahole
missing TCP-CC functions like cubictcp_init, from Martin KaFai Lau.
6) Add a kconfig entry to provide an option to disallow unprivileged
BPF by default, from Daniel Borkmann.
7) Fix libbpf compilation for older libelf when GELF_ST_VISIBILITY()
macro is not available, from Arnaldo Carvalho de Melo.
8) Migrate test_tc_redirect to test_progs framework as prep work
for upcoming skb_change_head() fix & selftest, from Jussi Maki.
9) Fix a libbpf segfault in add_dummy_ksym_var() if BTF is not
present, from Ian Rogers.
10) Fix tx_only micro-benchmark in xdpsock BPF sample with proper frame
size, from Magnus Karlsson.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/Kconfig | 88 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 12 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 27 | ||||
-rw-r--r-- | kernel/bpf/ringbuf.c | 24 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 3 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 36 | ||||
-rw-r--r-- | kernel/sysctl.c | 29 |
7 files changed, 174 insertions, 45 deletions
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig new file mode 100644 index 000000000000..26b591e23f16 --- /dev/null +++ b/kernel/bpf/Kconfig @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# BPF interpreter that, for example, classic socket filters depend on. +config BPF + bool + +# Used by archs to tell that they support BPF JIT compiler plus which +# flavour. Only one of the two can be selected for a specific arch since +# eBPF JIT supersedes the cBPF JIT. + +# Classic BPF JIT (cBPF) +config HAVE_CBPF_JIT + bool + +# Extended BPF JIT (eBPF) +config HAVE_EBPF_JIT + bool + +# Used by archs to tell that they want the BPF JIT compiler enabled by +# default for kernels that were compiled with BPF JIT support. +config ARCH_WANT_DEFAULT_BPF_JIT + bool + +menu "BPF subsystem" + +config BPF_SYSCALL + bool "Enable bpf() system call" + select BPF + select IRQ_WORK + select TASKS_TRACE_RCU + select BINARY_PRINTF + select NET_SOCK_MSG if INET + default n + help + Enable the bpf() system call that allows to manipulate BPF programs + and maps via file descriptors. + +config BPF_JIT + bool "Enable BPF Just In Time compiler" + depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT + depends on MODULES + help + BPF programs are normally handled by a BPF interpreter. This option + allows the kernel to generate native code when a program is loaded + into the kernel. This will significantly speed-up processing of BPF + programs. + + Note, an admin should enable this feature changing: + /proc/sys/net/core/bpf_jit_enable + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) + +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid speculative + execution of BPF instructions by the interpreter. + +config BPF_JIT_DEFAULT_ON + def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON + depends on HAVE_EBPF_JIT && BPF_JIT + +config BPF_UNPRIV_DEFAULT_OFF + bool "Disable unprivileged BPF by default" + depends on BPF_SYSCALL + help + Disables unprivileged BPF by default by setting the corresponding + /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can + still reenable it by setting it to 0 later on, or permanently + disable it by setting it to 1 (from which no other transition to + 0 is possible anymore). + +source "kernel/bpf/preload/Kconfig" + +config BPF_LSM + bool "Enable BPF LSM Instrumentation" + depends on BPF_EVENTS + depends on BPF_SYSCALL + depends on SECURITY + depends on BPF_JIT + help + Enables instrumentation of the security hooks with BPF programs for + implementing dynamic MAC and Audit Policies. + + If you are unsure how to answer this question, answer N. + +endmenu # "BPF subsystem" diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0600ed325fa0..f982a9f0dbc4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5206,6 +5206,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, m->ret_size = ret; for (i = 0; i < nargs; i++) { + if (i == nargs - 1 && args[i].type == 0) { + bpf_log(log, + "The function %s with variable args is unsupported.\n", + tname); + return -EINVAL; + } ret = __get_type_size(btf, args[i].type, &t); if (ret < 0) { bpf_log(log, @@ -5213,6 +5219,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); return -EINVAL; } + if (ret == 0) { + bpf_log(log, + "The function %s has malformed void argument.\n", + tname); + return -EINVAL; + } m->arg_size[i] = ret; } m->nr_args = nargs; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 544773970dbc..ef658a9ea5c9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -696,34 +696,35 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, */ #define MAX_PRINTF_BUF_LEN 512 -struct bpf_printf_buf { - char tmp_buf[MAX_PRINTF_BUF_LEN]; +/* Support executing three nested bprintf helper calls on a given CPU */ +struct bpf_bprintf_buffers { + char tmp_bufs[3][MAX_PRINTF_BUF_LEN]; }; -static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf); -static DEFINE_PER_CPU(int, bpf_printf_buf_used); +static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); +static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); static int try_get_fmt_tmp_buf(char **tmp_buf) { - struct bpf_printf_buf *bufs; - int used; + struct bpf_bprintf_buffers *bufs; + int nest_level; preempt_disable(); - used = this_cpu_inc_return(bpf_printf_buf_used); - if (WARN_ON_ONCE(used > 1)) { - this_cpu_dec(bpf_printf_buf_used); + nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) { + this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_printf_buf); - *tmp_buf = bufs->tmp_buf; + bufs = this_cpu_ptr(&bpf_bprintf_bufs); + *tmp_buf = bufs->tmp_bufs[nest_level - 1]; return 0; } void bpf_bprintf_cleanup(void) { - if (this_cpu_read(bpf_printf_buf_used)) { - this_cpu_dec(bpf_printf_buf_used); + if (this_cpu_read(bpf_bprintf_nest_level)) { + this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); } } diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index f25b719ac786..84b3b35fc0d0 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -221,25 +221,20 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } -static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb) -{ - size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT; - - /* consumer page + producer page + 2 x data pages */ - return RINGBUF_POS_PAGES + 2 * data_pages; -} - static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) { struct bpf_ringbuf_map *rb_map; - size_t mmap_sz; rb_map = container_of(map, struct bpf_ringbuf_map, map); - mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT; - - if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz) - return -EINVAL; + if (vma->vm_flags & VM_WRITE) { + /* allow writable mapping for the consumer_pos only */ + if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EPERM; + } else { + vma->vm_flags &= ~VM_MAYWRITE; + } + /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF); } @@ -315,6 +310,9 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) return NULL; len = round_up(size + BPF_RINGBUF_HDR_SZ, 8); + if (len > rb->mask + 1) + return NULL; + cons_pos = smp_load_acquire(&rb->consumer_pos); if (in_nmi()) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 941ca06d9dfa..ea04b0deb5ce 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock); static DEFINE_IDR(link_idr); static DEFINE_SPINLOCK(link_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = + IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 757476c91c98..c58598ef4b5b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7084,11 +7084,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umax_val = src_reg->u32_max_value; - /* Assuming scalar64_min_max_and will be called so its safe - * to skip updating register for known 32-bit case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our minimum from the var_off, since that's inherently * bitwise. Our maximum is the minimum of the operands' maxima. @@ -7108,7 +7107,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; } - } static void scalar_min_max_and(struct bpf_reg_state *dst_reg, @@ -7155,11 +7153,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umin_val = src_reg->u32_min_value; - /* Assuming scalar64_min_max_or will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our maximum from the var_off, and our minimum is the * maximum of the operands' minima @@ -7224,11 +7221,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct tnum var32_off = tnum_subreg(dst_reg->var_off); s32 smin_val = src_reg->s32_min_value; - /* Assuming scalar64_min_max_xor will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get both minimum and maximum from the var32_off. */ dst_reg->u32_min_value = var32_off.value; @@ -13200,6 +13196,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return 0; } +BTF_SET_START(btf_id_deny) +BTF_ID_UNUSED +#ifdef CONFIG_SMP +BTF_ID(func, migrate_disable) +BTF_ID(func, migrate_enable) +#endif +#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU +BTF_ID(func, rcu_read_unlock_strict) +#endif +BTF_SET_END(btf_id_deny) + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -13259,6 +13266,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ret = bpf_lsm_verify_prog(&env->log, prog); if (ret < 0) return ret; + } else if (prog->type == BPF_PROG_TYPE_TRACING && + btf_id_set_contains(&btf_id_deny, btf_id)) { + return -EINVAL; } key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 14edf84cc571..d4a78e08f6d8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -225,7 +225,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write, mutex_unlock(&bpf_stats_enabled_mutex); return ret; } -#endif + +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + return ret; +} +#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ /* * /proc/sys support @@ -2600,10 +2620,9 @@ static struct ctl_table kern_table[] = { .data = &sysctl_unprivileged_bpf_disabled, .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, + .proc_handler = bpf_unpriv_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, }, { .procname = "bpf_stats_enabled", |