diff options
author | David S. Miller <davem@davemloft.net> | 2021-02-16 13:14:06 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-02-16 13:14:06 -0800 |
commit | b8af417e4d93caeefb89bbfbd56ec95dedd8dab5 (patch) | |
tree | 1c8d22e1aec330238830a43cc8aee0cf768ae1c7 /arch/x86 | |
parent | 9ec5eea5b6acfae7279203097eeec5d02d01d9b7 (diff) | |
parent | 45159b27637b0fef6d5ddb86fc7c46b13c77960f (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2021-02-16
The following pull-request contains BPF updates for your *net-next* tree.
There's a small merge conflict between 7eeba1706eba ("tcp: Add receive timestamp
support for receive zerocopy.") from net-next tree and 9cacf81f8161 ("bpf: Remove
extra lock_sock for TCP_ZEROCOPY_RECEIVE") from bpf-next tree. Resolve as follows:
[...]
lock_sock(sk);
err = tcp_zerocopy_receive(sk, &zc, &tss);
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
&zc, &len, err);
release_sock(sk);
[...]
We've added 116 non-merge commits during the last 27 day(s) which contain
a total of 156 files changed, 5662 insertions(+), 1489 deletions(-).
The main changes are:
1) Adds support of pointers to types with known size among global function
args to overcome the limit on max # of allowed args, from Dmitrii Banshchikov.
2) Add bpf_iter for task_vma which can be used to generate information similar
to /proc/pid/maps, from Song Liu.
3) Enable bpf_{g,s}etsockopt() from all sock_addr related program hooks. Allow
rewriting bind user ports from BPF side below the ip_unprivileged_port_start
range, both from Stanislav Fomichev.
4) Prevent recursion on fentry/fexit & sleepable programs and allow map-in-map
as well as per-cpu maps for the latter, from Alexei Starovoitov.
5) Add selftest script to run BPF CI locally. Also enable BPF ringbuffer
for sleepable programs, both from KP Singh.
6) Extend verifier to enable variable offset read/write access to the BPF
program stack, from Andrei Matei.
7) Improve tc & XDP MTU handling and add a new bpf_check_mtu() helper to
query device MTU from programs, from Jesper Dangaard Brouer.
8) Allow bpf_get_socket_cookie() helper also be called from [sleepable] BPF
tracing programs, from Florent Revest.
9) Extend x86 JIT to pad JMPs with NOPs for helping image to converge when
otherwise too many passes are required, from Gary Lin.
10) Verifier fixes on atomics with BPF_FETCH as well as function-by-function
verification both related to zero-extension handling, from Ilya Leoshkevich.
11) Better kernel build integration of resolve_btfids tool, from Jiri Olsa.
12) Batch of AF_XDP selftest cleanups and small performance improvement
for libbpf's xsk map redirect for newer kernels, from Björn Töpel.
13) Follow-up BPF doc and verifier improvements around atomics with
BPF_FETCH, from Brendan Jackman.
14) Permit zero-sized data sections e.g. if ELF .rodata section contains
read-only data from local variables, from Yonghong Song.
15) veth driver skb bulk-allocation for ndo_xdp_xmit, from Lorenzo Bianconi.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 205 |
1 files changed, 158 insertions, 47 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1d4d50199293..79e7a0ec1da5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -869,8 +869,31 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt, } } +static int emit_nops(u8 **pprog, int len) +{ + u8 *prog = *pprog; + int i, noplen, cnt = 0; + + while (len > 0) { + noplen = len; + + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + + for (i = 0; i < noplen; i++) + EMIT1(ideal_nops[noplen][i]); + len -= noplen; + } + + *pprog = prog; + + return cnt; +} + +#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, - int oldproglen, struct jit_context *ctx) + int oldproglen, struct jit_context *ctx, bool jmp_padding) { bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; struct bpf_insn *insn = bpf_prog->insnsi; @@ -880,7 +903,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; int i, cnt = 0, excnt = 0; - int proglen = 0; + int ilen, proglen = 0; u8 *prog = temp; int err; @@ -894,17 +917,24 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_prog->aux->func_idx != 0); push_callee_regs(&prog, callee_regs_used); - addrs[0] = prog - temp; + + ilen = prog - temp; + if (image) + memcpy(image + proglen, temp, ilen); + proglen += ilen; + addrs[0] = proglen; + prog = temp; for (i = 1; i <= insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; u8 b2 = 0, b3 = 0; + u8 *start_of_ldx; s64 jmp_offset; u8 jmp_cond; - int ilen; u8 *func; + int nops; switch (insn->code) { /* ALU */ @@ -1249,12 +1279,30 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + /* test src_reg, src_reg */ + maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */ + EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg)); + /* jne start_of_ldx */ + EMIT2(X86_JNE, 0); + /* xor dst_reg, dst_reg */ + emit_mov_imm32(&prog, false, dst_reg, 0); + /* jmp byte_after_ldx */ + EMIT2(0xEB, 0); + + /* populate jmp_offset for JNE above */ + temp[4] = prog - temp - 5 /* sizeof(test + jne) */; + start_of_ldx = prog; + } emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { struct exception_table_entry *ex; u8 *_insn = image + proglen; s64 delta; + /* populate jmp_offset for JMP above */ + start_of_ldx[-1] = prog - start_of_ldx; + if (!bpf_prog->aux->extable) break; @@ -1502,6 +1550,30 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ } jmp_offset = addrs[i + insn->off] - addrs[i]; if (is_imm8(jmp_offset)) { + if (jmp_padding) { + /* To keep the jmp_offset valid, the extra bytes are + * padded before the jump insn, so we substract the + * 2 bytes of jmp_cond insn from INSN_SZ_DIFF. + * + * If the previous pass already emits an imm8 + * jmp_cond, then this BPF insn won't shrink, so + * "nops" is 0. + * + * On the other hand, if the previous pass emits an + * imm32 jmp_cond, the extra 4 bytes(*) is padded to + * keep the image from shrinking further. + * + * (*) imm32 jmp_cond is 6 bytes, and imm8 jmp_cond + * is 2 bytes, so the size difference is 4 bytes. + */ + nops = INSN_SZ_DIFF - 2; + if (nops != 0 && nops != 4) { + pr_err("unexpected jmp_cond padding: %d bytes\n", + nops); + return -EFAULT; + } + cnt += emit_nops(&prog, nops); + } EMIT2(jmp_cond, jmp_offset); } else if (is_simm32(jmp_offset)) { EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); @@ -1524,11 +1596,55 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ else jmp_offset = addrs[i + insn->off] - addrs[i]; - if (!jmp_offset) - /* Optimize out nop jumps */ + if (!jmp_offset) { + /* + * If jmp_padding is enabled, the extra nops will + * be inserted. Otherwise, optimize out nop jumps. + */ + if (jmp_padding) { + /* There are 3 possible conditions. + * (1) This BPF_JA is already optimized out in + * the previous run, so there is no need + * to pad any extra byte (0 byte). + * (2) The previous pass emits an imm8 jmp, + * so we pad 2 bytes to match the previous + * insn size. + * (3) Similarly, the previous pass emits an + * imm32 jmp, and 5 bytes is padded. + */ + nops = INSN_SZ_DIFF; + if (nops != 0 && nops != 2 && nops != 5) { + pr_err("unexpected nop jump padding: %d bytes\n", + nops); + return -EFAULT; + } + cnt += emit_nops(&prog, nops); + } break; + } emit_jmp: if (is_imm8(jmp_offset)) { + if (jmp_padding) { + /* To avoid breaking jmp_offset, the extra bytes + * are padded before the actual jmp insn, so + * 2 bytes is substracted from INSN_SZ_DIFF. + * + * If the previous pass already emits an imm8 + * jmp, there is nothing to pad (0 byte). + * + * If it emits an imm32 jmp (5 bytes) previously + * and now an imm8 jmp (2 bytes), then we pad + * (5 - 2 = 3) bytes to stop the image from + * shrinking further. + */ + nops = INSN_SZ_DIFF - 2; + if (nops != 0 && nops != 3) { + pr_err("unexpected jump padding: %d bytes\n", + nops); + return -EFAULT; + } + cnt += emit_nops(&prog, INSN_SZ_DIFF - 2); + } EMIT2(0xEB, jmp_offset); } else if (is_simm32(jmp_offset)) { EMIT1_off32(0xE9, jmp_offset); @@ -1624,17 +1740,25 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, struct bpf_prog *p, int stack_size, bool mod_ret) { u8 *prog = *pprog; + u8 *jmp_insn; int cnt = 0; - if (p->aux->sleepable) { - if (emit_call(&prog, __bpf_prog_enter_sleepable, prog)) - return -EINVAL; - } else { - if (emit_call(&prog, __bpf_prog_enter, prog)) + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); + if (emit_call(&prog, + p->aux->sleepable ? __bpf_prog_enter_sleepable : + __bpf_prog_enter, prog)) return -EINVAL; - /* remember prog start time returned by __bpf_prog_enter */ - emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); - } + /* remember prog start time returned by __bpf_prog_enter */ + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + + /* if (__bpf_prog_enter*(prog) == 0) + * goto skip_exec_of_prog; + */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ + /* emit 2 nops that will be replaced with JE insn */ + jmp_insn = prog; + emit_nops(&prog, 2); /* arg1: lea rdi, [rbp - stack_size] */ EMIT4(0x48, 0x8D, 0x7D, -stack_size); @@ -1654,43 +1778,23 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (mod_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - if (p->aux->sleepable) { - if (emit_call(&prog, __bpf_prog_exit_sleepable, prog)) - return -EINVAL; - } else { - /* arg1: mov rdi, progs[i] */ - emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, - (u32) (long) p); - /* arg2: mov rsi, rbx <- start time in nsec */ - emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); - if (emit_call(&prog, __bpf_prog_exit, prog)) + /* replace 2 nops with JE insn, since jmp target is known */ + jmp_insn[0] = X86_JE; + jmp_insn[1] = prog - jmp_insn - 2; + + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); + /* arg2: mov rsi, rbx <- start time in nsec */ + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + if (emit_call(&prog, + p->aux->sleepable ? __bpf_prog_exit_sleepable : + __bpf_prog_exit, prog)) return -EINVAL; - } *pprog = prog; return 0; } -static void emit_nops(u8 **pprog, unsigned int len) -{ - unsigned int i, noplen; - u8 *prog = *pprog; - int cnt = 0; - - while (len > 0) { - noplen = len; - - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - - for (i = 0; i < noplen; i++) - EMIT1(ideal_nops[noplen][i]); - len -= noplen; - } - - *pprog = prog; -} - static void emit_align(u8 **pprog, u32 align) { u8 *target, *prog = *pprog; @@ -2065,6 +2169,9 @@ struct x64_jit_data { struct jit_context ctx; }; +#define MAX_PASSES 20 +#define PADDING_PASSES (MAX_PASSES - 5) + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; @@ -2074,6 +2181,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct jit_context ctx = {}; bool tmp_blinded = false; bool extra_pass = false; + bool padding = false; u8 *image = NULL; int *addrs; int pass; @@ -2110,6 +2218,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image = jit_data->image; header = jit_data->header; extra_pass = true; + padding = true; goto skip_init_addrs; } addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); @@ -2135,8 +2244,10 @@ skip_init_addrs: * may converge on the last pass. In such case do one more * pass to emit the final image. */ - for (pass = 0; pass < 20 || image; pass++) { - proglen = do_jit(prog, addrs, image, oldproglen, &ctx); + for (pass = 0; pass < MAX_PASSES || image; pass++) { + if (!padding && pass >= PADDING_PASSES) + padding = true; + proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding); if (proglen <= 0) { out_image: image = NULL; |