From 1ad2f5838d345e1c102bd1cd27c4f4c1349b0dc8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:05 +0200 Subject: bpf: fix incorrect pruning decision when alignment must be tracked Currently, when we enforce alignment tracking on direct packet access, the verifier lets the following program pass despite doing a packet write with unaligned access: 0: (61) r2 = *(u32 *)(r1 +76) 1: (61) r3 = *(u32 *)(r1 +80) 2: (61) r7 = *(u32 *)(r1 +8) 3: (bf) r0 = r2 4: (07) r0 += 14 5: (25) if r7 > 0x1 goto pc+4 R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 6: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 7: (63) *(u32 *)(r0 -4) = r0 8: (b7) r0 = 0 9: (95) exit from 6 to 8: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 8: (b7) r0 = 0 9: (95) exit from 5 to 10: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=2 R10=fp 10: (07) r0 += 1 11: (05) goto pc-6 6: safe <----- here, wrongly found safe processed 15 insns However, if we enforce a pruning mismatch by adding state into r8 which is then being mismatched in states_equal(), we find that for the otherwise same program, the verifier detects a misaligned packet access when actually walking that path: 0: (61) r2 = *(u32 *)(r1 +76) 1: (61) r3 = *(u32 *)(r1 +80) 2: (61) r7 = *(u32 *)(r1 +8) 3: (b7) r8 = 1 4: (bf) r0 = r2 5: (07) r0 += 14 6: (25) if r7 > 0x1 goto pc+4 R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 7: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 8: (63) *(u32 *)(r0 -4) = r0 9: (b7) r0 = 0 10: (95) exit from 7 to 9: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 9: (b7) r0 = 0 10: (95) exit from 6 to 11: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=2 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 11: (07) r0 += 1 12: (b7) r8 = 0 13: (05) goto pc-7 <----- mismatch due to r8 7: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=15,r=15) R1=ctx R2=pkt(id=0,off=0,r=15) R3=pkt_end R7=inv,min_value=2 R8=imm0,min_value=0,max_value=0,min_align=2147483648 R10=fp 8: (63) *(u32 *)(r0 -4) = r0 misaligned packet access off 2+15+-4 size 4 The reason why we fail to see it in states_equal() is that the third test in compare_ptrs_to_packet() ... if (old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; ... will let the above pass. The situation we run into is that old->off <= cur->off (14 <= 15), meaning that prior walked paths went with smaller offset, which was later used in the packet access after successful packet range check and found to be safe already. For example: Given is R0=pkt(id=0,off=0,r=0). Adding offset 14 as in above program to it, results in R0=pkt(id=0,off=14,r=0) before the packet range test. Now, testing this against R3=pkt_end with 'if r0 > r3 goto out' will transform R0 into R0=pkt(id=0,off=14,r=14) for the case when we're within bounds. A write into the packet at offset *(u32 *)(r0 -4), that is, 2 + 14 -4, is valid and aligned (2 is for NET_IP_ALIGN). After processing this with all fall-through paths, we later on check paths from branches. When the above skb->mark test is true, then we jump near the end of the program, perform r0 += 1, and jump back to the 'if r0 > r3 goto out' test we've visited earlier already. This time, R0 is of type R0=pkt(id=0,off=15,r=0), and we'll prune that part because this time we'll have a larger safe packet range, and we already found that with off=14 all further insn were already safe, so it's safe as well with a larger off. However, the problem is that the subsequent write into the packet with 2 + 15 -4 is then unaligned, and not caught by the alignment tracking. Note that min_align, aux_off, and aux_off_align were all 0 in this example. Since we cannot tell at this time what kind of packet access was performed in the prior walk and what minimal requirements it has (we might do so in the future, but that requires more complexity), fix it to disable this pruning case for strict alignment for now, and let the verifier do check such paths instead. With that applied, the test cases pass and reject the program due to misalignment. Fixes: d1174416747d ("bpf: Track alignment of register values in the verifier.") Reference: http://patchwork.ozlabs.org/patch/761909/ Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c72cd41f5b8b..e37e06b1229d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -843,9 +843,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, { bool strict = env->strict_alignment; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - strict = true; - switch (reg->type) { case PTR_TO_PACKET: return check_pkt_ptr_alignment(reg, off, size, strict); @@ -2696,7 +2693,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct bpf_reg_state *old, +static bool compare_ptrs_to_packet(struct bpf_verifier_env *env, + struct bpf_reg_state *old, struct bpf_reg_state *cur) { if (old->id != cur->id) @@ -2739,7 +2737,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old, * 'if (R4 > data_end)' and all further insn were already good with r=20, * so they will be good with r=30 and we can prune the search. */ - if (old->off <= cur->off && + if (!env->strict_alignment && old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; @@ -2810,7 +2808,7 @@ static bool states_equal(struct bpf_verifier_env *env, continue; if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && - compare_ptrs_to_packet(rold, rcur)) + compare_ptrs_to_packet(env, rold, rcur)) continue; return false; @@ -3588,10 +3586,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) } else { log_level = 0; } - if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT) + + env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; - else - env->strict_alignment = false; ret = replace_map_fd_with_map_ptr(env); if (ret < 0) @@ -3697,7 +3695,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, mutex_lock(&bpf_verifier_lock); log_level = 0; + env->strict_alignment = false; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + env->strict_alignment = true; env->explored_states = kcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), -- cgit v1.2.3 From a9789ef9afcb4fb0193f8dd94f2665ba3ad71e79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:06 +0200 Subject: bpf: properly reset caller saved regs after helper call and ld_abs/ind Currently, after performing helper calls, we clear all caller saved registers, that is r0 - r5 and fill r0 depending on struct bpf_func_proto specification. The way we reset these regs can affect pruning decisions in later paths, since we only reset register's imm to 0 and type to NOT_INIT. However, we leave out clearing of other variables such as id, min_value, max_value, etc, which can later on lead to pruning mismatches due to stale data. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e37e06b1229d..339c8a1371de 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; +static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +{ + BUG_ON(regno >= MAX_BPF_REG); + + memset(®s[regno], 0, sizeof(regs[regno])); + regs[regno].type = NOT_INIT; + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + static void init_reg_state(struct bpf_reg_state *regs) { int i; - for (i = 0; i < MAX_BPF_REG; i++) { - regs[i].type = NOT_INIT; - regs[i].imm = 0; - regs[i].min_value = BPF_REGISTER_MIN_RANGE; - regs[i].max_value = BPF_REGISTER_MAX_RANGE; - regs[i].min_align = 0; - regs[i].aux_off = 0; - regs[i].aux_off_align = 0; - } + for (i = 0; i < MAX_BPF_REG; i++) + mark_reg_not_init(regs, i); /* frame pointer */ regs[BPF_REG_FP].type = FRAME_PTR; @@ -1346,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs = state->regs; - struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1413,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) } /* reset caller saved regs */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* update return register */ if (fn->ret_type == RET_INTEGER) { @@ -2445,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -2478,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* reset caller saved regs to unreadable */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* mark destination R0 register as readable, since it contains * the value fetched from the packet -- cgit v1.2.3 From a316338cb71a3260201490e615f2f6d5c0d8fb2c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:08 +0200 Subject: bpf: fix wrong exposure of map_flags into fdinfo for lpm trie_alloc() always needs to have BPF_F_NO_PREALLOC passed in via attr->map_flags, since it does not support preallocation yet. We check the flag, but we never copy the flag into trie->map.map_flags, which is later on exposed into fdinfo and used by loaders such as iproute2. Latter uses this in bpf_map_selfcheck_pinned() to test whether a pinned map has the same spec as the one from the BPF obj file and if not, bails out, which is currently the case for lpm since it exposes always 0 as flags. Also copy over flags in array_map_alloc() and stack_map_alloc(). They always have to be 0 right now, but we should make sure to not miss to copy them over at a later point in time when we add actual flags for them to use. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Jarno Rajahalme Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 1 + kernel/bpf/lpm_trie.c | 1 + kernel/bpf/stackmap.c | 1 + 3 files changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 5e00b2333c26..172dc8ee0e3b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; + array->map.map_flags = attr->map_flags; array->elem_size = elem_size; if (!percpu) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 39cfafd895b8..b09185f0f17d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) trie->map.key_size = attr->key_size; trie->map.value_size = attr->value_size; trie->map.max_entries = attr->max_entries; + trie->map.map_flags = attr->map_flags; trie->data_size = attr->key_size - offsetof(struct bpf_lpm_trie_key, data); trie->max_prefixlen = trie->data_size * 8; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 4dfd6f2ec2f9..31147d730abf 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) smap->map.key_size = attr->key_size; smap->map.value_size = value_size; smap->map.max_entries = attr->max_entries; + smap->map.map_flags = attr->map_flags; smap->n_buckets = n_buckets; smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; -- cgit v1.2.3