diff options
author | David Ahern <dsa@cumulusnetworks.com> | 2016-12-01 08:48:04 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-12-02 13:46:08 -0500 |
commit | 61023658760032e97869b07d54be9681d2529e77 (patch) | |
tree | 9b10a9d2a8b5820450298f9bda8f3c23fbf66b57 /net | |
parent | b2cd12574aa3e1625f471ff57cde7f628a18a46b (diff) |
bpf: Add new cgroup attach type to enable sock modifications
Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to
BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run
any time a process in the cgroup opens an AF_INET or AF_INET6 socket.
Currently only sk_bound_dev_if is exported to userspace for modification
by a bpf program.
This allows a cgroup to be configured such that AF_INET{6} sockets opened
by processes are automatically bound to a specific device. In turn, this
enables the running of programs that do not support SO_BINDTODEVICE in a
specific VRF context / L3 domain.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/filter.c | 62 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 12 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 8 |
3 files changed, 81 insertions, 1 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index 1c4d0faf22c8..0ab252e462aa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static bool sock_filter_is_valid_access(int off, int size, + enum bpf_access_type type, + enum bpf_reg_type *reg_type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct bpf_sock, bound_dev_if): + break; + default: + return false; + } + } + + if (off < 0 || off + size > sizeof(struct bpf_sock)) + return false; + + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + + if (size != sizeof(__u32)) + return false; + + return true; +} + static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog) { @@ -3076,6 +3102,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, return insn - insn_buf; } +static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, + int dst_reg, int src_reg, + int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct bpf_sock, bound_dev_if): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, sk_bound_dev_if)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, sk_bound_dev_if)); + break; + } + + return insn - insn_buf; +} + static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, @@ -3162,6 +3212,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = { .gen_prologue = tc_cls_act_prologue, }; +static const struct bpf_verifier_ops cg_sock_ops = { + .get_func_proto = sk_filter_func_proto, + .is_valid_access = sock_filter_is_valid_access, + .convert_ctx_access = sock_filter_convert_ctx_access, +}; + static struct bpf_prog_type_list sk_filter_type __read_mostly = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, @@ -3202,6 +3258,11 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = { .type = BPF_PROG_TYPE_LWT_XMIT, }; +static struct bpf_prog_type_list cg_sock_type __read_mostly = { + .ops = &cg_sock_ops, + .type = BPF_PROG_TYPE_CGROUP_SOCK +}; + static int __init register_sk_filter_ops(void) { bpf_register_prog_type(&sk_filter_type); @@ -3209,6 +3270,7 @@ static int __init register_sk_filter_ops(void) bpf_register_prog_type(&sched_act_type); bpf_register_prog_type(&xdp_type); bpf_register_prog_type(&cg_skb_type); + bpf_register_prog_type(&cg_sock_type); bpf_register_prog_type(&lwt_in_type); bpf_register_prog_type(&lwt_out_type); bpf_register_prog_type(&lwt_xmit_type); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5ddf5cda07f4..24d2550492ee 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -374,8 +374,18 @@ lookup_protocol: if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) + if (err) { + sk_common_release(sk); + goto out; + } + } + + if (!kern) { + err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); + if (err) { sk_common_release(sk); + goto out; + } } out: return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d424f3a3737a..237e654ba717 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -258,6 +258,14 @@ lookup_protocol: goto out; } } + + if (!kern) { + err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); + if (err) { + sk_common_release(sk); + goto out; + } + } out: return err; out_rcu_unlock: |