diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-04-24 22:26:59 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-04-24 22:27:00 +0200 |
commit | 0e25d14e5f0a3d4c32a1961e0cf58ae23b637681 (patch) | |
tree | 9790a515413c05b19aa0eb988ef002a2cdb1dc05 | |
parent | fbcf93ebcaef7d09881ee308b52cd84f5e43c622 (diff) | |
parent | 29a36f9eef30b7f008b722a753a84b314f981037 (diff) |
Merge branch 'bpf-xfrm-states'
Eyal Birger says:
====================
This patchset adds support for fetching XFRM state information from
an eBPF program called from TC.
The first patch introduces a helper for fetching an XFRM state from the
skb's secpath. The XFRM state is modeled using a new virtual struct which
contains the SPI, peer address, and reqid values of the state; This struct
can be extended in the future to provide additional state information.
The second patch adds a test example in test_tunnel_bpf.sh. The sample
validates the correct extraction of state information by the eBPF program.
v3:
- Kept SPI and peer IPv4 address in state in network byte order
following suggestion from Alexei Starovoitov
v2:
- Fixed two comments by Daniel Borkmann:
- disallow reserved flags in helper call
- avoid compiling in helper code when CONFIG_XFRM is off
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | include/uapi/linux/bpf.h | 25 | ||||
-rw-r--r-- | net/core/filter.c | 48 | ||||
-rw-r--r-- | samples/bpf/tcbpf2_kern.c | 16 | ||||
-rwxr-xr-x | samples/bpf/test_tunnel_bpf.sh | 71 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 25 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/bpf_helpers.h | 4 |
6 files changed, 186 insertions, 3 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8383a289f7b..e6679393b687 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -774,6 +774,15 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: A negative integer to be added to xdp_md.data_end * Return: 0 on success or negative on error + * + * int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) + * retrieve XFRM state + * @skb: pointer to skb + * @index: index of the xfrm state in the secpath + * @key: pointer to 'struct bpf_xfrm_state' + * @size: size of 'struct bpf_xfrm_state' + * @flags: room for future extensions + * Return: 0 on success or negative error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -841,7 +850,8 @@ union bpf_attr { FN(msg_cork_bytes), \ FN(msg_pull_data), \ FN(bind), \ - FN(xdp_adjust_tail), + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -947,6 +957,19 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + /* Generic BPF return codes which all BPF program types may support. * The values are binary compatible with their TC_ACT_* counter-part to * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT diff --git a/net/core/filter.c b/net/core/filter.c index e25bc4a3aa1a..8e45c6c7ab08 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -57,6 +57,7 @@ #include <net/sock_reuseport.h> #include <net/busy_poll.h> #include <net/tcp.h> +#include <net/xfrm.h> #include <linux/bpf_trace.h> /** @@ -3743,6 +3744,49 @@ static const struct bpf_func_proto bpf_bind_proto = { .arg3_type = ARG_CONST_SIZE, }; +#ifdef CONFIG_XFRM +BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index, + struct bpf_xfrm_state *, to, u32, size, u64, flags) +{ + const struct sec_path *sp = skb_sec_path(skb); + const struct xfrm_state *x; + + if (!sp || unlikely(index >= sp->len || flags)) + goto err_clear; + + x = sp->xvec[index]; + + if (unlikely(size != sizeof(struct bpf_xfrm_state))) + goto err_clear; + + to->reqid = x->props.reqid; + to->spi = x->id.spi; + to->family = x->props.family; + if (to->family == AF_INET6) { + memcpy(to->remote_ipv6, x->props.saddr.a6, + sizeof(to->remote_ipv6)); + } else { + to->remote_ipv4 = x->props.saddr.a4; + } + + return 0; +err_clear: + memset(to, 0, size); + return -EINVAL; +} + +static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { + .func = bpf_skb_get_xfrm_state, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, +}; +#endif + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3884,6 +3928,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; +#ifdef CONFIG_XFRM + case BPF_FUNC_skb_get_xfrm_state: + return &bpf_skb_get_xfrm_state_proto; +#endif default: return bpf_base_func_proto(func_id); } diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 9a8db7bd6db4..fa260c750fb1 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -593,4 +593,20 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("xfrm_get_state") +int _xfrm_get_state(struct __sk_buff *skb) +{ + struct bpf_xfrm_state x; + char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; + int ret; + + ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); + if (ret < 0) + return TC_ACT_OK; + + bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); + return TC_ACT_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index c265863ccdf9..9c534dc07b36 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -155,6 +155,57 @@ function add_ipip_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function setup_xfrm_tunnel { + auth=0x$(printf '1%.0s' {1..40}) + enc=0x$(printf '2%.0s' {1..32}) + spi_in_to_out=0x1 + spi_out_to_in=0x2 + # in namespace + # in -> out + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # out -> in + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip netns exec at_ns0 \ + ip addr add dev veth0 10.1.1.100/32 + ip netns exec at_ns0 \ + ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ + src 10.1.1.100 + + # out of namespace + # in -> out + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # out -> in + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip addr add dev veth1 10.1.1.200/32 + ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 +} + function attach_bpf { DEV=$1 SET_TUNNEL=$2 @@ -278,6 +329,22 @@ function test_ipip { cleanup } +function test_xfrm_tunnel { + config_device + tcpdump -nei veth1 ip & + output=$(mktemp) + cat /sys/kernel/debug/tracing/trace_pipe | tee $output & + setup_xfrm_tunnel + tc qdisc add dev veth1 clsact + tc filter add dev veth1 proto ip ingress bpf da obj tcbpf2_kern.o \ + sec xfrm_get_state + ip netns exec at_ns0 ping -c 1 10.1.1.200 + grep "reqid 1" $output + grep "spi 0x1" $output + grep "remote ip 0xac100164" $output + cleanup +} + function cleanup { set +ex pkill iperf @@ -291,6 +358,8 @@ function cleanup { ip link del geneve11 ip link del erspan11 ip link del ip6erspan11 + ip x s flush + ip x p flush pkill tcpdump pkill cat set -ex @@ -316,4 +385,6 @@ echo "Testing GENEVE tunnel..." test_geneve echo "Testing IPIP tunnel..." test_ipip +echo "Testing IPSec tunnel..." +test_xfrm_tunnel echo "*** PASS ***" diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7f7fbb9d0253..5841ed41b30c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -774,6 +774,15 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: A negative integer to be added to xdp_md.data_end * Return: 0 on success or negative on error + * + * int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) + * retrieve XFRM state + * @skb: pointer to skb + * @index: index of the xfrm state in the secpath + * @key: pointer to 'struct bpf_xfrm_state' + * @size: size of 'struct bpf_xfrm_state' + * @flags: room for future extensions + * Return: 0 on success or negative error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -841,7 +850,8 @@ union bpf_attr { FN(msg_cork_bytes), \ FN(msg_pull_data), \ FN(bind), \ - FN(xdp_adjust_tail), + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -946,6 +956,19 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + /* Generic BPF return codes which all BPF program types may support. * The values are binary compatible with their TC_ACT_* counter-part to * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 9271576bdc8f..69d7b918e66a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -98,7 +98,9 @@ static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_tail; - +static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, + int size, int flags) = + (void *) BPF_FUNC_skb_get_xfrm_state; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions |