diff options
author | Jakub Kicinski <kuba@kernel.org> | 2020-10-12 16:16:50 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2020-10-12 16:16:50 -0700 |
commit | ccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae (patch) | |
tree | 3028901b29bf4ab04cd50d61da4fecdb20b182b6 /include | |
parent | a308283fdbf712b30061d2b4567530eb9e8dc1b4 (diff) | |
parent | 376dcfe3a4e5a5475a84e6b5f926066a8614f887 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2020-10-12
The main changes are:
1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong.
2) libbpf relocation support for different size load/store, from Andrii.
3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel.
4) BPF support for per-cpu variables, form Hao.
5) sockmap improvements, from John.
====================
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bpf.h | 8 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 7 | ||||
-rw-r--r-- | include/linux/btf.h | 26 | ||||
-rw-r--r-- | include/linux/netdevice.h | 4 | ||||
-rw-r--r-- | include/linux/skmsg.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 33 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 101 |
7 files changed, 133 insertions, 48 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50e5c4b52bd1..2b16bf48aab6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -82,7 +82,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); - u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); + int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); @@ -293,6 +293,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ __BPF_ARG_TYPE_MAX, }; @@ -307,6 +308,8 @@ enum bpf_return_type { RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -405,6 +408,7 @@ enum bpf_reg_type { PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ }; /* The information passed from prog-specific *_is_valid_access @@ -1828,6 +1832,8 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; +extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 363b4f1c562a..e83ef6f6bf43 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -308,6 +308,13 @@ struct bpf_insn_aux_data { u32 map_index; /* index into used_maps[] */ u32 map_off; /* offset from value base address */ }; + struct { + enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ + union { + u32 btf_id; /* btf_id for struct typed var */ + u32 mem_size; /* mem_size for non-struct typed var */ + }; + } btf_var; }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 024e16ff7dcc..2bf641829664 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, i < btf_type_vlen(struct_type); \ i++, member++) +#define for_each_vsi(i, datasec_type, member) \ + for (i = 0, member = btf_type_var_secinfo(datasec_type); \ + i < btf_type_vlen(datasec_type); \ + i++, member++) + static inline bool btf_type_is_ptr(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; @@ -145,6 +150,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; } +static inline bool btf_type_is_var(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; +} + +/* union is only a special case of struct: + * all its offsetof(member) == 0 + */ +static inline bool btf_type_is_struct(const struct btf_type *t) +{ + u8 kind = BTF_INFO_KIND(t->info); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + static inline u16 btf_type_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); @@ -179,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) return (const struct btf_member *)(t + 1); } +static inline const struct btf_var_secinfo *btf_type_var_secinfo( + const struct btf_type *t) +{ + return (const struct btf_var_secinfo *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a0df43b13839..948d7105e91a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1276,6 +1276,9 @@ struct netdev_net_notifier { * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. + * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); + * If a device is paired with a peer device, return the peer instance. + * The caller must be under RCU read context. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1483,6 +1486,7 @@ struct net_device_ops { struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); + struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); }; /** diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 3119928fc103..fec0c5ac1c4f 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node); int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); +void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); +void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg); diff --git a/include/net/tcp.h b/include/net/tcp.h index 3601dea931a6..d4ef5bf94168 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2228,34 +2228,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, #endif /* CONFIG_NET_SOCK_MSG */ #ifdef CONFIG_CGROUP_BPF -/* Copy the listen sk's HDR_OPT_CB flags to its child. - * - * During 3-Way-HandShake, the synack is usually sent from - * the listen sk with the HDR_OPT_CB flags set so that - * bpf-prog will be called to write the BPF hdr option. - * - * In fastopen, the child sk is used to send synack instead - * of the listen sk. Thus, inheriting the HDR_OPT_CB flags - * from the listen sk gives the bpf-prog a chance to write - * BPF hdr option in the synack pkt during fastopen. - * - * Both fastopen and non-fastopen child will inherit the - * HDR_OPT_CB flags to keep the bpf-prog having a consistent - * behavior when deciding to clear this cb flags (or not) - * during the PASSIVE_ESTABLISHED_CB. - * - * In the future, other cb flags could be inherited here also. - */ -static inline void bpf_skops_init_child(const struct sock *sk, - struct sock *child) -{ - tcp_sk(child)->bpf_sock_ops_cb_flags = - tcp_sk(sk)->bpf_sock_ops_cb_flags & - (BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG | - BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | - BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); -} - static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, struct sk_buff *skb, unsigned int end_offset) @@ -2264,11 +2236,6 @@ static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, skops->skb_data_end = skb->data + end_offset; } #else -static inline void bpf_skops_init_child(const struct sock *sk, - struct sock *child) -{ -} - static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, struct sk_buff *skb, unsigned int end_offset) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4f556cfcbfbe..bf5a99d803e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -356,18 +356,36 @@ enum bpf_link_type { #define BPF_F_SLEEPABLE (1U << 4) /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -417,6 +435,9 @@ enum { /* Share perf_event among processes */ BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ @@ -1680,7 +1701,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return @@ -2235,7 +2256,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The @@ -3661,10 +3682,59 @@ union bpf_attr { * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it - * fills in e.g. MAC addresses based on the L3 information from - * the packet. This helper is supported for IPv4 and IPv6 protocols. + * populates L2 addresses as well, meaning, internally, the helper + * performs a FIB lookup based on the skb's networking header to + * get the address of the next hop and then relies on the neighbor + * lookup for the L2 address of the nexthop. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types. + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. @@ -3823,6 +3893,9 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ + FN(bpf_per_cpu_ptr), \ + FN(bpf_this_cpu_ptr), \ + FN(redirect_peer), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper |