summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-10-12 16:16:50 -0700
committerJakub Kicinski <kuba@kernel.org>2020-10-12 16:16:50 -0700
commitccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae (patch)
tree3028901b29bf4ab04cd50d61da4fecdb20b182b6 /include
parenta308283fdbf712b30061d2b4567530eb9e8dc1b4 (diff)
parent376dcfe3a4e5a5475a84e6b5f926066a8614f887 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-10-12 The main changes are: 1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong. 2) libbpf relocation support for different size load/store, from Andrii. 3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel. 4) BPF support for per-cpu variables, form Hao. 5) sockmap improvements, from John. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h8
-rw-r--r--include/linux/bpf_verifier.h7
-rw-r--r--include/linux/btf.h26
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/skmsg.h2
-rw-r--r--include/net/tcp.h33
-rw-r--r--include/uapi/linux/bpf.h101
7 files changed, 133 insertions, 48 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50e5c4b52bd1..2b16bf48aab6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -82,7 +82,7 @@ struct bpf_map_ops {
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
void (*map_fd_put_ptr)(void *ptr);
- u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+ int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
@@ -293,6 +293,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
+ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
__BPF_ARG_TYPE_MAX,
};
@@ -307,6 +308,8 @@ enum bpf_return_type {
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
+ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
+ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -405,6 +408,7 @@ enum bpf_reg_type {
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
+ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
};
/* The information passed from prog-specific *_is_valid_access
@@ -1828,6 +1832,8 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
+extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 363b4f1c562a..e83ef6f6bf43 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
u32 map_index; /* index into used_maps[] */
u32 map_off; /* offset from value base address */
};
+ struct {
+ enum bpf_reg_type reg_type; /* type of pseudo_btf_id */
+ union {
+ u32 btf_id; /* btf_id for struct typed var */
+ u32 mem_size; /* mem_size for non-struct typed var */
+ };
+ } btf_var;
};
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 024e16ff7dcc..2bf641829664 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
i < btf_type_vlen(struct_type); \
i++, member++)
+#define for_each_vsi(i, datasec_type, member) \
+ for (i = 0, member = btf_type_var_secinfo(datasec_type); \
+ i < btf_type_vlen(datasec_type); \
+ i++, member++)
+
static inline bool btf_type_is_ptr(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -145,6 +150,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
}
+static inline bool btf_type_is_var(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+/* union is only a special case of struct:
+ * all its offsetof(member) == 0
+ */
+static inline bool btf_type_is_struct(const struct btf_type *t)
+{
+ u8 kind = BTF_INFO_KIND(t->info);
+
+ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
static inline u16 btf_type_vlen(const struct btf_type *t)
{
return BTF_INFO_VLEN(t->info);
@@ -179,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
return (const struct btf_member *)(t + 1);
}
+static inline const struct btf_var_secinfo *btf_type_var_secinfo(
+ const struct btf_type *t)
+{
+ return (const struct btf_var_secinfo *)(t + 1);
+}
+
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a0df43b13839..948d7105e91a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1276,6 +1276,9 @@ struct netdev_net_notifier {
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
* int cmd);
* Add, change, delete or get information on an IPv4 tunnel.
+ * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
+ * If a device is paired with a peer device, return the peer instance.
+ * The caller must be under RCU read context.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1483,6 +1486,7 @@ struct net_device_ops {
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
int (*ndo_tunnel_ctl)(struct net_device *dev,
struct ip_tunnel_parm *p, int cmd);
+ struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
};
/**
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 3119928fc103..fec0c5ac1c4f 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
+void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3601dea931a6..d4ef5bf94168 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2228,34 +2228,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
#endif /* CONFIG_NET_SOCK_MSG */
#ifdef CONFIG_CGROUP_BPF
-/* Copy the listen sk's HDR_OPT_CB flags to its child.
- *
- * During 3-Way-HandShake, the synack is usually sent from
- * the listen sk with the HDR_OPT_CB flags set so that
- * bpf-prog will be called to write the BPF hdr option.
- *
- * In fastopen, the child sk is used to send synack instead
- * of the listen sk. Thus, inheriting the HDR_OPT_CB flags
- * from the listen sk gives the bpf-prog a chance to write
- * BPF hdr option in the synack pkt during fastopen.
- *
- * Both fastopen and non-fastopen child will inherit the
- * HDR_OPT_CB flags to keep the bpf-prog having a consistent
- * behavior when deciding to clear this cb flags (or not)
- * during the PASSIVE_ESTABLISHED_CB.
- *
- * In the future, other cb flags could be inherited here also.
- */
-static inline void bpf_skops_init_child(const struct sock *sk,
- struct sock *child)
-{
- tcp_sk(child)->bpf_sock_ops_cb_flags =
- tcp_sk(sk)->bpf_sock_ops_cb_flags &
- (BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
- BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
- BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
-}
-
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
struct sk_buff *skb,
unsigned int end_offset)
@@ -2264,11 +2236,6 @@ static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
skops->skb_data_end = skb->data + end_offset;
}
#else
-static inline void bpf_skops_init_child(const struct sock *sk,
- struct sock *child)
-{
-}
-
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
struct sk_buff *skb,
unsigned int end_offset)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4f556cfcbfbe..bf5a99d803e4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -356,18 +356,36 @@ enum bpf_link_type {
#define BPF_F_SLEEPABLE (1U << 4)
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
- * insn[0].imm: map fd map fd
- * insn[1].imm: 0 offset into value
- * insn[0].off: 0 0
- * insn[1].off: 0 0
- * ldimm64 rewrite: address of map address of map[0]+offset
- * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD
+ * insn[0].imm: map fd
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of map
+ * verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
+/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd
+ * insn[1].imm: offset into value
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of map[0]+offset
+ * verifier type: PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_VALUE 2
+/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
+ * insn[0].imm: kernel btd id of VAR
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the kernel variable
+ * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ * is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID 3
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -417,6 +435,9 @@ enum {
/* Share perf_event among processes */
BPF_F_PRESERVE_ELEMS = (1U << 11),
+
+/* Create a map that is suitable to be an inner map with dynamic max entries */
+ BPF_F_INNER_MAP = (1U << 12),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1680,7 +1701,7 @@ union bpf_attr {
* **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return
@@ -2235,7 +2256,7 @@ union bpf_attr {
* Description
* This helper is used in programs implementing policies at the
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
- * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * if the verdict eBPF program returns **SK_PASS**), redirect it
* to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
@@ -3661,10 +3682,59 @@ union bpf_attr {
* Redirect the packet to another net device of index *ifindex*
* and fill in L2 addresses from neighboring subsystem. This helper
* is somewhat similar to **bpf_redirect**\ (), except that it
- * fills in e.g. MAC addresses based on the L3 information from
- * the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * populates L2 addresses as well, meaning, internally, the helper
+ * performs a FIB lookup based on the skb's networking header to
+ * get the address of the next hop and then relies on the neighbor
+ * lookup for the L2 address of the nexthop.
+ *
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types, and enabled
+ * for IPv4 and IPv6 protocols.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
+ *
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
+ * Description
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ * pointer to the percpu kernel variable on *cpu*. A ksym is an
+ * extern variable decorated with '__ksym'. For ksym, there is a
+ * global var (either static or global) defined of the same name
+ * in the kernel. The ksym is percpu if the global var is percpu.
+ * The returned pointer points to the global percpu var on *cpu*.
+ *
+ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ * kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ * happens if *cpu* is larger than nr_cpu_ids. The caller of
+ * bpf_per_cpu_ptr() must check the returned value.
+ * Return
+ * A pointer pointing to the kernel percpu variable on *cpu*, or
+ * NULL, if *cpu* is invalid.
+ *
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
+ * Description
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ * pointer to the percpu kernel variable on this cpu. See the
+ * description of 'ksym' in **bpf_per_cpu_ptr**\ ().
+ *
+ * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+ * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
+ * never return NULL.
+ * Return
+ * A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_redirect**\ (), except
+ * that the redirection happens to the *ifindex*' peer device and
+ * the netns switch takes place from ingress to ingress without
+ * going through the CPU's backlog queue.
+ *
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types.
+ * currently only supported for tc BPF program types at the ingress
+ * hook and for veth device types. The peer device must reside in a
+ * different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -3823,6 +3893,9 @@ union bpf_attr {
FN(seq_printf_btf), \
FN(skb_cgroup_classid), \
FN(redirect_neigh), \
+ FN(bpf_per_cpu_ptr), \
+ FN(bpf_this_cpu_ptr), \
+ FN(redirect_peer), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper