From 6200d5c3831370cd0ab4b6455933d12e82ea9956 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2020 09:53:09 +0100 Subject: MAINTAINERS: Update XDP and AF_XDP entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Getting too many false positive matches with current use of the content regex K: and file regex N: patterns. This patch drops file match N: and makes K: more restricted. Some more normal F: file wildcards are added. Notice that AF_XDP forgot to some F: files that is also updated in this patch. Suggested-by: Jakub Kicinski Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/160586238944.2808432.4401269290440394008.stgit@firesoul --- MAINTAINERS | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c7f6924d34c7..15355c055a1a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19109,12 +19109,17 @@ L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported F: include/net/xdp.h +F: include/net/xdp_priv.h F: include/trace/events/xdp.h F: kernel/bpf/cpumap.c F: kernel/bpf/devmap.c F: net/core/xdp.c -N: xdp -K: xdp +F: samples/bpf/xdp* +F: tools/testing/selftests/bpf/*xdp* +F: tools/testing/selftests/bpf/*/*xdp* +F: drivers/net/ethernet/*/*/*/*/*xdp* +F: drivers/net/ethernet/*/*/*xdp* +K: (?:\b|_)xdp(?:\b|_) XDP SOCKETS (AF_XDP) M: Björn Töpel @@ -19123,9 +19128,12 @@ R: Jonathan Lemon L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained +F: Documentation/networking/af_xdp.rst F: include/net/xdp_sock* F: include/net/xsk_buff_pool.h F: include/uapi/linux/if_xdp.h +F: include/uapi/linux/xdp_diag.h +F: include/net/netns/xdp.h F: net/xdp/ F: samples/bpf/xdpsock* F: tools/lib/bpf/xsk* -- cgit v1.2.3 From 537cf4e3cc2f6cc9088dcd6162de573f603adc29 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 20 Nov 2020 12:53:39 +0100 Subject: xsk: Fix umem cleanup bug at socket destruct Fix a bug that is triggered when a partially setup socket is destroyed. For a fully setup socket, a socket that has been bound to a device, the cleanup of the umem is performed at the end of the buffer pool's cleanup work queue item. This has to be performed in a work queue, and not in RCU cleanup, as it is doing a vunmap that cannot execute in interrupt context. However, when a socket has only been partially set up so that a umem has been created but the buffer pool has not, the code erroneously directly calls the umem cleanup function instead of using a work queue, and this leads to a BUG_ON() in vunmap(). As there in this case is no buffer pool, we cannot use its work queue, so we need to introduce a work queue for the umem and schedule this for the cleanup. So in the case there is no pool, we are going to use the umem's own work queue to schedule the cleanup. But if there is a pool, the cleanup of the umem is still being performed by the pool's work queue, as it is important that the umem is cleaned up after the pool. Fixes: e5e1a4bc916d ("xsk: Fix possible memory leak at socket close") Reported-by: Marek Majtyka Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Marek Majtyka Link: https://lore.kernel.org/bpf/1605873219-21629-1-git-send-email-magnus.karlsson@gmail.com --- include/net/xdp_sock.h | 1 + net/xdp/xdp_umem.c | 19 ++++++++++++++++--- net/xdp/xdp_umem.h | 2 +- net/xdp/xsk.c | 2 +- net/xdp/xsk_buff_pool.c | 2 +- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1a9559c0cbdd..4f4e93bf814c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -31,6 +31,7 @@ struct xdp_umem { struct page **pgs; int id; struct list_head xsk_dma_list; + struct work_struct work; }; struct xsk_map { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 56d052bc65cb..56a28a686988 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -66,18 +66,31 @@ static void xdp_umem_release(struct xdp_umem *umem) kfree(umem); } +static void xdp_umem_release_deferred(struct work_struct *work) +{ + struct xdp_umem *umem = container_of(work, struct xdp_umem, work); + + xdp_umem_release(umem); +} + void xdp_get_umem(struct xdp_umem *umem) { refcount_inc(&umem->users); } -void xdp_put_umem(struct xdp_umem *umem) +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) { if (!umem) return; - if (refcount_dec_and_test(&umem->users)) - xdp_umem_release(umem); + if (refcount_dec_and_test(&umem->users)) { + if (defer_cleanup) { + INIT_WORK(&umem->work, xdp_umem_release_deferred); + schedule_work(&umem->work); + } else { + xdp_umem_release(umem); + } + } } static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 181fdda2f2a8..aa9fe2780410 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -9,7 +9,7 @@ #include void xdp_get_umem(struct xdp_umem *umem); -void xdp_put_umem(struct xdp_umem *umem); +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); #endif /* XDP_UMEM_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cfbec3989a76..5a6cdf7b320d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1147,7 +1147,7 @@ static void xsk_destruct(struct sock *sk) return; if (!xp_put_pool(xs->pool)) - xdp_put_umem(xs->umem); + xdp_put_umem(xs->umem, !xs->pool); sk_refcnt_debug_dec(sk); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8a3bf4e1318e..3c5a1423d922 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -242,7 +242,7 @@ static void xp_release_deferred(struct work_struct *work) pool->cq = NULL; } - xdp_put_umem(pool->umem); + xdp_put_umem(pool->umem, false); xp_destroy(pool); } -- cgit v1.2.3 From 178648916e73e00de83150eb0c90c0d3a977a46a Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Fri, 20 Nov 2020 16:14:43 +0100 Subject: xsk: Fix incorrect netdev reference count Fix incorrect netdev reference count in xsk_bind operation. Incorrect reference count of the device appears when a user calls bind with the XDP_ZEROCOPY flag on an interface which does not support zero-copy. In such a case, an error is returned but the reference count is not decreased. This change fixes the fault, by decreasing the reference count in case of such an error. The problem being corrected appeared in '162c820ed896' for the first time, and the code was moved to new file location over the time with commit 'c2d3d6a47462'. This specific patch applies to all version starting from 'c2d3d6a47462'. The same solution should be applied but on different file (net/xdp/xdp_umem.c) and function (xdp_umem_assign_dev) for versions from '162c820ed896' to 'c2d3d6a47462' excluded. Fixes: 162c820ed896 ("xdp: hold device for umem regardless of zero-copy mode") Signed-off-by: Marek Majtyka Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201120151443.105903-1-marekx.majtyka@intel.com --- net/xdp/xsk_buff_pool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 3c5a1423d922..9287eddec52c 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -185,8 +185,10 @@ err_unreg_xsk: err_unreg_pool: if (!force_zc) err = 0; /* fallback to copy mode */ - if (err) + if (err) { xsk_clear_pool_at_qid(netdev, queue_id); + dev_put(netdev); + } return err; } -- cgit v1.2.3 From 36ccdf85829a7dd6936dba5d02fa50138471f0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 23 Nov 2020 18:56:00 +0100 Subject: net, xsk: Avoid taking multiple skbuff references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") addressed the problem that packets were discarded from the Tx AF_XDP ring, when the driver returned NETDEV_TX_BUSY. Part of the fix was bumping the skbuff reference count, so that the buffer would not be freed by dev_direct_xmit(). A reference count larger than one means that the skbuff is "shared", which is not the case. If the "shared" skbuff is sent to the generic XDP receive path, netif_receive_generic_xdp(), and pskb_expand_head() is entered the BUG_ON(skb_shared(skb)) will trigger. This patch adds a variant to dev_direct_xmit(), __dev_direct_xmit(), where a user can select the skbuff free policy. This allows AF_XDP to avoid bumping the reference count, but still keep the NETDEV_TX_BUSY behavior. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Yonghong Song Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201123175600.146255-1-bjorn.topel@gmail.com --- include/linux/netdevice.h | 14 +++++++++++++- net/core/dev.c | 8 ++------ net/xdp/xsk.c | 8 +------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..76775abf259d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2813,9 +2813,21 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); + int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); + +static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +{ + int ret; + + ret = __dev_direct_xmit(skb, queue_id); + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + return ret; +} + int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); diff --git a/net/core/dev.c b/net/core/dev.c index 82dc6b48e45f..8588ade790cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4180,7 +4180,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) } EXPORT_SYMBOL(dev_queue_xmit_accel); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; @@ -4210,17 +4210,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) dev_xmit_recursion_dec(); local_bh_enable(); - - if (!dev_xmit_complete(ret)) - kfree_skb(skb); - return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; } -EXPORT_SYMBOL(dev_direct_xmit); +EXPORT_SYMBOL(__dev_direct_xmit); /************************************************************************* * Receiver routines diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5a6cdf7b320d..b7b039bd9d03 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -411,11 +411,7 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; - /* Hinder dev_direct_xmit from freeing the packet and - * therefore completing it in the destructor - */ - refcount_inc(&skb->users); - err = dev_direct_xmit(skb, xs->queue_id); + err = __dev_direct_xmit(skb, xs->queue_id); if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; @@ -429,12 +425,10 @@ static int xsk_generic_xmit(struct sock *sk) /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ - kfree_skb(skb); err = -EBUSY; goto out; } - consume_skb(skb); sent_frame = true; } -- cgit v1.2.3 From 68878a5c5b852d17f5827ce8a0f6fbd8b4cdfada Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 24 Nov 2020 18:41:00 +0800 Subject: bpftool: Fix error return value in build_btf_type_table An appropriate return value should be set on the failed path. Fixes: 4d374ba0bf30 ("tools: bpftool: implement "bpftool btf show|list"") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201124104100.491-1-thunder.leizhen@huawei.com --- tools/bpf/bpftool/btf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 8ab142ff5eac..2afb7d5b1aca 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -693,6 +693,7 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, obj_node = calloc(1, sizeof(*obj_node)); if (!obj_node) { p_err("failed to allocate memory: %s", strerror(errno)); + err = -ENOMEM; goto err_free; } -- cgit v1.2.3 From 9a44bc9449cfe7e39dbadf537ff669fb007a9e63 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 25 Nov 2020 20:24:04 +0000 Subject: bpf: Add MAINTAINERS entry for BPF LSM Similar to XDP and some JITs, also add Brendan and Florent who have been reviewing all my patches internally as reviewers. The patches are expected as usual to go via the BPF tree(s) / list / merge workflows. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201125202404.1419509-1-kpsingh@chromium.org --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 15355c055a1a..3004f0abfe7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3356,6 +3356,17 @@ S: Supported F: arch/x86/net/ X: arch/x86/net/bpf_jit_comp32.c +BPF LSM (Security Audit and Enforcement using BPF) +M: KP Singh +R: Florent Revest +R: Brendan Jackman +L: bpf@vger.kernel.org +S: Maintained +F: Documentation/bpf/bpf_lsm.rst +F: include/linux/bpf_lsm.h +F: kernel/bpf/bpf_lsm.c +F: security/bpf/ + BROADCOM B44 10/100 ETHERNET DRIVER M: Michael Chan L: netdev@vger.kernel.org -- cgit v1.2.3