diff options
author | David S. Miller <davem@davemloft.net> | 2020-09-10 13:08:47 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-09-10 13:08:47 -0700 |
commit | 6198f4469089846c6017d2a6bc80ea7a37b469c5 (patch) | |
tree | cbf94ed6cf85789793620d8bd136c61de728d175 | |
parent | 8b40f21b699df25dcc8b98d0de7774a1e45fca4f (diff) | |
parent | 5251ef82993252da0f63616676905da38b40778a (diff) |
Merge branch 'netpoll-make-sure-napi_list-is-safe-for-RCU-traversal'
Jakub Kicinski says:
====================
netpoll: make sure napi_list is safe for RCU traversal
This series is a follow-up to the fix in commit 96e97bc07e90 ("net:
disable netpoll on fresh napis"). To avoid any latent race conditions
convert dev->napi_list to a proper RCU list. We need minor restructuring
because it looks like netif_napi_del() used to be idempotent, and
it may be quite hard to track down everyone who depends on that.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_main.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 5 | ||||
-rw-r--r-- | drivers/net/veth.c | 3 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 7 | ||||
-rw-r--r-- | include/linux/netdevice.h | 36 | ||||
-rw-r--r-- | net/core/dev.c | 32 | ||||
-rw-r--r-- | net/core/netpoll.c | 2 |
10 files changed, 55 insertions, 59 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 7e4c93be4451..d8b1824c334d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -825,9 +825,9 @@ static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp) int i; for_each_rx_queue_cnic(bp, i) { - napi_hash_del(&bnx2x_fp(bp, i, napi)); - netif_napi_del(&bnx2x_fp(bp, i, napi)); + __netif_napi_del(&bnx2x_fp(bp, i, napi)); } + synchronize_net(); } static inline void bnx2x_del_all_napi(struct bnx2x *bp) @@ -835,9 +835,9 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp) int i; for_each_eth_queue(bp, i) { - napi_hash_del(&bnx2x_fp(bp, i, napi)); - netif_napi_del(&bnx2x_fp(bp, i, napi)); + __netif_napi_del(&bnx2x_fp(bp, i, napi)); } + synchronize_net(); } int bnx2x_set_int_mode(struct bnx2x *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b167066af450..53f64ca673c3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8634,10 +8634,9 @@ static void bnxt_del_napi(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; - napi_hash_del(&bnapi->napi); - netif_napi_del(&bnapi->napi); + __netif_napi_del(&bnapi->napi); } - /* We called napi_hash_del() before netif_napi_del(), we need + /* We called __netif_napi_del(), we need * to respect an RCU grace period before freeing napi structures. */ synchronize_net(); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index a4e307636f5a..4f0329d8778f 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2529,13 +2529,15 @@ static void enic_dev_deinit(struct enic *enic) { unsigned int i; - for (i = 0; i < enic->rq_count; i++) { - napi_hash_del(&enic->napi[i]); - netif_napi_del(&enic->napi[i]); - } + for (i = 0; i < enic->rq_count; i++) + __netif_napi_del(&enic->napi[i]); + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) - netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); + __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); + + /* observe RCU grace period after __netif_napi_del() calls */ + synchronize_net(); enic_free_vnic_resources(enic); enic_clear_intr_mode(enic); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 2e35c5706cf1..df389a11d3af 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1029,10 +1029,10 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL); adapter->q_vector[v_idx] = NULL; - napi_hash_del(&q_vector->napi); - netif_napi_del(&q_vector->napi); + __netif_napi_del(&q_vector->napi); /* + * after a call to __netif_napi_del() napi may still be used and * ixgbe_get_stats64() might access the rings on this vector, * we must wait a grace period before freeing it. */ diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 4a5beafa0493..1634ca6d4a8f 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3543,11 +3543,10 @@ static void myri10ge_free_slices(struct myri10ge_priv *mgp) ss->fw_stats, ss->fw_stats_bus); ss->fw_stats = NULL; } - napi_hash_del(&ss->napi); - netif_napi_del(&ss->napi); + __netif_napi_del(&ss->napi); } /* Wait till napi structs are no longer used, and then free ss. */ - synchronize_rcu(); + synchronize_net(); kfree(mgp->ss); mgp->ss = NULL; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 7de8f0ea3f6b..091e5b4ba042 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -897,14 +897,13 @@ static void veth_napi_del(struct net_device *dev) struct veth_rq *rq = &priv->rq[i]; napi_disable(&rq->xdp_napi); - napi_hash_del(&rq->xdp_napi); + __netif_napi_del(&rq->xdp_napi); } synchronize_net(); for (i = 0; i < dev->real_num_rx_queues; i++) { struct veth_rq *rq = &priv->rq[i]; - netif_napi_del(&rq->xdp_napi); rq->rx_notify_masked = false; ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 263b005981bd..7145c83c6c8c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2604,12 +2604,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) int i; for (i = 0; i < vi->max_queue_pairs; i++) { - napi_hash_del(&vi->rq[i].napi); - netif_napi_del(&vi->rq[i].napi); - netif_napi_del(&vi->sq[i].napi); + __netif_napi_del(&vi->rq[i].napi); + __netif_napi_del(&vi->sq[i].napi); } - /* We called napi_hash_del() before netif_napi_del(), + /* We called __netif_napi_del(), * we need to respect an RCU grace period before freeing vi->rq */ synchronize_net(); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7f9fcfd15942..157e0242e9ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -70,6 +70,7 @@ struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -354,7 +355,7 @@ enum { NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_LISTED, /* NAPI added to system lists */ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ }; @@ -364,7 +365,7 @@ enum { NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), - NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), }; @@ -489,20 +490,6 @@ static inline bool napi_complete(struct napi_struct *n) } /** - * napi_hash_del - remove a NAPI from global table - * @napi: NAPI context - * - * Warning: caller must observe RCU grace period - * before freeing memory containing @napi, if - * this function returns true. - * Note: core networking stack automatically calls it - * from netif_napi_del(). - * Drivers might want to call this helper to combine all - * the needed RCU grace periods into a single one. - */ -bool napi_hash_del(struct napi_struct *napi); - -/** * napi_disable - prevent NAPI from scheduling * @n: NAPI context * @@ -2368,12 +2355,26 @@ static inline void netif_tx_napi_add(struct net_device *dev, } /** + * __netif_napi_del - remove a NAPI context + * @napi: NAPI context + * + * Warning: caller must observe RCU grace period before freeing memory + * containing @napi. Drivers might want to call this helper to combine + * all the needed RCU grace periods into a single one. + */ +void __netif_napi_del(struct napi_struct *napi); + +/** * netif_napi_del - remove a NAPI context * @napi: NAPI context * * netif_napi_del() removes a NAPI context from the network device NAPI list */ -void netif_napi_del(struct napi_struct *napi); +static inline void netif_napi_del(struct napi_struct *napi) +{ + __netif_napi_del(napi); + synchronize_net(); +} struct napi_gro_cb { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ @@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 152ad3b578de..03624192862a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6533,8 +6533,7 @@ EXPORT_SYMBOL(napi_busy_loop); static void napi_hash_add(struct napi_struct *napi) { - if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || - test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) return; spin_lock(&napi_hash_lock); @@ -6555,20 +6554,14 @@ static void napi_hash_add(struct napi_struct *napi) /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi */ -bool napi_hash_del(struct napi_struct *napi) +static void napi_hash_del(struct napi_struct *napi) { - bool rcu_sync_needed = false; - spin_lock(&napi_hash_lock); - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { - rcu_sync_needed = true; - hlist_del_rcu(&napi->napi_hash_node); - } + hlist_del_init_rcu(&napi->napi_hash_node); + spin_unlock(&napi_hash_lock); - return rcu_sync_needed; } -EXPORT_SYMBOL_GPL(napi_hash_del); static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) { @@ -6600,7 +6593,11 @@ static void init_gro_hash(struct napi_struct *napi) void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { + if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) + return; + INIT_LIST_HEAD(&napi->poll_list); + INIT_HLIST_NODE(&napi->napi_hash_node); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; init_gro_hash(napi); @@ -6653,18 +6650,19 @@ static void flush_gro_hash(struct napi_struct *napi) } /* Must be called in process context */ -void netif_napi_del(struct napi_struct *napi) +void __netif_napi_del(struct napi_struct *napi) { - might_sleep(); - if (napi_hash_del(napi)) - synchronize_net(); - list_del_init(&napi->dev_list); + if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) + return; + + napi_hash_del(napi); + list_del_rcu(&napi->dev_list); napi_free_frags(napi); flush_gro_hash(napi); napi->gro_bitmask = 0; } -EXPORT_SYMBOL(netif_napi_del); +EXPORT_SYMBOL(__netif_napi_del); static int napi_poll(struct napi_struct *n, struct list_head *repoll) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2338753e936b..c310c7c1cef7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -297,7 +297,7 @@ static int netpoll_owner_active(struct net_device *dev) { struct napi_struct *napi; - list_for_each_entry(napi, &dev->napi_list, dev_list) { + list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { if (napi->poll_owner == smp_processor_id()) return 1; } |