summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-09-10 13:08:47 -0700
committerDavid S. Miller <davem@davemloft.net>2020-09-10 13:08:47 -0700
commit6198f4469089846c6017d2a6bc80ea7a37b469c5 (patch)
treecbf94ed6cf85789793620d8bd136c61de728d175
parent8b40f21b699df25dcc8b98d0de7774a1e45fca4f (diff)
parent5251ef82993252da0f63616676905da38b40778a (diff)
Merge branch 'netpoll-make-sure-napi_list-is-safe-for-RCU-traversal'
Jakub Kicinski says: ==================== netpoll: make sure napi_list is safe for RCU traversal This series is a follow-up to the fix in commit 96e97bc07e90 ("net: disable netpoll on fresh napis"). To avoid any latent race conditions convert dev->napi_list to a proper RCU list. We need minor restructuring because it looks like netif_napi_del() used to be idempotent, and it may be quite hard to track down everyone who depends on that. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h8
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c5
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c4
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c5
-rw-r--r--drivers/net/veth.c3
-rw-r--r--drivers/net/virtio_net.c7
-rw-r--r--include/linux/netdevice.h36
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/netpoll.c2
10 files changed, 55 insertions, 59 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 7e4c93be4451..d8b1824c334d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -825,9 +825,9 @@ static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp)
int i;
for_each_rx_queue_cnic(bp, i) {
- napi_hash_del(&bnx2x_fp(bp, i, napi));
- netif_napi_del(&bnx2x_fp(bp, i, napi));
+ __netif_napi_del(&bnx2x_fp(bp, i, napi));
}
+ synchronize_net();
}
static inline void bnx2x_del_all_napi(struct bnx2x *bp)
@@ -835,9 +835,9 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp)
int i;
for_each_eth_queue(bp, i) {
- napi_hash_del(&bnx2x_fp(bp, i, napi));
- netif_napi_del(&bnx2x_fp(bp, i, napi));
+ __netif_napi_del(&bnx2x_fp(bp, i, napi));
}
+ synchronize_net();
}
int bnx2x_set_int_mode(struct bnx2x *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b167066af450..53f64ca673c3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8634,10 +8634,9 @@ static void bnxt_del_napi(struct bnxt *bp)
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
- napi_hash_del(&bnapi->napi);
- netif_napi_del(&bnapi->napi);
+ __netif_napi_del(&bnapi->napi);
}
- /* We called napi_hash_del() before netif_napi_del(), we need
+ /* We called __netif_napi_del(), we need
* to respect an RCU grace period before freeing napi structures.
*/
synchronize_net();
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index a4e307636f5a..4f0329d8778f 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2529,13 +2529,15 @@ static void enic_dev_deinit(struct enic *enic)
{
unsigned int i;
- for (i = 0; i < enic->rq_count; i++) {
- napi_hash_del(&enic->napi[i]);
- netif_napi_del(&enic->napi[i]);
- }
+ for (i = 0; i < enic->rq_count; i++)
+ __netif_napi_del(&enic->napi[i]);
+
if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
for (i = 0; i < enic->wq_count; i++)
- netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
+ __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
+
+ /* observe RCU grace period after __netif_napi_del() calls */
+ synchronize_net();
enic_free_vnic_resources(enic);
enic_clear_intr_mode(enic);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 2e35c5706cf1..df389a11d3af 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1029,10 +1029,10 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL);
adapter->q_vector[v_idx] = NULL;
- napi_hash_del(&q_vector->napi);
- netif_napi_del(&q_vector->napi);
+ __netif_napi_del(&q_vector->napi);
/*
+ * after a call to __netif_napi_del() napi may still be used and
* ixgbe_get_stats64() might access the rings on this vector,
* we must wait a grace period before freeing it.
*/
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 4a5beafa0493..1634ca6d4a8f 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -3543,11 +3543,10 @@ static void myri10ge_free_slices(struct myri10ge_priv *mgp)
ss->fw_stats, ss->fw_stats_bus);
ss->fw_stats = NULL;
}
- napi_hash_del(&ss->napi);
- netif_napi_del(&ss->napi);
+ __netif_napi_del(&ss->napi);
}
/* Wait till napi structs are no longer used, and then free ss. */
- synchronize_rcu();
+ synchronize_net();
kfree(mgp->ss);
mgp->ss = NULL;
}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 7de8f0ea3f6b..091e5b4ba042 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -897,14 +897,13 @@ static void veth_napi_del(struct net_device *dev)
struct veth_rq *rq = &priv->rq[i];
napi_disable(&rq->xdp_napi);
- napi_hash_del(&rq->xdp_napi);
+ __netif_napi_del(&rq->xdp_napi);
}
synchronize_net();
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];
- netif_napi_del(&rq->xdp_napi);
rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..7145c83c6c8c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2604,12 +2604,11 @@ static void virtnet_free_queues(struct virtnet_info *vi)
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
- napi_hash_del(&vi->rq[i].napi);
- netif_napi_del(&vi->rq[i].napi);
- netif_napi_del(&vi->sq[i].napi);
+ __netif_napi_del(&vi->rq[i].napi);
+ __netif_napi_del(&vi->sq[i].napi);
}
- /* We called napi_hash_del() before netif_napi_del(),
+ /* We called __netif_napi_del(),
* we need to respect an RCU grace period before freeing vi->rq
*/
synchronize_net();
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f9fcfd15942..157e0242e9ee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -70,6 +70,7 @@ struct udp_tunnel_nic;
struct bpf_prog;
struct xdp_buff;
+void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
@@ -354,7 +355,7 @@ enum {
NAPI_STATE_MISSED, /* reschedule a napi */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
- NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
+ NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
};
@@ -364,7 +365,7 @@ enum {
NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
- NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
+ NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
};
@@ -489,20 +490,6 @@ static inline bool napi_complete(struct napi_struct *n)
}
/**
- * napi_hash_del - remove a NAPI from global table
- * @napi: NAPI context
- *
- * Warning: caller must observe RCU grace period
- * before freeing memory containing @napi, if
- * this function returns true.
- * Note: core networking stack automatically calls it
- * from netif_napi_del().
- * Drivers might want to call this helper to combine all
- * the needed RCU grace periods into a single one.
- */
-bool napi_hash_del(struct napi_struct *napi);
-
-/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
*
@@ -2368,12 +2355,26 @@ static inline void netif_tx_napi_add(struct net_device *dev,
}
/**
+ * __netif_napi_del - remove a NAPI context
+ * @napi: NAPI context
+ *
+ * Warning: caller must observe RCU grace period before freeing memory
+ * containing @napi. Drivers might want to call this helper to combine
+ * all the needed RCU grace periods into a single one.
+ */
+void __netif_napi_del(struct napi_struct *napi);
+
+/**
* netif_napi_del - remove a NAPI context
* @napi: NAPI context
*
* netif_napi_del() removes a NAPI context from the network device NAPI list
*/
-void netif_napi_del(struct napi_struct *napi);
+static inline void netif_napi_del(struct napi_struct *napi)
+{
+ __netif_napi_del(napi);
+ synchronize_net();
+}
struct napi_gro_cb {
/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
@@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
-void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 152ad3b578de..03624192862a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6533,8 +6533,7 @@ EXPORT_SYMBOL(napi_busy_loop);
static void napi_hash_add(struct napi_struct *napi)
{
- if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
- test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
+ if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
return;
spin_lock(&napi_hash_lock);
@@ -6555,20 +6554,14 @@ static void napi_hash_add(struct napi_struct *napi)
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
*/
-bool napi_hash_del(struct napi_struct *napi)
+static void napi_hash_del(struct napi_struct *napi)
{
- bool rcu_sync_needed = false;
-
spin_lock(&napi_hash_lock);
- if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
- rcu_sync_needed = true;
- hlist_del_rcu(&napi->napi_hash_node);
- }
+ hlist_del_init_rcu(&napi->napi_hash_node);
+
spin_unlock(&napi_hash_lock);
- return rcu_sync_needed;
}
-EXPORT_SYMBOL_GPL(napi_hash_del);
static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
{
@@ -6600,7 +6593,11 @@ static void init_gro_hash(struct napi_struct *napi)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
+ if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
+ return;
+
INIT_LIST_HEAD(&napi->poll_list);
+ INIT_HLIST_NODE(&napi->napi_hash_node);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
init_gro_hash(napi);
@@ -6653,18 +6650,19 @@ static void flush_gro_hash(struct napi_struct *napi)
}
/* Must be called in process context */
-void netif_napi_del(struct napi_struct *napi)
+void __netif_napi_del(struct napi_struct *napi)
{
- might_sleep();
- if (napi_hash_del(napi))
- synchronize_net();
- list_del_init(&napi->dev_list);
+ if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
+ return;
+
+ napi_hash_del(napi);
+ list_del_rcu(&napi->dev_list);
napi_free_frags(napi);
flush_gro_hash(napi);
napi->gro_bitmask = 0;
}
-EXPORT_SYMBOL(netif_napi_del);
+EXPORT_SYMBOL(__netif_napi_del);
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2338753e936b..c310c7c1cef7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -297,7 +297,7 @@ static int netpoll_owner_active(struct net_device *dev)
{
struct napi_struct *napi;
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner == smp_processor_id())
return 1;
}