From 69ba943151b2e40e201700cf5b3a94e433c6fd83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:06 -0800 Subject: mlx4: dma_dir is a mlx4_en_priv attribute No need to duplicate it for all queues and frags. num_frags & log_rx_info become u8 to save space. u8 accesses are a bit faster than u16 anyway. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 16 ++++++++-------- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 867292880c07..6183128b2d3d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, return -ENOMEM; } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - frag_info->dma_dir); + priv->dma_dir); if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; @@ -128,7 +128,7 @@ out: if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, page_alloc[i].page_size, - priv->frag_info[i].dma_dir); + priv->dma_dir); page = page_alloc[i].page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc[i].page_size / @@ -149,7 +149,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - frag_info->dma_dir); + priv->dma_dir); if (frags[i].page) put_page(frags[i].page); @@ -181,7 +181,7 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, page_alloc->page_size, - priv->frag_info[i].dma_dir); + priv->dma_dir); page = page_alloc->page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc->page_size / @@ -206,7 +206,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, frag_info->dma_dir); + page_alloc->page_size, priv->dma_dir); while (page_alloc->page_offset + frag_info->frag_stride < page_alloc->page_size) { put_page(page_alloc->page); @@ -570,7 +570,7 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; dma_unmap_page(priv->ddev, frame->dma, frame->page_size, - priv->frag_info[0].dma_dir); + priv->dma_dir); put_page(frame->page); } ring->page_cache.index = 0; @@ -1202,7 +1202,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) * expense of more costly truesize accounting */ priv->frag_info[0].frag_stride = PAGE_SIZE; - priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL; + priv->dma_dir = PCI_DMA_BIDIRECTIONAL; priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM; i = 1; } else { @@ -1217,11 +1217,11 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) priv->frag_info[i].frag_stride = ALIGN(priv->frag_info[i].frag_size, SMP_CACHE_BYTES); - priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE; priv->frag_info[i].rx_headroom = 0; buf_size += priv->frag_info[i].frag_size; i++; } + priv->dma_dir = PCI_DMA_FROMDEVICE; } priv->num_frags = i; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ed42199d3f1..98bc67a7249b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -360,7 +360,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { dma_unmap_page(priv->ddev, tx_info->map0_dma, - PAGE_SIZE, priv->frag_info[0].dma_dir); + PAGE_SIZE, priv->dma_dir); put_page(tx_info->page); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 3629ce11a68b..a4c7d94d52c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -474,7 +474,6 @@ struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; u32 frag_stride; - enum dma_data_direction dma_dir; u16 order; u16 rx_headroom; }; @@ -584,8 +583,9 @@ struct mlx4_en_priv { u32 rx_ring_num; u32 rx_skb_size; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; - u16 num_frags; - u16 log_rx_info; + u8 num_frags; + u8 log_rx_info; + u8 dma_dir; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; -- cgit v1.2.3 From 159ddfd2ca30a2361324111ef64fdedf43e7dc32 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:07 -0800 Subject: mlx4: remove order field from mlx4_en_frag_info This is really a port attribute, no need to duplicate it per RX queue and per frag. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6183128b2d3d..b78d6762e03f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -59,7 +59,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, struct page *page; dma_addr_t dma; - for (order = frag_info->order; ;) { + for (order = priv->rx_page_order; ;) { gfp_t gfp = _gfp; if (order) @@ -1195,7 +1195,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) * This only works when num_frags == 1. */ if (priv->tx_ring_num[TX_XDP]) { - priv->frag_info[0].order = 0; + priv->rx_page_order = 0; priv->frag_info[0].frag_size = eff_mtu; priv->frag_info[0].frag_prefix_size = 0; /* This will gain efficient xdp frame recycling at the @@ -1209,7 +1209,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) int buf_size = 0; while (buf_size < eff_mtu) { - priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER; priv->frag_info[i].frag_size = (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; @@ -1221,6 +1220,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) buf_size += priv->frag_info[i].frag_size; i++; } + priv->rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER; priv->dma_dir = PCI_DMA_FROMDEVICE; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a4c7d94d52c6..9d2c53ddb15c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -474,7 +474,6 @@ struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; u32 frag_stride; - u16 order; u16 rx_headroom; }; @@ -586,6 +585,7 @@ struct mlx4_en_priv { u8 num_frags; u8 log_rx_info; u8 dma_dir; + u8 rx_page_order; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; -- cgit v1.2.3 From aaca121dd6cfedc9afb9d0ee23ded9d84e217e20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:08 -0800 Subject: mlx4: get rid of frag_prefix_size Using per frag storage for frag_prefix_size is really silly. mlx4_en_complete_rx_desc() has all needed info already. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 27 ++++++++++++--------------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 +-- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b78d6762e03f..118ea83cff08 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -588,15 +588,14 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, int length) { struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; - struct mlx4_en_frag_info *frag_info; - int nr; + struct mlx4_en_frag_info *frag_info = priv->frag_info; + int nr, frag_size; dma_addr_t dma; /* Collect used fragments while replacing them in the HW descriptors */ - for (nr = 0; nr < priv->num_frags; nr++) { - frag_info = &priv->frag_info[nr]; - if (length <= frag_info->frag_prefix_size) - break; + for (nr = 0;;) { + frag_size = min_t(int, length, frag_info->frag_size); + if (unlikely(!frags[nr].page)) goto fail; @@ -606,15 +605,16 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, __skb_fill_page_desc(skb, nr, frags[nr].page, frags[nr].page_offset, - frag_info->frag_size); + frag_size); skb->truesize += frag_info->frag_stride; frags[nr].page = NULL; + nr++; + length -= frag_size; + if (!length) + break; + frag_info++; } - /* Adjust size of last fragment to match actual length */ - if (nr > 0) - skb_frag_size_set(&skb_frags_rx[nr - 1], - length - priv->frag_info[nr - 1].frag_prefix_size); return nr; fail: @@ -1197,7 +1197,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) if (priv->tx_ring_num[TX_XDP]) { priv->rx_page_order = 0; priv->frag_info[0].frag_size = eff_mtu; - priv->frag_info[0].frag_prefix_size = 0; /* This will gain efficient xdp frame recycling at the * expense of more costly truesize accounting */ @@ -1212,7 +1211,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) priv->frag_info[i].frag_size = (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; - priv->frag_info[i].frag_prefix_size = buf_size; priv->frag_info[i].frag_stride = ALIGN(priv->frag_info[i].frag_size, SMP_CACHE_BYTES); @@ -1232,10 +1230,9 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_err(priv, - " frag:%d - size:%d prefix:%d stride:%d\n", + " frag:%d - size:%d stride:%d\n", i, priv->frag_info[i].frag_size, - priv->frag_info[i].frag_prefix_size, priv->frag_info[i].frag_stride); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9d2c53ddb15c..849aa8af4dd7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -472,9 +472,8 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; - u16 frag_prefix_size; - u32 frag_stride; u16 rx_headroom; + u32 frag_stride; }; #ifdef CONFIG_MLX4_EN_DCB -- cgit v1.2.3 From d85f6c14e967b0bddfa9712daa17d79e297d18b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:09 -0800 Subject: mlx4: rx_headroom is a per port attribute No need to duplicate it per RX queue / frags. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 118ea83cff08..bb33032a280f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -115,7 +115,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, for (i = 0; i < priv->num_frags; i++) { frags[i] = ring_alloc[i]; - frags[i].page_offset += priv->frag_info[i].rx_headroom; + frags[i].page_offset += priv->rx_headroom; rx_desc->data[i].addr = cpu_to_be64(frags[i].dma + frags[i].page_offset); ring_alloc[i] = page_alloc[i]; @@ -1202,7 +1202,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) */ priv->frag_info[0].frag_stride = PAGE_SIZE; priv->dma_dir = PCI_DMA_BIDIRECTIONAL; - priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM; + priv->rx_headroom = XDP_PACKET_HEADROOM; i = 1; } else { int buf_size = 0; @@ -1214,12 +1214,12 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) priv->frag_info[i].frag_stride = ALIGN(priv->frag_info[i].frag_size, SMP_CACHE_BYTES); - priv->frag_info[i].rx_headroom = 0; buf_size += priv->frag_info[i].frag_size; i++; } priv->rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER; priv->dma_dir = PCI_DMA_FROMDEVICE; + priv->rx_headroom = 0; } priv->num_frags = i; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 849aa8af4dd7..fc7b4da5d8c3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -472,7 +472,6 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; - u16 rx_headroom; u32 frag_stride; }; @@ -585,6 +584,7 @@ struct mlx4_en_priv { u8 log_rx_info; u8 dma_dir; u8 rx_page_order; + u16 rx_headroom; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; -- cgit v1.2.3 From acd7628de05c73118aab7ae4847a5b5b2c6999c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:10 -0800 Subject: mlx4: reduce rx ring page_cache size We only need to store the page and dma address. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 17 ++++++++++------- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 -- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 +++++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index bb33032a280f..453313d404e3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -250,7 +250,10 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, (index << priv->log_rx_info); if (ring->page_cache.index > 0) { - frags[0] = ring->page_cache.buf[--ring->page_cache.index]; + ring->page_cache.index--; + frags[0].page = ring->page_cache.buf[ring->page_cache.index].page; + frags[0].dma = ring->page_cache.buf[ring->page_cache.index].dma; + frags[0].page_offset = XDP_PACKET_HEADROOM; rx_desc->data[0].addr = cpu_to_be64(frags[0].dma + frags[0].page_offset); return 0; @@ -537,7 +540,9 @@ bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, if (cache->index >= MLX4_EN_CACHE_SIZE) return false; - cache->buf[cache->index++] = *frame; + cache->buf[cache->index].page = frame->page; + cache->buf[cache->index].dma = frame->dma; + cache->index++; return true; } @@ -567,11 +572,9 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, int i; for (i = 0; i < ring->page_cache.index; i++) { - struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; - - dma_unmap_page(priv->ddev, frame->dma, frame->page_size, - priv->dma_dir); - put_page(frame->page); + dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, + PAGE_SIZE, priv->dma_dir); + put_page(ring->page_cache.buf[i].page); } ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 98bc67a7249b..e0c5ffb3e3a6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -354,8 +354,6 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc frame = { .page = tx_info->page, .dma = tx_info->map0_dma, - .page_offset = XDP_PACKET_HEADROOM, - .page_size = PAGE_SIZE, }; if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index fc7b4da5d8c3..c98940c6acd0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -268,9 +268,13 @@ struct mlx4_en_rx_alloc { }; #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) + struct mlx4_en_page_cache { u32 index; - struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; + struct { + struct page *page; + dma_addr_t dma; + } buf[MLX4_EN_CACHE_SIZE]; }; struct mlx4_en_priv; -- cgit v1.2.3 From 60c7f5ae5416a8491216bcccf6b3b3d842d69fa4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:11 -0800 Subject: mlx4: removal of frag_sizes[] We will soon use order-0 pages, and frag truesize will more precisely match real sizes. In the new model, we prefer to use <= 2048 bytes fragments, so that we can use page-recycle technique on PAGE_SIZE=4096 arches. We will still pack as much frames as possible on arches with big pages, like PowerPC. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 24 ++++++++++-------------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 8 -------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 453313d404e3..0c61c1200f2a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1181,13 +1181,6 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) return done; } -static const int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, - FRAG_SZ2, - FRAG_SZ3 -}; - void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1211,13 +1204,16 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) int buf_size = 0; while (buf_size < eff_mtu) { - priv->frag_info[i].frag_size = - (eff_mtu > buf_size + frag_sizes[i]) ? - frag_sizes[i] : eff_mtu - buf_size; - priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); - buf_size += priv->frag_info[i].frag_size; + int frag_size = eff_mtu - buf_size; + + if (i < MLX4_EN_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, 2048); + + priv->frag_info[i].frag_size = frag_size; + + priv->frag_info[i].frag_stride = ALIGN(frag_size, + SMP_CACHE_BYTES); + buf_size += frag_size; i++; } priv->rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c98940c6acd0..608396dc6d95 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -105,14 +105,6 @@ #define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \ PAGE_ALLOC_COSTLY_ORDER) -/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU - * and 4K allocations) */ -enum { - FRAG_SZ0 = 1536 - NET_IP_ALIGN, - FRAG_SZ1 = 4096, - FRAG_SZ2 = 4096, - FRAG_SZ3 = MLX4_EN_ALLOC_SIZE -}; #define MLX4_EN_MAX_RX_FRAGS 4 /* Maximum ring sizes */ -- cgit v1.2.3 From b5a54d9a313645ec9607dc557b67d9325c28884c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:12 -0800 Subject: mlx4: use order-0 pages for RX Use of order-3 pages is problematic in some cases. This patch might add three kinds of regression : 1) a CPU performance regression, but we will add later page recycling and performance should be back. 2) TCP receiver could grow its receive window slightly slower, because skb->len/skb->truesize ratio will decrease. This is mostly ok, we prefer being conservative to not risk OOM, and eventually tune TCP better in the future. This is consistent with other drivers using 2048 per ethernet frame. 3) Because we allocate one page per RX slot, we consume more memory for the ring buffers. XDP already had this constraint anyway. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 72 +++++++++++++--------------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 5 -- 2 files changed, 33 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 0c61c1200f2a..069ea09185fb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -53,38 +53,26 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *page_alloc, const struct mlx4_en_frag_info *frag_info, - gfp_t _gfp) + gfp_t gfp) { - int order; struct page *page; dma_addr_t dma; - for (order = priv->rx_page_order; ;) { - gfp_t gfp = _gfp; - - if (order) - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC; - page = alloc_pages(gfp, order); - if (likely(page)) - break; - if (--order < 0 || - ((PAGE_SIZE << order) < frag_info->frag_size)) - return -ENOMEM; - } - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - priv->dma_dir); + page = alloc_page(gfp); + if (unlikely(!page)) + return -ENOMEM; + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; } - page_alloc->page_size = PAGE_SIZE << order; page_alloc->page = page; page_alloc->dma = dma; page_alloc->page_offset = 0; /* Not doing get_page() for each frag is a big win * on asymetric workloads. Note we can not use atomic_set(). */ - page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1); + page_ref_add(page, PAGE_SIZE / frag_info->frag_stride - 1); return 0; } @@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, page_alloc[i].page_offset += frag_info->frag_stride; if (page_alloc[i].page_offset + frag_info->frag_stride <= - ring_alloc[i].page_size) + PAGE_SIZE) continue; if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], @@ -127,11 +115,10 @@ out: while (i--) { if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, - priv->dma_dir); + PAGE_SIZE, priv->dma_dir); page = page_alloc[i].page; /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, page_alloc[i].page_size / + page_ref_sub(page, PAGE_SIZE / priv->frag_info[i].frag_stride - 1); put_page(page); } @@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - if (next_frag_end > frags[i].page_size) - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, + if (next_frag_end > PAGE_SIZE) + dma_unmap_page(priv->ddev, frags[i].dma, PAGE_SIZE, priv->dma_dir); if (frags[i].page) @@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, frag_info, GFP_KERNEL | __GFP_COLD)) goto out; - en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", - i, ring->page_alloc[i].page_size, - page_ref_count(ring->page_alloc[i].page)); + en_dbg(DRV, priv, " frag %d allocator: - frags:%d\n", + i, page_ref_count(ring->page_alloc[i].page)); } return 0; @@ -180,11 +166,10 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, - priv->dma_dir); + PAGE_SIZE, priv->dma_dir); page = page_alloc->page; /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, page_alloc->page_size / + page_ref_sub(page, PAGE_SIZE / priv->frag_info[i].frag_stride - 1); put_page(page); page_alloc->page = NULL; @@ -206,9 +191,9 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, priv->dma_dir); + PAGE_SIZE, priv->dma_dir); while (page_alloc->page_offset + frag_info->frag_stride < - page_alloc->page_size) { + PAGE_SIZE) { put_page(page_alloc->page); page_alloc->page_offset += frag_info->frag_stride; } @@ -1191,7 +1176,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) * This only works when num_frags == 1. */ if (priv->tx_ring_num[TX_XDP]) { - priv->rx_page_order = 0; priv->frag_info[0].frag_size = eff_mtu; /* This will gain efficient xdp frame recycling at the * expense of more costly truesize accounting @@ -1201,22 +1185,32 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) priv->rx_headroom = XDP_PACKET_HEADROOM; i = 1; } else { - int buf_size = 0; + int frag_size_max = 2048, buf_size = 0; + + /* should not happen, right ? */ + if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048) + frag_size_max = PAGE_SIZE; while (buf_size < eff_mtu) { - int frag_size = eff_mtu - buf_size; + int frag_stride, frag_size = eff_mtu - buf_size; + int pad, nb; if (i < MLX4_EN_MAX_RX_FRAGS - 1) - frag_size = min(frag_size, 2048); + frag_size = min(frag_size, frag_size_max); priv->frag_info[i].frag_size = frag_size; + frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES); + /* We can only pack 2 1536-bytes frames in on 4K page + * Therefore, each frame would consume more bytes (truesize) + */ + nb = PAGE_SIZE / frag_stride; + pad = (PAGE_SIZE - nb * frag_stride) / nb; + pad &= ~(SMP_CACHE_BYTES - 1); + priv->frag_info[i].frag_stride = frag_stride + pad; - priv->frag_info[i].frag_stride = ALIGN(frag_size, - SMP_CACHE_BYTES); buf_size += frag_size; i++; } - priv->rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER; priv->dma_dir = PCI_DMA_FROMDEVICE; priv->rx_headroom = 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 608396dc6d95..6c80117006ed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -102,9 +102,6 @@ /* Use the maximum between 16384 and a single page */ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) -#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \ - PAGE_ALLOC_COSTLY_ORDER) - #define MLX4_EN_MAX_RX_FRAGS 4 /* Maximum ring sizes */ @@ -256,7 +253,6 @@ struct mlx4_en_rx_alloc { struct page *page; dma_addr_t dma; u32 page_offset; - u32 page_size; }; #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) @@ -579,7 +575,6 @@ struct mlx4_en_priv { u8 num_frags; u8 log_rx_info; u8 dma_dir; - u8 rx_page_order; u16 rx_headroom; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; -- cgit v1.2.3 From 34db548bfb9580f33d9a7faecafe4da61a4428a3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:13 -0800 Subject: mlx4: add page recycling in receive path Same technique than some Intel drivers, for arches where PAGE_SIZE = 4096 In most cases, pages are reused because they were consumed before we could loop around the RX ring. This brings back performance, and is even better, a single TCP flow reaches 30Gbit on my hosts. v2: added full memset() in mlx4_en_free_frag(), as Tariq found it was needed if we switch to large MTU, as priv->log_rx_info can dynamically be changed. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 258 +++++++++------------------ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - 2 files changed, 82 insertions(+), 177 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 069ea09185fb..5edd0cf2999c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -50,10 +50,9 @@ #include "mlx4_en.h" -static int mlx4_alloc_pages(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *page_alloc, - const struct mlx4_en_frag_info *frag_info, - gfp_t gfp) +static int mlx4_alloc_page(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag, + gfp_t gfp) { struct page *page; dma_addr_t dma; @@ -63,145 +62,46 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, return -ENOMEM; dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); if (unlikely(dma_mapping_error(priv->ddev, dma))) { - put_page(page); + __free_page(page); return -ENOMEM; } - page_alloc->page = page; - page_alloc->dma = dma; - page_alloc->page_offset = 0; - /* Not doing get_page() for each frag is a big win - * on asymetric workloads. Note we can not use atomic_set(). - */ - page_ref_add(page, PAGE_SIZE / frag_info->frag_stride - 1); + frag->page = page; + frag->dma = dma; + frag->page_offset = priv->rx_headroom; return 0; } static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, - struct mlx4_en_rx_alloc *ring_alloc, gfp_t gfp) { - struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; - const struct mlx4_en_frag_info *frag_info; - struct page *page; int i; - for (i = 0; i < priv->num_frags; i++) { - frag_info = &priv->frag_info[i]; - page_alloc[i] = ring_alloc[i]; - page_alloc[i].page_offset += frag_info->frag_stride; - - if (page_alloc[i].page_offset + frag_info->frag_stride <= - PAGE_SIZE) - continue; - - if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], - frag_info, gfp))) - goto out; - } - - for (i = 0; i < priv->num_frags; i++) { - frags[i] = ring_alloc[i]; - frags[i].page_offset += priv->rx_headroom; - rx_desc->data[i].addr = cpu_to_be64(frags[i].dma + - frags[i].page_offset); - ring_alloc[i] = page_alloc[i]; - } - - return 0; - -out: - while (i--) { - if (page_alloc[i].page != ring_alloc[i].page) { - dma_unmap_page(priv->ddev, page_alloc[i].dma, - PAGE_SIZE, priv->dma_dir); - page = page_alloc[i].page; - /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, PAGE_SIZE / - priv->frag_info[i].frag_stride - 1); - put_page(page); - } - } - return -ENOMEM; -} - -static void mlx4_en_free_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frags, - int i) -{ - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - - - if (next_frag_end > PAGE_SIZE) - dma_unmap_page(priv->ddev, frags[i].dma, PAGE_SIZE, - priv->dma_dir); - - if (frags[i].page) - put_page(frags[i].page); -} - -static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) -{ - int i; - struct mlx4_en_rx_alloc *page_alloc; - - for (i = 0; i < priv->num_frags; i++) { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - - if (mlx4_alloc_pages(priv, &ring->page_alloc[i], - frag_info, GFP_KERNEL | __GFP_COLD)) - goto out; - - en_dbg(DRV, priv, " frag %d allocator: - frags:%d\n", - i, page_ref_count(ring->page_alloc[i].page)); + for (i = 0; i < priv->num_frags; i++, frags++) { + if (!frags->page && mlx4_alloc_page(priv, frags, gfp)) + return -ENOMEM; + rx_desc->data[i].addr = cpu_to_be64(frags->dma + + frags->page_offset); } return 0; - -out: - while (i--) { - struct page *page; - - page_alloc = &ring->page_alloc[i]; - dma_unmap_page(priv->ddev, page_alloc->dma, - PAGE_SIZE, priv->dma_dir); - page = page_alloc->page; - /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, PAGE_SIZE / - priv->frag_info[i].frag_stride - 1); - put_page(page); - page_alloc->page = NULL; - } - return -ENOMEM; } -static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) +static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag) { - struct mlx4_en_rx_alloc *page_alloc; - int i; - - for (i = 0; i < priv->num_frags; i++) { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - - page_alloc = &ring->page_alloc[i]; - en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", - i, page_count(page_alloc->page)); - - dma_unmap_page(priv->ddev, page_alloc->dma, + if (frag->page) { + dma_unmap_page(priv->ddev, frag->dma, PAGE_SIZE, priv->dma_dir); - while (page_alloc->page_offset + frag_info->frag_stride < - PAGE_SIZE) { - put_page(page_alloc->page); - page_alloc->page_offset += frag_info->frag_stride; - } - page_alloc->page = NULL; + __free_page(frag->page); } + /* We need to clear all fields, otherwise a change of priv->log_rx_info + * could lead to see garbage later in frag->page. + */ + memset(frag, 0, sizeof(*frag)); } -static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, +static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; @@ -235,19 +135,22 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, (index << priv->log_rx_info); if (ring->page_cache.index > 0) { - ring->page_cache.index--; - frags[0].page = ring->page_cache.buf[ring->page_cache.index].page; - frags[0].dma = ring->page_cache.buf[ring->page_cache.index].dma; - frags[0].page_offset = XDP_PACKET_HEADROOM; - rx_desc->data[0].addr = cpu_to_be64(frags[0].dma + - frags[0].page_offset); + /* XDP uses a single page per frame */ + if (!frags->page) { + ring->page_cache.index--; + frags->page = ring->page_cache.buf[ring->page_cache.index].page; + frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; + } + frags->page_offset = XDP_PACKET_HEADROOM; + rx_desc->data[0].addr = cpu_to_be64(frags->dma + + XDP_PACKET_HEADROOM); return 0; } - return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); + return mlx4_en_alloc_frags(priv, rx_desc, frags, gfp); } -static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) +static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring) { return ring->prod == ring->cons; } @@ -257,7 +160,8 @@ static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } -static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, +/* slow path */ +static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { @@ -267,7 +171,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - mlx4_en_free_frag(priv, frags, nr); + mlx4_en_free_frag(priv, frags + nr); } } @@ -323,12 +227,12 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, ring->cons, ring->prod); /* Unmap and free Rx buffers */ - while (!mlx4_en_is_ring_empty(ring)) { - index = ring->cons & ring->size_mask; + for (index = 0; index < ring->size; index++) { en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_rx_desc(priv, ring, index); - ++ring->cons; } + ring->cons = 0; + ring->prod = 0; } void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) @@ -380,9 +284,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); - ring->rx_info = vmalloc_node(tmp, node); + ring->rx_info = vzalloc_node(tmp, node); if (!ring->rx_info) { - ring->rx_info = vmalloc(tmp); + ring->rx_info = vzalloc(tmp); if (!ring->rx_info) { err = -ENOMEM; goto err_ring; @@ -452,16 +356,6 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); - - /* Initialize page allocators */ - err = mlx4_en_init_allocator(priv, ring); - if (err) { - en_err(priv, "Failed initializing ring allocator\n"); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; - ring_ind--; - goto err_allocator; - } } err = mlx4_en_fill_rx_buffers(priv); if (err) @@ -481,11 +375,9 @@ err_buffers: mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); ring_ind = priv->rx_ring_num - 1; -err_allocator: while (ring_ind >= 0) { if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE) priv->rx_ring[ring_ind]->buf -= TXBB_SIZE; - mlx4_en_destroy_allocator(priv, priv->rx_ring[ring_ind]); ring_ind--; } return err; @@ -565,50 +457,68 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; - mlx4_en_destroy_allocator(priv, ring); } static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, struct sk_buff *skb, int length) { - struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; - struct mlx4_en_frag_info *frag_info = priv->frag_info; + const struct mlx4_en_frag_info *frag_info = priv->frag_info; + unsigned int truesize = 0; int nr, frag_size; + struct page *page; dma_addr_t dma; + bool release; /* Collect used fragments while replacing them in the HW descriptors */ - for (nr = 0;;) { + for (nr = 0;; frags++) { frag_size = min_t(int, length, frag_info->frag_size); - if (unlikely(!frags[nr].page)) + page = frags->page; + if (unlikely(!page)) goto fail; - dma = be64_to_cpu(rx_desc->data[nr].addr); - dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, - DMA_FROM_DEVICE); + dma = frags->dma; + dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, + frag_size, priv->dma_dir); - __skb_fill_page_desc(skb, nr, frags[nr].page, - frags[nr].page_offset, + __skb_fill_page_desc(skb, nr, page, frags->page_offset, frag_size); - skb->truesize += frag_info->frag_stride; - frags[nr].page = NULL; + truesize += frag_info->frag_stride; + if (frag_info->frag_stride == PAGE_SIZE / 2) { + frags->page_offset ^= PAGE_SIZE / 2; + release = page_count(page) != 1 || + page_is_pfmemalloc(page) || + page_to_nid(page) != numa_mem_id(); + } else { + u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES); + + frags->page_offset += sz_align; + release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; + } + if (release) { + dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); + frags->page = NULL; + } else { + page_ref_inc(page); + } + nr++; length -= frag_size; if (!length) break; frag_info++; } + skb->truesize += truesize; return nr; fail: while (nr > 0) { nr--; - __skb_frag_unref(&skb_frags_rx[nr]); + __skb_frag_unref(skb_shinfo(skb)->frags + nr); } return 0; } @@ -639,7 +549,8 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, if (length <= SMALL_PACKET_SIZE) { /* We are copying all relevant data to the skb - temporarily * sync buffers for the copy */ - dma = be64_to_cpu(rx_desc->data[0].addr); + + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, length, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, va, length); @@ -648,7 +559,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, unsigned int pull_len; /* Move relevant fragments to skb */ - used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags, + used_frags = mlx4_en_complete_rx_desc(priv, frags, skb, length); if (unlikely(!used_frags)) { kfree_skb(skb); @@ -916,8 +827,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_TX: if (likely(!mlx4_en_xmit_frame(ring, frags, dev, length, cq->ring, - &doorbell_pending))) - goto consumed; + &doorbell_pending))) { + frags[0].page = NULL; + goto next; + } trace_xdp_exception(dev, xdp_prog, act); goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: @@ -927,8 +840,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_DROP: ring->xdp_drop++; xdp_drop_no_cnt: - if (likely(mlx4_en_rx_recycle(ring, frags))) - goto consumed; goto next; } } @@ -974,9 +885,8 @@ xdp_drop_no_cnt: if (!gro_skb) goto next; - nr = mlx4_en_complete_rx_desc(priv, - rx_desc, frags, gro_skb, - length); + nr = mlx4_en_complete_rx_desc(priv, frags, gro_skb, + length); if (!nr) goto next; @@ -1084,10 +994,6 @@ xdp_drop_no_cnt: napi_gro_receive(&cq->napi, skb); next: - for (nr = 0; nr < priv->num_frags; nr++) - mlx4_en_free_frag(priv, frags, nr); - -consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 6c80117006ed..5f6080266981 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -327,7 +327,6 @@ struct mlx4_en_rx_desc { struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; - struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; -- cgit v1.2.3 From 7d7bfc6a3f69d2debe104656fadd8d568fda0e5b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:14 -0800 Subject: mlx4: add rx_alloc_pages counter in ethtool -S This new counter tracks number of pages that we allocated for one port. lpaa24:~# ethtool -S eth0 | egrep 'rx_alloc_pages|rx_packets' rx_packets: 306755183 rx_alloc_pages: 932897 Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_port.c | 2 ++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 +++++++---- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h | 2 +- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c4d714fcc7da..ffbcb27c05e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -117,7 +117,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = { /* port statistics */ "tso_packets", "xmit_more", - "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", + "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_pages", "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", /* pf statistics */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 9166d90e7328..e0eb695318e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -213,6 +213,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_none = 0; priv->port_stats.rx_chksum_complete = 0; + priv->port_stats.rx_alloc_pages = 0; priv->xdp_stats.rx_xdp_drop = 0; priv->xdp_stats.rx_xdp_tx = 0; priv->xdp_stats.rx_xdp_tx_full = 0; @@ -223,6 +224,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok); priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none); priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete); + priv->port_stats.rx_alloc_pages += READ_ONCE(ring->rx_alloc_pages); priv->xdp_stats.rx_xdp_drop += READ_ONCE(ring->xdp_drop); priv->xdp_stats.rx_xdp_tx += READ_ONCE(ring->xdp_tx); priv->xdp_stats.rx_xdp_tx_full += READ_ONCE(ring->xdp_tx_full); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5edd0cf2999c..d3a425fa46b3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,6 +72,7 @@ static int mlx4_alloc_page(struct mlx4_en_priv *priv, } static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, gfp_t gfp) @@ -79,8 +80,11 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, int i; for (i = 0; i < priv->num_frags; i++, frags++) { - if (!frags->page && mlx4_alloc_page(priv, frags, gfp)) - return -ENOMEM; + if (!frags->page) { + if (mlx4_alloc_page(priv, frags, gfp)) + return -ENOMEM; + ring->rx_alloc_pages++; + } rx_desc->data[i].addr = cpu_to_be64(frags->dma + frags->page_offset); } @@ -133,7 +137,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (ring->page_cache.index > 0) { /* XDP uses a single page per frame */ if (!frags->page) { @@ -147,7 +150,7 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, return 0; } - return mlx4_en_alloc_frags(priv, rx_desc, frags, gfp); + return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); } static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 5f6080266981..39f401aa3047 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -346,6 +346,7 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; + unsigned long rx_alloc_pages; unsigned long xdp_drop; unsigned long xdp_tx; unsigned long xdp_tx_full; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 48641cb0367f..926f3c3f3665 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -37,7 +37,7 @@ struct mlx4_en_port_stats { unsigned long queue_stopped; unsigned long wake_queue; unsigned long tx_timeout; - unsigned long rx_alloc_failed; + unsigned long rx_alloc_pages; unsigned long rx_chksum_good; unsigned long rx_chksum_none; unsigned long rx_chksum_complete; -- cgit v1.2.3 From 9e8c0395a7e87a8f0eb008297df25ffdf3e0e5b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:15 -0800 Subject: mlx4: do not access rx_desc from mlx4_en_process_rx_cq() Instead of fetching dma address from rx_desc->data[0].addr, prefer using frags[0].dma + frags[0].page_offset to avoid a potential cache line miss. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d3a425fa46b3..b62fa265890e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -528,7 +528,6 @@ fail: static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, unsigned int length) { @@ -703,7 +702,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; - struct mlx4_en_rx_desc *rx_desc; struct bpf_prog *xdp_prog; int doorbell_pending; struct sk_buff *skb; @@ -738,7 +736,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud cq->mcq.cons_index & cq->size)) { frags = ring->rx_info + (index << priv->log_rx_info); - rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit @@ -767,7 +764,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct */ - dma = be64_to_cpu(rx_desc->data[0].addr); + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); ethh = (struct ethhdr *)(page_address(frags[0].page) + @@ -806,7 +803,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud void *orig_data; u32 act; - dma = be64_to_cpu(rx_desc->data[0].addr); + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, priv->frag_info[0].frag_size, DMA_FROM_DEVICE); @@ -946,7 +943,7 @@ xdp_drop_no_cnt: } /* GRO not possible, complete processing here */ - skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); + skb = mlx4_en_rx_skb(priv, frags, length); if (unlikely(!skb)) { ring->dropped++; goto next; -- cgit v1.2.3 From 02e6fd3e5598bace683e05ee783f134722cb21b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:16 -0800 Subject: mlx4: factorize page_address() calls We need to compute the frame virtual address at different points. Do it once. Following patch will use the new va address for validate_loopback() Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b62fa265890e..b5aa3f986508 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -734,9 +734,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { + void *va; frags = ring->rx_info + (index << priv->log_rx_info); - + va = page_address(frags[0].page) + frags[0].page_offset; /* * make sure we read the CQE after we read the ownership bit */ @@ -759,7 +760,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * and not performing the selftest or flb disabled */ if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) { - struct ethhdr *ethh; + const struct ethhdr *ethh = va; dma_addr_t dma; /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct @@ -767,8 +768,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); - ethh = (struct ethhdr *)(page_address(frags[0].page) + - frags[0].page_offset); if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; @@ -808,8 +807,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud priv->frag_info[0].frag_size, DMA_FROM_DEVICE); - xdp.data_hard_start = page_address(frags[0].page); - xdp.data = xdp.data_hard_start + frags[0].page_offset; + xdp.data_hard_start = va - frags[0].page_offset; + xdp.data = va; xdp.data_end = xdp.data + length; orig_data = xdp.data; @@ -819,6 +818,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length = xdp.data_end - xdp.data; frags[0].page_offset = xdp.data - xdp.data_hard_start; + va = xdp.data; } switch (act) { @@ -891,7 +891,6 @@ xdp_drop_no_cnt: goto next; if (ip_summed == CHECKSUM_COMPLETE) { - void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); if (check_csum(cqe, gro_skb, va, dev->features)) { ip_summed = CHECKSUM_NONE; @@ -955,7 +954,7 @@ xdp_drop_no_cnt: } if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, dev->features)) { + if (check_csum(cqe, skb, va, dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_complete--; ring->csum_none++; -- cgit v1.2.3 From 6969cf0fdbe9e67ed14cdf7ec95977abcf4a14af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:17 -0800 Subject: mlx4: make validate_loopback() more generic Testing a boolean in fast path is not worth duplicating the code allocating packets, when GRO is on or off. If this proves to be a problem, we might later use a jump label. Next patch will remove this duplicated code and ease code review. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 23 ++++++++++------------- drivers/net/ethernet/mellanox/mlx4/en_selftest.c | 6 ------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b5aa3f986508..33b28278c827 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -584,20 +584,17 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, return skb; } -static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb) +static void validate_loopback(struct mlx4_en_priv *priv, void *va) { + const unsigned char *data = va + ETH_HLEN; int i; - int offset = ETH_HLEN; - for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { - if (*(skb->data + offset) != (unsigned char) (i & 0xff)) - goto out_loopback; + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) { + if (data[i] != (unsigned char)i) + return; } /* Loopback found */ priv->loopback_ok = 1; - -out_loopback: - dev_kfree_skb_any(skb); } static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, @@ -785,6 +782,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } } + if (unlikely(priv->validate_loopback)) { + validate_loopback(priv, va); + goto next; + } + /* * Packet is OK - process it. */ @@ -948,11 +950,6 @@ xdp_drop_no_cnt: goto next; } - if (unlikely(priv->validate_loopback)) { - validate_loopback(priv, skb); - goto next; - } - if (ip_summed == CHECKSUM_COMPLETE) { if (check_csum(cqe, skb, va, dev->features)) { ip_summed = CHECKSUM_NONE; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 95290e1fc9fe..17112faafbcc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -81,14 +81,11 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) { u32 loopback_ok = 0; int i; - bool gro_enabled; priv->loopback_ok = 0; priv->validate_loopback = 1; - gro_enabled = priv->dev->features & NETIF_F_GRO; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); - priv->dev->features &= ~NETIF_F_GRO; /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { @@ -111,9 +108,6 @@ mlx4_en_test_loopback_exit: priv->validate_loopback = 0; - if (gro_enabled) - priv->dev->features |= NETIF_F_GRO; - mlx4_en_update_loopback_state(priv->dev, priv->dev->features); return !loopback_ok; } -- cgit v1.2.3 From 68b8df464406e39925a54955294d5994df46bcf4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2017 08:17:18 -0800 Subject: mlx4: remove duplicate code in mlx4_en_process_rx_cq() We should keep one way to build skbs, regardless of GRO being on or off. Note that I made sure to defer as much as possible the point we need to pull data from the frame, so that future prefetch() we might add are more effective. These skb attributes derive from the CQE or ring : ip_summed, csum hash vlan offload hwtstamps queue_mapping As a bonus, this patch removes mlx4 dependency on eth_get_headlen() which is very often broken enough to give us headaches. Signed-off-by: Eric Dumazet Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 209 ++++++----------------------- 1 file changed, 41 insertions(+), 168 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 33b28278c827..aa074e57ce06 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -526,64 +526,6 @@ fail: return 0; } - -static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frags, - unsigned int length) -{ - struct sk_buff *skb; - void *va; - int used_frags; - dma_addr_t dma; - - skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (unlikely(!skb)) { - en_dbg(RX_ERR, priv, "Failed allocating skb\n"); - return NULL; - } - skb_reserve(skb, NET_IP_ALIGN); - skb->len = length; - - /* Get pointer to first fragment so we could copy the headers into the - * (linear part of the) skb */ - va = page_address(frags[0].page) + frags[0].page_offset; - - if (length <= SMALL_PACKET_SIZE) { - /* We are copying all relevant data to the skb - temporarily - * sync buffers for the copy */ - - dma = frags[0].dma + frags[0].page_offset; - dma_sync_single_for_cpu(priv->ddev, dma, length, - DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, va, length); - skb->tail += length; - } else { - unsigned int pull_len; - - /* Move relevant fragments to skb */ - used_frags = mlx4_en_complete_rx_desc(priv, frags, - skb, length); - if (unlikely(!used_frags)) { - kfree_skb(skb); - return NULL; - } - skb_shinfo(skb)->nr_frags = used_frags; - - pull_len = eth_get_headlen(va, SMALL_PACKET_SIZE); - /* Copy headers into the skb linear buffer */ - memcpy(skb->data, va, pull_len); - skb->tail += pull_len; - - /* Skip headers in first fragment */ - skb_shinfo(skb)->frags[0].page_offset += pull_len; - - /* Adjust size of first fragment */ - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], pull_len); - skb->data_len = length - pull_len; - } - return skb; -} - static void validate_loopback(struct mlx4_en_priv *priv, void *va) { const unsigned char *data = va + ETH_HLEN; @@ -792,8 +734,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; - l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && - (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); /* A bpf program gets first chance to drop the packet. It may * read bytes but not past the end of the frag. @@ -849,122 +789,51 @@ xdp_drop_no_cnt: ring->bytes += length; ring->packets++; + skb = napi_get_frags(&cq->napi); + if (!skb) + goto next; + + if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), + timestamp); + } + skb_record_rx_queue(skb, cq->ring); + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && cqe->checksum == cpu_to_be16(0xffff)) { ip_summed = CHECKSUM_UNNECESSARY; + l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + if (l2_tunnel) + skb->csum_level = 1; ring->csum_ok++; } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + goto csum_none; } } else { if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6))) { - ip_summed = CHECKSUM_COMPLETE; - ring->csum_complete++; + if (check_csum(cqe, skb, va, dev->features)) { + goto csum_none; + } else { + ip_summed = CHECKSUM_COMPLETE; + ring->csum_complete++; + } } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + goto csum_none; } } } else { +csum_none: ip_summed = CHECKSUM_NONE; ring->csum_none++; } - - /* This packet is eligible for GRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment - */ - if (dev->features & NETIF_F_GRO) { - struct sk_buff *gro_skb = napi_get_frags(&cq->napi); - if (!gro_skb) - goto next; - - nr = mlx4_en_complete_rx_desc(priv, frags, gro_skb, - length); - if (!nr) - goto next; - - if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, gro_skb, va, - dev->features)) { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; - ring->csum_complete--; - } - } - - skb_shinfo(gro_skb)->nr_frags = nr; - gro_skb->len = length; - gro_skb->data_len = length; - gro_skb->ip_summed = ip_summed; - - if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) - gro_skb->csum_level = 1; - - if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && - (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { - u16 vid = be16_to_cpu(cqe->sl_vid); - - __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); - } else if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_SVLAN_PRESENT_MASK) && - (dev->features & NETIF_F_HW_VLAN_STAG_RX)) { - __vlan_hwaccel_put_tag(gro_skb, - htons(ETH_P_8021AD), - be16_to_cpu(cqe->sl_vid)); - } - - if (dev->features & NETIF_F_RXHASH) - skb_set_hash(gro_skb, - be32_to_cpu(cqe->immed_rss_invalid), - (ip_summed == CHECKSUM_UNNECESSARY) ? - PKT_HASH_TYPE_L4 : - PKT_HASH_TYPE_L3); - - skb_record_rx_queue(gro_skb, cq->ring); - - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, - skb_hwtstamps(gro_skb), - timestamp); - } - - napi_gro_frags(&cq->napi); - goto next; - } - - /* GRO not possible, complete processing here */ - skb = mlx4_en_rx_skb(priv, frags, length); - if (unlikely(!skb)) { - ring->dropped++; - goto next; - } - - if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, va, dev->features)) { - ip_summed = CHECKSUM_NONE; - ring->csum_complete--; - ring->csum_none++; - } - } - skb->ip_summed = ip_summed; - skb->protocol = eth_type_trans(skb, dev); - skb_record_rx_queue(skb, cq->ring); - - if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) - skb->csum_level = 1; - if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), @@ -972,32 +841,36 @@ xdp_drop_no_cnt: PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); - if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_CVLAN_PRESENT_MASK) && + + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); - else if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_SVLAN_PRESENT_MASK) && + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->sl_vid)); + else if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_STAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), be16_to_cpu(cqe->sl_vid)); - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), - timestamp); + nr = mlx4_en_complete_rx_desc(priv, frags, skb, length); + if (likely(nr)) { + skb_shinfo(skb)->nr_frags = nr; + skb->len = length; + skb->data_len = length; + napi_gro_frags(&cq->napi); + } else { + skb->vlan_tci = 0; + skb_clear_hash(skb); } - - napi_gro_receive(&cq->napi, skb); next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; if (++polled == budget) - goto out; + break; } -out: rcu_read_unlock(); if (polled) { -- cgit v1.2.3