diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-25 16:24:34 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-25 16:24:34 -0400 |
commit | 0374016579d0ef5d86655e48645ccc05678e0cf5 (patch) | |
tree | f188bc02287b805cc9fa0c9046c91fcd6c414253 /drivers/net | |
parent | ee7a60c9657e21ca28be66fa5c5df39963f99c38 (diff) | |
parent | ed93a39871282fc0dbc2fecd0d04ea0ddad54353 (diff) |
Merge branch '10GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says:
====================
10GbE Intel Wired LAN Driver Updates 2018-03-23
This series contains updates to ixgbe and ixgbevf only.
Paul adds status register reads to reduce a potential race condition
where registers can read 0xFFFFFFFF during a PCI reset, which in turn
causes the driver to remove the adapter. Then fixes an assignment
operation with an "OR" operation.
Shannon Nelson provides several IPsec offload cleanups to ixgbe, as well as a
patch to enable TSO with IPsec offload.
Tony provides the much anticipated XDP support for ixgbevf. Currently,
pass, drop and XDP_TX actions are supported, as well as meta data and
stats reporting.
Björn Töpel tweaks the page counting for XDP_REDIRECT, since a page can
have its reference count decreased via the xdp_do_redirect() call.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_common.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 46 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 79 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ethtool.c | 68 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 489 |
7 files changed, 596 insertions, 121 deletions
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 67f304289fd9..2b311382167a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -154,6 +154,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed); +#define IXGBE_FAILED_READ_RETRIES 5 #define IXGBE_FAILED_READ_REG 0xffffffffU #define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU #define IXGBE_FAILED_READ_CFG_WORD 0xffffU diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index f2254528dcfc..68af127987bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -774,11 +774,7 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC; - itd->flags = 0; if (xs->id.proto == IPPROTO_ESP) { - struct sk_buff *skb = first->skb; - int ret, authlen, trailerlen; - u8 padlen; itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP | IXGBE_ADVTXD_TUCMD_L4T_TCP; @@ -790,19 +786,28 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, * padlen bytes of padding. This ends up not the same * as the static value found in xs->props.trailer_len (21). * - * The "correct" way to get the auth length would be to use - * authlen = crypto_aead_authsize(xs->data); - * but since we know we only have one size to worry about - * we can let the compiler use the constant and save us a - * few CPU cycles. + * ... but if we're doing GSO, don't bother as the stack + * doesn't add a trailer for those. */ - authlen = IXGBE_IPSEC_AUTH_BITS / 8; - - ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1); - if (unlikely(ret)) - return 0; - trailerlen = authlen + 2 + padlen; - itd->trailer_len = trailerlen; + if (!skb_is_gso(first->skb)) { + /* The "correct" way to get the auth length would be + * to use + * authlen = crypto_aead_authsize(xs->data); + * but since we know we only have one size to worry + * about * we can let the compiler use the constant + * and save us a few CPU cycles. + */ + const int authlen = IXGBE_IPSEC_AUTH_BITS / 8; + struct sk_buff *skb = first->skb; + u8 padlen; + int ret; + + ret = skb_copy_bits(skb, skb->len - (authlen + 2), + &padlen, 1); + if (unlikely(ret)) + return 0; + itd->trailer_len = authlen + 2 + padlen; + } } if (tsa->encrypt) itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN; @@ -924,8 +929,13 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) ixgbe_ipsec_clear_hw_tables(adapter); adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops; - adapter->netdev->features |= NETIF_F_HW_ESP; - adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP; + +#define IXGBE_ESP_FEATURES (NETIF_F_HW_ESP | \ + NETIF_F_HW_ESP_TX_CSUM | \ + NETIF_F_GSO_ESP) + + adapter->netdev->features |= IXGBE_ESP_FEATURES; + adapter->netdev->hw_enc_features |= IXGBE_ESP_FEATURES; return; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 85369423452d..afadba99f7b8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -353,23 +353,32 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw) ixgbe_service_event_schedule(adapter); } -static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) +static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) { + u8 __iomem *reg_addr; u32 value; + int i; + + reg_addr = READ_ONCE(hw->hw_addr); + if (ixgbe_removed(reg_addr)) + return IXGBE_FAILED_READ_REG; - /* The following check not only optimizes a bit by not - * performing a read on the status register when the - * register just read was a status register read that - * returned IXGBE_FAILED_READ_REG. It also blocks any - * potential recursion. + /* Register read of 0xFFFFFFF can indicate the adapter has been removed, + * so perform several status register reads to determine if the adapter + * has been removed. */ - if (reg == IXGBE_STATUS) { - ixgbe_remove_adapter(hw); - return; + for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) { + value = readl(reg_addr + IXGBE_STATUS); + if (value != IXGBE_FAILED_READ_REG) + break; + mdelay(3); } - value = ixgbe_read_reg(hw, IXGBE_STATUS); + if (value == IXGBE_FAILED_READ_REG) ixgbe_remove_adapter(hw); + else + value = readl(reg_addr + reg); + return value; } /** @@ -415,7 +424,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) writes_completed: value = readl(reg_addr + reg); if (unlikely(value == IXGBE_FAILED_READ_REG)) - ixgbe_check_remove(hw, reg); + value = ixgbe_check_remove(hw, reg); return value; } @@ -1620,7 +1629,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->dma = dma; bi->page = page; bi->page_offset = ixgbe_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; rx_ring->rx_stats.alloc_rx_page++; return true; @@ -2030,8 +2040,8 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -7721,7 +7731,8 @@ static void ixgbe_service_task(struct work_struct *work) static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first, - u8 *hdr_len) + u8 *hdr_len, + struct ixgbe_ipsec_tx_data *itd) { u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; @@ -7735,6 +7746,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, unsigned char *hdr; } l4; u32 paylen, l4_offset; + u32 fceof_saidx = 0; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -7760,13 +7772,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, if (ip.v4->version == 4) { unsigned char *csum_start = skb_checksum_start(skb); unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); + int len = csum_start - trans_start; /* IP header will have to cancel out any data that - * is not a part of the outer IP header + * is not a part of the outer IP header, so set to + * a reverse csum if needed, else init check to 0. */ - ip.v4->check = csum_fold(csum_partial(trans_start, - csum_start - trans_start, - 0)); + ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ? + csum_fold(csum_partial(trans_start, + len, 0)) : 0; type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; ip.v4->tot_len = 0; @@ -7797,12 +7811,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; + fceof_saidx |= itd->sa_idx; + type_tucmd |= itd->flags | itd->trailer_len; + /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ vlan_macip_lens = l4.hdr - ip.hdr; vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, mss_l4len_idx); return 1; @@ -7864,10 +7881,8 @@ no_csum: vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) { - fceof_saidx |= itd->sa_idx; - type_tucmd |= itd->flags | itd->trailer_len; - } + fceof_saidx |= itd->sa_idx; + type_tucmd |= itd->flags | itd->trailer_len; ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0); } @@ -8495,7 +8510,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif - tso = ixgbe_tso(tx_ring, first, &hdr_len); + tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx); if (tso < 0) goto out_drop; else if (!tso) @@ -9904,15 +9919,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, /* We can only support IPV4 TSO in tunnels if we can mangle the * inner IP ID field, so strip TSO if MANGLEID is not supported. + * IPsec offoad sets skb->encapsulation but still can handle + * the TSO, so it's the exception. */ - if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) - features &= ~NETIF_F_TSO; - -#ifdef CONFIG_XFRM_OFFLOAD - /* IPsec offload doesn't get along well with others *yet* */ - if (skb->sp) - features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM); + if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) { +#ifdef CONFIG_XFRM + if (!skb->sp) #endif + features &= ~NETIF_F_TSO; + } return features; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index f470d0204771..3123267dfba9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1847,9 +1847,9 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, (IXGBE_CS4227_EDC_MODE_SR << 1)); if (setup_linear) - reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; else - reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; ret_val = hw->phy.ops.write_reg(hw, reg_slice, IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext); diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index e7623fed42da..8e7d6c6f5c92 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. + Copyright(c) 1999 - 2018 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -82,6 +82,7 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = { #define IXGBEVF_QUEUE_STATS_LEN ( \ (((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \ + ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \ ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \ (sizeof(struct ixgbevf_stats) / sizeof(u64))) #define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats) @@ -269,7 +270,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL; u32 new_rx_count, new_tx_count; - int i, err = 0; + int i, j, err = 0; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; @@ -293,15 +294,19 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, if (!netif_running(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_xdp_queues; i++) + adapter->xdp_ring[i]->count = new_tx_count; for (i = 0; i < adapter->num_rx_queues; i++) adapter->rx_ring[i]->count = new_rx_count; adapter->tx_ring_count = new_tx_count; + adapter->xdp_ring_count = new_tx_count; adapter->rx_ring_count = new_rx_count; goto clear_reset; } if (new_tx_count != adapter->tx_ring_count) { - tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring)); + tx_ring = vmalloc((adapter->num_tx_queues + + adapter->num_xdp_queues) * sizeof(*tx_ring)); if (!tx_ring) { err = -ENOMEM; goto clear_reset; @@ -324,6 +329,24 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, goto clear_reset; } } + + for (j = 0; j < adapter->num_xdp_queues; i++, j++) { + /* clone ring and setup updated count */ + tx_ring[i] = *adapter->xdp_ring[j]; + tx_ring[i].count = new_tx_count; + err = ixgbevf_setup_tx_resources(&tx_ring[i]); + if (err) { + while (i) { + i--; + ixgbevf_free_tx_resources(&tx_ring[i]); + } + + vfree(tx_ring); + tx_ring = NULL; + + goto clear_reset; + } + } } if (new_rx_count != adapter->rx_ring_count) { @@ -336,8 +359,13 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) { /* clone ring and setup updated count */ rx_ring[i] = *adapter->rx_ring[i]; + + /* Clear copied XDP RX-queue info */ + memset(&rx_ring[i].xdp_rxq, 0, + sizeof(rx_ring[i].xdp_rxq)); + rx_ring[i].count = new_rx_count; - err = ixgbevf_setup_rx_resources(&rx_ring[i]); + err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]); if (err) { while (i) { i--; @@ -363,6 +391,12 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, } adapter->tx_ring_count = new_tx_count; + for (j = 0; j < adapter->num_xdp_queues; i++, j++) { + ixgbevf_free_tx_resources(adapter->xdp_ring[j]); + *adapter->xdp_ring[j] = tx_ring[i]; + } + adapter->xdp_ring_count = new_tx_count; + vfree(tx_ring); tx_ring = NULL; } @@ -385,7 +419,8 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, clear_reset: /* free Tx resources if Rx error is encountered */ if (tx_ring) { - for (i = 0; i < adapter->num_tx_queues; i++) + for (i = 0; + i < adapter->num_tx_queues + adapter->num_xdp_queues; i++) ixgbevf_free_tx_resources(&tx_ring[i]); vfree(tx_ring); } @@ -457,6 +492,23 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, i += 2; } + /* populate XDP queue data */ + for (j = 0; j < adapter->num_xdp_queues; j++) { + ring = adapter->xdp_ring[j]; + if (!ring) { + data[i++] = 0; + data[i++] = 0; + continue; + } + + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + i += 2; + } + /* populate Rx queue data */ for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; @@ -500,6 +552,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, sprintf(p, "tx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; } + for (i = 0; i < adapter->num_xdp_queues; i++) { + sprintf(p, "xdp_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "xdp_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } for (i = 0; i < adapter->num_rx_queues; i++) { sprintf(p, "rx_queue_%u_packets", i); p += ETH_GSTRING_LEN; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index c06ea4dc49a0..447ce1d5e0e3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -2,7 +2,7 @@ /******************************************************************************* Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. + Copyright(c) 1999 - 2018 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -35,6 +35,7 @@ #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/u64_stats_sync.h> +#include <net/xdp.h> #include "vf.h" @@ -51,7 +52,11 @@ struct ixgbevf_tx_buffer { union ixgbe_adv_tx_desc *next_to_watch; unsigned long time_stamp; - struct sk_buff *skb; + union { + struct sk_buff *skb; + /* XDP uses address ptr on irq_clean */ + void *data; + }; unsigned int bytecount; unsigned short gso_segs; __be16 protocol; @@ -94,12 +99,21 @@ enum ixgbevf_ring_state_t { __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, + __IXGBEVF_TX_XDP_RING, }; +#define ring_is_xdp(ring) \ + test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define set_ring_xdp(ring) \ + set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define clear_ring_xdp(ring) \ + clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) + struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; + struct bpf_prog *xdp_prog; struct device *dev; void *desc; /* descriptor ring memory */ dma_addr_t dma; /* phys. address of descriptor ring */ @@ -120,7 +134,7 @@ struct ixgbevf_ring { struct ixgbevf_tx_queue_stats tx_stats; struct ixgbevf_rx_queue_stats rx_stats; }; - + struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; struct sk_buff *skb; @@ -137,6 +151,7 @@ struct ixgbevf_ring { #define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES #define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES +#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES #define IXGBEVF_MAX_RSS_QUEUES 2 #define IXGBEVF_82599_RETA_SIZE 128 /* 128 entries */ #define IXGBEVF_X550_VFRETA_SIZE 64 /* 64 entries */ @@ -337,6 +352,10 @@ struct ixgbevf_adapter { u32 eims_enable_mask; u32 eims_other; + /* XDP */ + int num_xdp_queues; + struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES]; + /* TX */ int num_tx_queues; struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */ @@ -357,6 +376,7 @@ struct ixgbevf_adapter { /* OS defined structs */ struct net_device *netdev; + struct bpf_prog *xdp_prog; struct pci_dev *pdev; /* structs defined in ixgbe_vf.h */ @@ -370,6 +390,7 @@ struct ixgbevf_adapter { unsigned long state; u64 tx_busy; unsigned int tx_ring_count; + unsigned int xdp_ring_count; unsigned int rx_ring_count; u8 __iomem *io_addr; /* Mainly for iounmap use */ @@ -443,7 +464,8 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); void ixgbevf_reset(struct ixgbevf_adapter *adapter); void ixgbevf_set_ethtool_ops(struct net_device *netdev); -int ixgbevf_setup_rx_resources(struct ixgbevf_ring *); +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring); int ixgbevf_setup_tx_resources(struct ixgbevf_ring *); void ixgbevf_free_rx_resources(struct ixgbevf_ring *); void ixgbevf_free_tx_resources(struct ixgbevf_ring *); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4da449e0a4ba..3d9033f26eff 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. + Copyright(c) 1999 - 2018 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -50,6 +50,9 @@ #include <linux/if_vlan.h> #include <linux/prefetch.h> #include <net/mpls.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/atomic.h> #include "ixgbevf.h" @@ -321,7 +324,10 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, total_packets += tx_buffer->gso_segs; /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buffer->data); + else + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -385,7 +391,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc = tx_ring->tx_buffer_info[i].next_to_watch; - pr_err("Detected Tx Unit Hang\n" + pr_err("Detected Tx Unit Hang%s\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" " next_to_use <%x>\n" @@ -395,6 +401,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, " eop_desc->wb.status <%x>\n" " time_stamp <%lx>\n" " jiffies <%lx>\n", + ring_is_xdp(tx_ring) ? " XDP" : "", tx_ring->queue_index, IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)), IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)), @@ -402,7 +409,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + if (!ring_is_xdp(tx_ring)) + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -410,6 +419,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } + if (ring_is_xdp(tx_ring)) + return !!budget; + #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -552,19 +564,21 @@ struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, } static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer) + struct ixgbevf_rx_buffer *rx_buffer, + struct sk_buff *skb) { if (ixgbevf_can_reuse_rx_page(rx_buffer)) { /* hand second half of page back to the ring */ ixgbevf_reuse_rx_page(rx_ring, rx_buffer); } else { - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); + if (IS_ERR(skb)) + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBEVF_RX_DMA_ATTR); __page_frag_cache_drain(rx_buffer->page, rx_buffer->pagecnt_bias); } @@ -737,6 +751,10 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + /* verify that the packet does not have any known errors */ if (unlikely(ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { @@ -853,23 +871,38 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, static struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + union ixgbe_adv_rx_desc *rx_desc) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data + L1_CACHE_BYTES); #endif + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags <dev> legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via ixgbevf_build_skb(). + * + * For ixgbevf_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * changed in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); @@ -879,16 +912,18 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); + headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, + ALIGN(headlen, sizeof(long))); /* update all of the pointers */ size -= headlen; if (size) { skb_add_rx_frag(skb, 0, rx_buffer->page, - (va + headlen) - page_address(rx_buffer->page), + (xdp->data + headlen) - + page_address(rx_buffer->page), size, truesize); #if (PAGE_SIZE < 8192) rx_buffer->page_offset ^= truesize; @@ -912,32 +947,39 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + union ixgbe_adv_rx_desc *rx_desc) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(va); + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points to xdp->data, otherwise, we likely + * have a consumer accessing first few bytes of meta data, + * and then actual data. + */ + prefetch(xdp->data_meta); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data_meta + L1_CACHE_BYTES); #endif - /* build an skb to around the page buffer */ - skb = build_skb(va - IXGBEVF_SKB_PAD, truesize); + /* build an skb around the page buffer */ + skb = build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL; /* update pointers within the skb to store the data */ - skb_reserve(skb, IXGBEVF_SKB_PAD); - __skb_put(skb, size); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); /* update buffer offset */ #if (PAGE_SIZE < 8192) @@ -948,17 +990,138 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, return skb; } + +#define IXGBEVF_XDP_PASS 0 +#define IXGBEVF_XDP_CONSUMED 1 +#define IXGBEVF_XDP_TX 2 + +static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, + struct xdp_buff *xdp) +{ + struct ixgbevf_tx_buffer *tx_buffer; + union ixgbe_adv_tx_desc *tx_desc; + u32 len, cmd_type; + dma_addr_t dma; + u16 i; + + len = xdp->data_end - xdp->data; + + if (unlikely(!ixgbevf_desc_unused(ring))) + return IXGBEVF_XDP_CONSUMED; + + dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) + return IXGBEVF_XDP_CONSUMED; + + /* record the location of the first descriptor for this packet */ + tx_buffer = &ring->tx_buffer_info[ring->next_to_use]; + tx_buffer->bytecount = len; + tx_buffer->gso_segs = 1; + tx_buffer->protocol = 0; + + i = ring->next_to_use; + tx_desc = IXGBEVF_TX_DESC(ring, i); + + dma_unmap_len_set(tx_buffer, len, len); + dma_unmap_addr_set(tx_buffer, dma, dma); + tx_buffer->data = xdp->data; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + /* put descriptor type bits */ + cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS; + cmd_type |= len | IXGBE_TXD_CMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.olinfo_status = + cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + + /* Avoid any potential race with cleanup */ + smp_wmb(); + + /* set next_to_watch value indicating a packet is present */ + i++; + if (i == ring->count) + i = 0; + + tx_buffer->next_to_watch = tx_desc; + ring->next_to_use = i; + + return IXGBEVF_XDP_TX; +} + +static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring, + struct xdp_buff *xdp) +{ + int result = IXGBEVF_XDP_PASS; + struct ixgbevf_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + + if (!xdp_prog) + goto xdp_out; + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; + result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fallthrough */ + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping packet */ + case XDP_DROP: + result = IXGBEVF_XDP_CONSUMED; + break; + } +xdp_out: + rcu_read_unlock(); + return ERR_PTR(-result); +} + +static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + unsigned int size) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; + + rx_buffer->page_offset ^= truesize; +#else + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : + SKB_DATA_ALIGN(size); + + rx_buffer->page_offset += truesize; +#endif +} + static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; + bool xdp_xmit = false; + struct xdp_buff xdp; + + xdp.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { - union ixgbe_adv_rx_desc *rx_desc; struct ixgbevf_rx_buffer *rx_buffer; + union ixgbe_adv_rx_desc *rx_desc; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -981,14 +1144,36 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ - if (skb) + if (!skb) { + xdp.data = page_address(rx_buffer->page) + + rx_buffer->page_offset; + xdp.data_meta = xdp.data; + xdp.data_hard_start = xdp.data - + ixgbevf_rx_offset(rx_ring); + xdp.data_end = xdp.data + size; + + skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp); + } + + if (IS_ERR(skb)) { + if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) { + xdp_xmit = true; + ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, + size); + } else { + rx_buffer->pagecnt_bias++; + } + total_rx_packets++; + total_rx_bytes += size; + } else if (skb) { ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) + } else if (ring_uses_build_skb(rx_ring)) { skb = ixgbevf_build_skb(rx_ring, rx_buffer, - rx_desc, size); - else + &xdp, rx_desc); + } else { skb = ixgbevf_construct_skb(rx_ring, rx_buffer, - rx_desc, size); + &xdp, rx_desc); + } /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -997,7 +1182,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer); + ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1039,6 +1224,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; + if (xdp_xmit) { + struct ixgbevf_ring *xdp_ring = + adapter->xdp_ring[rx_ring->queue_index]; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); + } + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1540,6 +1736,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) /* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]); } #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 @@ -2171,7 +2369,10 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buffer->data); + else + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2239,6 +2440,8 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); } void ixgbevf_down(struct ixgbevf_adapter *adapter) @@ -2277,6 +2480,13 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) IXGBE_TXDCTL_SWFLSH); } + for (i = 0; i < adapter->num_xdp_queues; i++) { + u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); + } + if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); @@ -2374,6 +2584,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) /* Start with base case */ adapter->num_rx_queues = 1; adapter->num_tx_queues = 1; + adapter->num_xdp_queues = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2395,8 +2606,13 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_11: case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; + adapter->num_rx_queues = rss; adapter->num_tx_queues = rss; + adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0; default: break; } @@ -2453,6 +2669,8 @@ static void ixgbevf_add_ring(struct ixgbevf_ring *ring, * @v_idx: index of vector in adapter struct * @txr_count: number of Tx rings for q vector * @txr_idx: index of first Tx ring to assign + * @xdp_count: total number of XDP rings to allocate + * @xdp_idx: index of first XDP ring to allocate * @rxr_count: number of Rx rings for q vector * @rxr_idx: index of first Rx ring to assign * @@ -2460,13 +2678,15 @@ static void ixgbevf_add_ring(struct ixgbevf_ring *ring, **/ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, int txr_count, int txr_idx, + int xdp_count, int xdp_idx, int rxr_count, int rxr_idx) { struct ixgbevf_q_vector *q_vector; + int reg_idx = txr_idx + xdp_idx; struct ixgbevf_ring *ring; int ring_count, size; - ring_count = txr_count + rxr_count; + ring_count = txr_count + xdp_count + rxr_count; size = sizeof(*q_vector) + (sizeof(*ring) * ring_count); /* allocate q_vector and rings */ @@ -2499,7 +2719,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, /* apply Tx specific ring traits */ ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; - ring->reg_idx = txr_idx; + ring->reg_idx = reg_idx; /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -2507,6 +2727,36 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, /* update count and index */ txr_count--; txr_idx++; + reg_idx++; + + /* push pointer to next ring */ + ring++; + } + + while (xdp_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + ixgbevf_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = xdp_idx; + ring->reg_idx = reg_idx; + set_ring_xdp(ring); + + /* assign ring to adapter */ + adapter->xdp_ring[xdp_idx] = ring; + + /* update count and index */ + xdp_count--; + xdp_idx++; + reg_idx++; /* push pointer to next ring */ ring++; @@ -2556,8 +2806,12 @@ static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx) struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx]; struct ixgbevf_ring *ring; - ixgbevf_for_each_ring(ring, q_vector->tx) - adapter->tx_ring[ring->queue_index] = NULL; + ixgbevf_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + adapter->xdp_ring[ring->queue_index] = NULL; + else + adapter->tx_ring[ring->queue_index] = NULL; + } ixgbevf_for_each_ring(ring, q_vector->rx) adapter->rx_ring[ring->queue_index] = NULL; @@ -2583,15 +2837,16 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; int rxr_remaining = adapter->num_rx_queues; int txr_remaining = adapter->num_tx_queues; - int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int xdp_remaining = adapter->num_xdp_queues; + int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0; int err; - if (q_vectors >= (rxr_remaining + txr_remaining)) { + if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) { for (; rxr_remaining; v_idx++, q_vectors--) { int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); err = ixgbevf_alloc_q_vector(adapter, v_idx, - 0, 0, rqpv, rxr_idx); + 0, 0, 0, 0, rqpv, rxr_idx); if (err) goto err_out; @@ -2604,9 +2859,11 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) for (; q_vectors; v_idx++, q_vectors--) { int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors); + int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors); err = ixgbevf_alloc_q_vector(adapter, v_idx, tqpv, txr_idx, + xqpv, xdp_idx, rqpv, rxr_idx); if (err) @@ -2617,6 +2874,8 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) rxr_idx += rqpv; txr_remaining -= tqpv; txr_idx += tqpv; + xdp_remaining -= xqpv; + xdp_idx += xqpv; } return 0; @@ -2688,9 +2947,10 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) goto err_alloc_q_vectors; } - hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", - (adapter->num_rx_queues > 1) ? "Enabled" : - "Disabled", adapter->num_rx_queues, adapter->num_tx_queues); + hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n", + (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", + adapter->num_rx_queues, adapter->num_tx_queues, + adapter->num_xdp_queues); set_bit(__IXGBEVF_DOWN, &adapter->state); @@ -2711,6 +2971,7 @@ err_set_interrupt: static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) { adapter->num_tx_queues = 0; + adapter->num_xdp_queues = 0; adapter->num_rx_queues = 0; ixgbevf_free_q_vectors(adapter); @@ -2918,6 +3179,8 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter) if (netif_carrier_ok(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + set_check_for_tx_hang(adapter->xdp_ring[i]); } /* get one bit for every active Tx/Rx interrupt vector */ @@ -3089,6 +3352,9 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) if (adapter->tx_ring[i]->desc) ixgbevf_free_tx_resources(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + if (adapter->xdp_ring[i]->desc) + ixgbevf_free_tx_resources(adapter->xdp_ring[i]); } /** @@ -3139,7 +3405,7 @@ err: **/ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) { - int i, err = 0; + int i, j = 0, err = 0; for (i = 0; i < adapter->num_tx_queues; i++) { err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]); @@ -3149,21 +3415,34 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) goto err_setup_tx; } + for (j = 0; j < adapter->num_xdp_queues; j++) { + err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]); + if (!err) + continue; + hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j); + break; + } + return 0; err_setup_tx: /* rewind the index freeing the rings as we go */ + while (j--) + ixgbevf_free_tx_resources(adapter->xdp_ring[j]); while (i--) ixgbevf_free_tx_resources(adapter->tx_ring[i]); + return err; } /** * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) + * @adapter: board private structure * @rx_ring: Rx descriptor ring (for a specific queue) to setup * * Returns 0 on success, negative on failure **/ -int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) { int size; @@ -3184,6 +3463,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) if (!rx_ring->desc) goto err; + /* XDP RX-queue info */ + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index) < 0) + goto err; + + rx_ring->xdp_prog = adapter->xdp_prog; + return 0; err: vfree(rx_ring->rx_buffer_info); @@ -3207,7 +3493,7 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_rx_queues; i++) { - err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]); + err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]); if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i); @@ -3232,6 +3518,8 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { ixgbevf_clean_rx_ring(rx_ring); + rx_ring->xdp_prog = NULL; + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -3918,6 +4206,12 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int ret; + /* prevent MTU being changed to a size unsupported by XDP */ + if (adapter->xdp_prog) { + dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n"); + return -EPERM; + } + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); @@ -4029,6 +4323,23 @@ static void ixgbevf_shutdown(struct pci_dev *pdev) ixgbevf_suspend(pdev, PMSG_SUSPEND); } +static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats, + const struct ixgbevf_ring *ring) +{ + u64 bytes, packets; + unsigned int start; + + if (ring) { + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + bytes = ring->stats.bytes; + packets = ring->stats.packets; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + stats->tx_bytes += bytes; + stats->tx_packets += packets; + } +} + static void ixgbevf_get_stats(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -4056,13 +4367,12 @@ static void ixgbevf_get_stats(struct net_device *netdev, for (i = 0; i < adapter->num_tx_queues; i++) { ring = adapter->tx_ring[i]; - do { - start = u64_stats_fetch_begin_irq(&ring->syncp); - bytes = ring->stats.bytes; - packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); - stats->tx_bytes += bytes; - stats->tx_packets += packets; + ixgbevf_get_tx_ring_stats(stats, ring); + } + + for (i = 0; i < adapter->num_xdp_queues; i++) { + ring = adapter->xdp_ring[i]; + ixgbevf_get_tx_ring_stats(stats, ring); } rcu_read_unlock(); } @@ -4101,6 +4411,64 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, return features; } +static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +{ + int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct bpf_prog *old_prog; + + /* verify ixgbevf ring attributes are sufficient for XDP */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbevf_ring *ring = adapter->rx_ring[i]; + + if (frame_size > ixgbevf_rx_bufsz(ring)) + return -EINVAL; + } + + old_prog = xchg(&adapter->xdp_prog, prog); + + /* If transitioning XDP modes reconfigure rings */ + if (!!prog != !!old_prog) { + /* Hardware has to reinitialize queues and interrupts to + * match packet buffer alignment. Unfortunately, the + * hardware is not flexible enough to do this dynamically. + */ + if (netif_running(dev)) + ixgbevf_close(dev); + + ixgbevf_clear_interrupt_scheme(adapter); + ixgbevf_init_interrupt_scheme(adapter); + + if (netif_running(dev)) + ixgbevf_open(dev); + } else { + for (i = 0; i < adapter->num_rx_queues; i++) + xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return ixgbevf_xdp_setup(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!(adapter->xdp_prog); + xdp->prog_id = adapter->xdp_prog ? + adapter->xdp_prog->aux->id : 0; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, @@ -4117,6 +4485,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_poll_controller = ixgbevf_netpoll, #endif .ndo_features_check = ixgbevf_features_check, + .ndo_bpf = ixgbevf_xdp, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) |