642 files changed, 42555 insertions, 12635 deletions
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 049cc0158a64..05e15b6e5e2c 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -789,8 +789,8 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 	 * it with zeros to ETH_ZLEN for us.
 	 */
 	if (skb_shinfo(skb)->nr_frags == 0) {
-		skb_dma = pci_map_single(tp->tx_pdev, skb->data, skb->len,
-				       PCI_DMA_TODEVICE);
+		skb_dma = dma_map_single(&tp->tx_pdev->dev, skb->data,
+					 skb->len, DMA_TO_DEVICE);
 		txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID;
 		txd->len = cpu_to_le16(skb->len);
 		txd->frag.addr = cpu_to_le32(skb_dma);
@@ -800,8 +800,8 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 		int i, len;
 
 		len = skb_headlen(skb);
-		skb_dma = pci_map_single(tp->tx_pdev, skb->data, len,
-				         PCI_DMA_TODEVICE);
+		skb_dma = dma_map_single(&tp->tx_pdev->dev, skb->data, len,
+					 DMA_TO_DEVICE);
 		txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID;
 		txd->len = cpu_to_le16(len);
 		txd->frag.addr = cpu_to_le32(skb_dma);
@@ -818,8 +818,8 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 
 			len = skb_frag_size(frag);
 			frag_addr = skb_frag_address(frag);
-			skb_dma = pci_map_single(tp->tx_pdev, frag_addr, len,
-					 PCI_DMA_TODEVICE);
+			skb_dma = dma_map_single(&tp->tx_pdev->dev, frag_addr,
+						 len, DMA_TO_DEVICE);
 			txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID;
 			txd->len = cpu_to_le16(len);
 			txd->frag.addr = cpu_to_le32(skb_dma);
@@ -1349,12 +1349,12 @@ typhoon_download_firmware(struct typhoon *tp)
 	image_data = typhoon_fw->data;
 	fHdr = (struct typhoon_file_header *) image_data;
 
-	/* Cannot just map the firmware image using pci_map_single() as
+	/* Cannot just map the firmware image using dma_map_single() as
 	 * the firmware is vmalloc()'d and may not be physically contiguous,
-	 * so we allocate some consistent memory to copy the sections into.
+	 * so we allocate some coherent memory to copy the sections into.
 	 */
 	err = -ENOMEM;
-	dpage = pci_alloc_consistent(pdev, PAGE_SIZE, &dpage_dma);
+	dpage = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &dpage_dma, GFP_ATOMIC);
 	if (!dpage) {
 		netdev_err(tp->dev, "no DMA mem for firmware\n");
 		goto err_out;
@@ -1459,7 +1459,7 @@ err_out_irq:
 	iowrite32(irqMasked, ioaddr + TYPHOON_REG_INTR_MASK);
 	iowrite32(irqEnabled, ioaddr + TYPHOON_REG_INTR_ENABLE);
 
-	pci_free_consistent(pdev, PAGE_SIZE, dpage, dpage_dma);
+	dma_free_coherent(&pdev->dev, PAGE_SIZE, dpage, dpage_dma);
 
 err_out:
 	return err;
@@ -1526,8 +1526,8 @@ typhoon_clean_tx(struct typhoon *tp, struct transmit_ring *txRing,
 			 */
 			skb_dma = (dma_addr_t) le32_to_cpu(tx->frag.addr);
 			dma_len = le16_to_cpu(tx->len);
-			pci_unmap_single(tp->pdev, skb_dma, dma_len,
-				       PCI_DMA_TODEVICE);
+			dma_unmap_single(&tp->pdev->dev, skb_dma, dma_len,
+					 DMA_TO_DEVICE);
 		}
 
 		tx->flags = 0;
@@ -1608,8 +1608,8 @@ typhoon_alloc_rx_skb(struct typhoon *tp, u32 idx)
 	skb_reserve(skb, 2);
 #endif
 
-	dma_addr = pci_map_single(tp->pdev, skb->data,
-				  PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+	dma_addr = dma_map_single(&tp->pdev->dev, skb->data, PKT_BUF_SZ,
+				  DMA_FROM_DEVICE);
 
 	/* Since no card does 64 bit DAC, the high bits will never
 	 * change from zero.
@@ -1664,20 +1664,19 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read
 		if (pkt_len < rx_copybreak &&
 		   (new_skb = netdev_alloc_skb(tp->dev, pkt_len + 2)) != NULL) {
 			skb_reserve(new_skb, 2);
-			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
-						    PKT_BUF_SZ,
-						    PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_cpu(&tp->pdev->dev, dma_addr,
+						PKT_BUF_SZ, DMA_FROM_DEVICE);
 			skb_copy_to_linear_data(new_skb, skb->data, pkt_len);
-			pci_dma_sync_single_for_device(tp->pdev, dma_addr,
-						       PKT_BUF_SZ,
-						       PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_device(&tp->pdev->dev, dma_addr,
+						   PKT_BUF_SZ,
+						   DMA_FROM_DEVICE);
 			skb_put(new_skb, pkt_len);
 			typhoon_recycle_rx_skb(tp, idx);
 		} else {
 			new_skb = skb;
 			skb_put(new_skb, pkt_len);
-			pci_unmap_single(tp->pdev, dma_addr, PKT_BUF_SZ,
-				       PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&tp->pdev->dev, dma_addr, PKT_BUF_SZ,
+					 DMA_FROM_DEVICE);
 			typhoon_alloc_rx_skb(tp, idx);
 		}
 		new_skb->protocol = eth_type_trans(new_skb, tp->dev);
@@ -1791,8 +1790,8 @@ typhoon_free_rx_rings(struct typhoon *tp)
 	for (i = 0; i < RXENT_ENTRIES; i++) {
 		struct rxbuff_ent *rxb = &tp->rxbuffers[i];
 		if (rxb->skb) {
-			pci_unmap_single(tp->pdev, rxb->dma_addr, PKT_BUF_SZ,
-				       PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&tp->pdev->dev, rxb->dma_addr,
+					 PKT_BUF_SZ, DMA_FROM_DEVICE);
 			dev_kfree_skb(rxb->skb);
 			rxb->skb = NULL;
 		}
@@ -2305,7 +2304,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto error_out_disable;
 	}
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (err < 0) {
 		err_msg = "No usable DMA configuration";
 		goto error_out_mwi;
@@ -2354,8 +2353,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* allocate pci dma space for rx and tx descriptor rings
 	 */
-	shared = pci_alloc_consistent(pdev, sizeof(struct typhoon_shared),
-				      &shared_dma);
+	shared = dma_alloc_coherent(&pdev->dev, sizeof(struct typhoon_shared),
+				    &shared_dma, GFP_KERNEL);
 	if (!shared) {
 		err_msg = "could not allocate DMA memory";
 		err = -ENOMEM;
@@ -2508,8 +2507,8 @@ error_out_reset:
 	typhoon_reset(ioaddr, NoWait);
 
 error_out_dma:
-	pci_free_consistent(pdev, sizeof(struct typhoon_shared),
-			    shared, shared_dma);
+	dma_free_coherent(&pdev->dev, sizeof(struct typhoon_shared), shared,
+			  shared_dma);
 error_out_remap:
 	pci_iounmap(pdev, ioaddr);
 error_out_regions:
@@ -2536,8 +2535,8 @@ typhoon_remove_one(struct pci_dev *pdev)
 	pci_restore_state(pdev);
 	typhoon_reset(tp->ioaddr, NoWait);
 	pci_iounmap(pdev, tp->ioaddr);
-	pci_free_consistent(pdev, sizeof(struct typhoon_shared),
-			    tp->shared, tp->shared_dma);
+	dma_free_coherent(&pdev->dev, sizeof(struct typhoon_shared),
+			  tp->shared, tp->shared_dma);
 	pci_release_regions(pdev);
 	pci_clear_mwi(pdev);
 	pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index a00b36f91d9f..2488bfdb9133 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -657,8 +657,10 @@ static void block_input(struct net_device *dev, int count,
     outb_p(E8390_RREAD+E8390_START, nic_base + AXNET_CMD);
 
     insw(nic_base + AXNET_DATAPORT,buf,count>>1);
-    if (count & 0x01)
-	buf[count-1] = inb(nic_base + AXNET_DATAPORT), xfer_count++;
+    if (count & 0x01) {
+	buf[count-1] = inb(nic_base + AXNET_DATAPORT);
+	xfer_count++;
+    }
 
 }
 
@@ -1270,10 +1272,12 @@ static void ei_tx_intr(struct net_device *dev)
 			ei_local->txing = 1;
 			NS8390_trigger_send(dev, ei_local->tx2, ei_local->tx_start_page + 6);
 			netif_trans_update(dev);
-			ei_local->tx2 = -1,
+			ei_local->tx2 = -1;
 			ei_local->lasttx = 2;
+		} else {
+			ei_local->lasttx = 20;
+			ei_local->txing = 0;
 		}
-		else ei_local->lasttx = 20, ei_local->txing = 0;	
 	}
 	else if (ei_local->tx2 < 0) 
 	{
@@ -1289,9 +1293,10 @@ static void ei_tx_intr(struct net_device *dev)
 			netif_trans_update(dev);
 			ei_local->tx1 = -1;
 			ei_local->lasttx = 1;
+		} else {
+			ei_local->lasttx = 10;
+			ei_local->txing = 0;
 		}
-		else
-			ei_local->lasttx = 10, ei_local->txing = 0;
 	}
 //	else
 //		netdev_warn(dev, "unexpected TX-done interrupt, lasttx=%d\n",
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index babc92e2692e..e84021282edf 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -50,6 +50,7 @@
 
   */
 
+#include <linux/build_bug.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
@@ -112,8 +113,10 @@ static void do_set_multicast_list(struct net_device *dev);
 static void __NS8390_init(struct net_device *dev, int startp);
 
 static unsigned version_printed;
-static u32 msg_enable;
-module_param(msg_enable, uint, 0444);
+static int msg_enable;
+static const int default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_RX_ERR |
+				     NETIF_MSG_TX_ERR);
+module_param(msg_enable, int, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 /*
@@ -597,10 +600,12 @@ static void ei_tx_intr(struct net_device *dev)
 			ei_local->txing = 1;
 			NS8390_trigger_send(dev, ei_local->tx2, ei_local->tx_start_page + 6);
 			netif_trans_update(dev);
-			ei_local->tx2 = -1,
+			ei_local->tx2 = -1;
 			ei_local->lasttx = 2;
-		} else
-			ei_local->lasttx = 20, ei_local->txing = 0;
+		} else {
+			ei_local->lasttx = 20;
+			ei_local->txing = 0;
+		}
 	} else if (ei_local->tx2 < 0) {
 		if (ei_local->lasttx != 2  &&  ei_local->lasttx != -2)
 			pr_err("%s: bogus last_tx_buffer %d, tx2=%d\n",
@@ -612,8 +617,10 @@ static void ei_tx_intr(struct net_device *dev)
 			netif_trans_update(dev);
 			ei_local->tx1 = -1;
 			ei_local->lasttx = 1;
-		} else
-			ei_local->lasttx = 10, ei_local->txing = 0;
+		} else {
+			ei_local->lasttx = 10;
+			ei_local->txing = 0;
+		}
 	} /* else
 		netdev_warn(dev, "unexpected TX-done interrupt, lasttx=%d\n",
 			    ei_local->lasttx);
@@ -969,14 +976,14 @@ static void ethdev_setup(struct net_device *dev)
 {
 	struct ei_device *ei_local = netdev_priv(dev);
 
-	if ((msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
-		pr_info("%s", version);
-
 	ether_setup(dev);
 
 	spin_lock_init(&ei_local->page_lock);
 
-	ei_local->msg_enable = msg_enable;
+	ei_local->msg_enable = netif_msg_init(msg_enable, default_msg_level);
+
+	if (netif_msg_drv(ei_local) && (version_printed++ == 0))
+		pr_info("%s", version);
 }
 
 /**
@@ -1014,8 +1021,7 @@ static void __NS8390_init(struct net_device *dev, int startp)
 	    ? (0x48 | ENDCFG_WTS | (ei_local->bigendian ? ENDCFG_BOS : 0))
 	    : 0x48;
 
-	if (sizeof(struct e8390_pkt_hdr) != 4)
-		panic("8390.c: header struct mispacked\n");
+	BUILD_BUG_ON(sizeof(struct e8390_pkt_hdr) != 4);
 	/* Follow National Semi's recommendations for initing the DP83902. */
 	ei_outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD); /* 0x21 */
 	ei_outb_p(endcfg, e8390_base + EN0_DCFG);	/* 0x48 or 0x49 */
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 164c3ed550bf..9d3b1e0e425c 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -1178,8 +1178,10 @@ static void dma_block_input(struct net_device *dev, int count,
     outb_p(E8390_RREAD+E8390_START, nic_base + PCNET_CMD);
 
     insw(nic_base + PCNET_DATAPORT,buf,count>>1);
-    if (count & 0x01)
-	buf[count-1] = inb(nic_base + PCNET_DATAPORT), xfer_count++;
+    if (count & 0x01) {
+	buf[count-1] = inb(nic_base + PCNET_DATAPORT);
+	xfer_count++;
+    }
 
     /* This was for the ALPHA version only, but enough people have been
        encountering problems that it is still here. */
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index ba0055bb1614..555299737b51 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -886,7 +886,9 @@ static int netdev_open(struct net_device *dev)
 		tx_ring_size = ((sizeof(starfire_tx_desc) * TX_RING_SIZE + QUEUE_ALIGN - 1) / QUEUE_ALIGN) * QUEUE_ALIGN;
 		rx_ring_size = sizeof(struct starfire_rx_desc) * RX_RING_SIZE;
 		np->queue_mem_size = tx_done_q_size + rx_done_q_size + tx_ring_size + rx_ring_size;
-		np->queue_mem = pci_alloc_consistent(np->pci_dev, np->queue_mem_size, &np->queue_mem_dma);
+		np->queue_mem = dma_alloc_coherent(&np->pci_dev->dev,
+						   np->queue_mem_size,
+						   &np->queue_mem_dma, GFP_ATOMIC);
 		if (np->queue_mem == NULL) {
 			free_irq(irq, dev);
 			return -ENOMEM;
@@ -1136,9 +1138,11 @@ static void init_ring(struct net_device *dev)
 		np->rx_info[i].skb = skb;
 		if (skb == NULL)
 			break;
-		np->rx_info[i].mapping = pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(np->pci_dev,
-					  np->rx_info[i].mapping)) {
+		np->rx_info[i].mapping = dma_map_single(&np->pci_dev->dev,
+							skb->data,
+							np->rx_buf_sz,
+							DMA_FROM_DEVICE);
+		if (dma_mapping_error(&np->pci_dev->dev, np->rx_info[i].mapping)) {
 			dev_kfree_skb(skb);
 			np->rx_info[i].skb = NULL;
 			break;
@@ -1217,18 +1221,19 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 			status |= skb_first_frag_len(skb) | (skb_num_frags(skb) << 16);
 
 			np->tx_info[entry].mapping =
-				pci_map_single(np->pci_dev, skb->data, skb_first_frag_len(skb), PCI_DMA_TODEVICE);
+				dma_map_single(&np->pci_dev->dev, skb->data,
+					       skb_first_frag_len(skb),
+					       DMA_TO_DEVICE);
 		} else {
 			const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[i - 1];
 			status |= skb_frag_size(this_frag);
 			np->tx_info[entry].mapping =
-				pci_map_single(np->pci_dev,
+				dma_map_single(&np->pci_dev->dev,
 					       skb_frag_address(this_frag),
 					       skb_frag_size(this_frag),
-					       PCI_DMA_TODEVICE);
+					       DMA_TO_DEVICE);
 		}
-		if (pci_dma_mapping_error(np->pci_dev,
-					  np->tx_info[entry].mapping)) {
+		if (dma_mapping_error(&np->pci_dev->dev, np->tx_info[entry].mapping)) {
 			dev->stats.tx_dropped++;
 			goto err_out;
 		}
@@ -1271,18 +1276,16 @@ err_out:
 	entry = prev_tx % TX_RING_SIZE;
 	np->tx_info[entry].skb = NULL;
 	if (i > 0) {
-		pci_unmap_single(np->pci_dev,
+		dma_unmap_single(&np->pci_dev->dev,
 				 np->tx_info[entry].mapping,
-				 skb_first_frag_len(skb),
-				 PCI_DMA_TODEVICE);
+				 skb_first_frag_len(skb), DMA_TO_DEVICE);
 		np->tx_info[entry].mapping = 0;
 		entry = (entry + np->tx_info[entry].used_slots) % TX_RING_SIZE;
 		for (j = 1; j < i; j++) {
-			pci_unmap_single(np->pci_dev,
+			dma_unmap_single(&np->pci_dev->dev,
 					 np->tx_info[entry].mapping,
-					 skb_frag_size(
-						&skb_shinfo(skb)->frags[j-1]),
-					 PCI_DMA_TODEVICE);
+					 skb_frag_size(&skb_shinfo(skb)->frags[j - 1]),
+					 DMA_TO_DEVICE);
 			entry++;
 		}
 	}
@@ -1356,20 +1359,20 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 				u16 entry = (tx_status & 0x7fff) / sizeof(starfire_tx_desc);
 				struct sk_buff *skb = np->tx_info[entry].skb;
 				np->tx_info[entry].skb = NULL;
-				pci_unmap_single(np->pci_dev,
+				dma_unmap_single(&np->pci_dev->dev,
 						 np->tx_info[entry].mapping,
 						 skb_first_frag_len(skb),
-						 PCI_DMA_TODEVICE);
+						 DMA_TO_DEVICE);
 				np->tx_info[entry].mapping = 0;
 				np->dirty_tx += np->tx_info[entry].used_slots;
 				entry = (entry + np->tx_info[entry].used_slots) % TX_RING_SIZE;
 				{
 					int i;
 					for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-						pci_unmap_single(np->pci_dev,
+						dma_unmap_single(&np->pci_dev->dev,
 								 np->tx_info[entry].mapping,
 								 skb_frag_size(&skb_shinfo(skb)->frags[i]),
-								 PCI_DMA_TODEVICE);
+								 DMA_TO_DEVICE);
 						np->dirty_tx++;
 						entry++;
 					}
@@ -1461,16 +1464,18 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 		if (pkt_len < rx_copybreak &&
 		    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
 			skb_reserve(skb, 2);	/* 16 byte align the IP header */
-			pci_dma_sync_single_for_cpu(np->pci_dev,
-						    np->rx_info[entry].mapping,
-						    pkt_len, PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_cpu(&np->pci_dev->dev,
+						np->rx_info[entry].mapping,
+						pkt_len, DMA_FROM_DEVICE);
 			skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len);
-			pci_dma_sync_single_for_device(np->pci_dev,
-						       np->rx_info[entry].mapping,
-						       pkt_len, PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_device(&np->pci_dev->dev,
+						   np->rx_info[entry].mapping,
+						   pkt_len, DMA_FROM_DEVICE);
 			skb_put(skb, pkt_len);
 		} else {
-			pci_unmap_single(np->pci_dev, np->rx_info[entry].mapping, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&np->pci_dev->dev,
+					 np->rx_info[entry].mapping,
+					 np->rx_buf_sz, DMA_FROM_DEVICE);
 			skb = np->rx_info[entry].skb;
 			skb_put(skb, pkt_len);
 			np->rx_info[entry].skb = NULL;
@@ -1588,9 +1593,9 @@ static void refill_rx_ring(struct net_device *dev)
 			if (skb == NULL)
 				break;	/* Better luck next round. */
 			np->rx_info[entry].mapping =
-				pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(np->pci_dev,
-						np->rx_info[entry].mapping)) {
+				dma_map_single(&np->pci_dev->dev, skb->data,
+					       np->rx_buf_sz, DMA_FROM_DEVICE);
+			if (dma_mapping_error(&np->pci_dev->dev, np->rx_info[entry].mapping)) {
 				dev_kfree_skb(skb);
 				np->rx_info[entry].skb = NULL;
 				break;
@@ -1963,7 +1968,9 @@ static int netdev_close(struct net_device *dev)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		np->rx_ring[i].rxaddr = cpu_to_dma(0xBADF00D0); /* An invalid address. */
 		if (np->rx_info[i].skb != NULL) {
-			pci_unmap_single(np->pci_dev, np->rx_info[i].mapping, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&np->pci_dev->dev,
+					 np->rx_info[i].mapping,
+					 np->rx_buf_sz, DMA_FROM_DEVICE);
 			dev_kfree_skb(np->rx_info[i].skb);
 		}
 		np->rx_info[i].skb = NULL;
@@ -1973,9 +1980,8 @@ static int netdev_close(struct net_device *dev)
 		struct sk_buff *skb = np->tx_info[i].skb;
 		if (skb == NULL)
 			continue;
-		pci_unmap_single(np->pci_dev,
-				 np->tx_info[i].mapping,
-				 skb_first_frag_len(skb), PCI_DMA_TODEVICE);
+		dma_unmap_single(&np->pci_dev->dev, np->tx_info[i].mapping,
+				 skb_first_frag_len(skb), DMA_TO_DEVICE);
 		np->tx_info[i].mapping = 0;
 		dev_kfree_skb(skb);
 		np->tx_info[i].skb = NULL;
@@ -2018,7 +2024,8 @@ static void starfire_remove_one(struct pci_dev *pdev)
 	unregister_netdev(dev);
 
 	if (np->queue_mem)
-		pci_free_consistent(pdev, np->queue_mem_size, np->queue_mem, np->queue_mem_dma);
+		dma_free_coherent(&pdev->dev, np->queue_mem_size,
+				  np->queue_mem, np->queue_mem_dma);
 
 
 	/* XXX: add wakeup code -- requires firmware for MagicPacket */
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index b3b8a8010142..862ea44beea7 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -640,13 +640,11 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct emac_board_info *db = netdev_priv(dev);
 	int int_status;
-	unsigned long flags;
 	unsigned int reg_val;
 
 	/* A real interrupt coming */
 
-	/* holders of db->lock must always block IRQs */
-	spin_lock_irqsave(&db->lock, flags);
+	spin_lock(&db->lock);
 
 	/* Disable all interrupts */
 	writel(0, db->membase + EMAC_INT_CTL_REG);
@@ -680,7 +678,7 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
 		reg_val |= (0xf << 0) | (0x01 << 8);
 		writel(reg_val, db->membase + EMAC_INT_CTL_REG);
 	}
-	spin_unlock_irqrestore(&db->lock, flags);
+	spin_unlock(&db->lock);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 8470c836fa18..1a7e4df9b3e9 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -465,6 +465,7 @@ static int acenic_probe_one(struct pci_dev *pdev,
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	ap = netdev_priv(dev);
+	ap->ndev = dev;
 	ap->pdev = pdev;
 	ap->name = pci_name(pdev);
 
@@ -1562,10 +1563,10 @@ static void ace_watchdog(struct net_device *data, unsigned int txqueue)
 }
 
 
-static void ace_tasklet(unsigned long arg)
+static void ace_tasklet(struct tasklet_struct *t)
 {
-	struct net_device *dev = (struct net_device *) arg;
-	struct ace_private *ap = netdev_priv(dev);
+	struct ace_private *ap = from_tasklet(ap, t, ace_tasklet);
+	struct net_device *dev = ap->ndev;
 	int cur_size;
 
 	cur_size = atomic_read(&ap->cur_rx_bufs);
@@ -2269,7 +2270,7 @@ static int ace_open(struct net_device *dev)
 	/*
 	 * Setup the bottom half rx ring refill handler
 	 */
-	tasklet_init(&ap->ace_tasklet, ace_tasklet, (unsigned long)dev);
+	tasklet_setup(&ap->ace_tasklet, ace_tasklet);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/alteon/acenic.h b/drivers/net/ethernet/alteon/acenic.h
index c670067b1541..265fa601a258 100644
--- a/drivers/net/ethernet/alteon/acenic.h
+++ b/drivers/net/ethernet/alteon/acenic.h
@@ -633,6 +633,7 @@ struct ace_skb
  */
 struct ace_private
 {
+	struct net_device	*ndev;		/* backpointer */
 	struct ace_info		*info;
 	struct ace_regs	__iomem	*regs;		/* register base */
 	struct ace_skb		*skb;
@@ -776,7 +777,7 @@ static int ace_open(struct net_device *dev);
 static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev);
 static int ace_close(struct net_device *dev);
-static void ace_tasklet(unsigned long dev);
+static void ace_tasklet(struct tasklet_struct *t);
 static void ace_dump_trace(struct ace_private *ap);
 static void ace_set_multicast_list(struct net_device *dev);
 static int ace_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index b818a169c193..4164eacc5c28 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -1,37 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 #ifndef _ENA_ADMIN_H_
 #define _ENA_ADMIN_H_
 
+#define ENA_ADMIN_RSS_KEY_PARTS              10
 
 enum ena_admin_aq_opcode {
 	ENA_ADMIN_CREATE_SQ                         = 1,
@@ -55,6 +29,7 @@ enum ena_admin_aq_completion_status {
 	ENA_ADMIN_RESOURCE_BUSY                     = 7,
 };
 
+/* subcommands for the set/get feature admin commands */
 enum ena_admin_aq_feature_id {
 	ENA_ADMIN_DEVICE_ATTRIBUTES                 = 1,
 	ENA_ADMIN_MAX_QUEUES_NUM                    = 2,
@@ -63,7 +38,7 @@ enum ena_admin_aq_feature_id {
 	ENA_ADMIN_MAX_QUEUES_EXT                    = 7,
 	ENA_ADMIN_RSS_HASH_FUNCTION                 = 10,
 	ENA_ADMIN_STATELESS_OFFLOAD_CONFIG          = 11,
-	ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG      = 12,
+	ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG      = 12,
 	ENA_ADMIN_MTU                               = 14,
 	ENA_ADMIN_RSS_HASH_INPUT                    = 18,
 	ENA_ADMIN_INTERRUPT_MODERATION              = 20,
@@ -117,6 +92,8 @@ enum ena_admin_completion_policy_type {
 enum ena_admin_get_stats_type {
 	ENA_ADMIN_GET_STATS_TYPE_BASIC              = 0,
 	ENA_ADMIN_GET_STATS_TYPE_EXTENDED           = 1,
+	/* extra HW stats for specific network interface */
+	ENA_ADMIN_GET_STATS_TYPE_ENI                = 2,
 };
 
 enum ena_admin_get_stats_scope {
@@ -193,7 +170,7 @@ struct ena_admin_acq_common_desc {
 	u16 extended_status;
 
 	/* indicates to the driver which AQ entry has been consumed by the
-	 *    device and could be reused
+	 * device and could be reused
 	 */
 	u16 sq_head_indx;
 };
@@ -238,8 +215,8 @@ struct ena_admin_aq_create_sq_cmd {
 	 */
 	u8 sq_caps_3;
 
-	/* associated completion queue id. This CQ must be created prior to
-	 *    SQ creation
+	/* associated completion queue id. This CQ must be created prior to SQ
+	 * creation
 	 */
 	u16 cq_idx;
 
@@ -378,7 +355,7 @@ struct ena_admin_aq_get_stats_cmd {
 	u16 queue_idx;
 
 	/* device id, value 0xFFFF means mine. only privileged device can get
-	 *    stats of other device
+	 * stats of other device
 	 */
 	u16 device_id;
 };
@@ -410,10 +387,43 @@ struct ena_admin_basic_stats {
 	u32 tx_drops_high;
 };
 
+/* ENI Statistics Command. */
+struct ena_admin_eni_stats {
+	/* The number of packets shaped due to inbound aggregate BW
+	 * allowance being exceeded
+	 */
+	u64 bw_in_allowance_exceeded;
+
+	/* The number of packets shaped due to outbound aggregate BW
+	 * allowance being exceeded
+	 */
+	u64 bw_out_allowance_exceeded;
+
+	/* The number of packets shaped due to PPS allowance being exceeded */
+	u64 pps_allowance_exceeded;
+
+	/* The number of packets shaped due to connection tracking
+	 * allowance being exceeded and leading to failure in establishment
+	 * of new connections
+	 */
+	u64 conntrack_allowance_exceeded;
+
+	/* The number of packets shaped due to linklocal packet rate
+	 * allowance being exceeded
+	 */
+	u64 linklocal_allowance_exceeded;
+};
+
 struct ena_admin_acq_get_stats_resp {
 	struct ena_admin_acq_common_desc acq_common_desc;
 
-	struct ena_admin_basic_stats basic_stats;
+	union {
+		u64 raw[7];
+
+		struct ena_admin_basic_stats basic_stats;
+
+		struct ena_admin_eni_stats eni_stats;
+	} u;
 };
 
 struct ena_admin_get_set_feature_common_desc {
@@ -440,7 +450,9 @@ struct ena_admin_device_attr_feature_desc {
 
 	u32 device_version;
 
-	/* bitmap of ena_admin_aq_feature_id */
+	/* bitmap of ena_admin_aq_feature_id, which represents supported
+	 * subcommands for the set/get feature admin commands.
+	 */
 	u32 supported_features;
 
 	u32 reserved3;
@@ -526,32 +538,30 @@ struct ena_admin_feature_llq_desc {
 
 	u32 max_llq_depth;
 
-	/*  specify the header locations the device supports. bitfield of
-	 *    enum ena_admin_llq_header_location.
+	/* specify the header locations the device supports. bitfield of enum
+	 * ena_admin_llq_header_location.
 	 */
 	u16 header_location_ctrl_supported;
 
 	/* the header location the driver selected to use. */
 	u16 header_location_ctrl_enabled;
 
-	/* if inline header is specified - this is the size of descriptor
-	 *    list entry. If header in a separate ring is specified - this is
-	 *    the size of header ring entry. bitfield of enum
-	 *    ena_admin_llq_ring_entry_size. specify the entry sizes the device
-	 *    supports
+	/* if inline header is specified - this is the size of descriptor list
+	 * entry. If header in a separate ring is specified - this is the size
+	 * of header ring entry. bitfield of enum ena_admin_llq_ring_entry_size.
+	 * specify the entry sizes the device supports
 	 */
 	u16 entry_size_ctrl_supported;
 
 	/* the entry size the driver selected to use. */
 	u16 entry_size_ctrl_enabled;
 
-	/* valid only if inline header is specified. First entry associated
-	 *    with the packet includes descriptors and header. Rest of the
-	 *    entries occupied by descriptors. This parameter defines the max
-	 *    number of descriptors precedding the header in the first entry.
-	 *    The field is bitfield of enum
-	 *    ena_admin_llq_num_descs_before_header and specify the values the
-	 *    device supports
+	/* valid only if inline header is specified. First entry associated with
+	 * the packet includes descriptors and header. Rest of the entries
+	 * occupied by descriptors. This parameter defines the max number of
+	 * descriptors precedding the header in the first entry. The field is
+	 * bitfield of enum ena_admin_llq_num_descs_before_header and specify
+	 * the values the device supports
 	 */
 	u16 desc_num_before_header_supported;
 
@@ -559,7 +569,7 @@ struct ena_admin_feature_llq_desc {
 	u16 desc_num_before_header_enabled;
 
 	/* valid only if inline was chosen. bitfield of enum
-	 *    ena_admin_llq_stride_ctrl
+	 * ena_admin_llq_stride_ctrl
 	 */
 	u16 descriptors_stride_ctrl_supported;
 
@@ -594,8 +604,8 @@ struct ena_admin_queue_ext_feature_fields {
 
 	u32 max_tx_header_size;
 
-	/* Maximum Descriptors number, including meta descriptor, allowed for
-	 * a single Tx packet
+	/* Maximum Descriptors number, including meta descriptor, allowed for a
+	 * single Tx packet
 	 */
 	u16 max_per_packet_tx_descs;
 
@@ -618,8 +628,8 @@ struct ena_admin_queue_feature_desc {
 
 	u32 max_header_size;
 
-	/* Maximum Descriptors number, including meta descriptor, allowed for
-	 *    a single Tx packet
+	/* Maximum Descriptors number, including meta descriptor, allowed for a
+	 * single Tx packet
 	 */
 	u16 max_packet_tx_descs;
 
@@ -707,11 +717,11 @@ enum ena_admin_hash_functions {
 };
 
 struct ena_admin_feature_rss_flow_hash_control {
-	u32 keys_num;
+	u32 key_parts;
 
 	u32 reserved;
 
-	u32 key[10];
+	u32 key[ENA_ADMIN_RSS_KEY_PARTS];
 };
 
 struct ena_admin_feature_rss_flow_hash_function {
@@ -1007,7 +1017,7 @@ struct ena_admin_set_feat_resp {
 struct ena_admin_aenq_common_desc {
 	u16 group;
 
-	u16 syndrom;
+	u16 syndrome;
 
 	/* 0 : phase
 	 * 7:1 : reserved - MBZ
@@ -1031,7 +1041,7 @@ enum ena_admin_aenq_group {
 	ENA_ADMIN_AENQ_GROUPS_NUM                   = 5,
 };
 
-enum ena_admin_aenq_notification_syndrom {
+enum ena_admin_aenq_notification_syndrome {
 	ENA_ADMIN_SUSPEND                           = 0,
 	ENA_ADMIN_RESUME                            = 1,
 	ENA_ADMIN_UPDATE_HINTS                      = 2,
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 435bf05a853c..5f8769aa469d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include "ena_com.h"
@@ -98,7 +71,7 @@ static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
 				       dma_addr_t addr)
 {
 	if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) {
-		pr_err("dma address has more bits that the device supports\n");
+		pr_err("DMA address has more bits that the device supports\n");
 		return -EINVAL;
 	}
 
@@ -108,16 +81,16 @@ static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
 	return 0;
 }
 
-static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
+static int ena_com_admin_init_sq(struct ena_com_admin_queue *admin_queue)
 {
-	struct ena_com_admin_sq *sq = &queue->sq;
-	u16 size = ADMIN_SQ_SIZE(queue->q_depth);
+	struct ena_com_admin_sq *sq = &admin_queue->sq;
+	u16 size = ADMIN_SQ_SIZE(admin_queue->q_depth);
 
-	sq->entries = dma_alloc_coherent(queue->q_dmadev, size, &sq->dma_addr,
-					 GFP_KERNEL);
+	sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size,
+					 &sq->dma_addr, GFP_KERNEL);
 
 	if (!sq->entries) {
-		pr_err("memory allocation failed\n");
+		pr_err("Memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -130,16 +103,16 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
 	return 0;
 }
 
-static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
+static int ena_com_admin_init_cq(struct ena_com_admin_queue *admin_queue)
 {
-	struct ena_com_admin_cq *cq = &queue->cq;
-	u16 size = ADMIN_CQ_SIZE(queue->q_depth);
+	struct ena_com_admin_cq *cq = &admin_queue->cq;
+	u16 size = ADMIN_CQ_SIZE(admin_queue->q_depth);
 
-	cq->entries = dma_alloc_coherent(queue->q_dmadev, size, &cq->dma_addr,
-					 GFP_KERNEL);
+	cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size,
+					 &cq->dma_addr, GFP_KERNEL);
 
 	if (!cq->entries) {
-		pr_err("memory allocation failed\n");
+		pr_err("Memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -149,20 +122,20 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
 	return 0;
 }
 
-static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
+static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev,
 				   struct ena_aenq_handlers *aenq_handlers)
 {
-	struct ena_com_aenq *aenq = &dev->aenq;
+	struct ena_com_aenq *aenq = &ena_dev->aenq;
 	u32 addr_low, addr_high, aenq_caps;
 	u16 size;
 
-	dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
+	ena_dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
 	size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
-	aenq->entries = dma_alloc_coherent(dev->dmadev, size, &aenq->dma_addr,
-					   GFP_KERNEL);
+	aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size,
+					   &aenq->dma_addr, GFP_KERNEL);
 
 	if (!aenq->entries) {
-		pr_err("memory allocation failed\n");
+		pr_err("Memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -172,18 +145,18 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
 	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
 	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
 
-	writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF);
-	writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF);
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF);
 
 	aenq_caps = 0;
-	aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+	aenq_caps |= ena_dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
 	aenq_caps |= (sizeof(struct ena_admin_aenq_entry)
 		      << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
 		     ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
-	writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
+	writel(aenq_caps, ena_dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
 
 	if (unlikely(!aenq_handlers)) {
-		pr_err("aenq handlers pointer is NULL\n");
+		pr_err("AENQ handlers pointer is NULL\n");
 		return -EINVAL;
 	}
 
@@ -199,31 +172,31 @@ static void comp_ctxt_release(struct ena_com_admin_queue *queue,
 	atomic_dec(&queue->outstanding_cmds);
 }
 
-static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
+static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *admin_queue,
 					  u16 command_id, bool capture)
 {
-	if (unlikely(command_id >= queue->q_depth)) {
-		pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n",
-		       command_id, queue->q_depth);
+	if (unlikely(command_id >= admin_queue->q_depth)) {
+		pr_err("Command id is larger than the queue size. cmd_id: %u queue size %d\n",
+		       command_id, admin_queue->q_depth);
 		return NULL;
 	}
 
-	if (unlikely(!queue->comp_ctx)) {
+	if (unlikely(!admin_queue->comp_ctx)) {
 		pr_err("Completion context is NULL\n");
 		return NULL;
 	}
 
-	if (unlikely(queue->comp_ctx[command_id].occupied && capture)) {
+	if (unlikely(admin_queue->comp_ctx[command_id].occupied && capture)) {
 		pr_err("Completion context is occupied\n");
 		return NULL;
 	}
 
 	if (capture) {
-		atomic_inc(&queue->outstanding_cmds);
-		queue->comp_ctx[command_id].occupied = true;
+		atomic_inc(&admin_queue->outstanding_cmds);
+		admin_queue->comp_ctx[command_id].occupied = true;
 	}
 
-	return &queue->comp_ctx[command_id];
+	return &admin_queue->comp_ctx[command_id];
 }
 
 static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
@@ -244,7 +217,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
 	/* In case of queue FULL */
 	cnt = (u16)atomic_read(&admin_queue->outstanding_cmds);
 	if (cnt >= admin_queue->q_depth) {
-		pr_debug("admin queue is full.\n");
+		pr_debug("Admin queue is full.\n");
 		admin_queue->stats.out_of_space++;
 		return ERR_PTR(-ENOSPC);
 	}
@@ -284,20 +257,21 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
 	return comp_ctx;
 }
 
-static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *admin_queue)
 {
-	size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
+	size_t size = admin_queue->q_depth * sizeof(struct ena_comp_ctx);
 	struct ena_comp_ctx *comp_ctx;
 	u16 i;
 
-	queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL);
-	if (unlikely(!queue->comp_ctx)) {
-		pr_err("memory allocation failed\n");
+	admin_queue->comp_ctx =
+		devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL);
+	if (unlikely(!admin_queue->comp_ctx)) {
+		pr_err("Memory allocation failed\n");
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < queue->q_depth; i++) {
-		comp_ctx = get_comp_ctxt(queue, i, false);
+	for (i = 0; i < admin_queue->q_depth; i++) {
+		comp_ctx = get_comp_ctxt(admin_queue, i, false);
 		if (comp_ctx)
 			init_completion(&comp_ctx->wait_event);
 	}
@@ -363,7 +337,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 		}
 
 		if (!io_sq->desc_addr.virt_addr) {
-			pr_err("memory allocation failed\n");
+			pr_err("Memory allocation failed\n");
 			return -ENOMEM;
 		}
 	}
@@ -389,7 +363,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
 
 		if (!io_sq->bounce_buf_ctrl.base_buffer) {
-			pr_err("bounce buffer memory allocation failed\n");
+			pr_err("Bounce buffer memory allocation failed\n");
 			return -ENOMEM;
 		}
 
@@ -449,7 +423,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 	}
 
 	if (!io_cq->cdesc_addr.virt_addr) {
-		pr_err("memory allocation failed\n");
+		pr_err("Memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -525,7 +499,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu
 static int ena_com_comp_status_to_errno(u8 comp_status)
 {
 	if (unlikely(comp_status != 0))
-		pr_err("admin command failed[%u]\n", comp_status);
+		pr_err("Admin command failed[%u]\n", comp_status);
 
 	switch (comp_status) {
 	case ENA_ADMIN_SUCCESS:
@@ -539,6 +513,8 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 	case ENA_ADMIN_ILLEGAL_PARAMETER:
 	case ENA_ADMIN_UNKNOWN_ERROR:
 		return -EINVAL;
+	case ENA_ADMIN_RESOURCE_BUSY:
+		return -EAGAIN;
 	}
 
 	return -EINVAL;
@@ -603,7 +579,7 @@ err:
 	return ret;
 }
 
-/**
+/*
  * Set the LLQ configurations of the firmware
  *
  * The driver provides only the enabled feature values to the device,
@@ -717,7 +693,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
 		/* The desc list entry size should be whole multiply of 8
 		 * This requirement comes from __iowrite64_copy()
 		 */
-		pr_err("illegal entry size %d\n", llq_info->desc_list_entry_size);
+		pr_err("Illegal entry size %d\n", llq_info->desc_list_entry_size);
 		return -EINVAL;
 	}
 
@@ -858,7 +834,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 	}
 
 	if (unlikely(i == timeout)) {
-		pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
+		pr_err("Reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
 		       mmio_read->seq_num, offset, read_resp->req_id,
 		       read_resp->reg_off);
 		ret = ENA_MMIO_READ_TIMEOUT;
@@ -925,7 +901,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
 					    sizeof(destroy_resp));
 
 	if (unlikely(ret && (ret != -ENODEV)))
-		pr_err("failed to destroy io sq error: %d\n", ret);
+		pr_err("Failed to destroy io sq error: %d\n", ret);
 
 	return ret;
 }
@@ -1034,7 +1010,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
 				   &get_cmd.control_buffer.address,
 				   control_buf_dma_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 
@@ -1081,11 +1057,10 @@ static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev)
 		(ena_dev->rss).hash_key;
 
 	netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key));
-	/* The key is stored in the device in u32 array
-	 * as well as the API requires the key to be passed in this
-	 * format. Thus the size of our array should be divided by 4
+	/* The key buffer is stored in the device in an array of
+	 * uint32 elements.
 	 */
-	hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32);
+	hash_key->key_parts = ENA_ADMIN_RSS_KEY_PARTS;
 }
 
 static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
@@ -1149,13 +1124,13 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
 	int ret;
 
 	ret = ena_com_get_feature(ena_dev, &get_resp,
-				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0);
+				  ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG, 0);
 	if (unlikely(ret))
 		return ret;
 
 	if ((get_resp.u.ind_table.min_size > log_size) ||
 	    (get_resp.u.ind_table.max_size < log_size)) {
-		pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n",
+		pr_err("Indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n",
 		       1 << log_size, 1 << get_resp.u.ind_table.min_size,
 		       1 << get_resp.u.ind_table.max_size);
 		return -EINVAL;
@@ -1248,7 +1223,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
 					   &create_cmd.sq_ba,
 					   io_sq->desc_addr.phys_addr);
 		if (unlikely(ret)) {
-			pr_err("memory address set failed\n");
+			pr_err("Memory address set failed\n");
 			return ret;
 		}
 	}
@@ -1277,7 +1252,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
 			cmd_completion.llq_descriptors_offset);
 	}
 
-	pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth);
+	pr_debug("Created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth);
 
 	return ret;
 }
@@ -1390,7 +1365,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
 				   &create_cmd.cq_ba,
 				   io_cq->cdesc_addr.phys_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 
@@ -1419,7 +1394,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
 			(u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
 			cmd_completion.numa_node_register_offset);
 
-	pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth);
+	pr_debug("Created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth);
 
 	return ret;
 }
@@ -1612,12 +1587,12 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev)
 		return -ETIME;
 	}
 
-	pr_info("ena device version: %d.%d\n",
+	pr_info("ENA device version: %d.%d\n",
 		(ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >>
 			ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
 		ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
 
-	pr_info("ena controller version: %d.%d.%d implementation version %d\n",
+	pr_info("ENA controller version: %d.%d.%d implementation version %d\n",
 		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
 			ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
 		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
@@ -1640,6 +1615,19 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev)
 	return 0;
 }
 
+static void
+ena_com_free_ena_admin_queue_comp_ctx(struct ena_com_dev *ena_dev,
+				      struct ena_com_admin_queue *admin_queue)
+
+{
+	if (!admin_queue->comp_ctx)
+		return;
+
+	devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx);
+
+	admin_queue->comp_ctx = NULL;
+}
+
 void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
 {
 	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
@@ -1648,9 +1636,8 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
 	struct ena_com_aenq *aenq = &ena_dev->aenq;
 	u16 size;
 
-	if (admin_queue->comp_ctx)
-		devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx);
-	admin_queue->comp_ctx = NULL;
+	ena_com_free_ena_admin_queue_comp_ctx(ena_dev, admin_queue);
+
 	size = ADMIN_SQ_SIZE(admin_queue->q_depth);
 	if (sq->entries)
 		dma_free_coherent(ena_dev->dmadev, size, sq->entries,
@@ -1928,6 +1915,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 
 	memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr,
 	       sizeof(get_resp.u.dev_attr));
+
 	ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
 
 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
@@ -2006,10 +1994,10 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev)
 /* ena_handle_specific_aenq_event:
  * return the handler that is relevant to the specific event group
  */
-static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev,
+static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *ena_dev,
 						     u16 group)
 {
-	struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers;
+	struct ena_aenq_handlers *aenq_handlers = ena_dev->aenq.aenq_handlers;
 
 	if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group])
 		return aenq_handlers->handlers[group];
@@ -2021,11 +2009,11 @@ static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev,
  * handles the aenq incoming events.
  * pop events from the queue and apply the specific handler
  */
-void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
+void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data)
 {
 	struct ena_admin_aenq_entry *aenq_e;
 	struct ena_admin_aenq_common_desc *aenq_common;
-	struct ena_com_aenq *aenq  = &dev->aenq;
+	struct ena_com_aenq *aenq  = &ena_dev->aenq;
 	u64 timestamp;
 	ena_aenq_handler handler_cb;
 	u16 masked_head, processed = 0;
@@ -2045,12 +2033,13 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 		dma_rmb();
 
 		timestamp = (u64)aenq_common->timestamp_low |
-			    ((u64)aenq_common->timestamp_high << 32);
-		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
-			 aenq_common->group, aenq_common->syndrom, timestamp);
+			((u64)aenq_common->timestamp_high << 32);
+
+		pr_debug("AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n",
+			 aenq_common->group, aenq_common->syndrome, timestamp);
 
 		/* Handle specific event*/
-		handler_cb = ena_com_get_specific_aenq_cb(dev,
+		handler_cb = ena_com_get_specific_aenq_cb(ena_dev,
 							  aenq_common->group);
 		handler_cb(data, aenq_e); /* call the actual event handler*/
 
@@ -2075,7 +2064,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 
 	/* write the aenq doorbell after all AENQ descriptors were read */
 	mb();
-	writel_relaxed((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	writel_relaxed((u32)aenq->head,
+		       ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
@@ -2167,6 +2157,21 @@ static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
 	return ret;
 }
 
+int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
+			  struct ena_admin_eni_stats *stats)
+{
+	struct ena_com_stats_ctx ctx;
+	int ret;
+
+	memset(&ctx, 0x0, sizeof(ctx));
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENI);
+	if (likely(ret == 0))
+		memcpy(stats, &ctx.get_resp.u.eni_stats,
+		       sizeof(ctx.get_resp.u.eni_stats));
+
+	return ret;
+}
+
 int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
 				struct ena_admin_basic_stats *stats)
 {
@@ -2176,8 +2181,8 @@ int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
 	memset(&ctx, 0x0, sizeof(ctx));
 	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC);
 	if (likely(ret == 0))
-		memcpy(stats, &ctx.get_resp.basic_stats,
-		       sizeof(ctx.get_resp.basic_stats));
+		memcpy(stats, &ctx.get_resp.u.basic_stats,
+		       sizeof(ctx.get_resp.u.basic_stats));
 
 	return ret;
 }
@@ -2273,7 +2278,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
 				   &cmd.control_buffer.address,
 				   rss->hash_key_dma_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 
@@ -2331,7 +2336,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 			}
 			memcpy(hash_key->key, key, key_len);
 			rss->hash_init_val = init_val;
-			hash_key->keys_num = key_len >> 2;
+			hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
 		}
 		break;
 	case ENA_ADMIN_CRC32:
@@ -2386,7 +2391,8 @@ int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key)
 		ena_dev->rss.hash_key;
 
 	if (key)
-		memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
+		memcpy(key, hash_key->key,
+		       (size_t)(hash_key->key_parts) * sizeof(hash_key->key[0]));
 
 	return 0;
 }
@@ -2442,7 +2448,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
 				   &cmd.control_buffer.address,
 				   rss->hash_ctrl_dma_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 	cmd.control_buffer.length = sizeof(*hash_ctrl);
@@ -2503,7 +2509,7 @@ int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev)
 		available_fields = hash_ctrl->selected_fields[i].fields &
 				hash_ctrl->supported_fields[i].fields;
 		if (available_fields != hash_ctrl->selected_fields[i].fields) {
-			pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n",
+			pr_err("Hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n",
 			       i, hash_ctrl->supported_fields[i].fields,
 			       hash_ctrl->selected_fields[i].fields);
 			return -EOPNOTSUPP;
@@ -2541,7 +2547,7 @@ int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
 	/* Make sure all the fields are supported */
 	supported_fields = hash_ctrl->supported_fields[proto].fields;
 	if ((hash_fields & supported_fields) != hash_fields) {
-		pr_err("proto %d doesn't support the required fields %x. supports only: %x\n",
+		pr_err("Proto %d doesn't support the required fields %x. supports only: %x\n",
 		       proto, hash_fields, supported_fields);
 	}
 
@@ -2581,9 +2587,9 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
 	int ret;
 
 	if (!ena_com_check_supported_feature_id(
-		    ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) {
+		    ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) {
 		pr_debug("Feature %d isn't supported\n",
-			 ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+			 ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG);
 		return -EOPNOTSUPP;
 	}
 
@@ -2598,7 +2604,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
 	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
 	cmd.aq_common_descriptor.flags =
 		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
-	cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG;
 	cmd.u.ind_table.size = rss->tbl_log_size;
 	cmd.u.ind_table.inline_index = 0xFFFFFFFF;
 
@@ -2606,7 +2612,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
 				   &cmd.control_buffer.address,
 				   rss->rss_ind_tbl_dma_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 
@@ -2636,7 +2642,7 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
 		sizeof(struct ena_admin_rss_ind_table_entry);
 
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
-				    ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
+				    ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG,
 				    rss->rss_ind_tbl_dma_addr,
 				    tbl_size, 0);
 	if (unlikely(rc))
@@ -2719,8 +2725,7 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
 
 	host_attr->debug_area_virt_addr =
 		dma_alloc_coherent(ena_dev->dmadev, debug_area_size,
-				   &host_attr->debug_area_dma_addr,
-				   GFP_KERNEL);
+				   &host_attr->debug_area_dma_addr, GFP_KERNEL);
 	if (unlikely(!host_attr->debug_area_virt_addr)) {
 		host_attr->debug_area_size = 0;
 		return -ENOMEM;
@@ -2777,7 +2782,7 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
 				   &cmd.u.host_attr.debug_ba,
 				   host_attr->debug_area_dma_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 
@@ -2785,7 +2790,7 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
 				   &cmd.u.host_attr.os_info_ba,
 				   host_attr->host_info_dma_addr);
 	if (unlikely(ret)) {
-		pr_err("memory address set failed\n");
+		pr_err("Memory address set failed\n");
 		return ret;
 	}
 
@@ -2904,7 +2909,7 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
 		(llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc));
 
 	if (unlikely(ena_dev->tx_max_header_size == 0)) {
-		pr_err("the size of the LLQ entry is smaller than needed\n");
+		pr_err("The size of the LLQ entry is smaller than needed\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 4287d47b2b0b..55097750d062 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef ENA_COM
@@ -536,7 +509,7 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
  * This method goes over the async event notification queue and calls the proper
  * aenq handler.
  */
-void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data);
+void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data);
 
 /* ena_com_abort_admin_commands - Abort all the outstanding admin commands.
  * @ena_dev: ENA communication layer struct
@@ -616,6 +589,15 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
 				struct ena_admin_basic_stats *stats);
 
+/* ena_com_get_eni_stats - Get extended network interface statistics
+ * @ena_dev: ENA communication layer struct
+ * @stats: stats return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
+			  struct ena_admin_eni_stats *stats);
+
 /* ena_com_set_dev_mtu - Configure the device mtu.
  * @ena_dev: ENA communication layer struct
  * @mtu: mtu value
diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
index 8a8ded0de9ac..e210c8a81fc0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_common_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 #ifndef _ENA_COMMON_H_
 #define _ENA_COMMON_H_
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index ccd440589565..ad30cacc1622 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include "ena_eth_com.h"
@@ -45,8 +18,9 @@ static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
 	cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
 			+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
 
-	desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
-			ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
+	desc_phase = (READ_ONCE(cdesc->status) &
+		      ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+		     ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
 
 	if (desc_phase != expected_phase)
 		return NULL;
@@ -89,7 +63,7 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
 		}
 
 		io_sq->entries_in_tx_burst_left--;
-		pr_debug("decreasing entries_in_tx_burst_left of queue %d to %d\n",
+		pr_debug("Decreasing entries_in_tx_burst_left of queue %d to %d\n",
 			 io_sq->qid, io_sq->entries_in_tx_burst_left);
 	}
 
@@ -128,12 +102,12 @@ static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
 
 	if (unlikely((header_offset + header_len) >
 		     llq_info->desc_list_entry_size)) {
-		pr_err("trying to write header larger than llq entry can accommodate\n");
+		pr_err("Trying to write header larger than llq entry can accommodate\n");
 		return -EFAULT;
 	}
 
 	if (unlikely(!bounce_buffer)) {
-		pr_err("bounce buffer is NULL\n");
+		pr_err("Bounce buffer is NULL\n");
 		return -EFAULT;
 	}
 
@@ -151,7 +125,7 @@ static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
 	bounce_buffer = pkt_ctrl->curr_bounce_buf;
 
 	if (unlikely(!bounce_buffer)) {
-		pr_err("bounce buffer is NULL\n");
+		pr_err("Bounce buffer is NULL\n");
 		return NULL;
 	}
 
@@ -262,8 +236,9 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
 
 		ena_com_cq_inc_head(io_cq);
 		count++;
-		last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
-			ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
+		last = (READ_ONCE(cdesc->status) &
+			ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+		       ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
 	} while (!last);
 
 	if (last) {
@@ -275,7 +250,7 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
 		io_cq->cur_rx_pkt_cdesc_count = 0;
 		io_cq->cur_rx_pkt_cdesc_start_idx = head_masked;
 
-		pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n",
+		pr_debug("ENA q_id: %d packets were completed. first desc idx %u descs# %d\n",
 			 io_cq->qid, *first_cdesc_idx, count);
 	} else {
 		io_cq->cur_rx_pkt_cdesc_count += count;
@@ -291,6 +266,9 @@ static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
 	struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
 
 	meta_desc = get_sq_desc(io_sq);
+	if (unlikely(!meta_desc))
+		return -EFAULT;
+
 	memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
 
 	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK;
@@ -298,7 +276,7 @@ static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
 	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK;
 
 	/* bits 0-9 of the mss */
-	meta_desc->word2 |= (ena_meta->mss <<
+	meta_desc->word2 |= ((u32)ena_meta->mss <<
 		ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) &
 		ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK;
 	/* bits 10-13 of the mss */
@@ -308,7 +286,7 @@ static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
 
 	/* Extended meta desc */
 	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
-	meta_desc->len_ctrl |= (io_sq->phase <<
+	meta_desc->len_ctrl |= ((u32)io_sq->phase <<
 		ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
 		ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
 
@@ -321,7 +299,7 @@ static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
 		ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) &
 		ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK;
 
-	meta_desc->word2 |= (ena_meta->l4_hdr_len <<
+	meta_desc->word2 |= ((u32)ena_meta->l4_hdr_len <<
 		ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
 		ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
 
@@ -358,7 +336,7 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
 }
 
 static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
-					struct ena_eth_io_rx_cdesc_base *cdesc)
+				 struct ena_eth_io_rx_cdesc_base *cdesc)
 {
 	ena_rx_ctx->l3_proto = cdesc->status &
 		ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK;
@@ -379,7 +357,7 @@ static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
 		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
 		ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT;
 
-	pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n",
+	pr_debug("l3_proto %d l4_proto %d l3_csum_err %d l4_csum_err %d hash %d frag %d cdesc_status %x\n",
 		 ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto,
 		 ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err,
 		 ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
@@ -412,7 +390,7 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 	}
 
 	if (unlikely(header_len > io_sq->tx_max_header_size)) {
-		pr_err("header size is too large %d max header: %d\n",
+		pr_err("Header size is too large %d max header: %d\n",
 		       header_len, io_sq->tx_max_header_size);
 		return -EINVAL;
 	}
@@ -427,7 +405,7 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 
 	rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx, &have_meta);
 	if (unlikely(rc)) {
-		pr_err("failed to create and store tx meta desc\n");
+		pr_err("Failed to create and store tx meta desc\n");
 		return rc;
 	}
 
@@ -447,16 +425,16 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 	if (!have_meta)
 		desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK;
 
-	desc->buff_addr_hi_hdr_sz |= (header_len <<
+	desc->buff_addr_hi_hdr_sz |= ((u32)header_len <<
 		ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) &
 		ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK;
-	desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+	desc->len_ctrl |= ((u32)io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
 		ENA_ETH_IO_TX_DESC_PHASE_MASK;
 
 	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK;
 
 	/* Bits 0-9 */
-	desc->meta_ctrl |= (ena_tx_ctx->req_id <<
+	desc->meta_ctrl |= ((u32)ena_tx_ctx->req_id <<
 		ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) &
 		ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK;
 
@@ -502,7 +480,7 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 
 			memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
 
-			desc->len_ctrl |= (io_sq->phase <<
+			desc->len_ctrl |= ((u32)io_sq->phase <<
 				ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
 				ENA_ETH_IO_TX_DESC_PHASE_MASK;
 		}
@@ -550,7 +528,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
 		return 0;
 	}
 
-	pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
+	pr_debug("Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
 		 nb_hw_desc);
 
 	if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) {
@@ -606,9 +584,9 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
 	desc->length = ena_buf->len;
 
 	desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK |
-		ENA_ETH_IO_RX_DESC_LAST_MASK |
-		(io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK) |
-		ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
+		     ENA_ETH_IO_RX_DESC_LAST_MASK |
+		     (io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK) |
+		     ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
 
 	desc->req_id = req_id;
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index b6592cb93b04..2c16c218818a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef ENA_ETH_COM_H_
@@ -167,7 +140,7 @@ static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
 						   llq_info->descs_per_entry);
 	}
 
-	pr_debug("queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid,
+	pr_debug("Queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid,
 		 num_descs, num_entries_needed);
 
 	return num_entries_needed > io_sq->entries_in_tx_burst_left;
@@ -178,13 +151,13 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 	u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst;
 	u16 tail = io_sq->tail;
 
-	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+	pr_debug("Write submission queue doorbell for queue: %d tail: %d\n",
 		 io_sq->qid, tail);
 
 	writel(tail, io_sq->db_addr);
 
 	if (is_llq_max_tx_burst_exists(io_sq)) {
-		pr_debug("reset available entries in tx burst for queue %d to %d\n",
+		pr_debug("Reset available entries in tx burst for queue %d to %d\n",
 			 io_sq->qid, max_entries_in_tx_burst);
 		io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst;
 	}
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
index d105c9c56192..332ac0d28ac7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 #ifndef _ENA_ETH_IO_H_
 #define _ENA_ETH_IO_H_
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 430275bc0d04..3b2cd28f962d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include <linux/pci.h>
@@ -41,12 +14,17 @@ struct ena_stats {
 
 #define ENA_STAT_ENA_COM_ENTRY(stat) { \
 	.name = #stat, \
-	.stat_offset = offsetof(struct ena_com_stats_admin, stat) \
+	.stat_offset = offsetof(struct ena_com_stats_admin, stat) / sizeof(u64) \
 }
 
 #define ENA_STAT_ENTRY(stat, stat_type) { \
 	.name = #stat, \
-	.stat_offset = offsetof(struct ena_stats_##stat_type, stat) \
+	.stat_offset = offsetof(struct ena_stats_##stat_type, stat) / sizeof(u64) \
+}
+
+#define ENA_STAT_HW_ENTRY(stat, stat_type) { \
+	.name = #stat, \
+	.stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64) \
 }
 
 #define ENA_STAT_RX_ENTRY(stat) \
@@ -58,6 +36,9 @@ struct ena_stats {
 #define ENA_STAT_GLOBAL_ENTRY(stat) \
 	ENA_STAT_ENTRY(stat, dev)
 
+#define ENA_STAT_ENI_ENTRY(stat) \
+	ENA_STAT_HW_ENTRY(stat, eni_stats)
+
 static const struct ena_stats ena_stats_global_strings[] = {
 	ENA_STAT_GLOBAL_ENTRY(tx_timeout),
 	ENA_STAT_GLOBAL_ENTRY(suspend),
@@ -68,6 +49,14 @@ static const struct ena_stats ena_stats_global_strings[] = {
 	ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
 };
 
+static const struct ena_stats ena_stats_eni_strings[] = {
+	ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded),
+	ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded),
+	ENA_STAT_ENI_ENTRY(pps_allowance_exceeded),
+	ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded),
+	ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded),
+};
+
 static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(cnt),
 	ENA_STAT_TX_ENTRY(bytes),
@@ -100,6 +89,11 @@ static const struct ena_stats ena_stats_rx_strings[] = {
 	ENA_STAT_RX_ENTRY(bad_req_id),
 	ENA_STAT_RX_ENTRY(empty_rx_ring),
 	ENA_STAT_RX_ENTRY(csum_unchecked),
+	ENA_STAT_RX_ENTRY(xdp_aborted),
+	ENA_STAT_RX_ENTRY(xdp_drop),
+	ENA_STAT_RX_ENTRY(xdp_pass),
+	ENA_STAT_RX_ENTRY(xdp_tx),
+	ENA_STAT_RX_ENTRY(xdp_invalid),
 };
 
 static const struct ena_stats ena_stats_ena_com_strings[] = {
@@ -110,10 +104,12 @@ static const struct ena_stats ena_stats_ena_com_strings[] = {
 	ENA_STAT_ENA_COM_ENTRY(no_completion),
 };
 
-#define ENA_STATS_ARRAY_GLOBAL	ARRAY_SIZE(ena_stats_global_strings)
-#define ENA_STATS_ARRAY_TX	ARRAY_SIZE(ena_stats_tx_strings)
-#define ENA_STATS_ARRAY_RX	ARRAY_SIZE(ena_stats_rx_strings)
-#define ENA_STATS_ARRAY_ENA_COM	ARRAY_SIZE(ena_stats_ena_com_strings)
+#define ENA_STATS_ARRAY_GLOBAL		ARRAY_SIZE(ena_stats_global_strings)
+#define ENA_STATS_ARRAY_TX		ARRAY_SIZE(ena_stats_tx_strings)
+#define ENA_STATS_ARRAY_RX		ARRAY_SIZE(ena_stats_rx_strings)
+#define ENA_STATS_ARRAY_ENA_COM		ARRAY_SIZE(ena_stats_ena_com_strings)
+#define ENA_STATS_ARRAY_ENI(adapter)	\
+	(ARRAY_SIZE(ena_stats_eni_strings) * (adapter)->eni_stats_supported)
 
 static void ena_safe_update_stat(u64 *src, u64 *dst,
 				 struct u64_stats_sync *syncp)
@@ -134,29 +130,30 @@ static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
 	u64 *ptr;
 	int i, j;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
 		/* Tx stats */
 		ring = &adapter->tx_ring[i];
 
 		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
 			ena_stats = &ena_stats_tx_strings[j];
 
-			ptr = (u64 *)((uintptr_t)&ring->tx_stats +
-				(uintptr_t)ena_stats->stat_offset);
+			ptr = (u64 *)&ring->tx_stats + ena_stats->stat_offset;
 
 			ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
 		}
+		/* XDP TX queues don't have a RX queue counterpart */
+		if (!ENA_IS_XDP_INDEX(adapter, i)) {
+			/* Rx stats */
+			ring = &adapter->rx_ring[i];
 
-		/* Rx stats */
-		ring = &adapter->rx_ring[i];
-
-		for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
-			ena_stats = &ena_stats_rx_strings[j];
+			for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+				ena_stats = &ena_stats_rx_strings[j];
 
-			ptr = (u64 *)((uintptr_t)&ring->rx_stats +
-				(uintptr_t)ena_stats->stat_offset);
+				ptr = (u64 *)&ring->rx_stats +
+					ena_stats->stat_offset;
 
-			ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+				ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+			}
 		}
 	}
 }
@@ -170,18 +167,17 @@ static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
 	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
 		ena_stats = &ena_stats_ena_com_strings[i];
 
-		ptr = (u64 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats +
-			(uintptr_t)ena_stats->stat_offset);
+		ptr = (u64 *)&adapter->ena_dev->admin_queue.stats +
+			ena_stats->stat_offset;
 
 		*(*data)++ = *ptr;
 	}
 }
 
-static void ena_get_ethtool_stats(struct net_device *netdev,
-				  struct ethtool_stats *stats,
-				  u64 *data)
+static void ena_get_stats(struct ena_adapter *adapter,
+			  u64 *data,
+			  bool eni_stats_needed)
 {
-	struct ena_adapter *adapter = netdev_priv(netdev);
 	const struct ena_stats *ena_stats;
 	u64 *ptr;
 	int i;
@@ -189,16 +185,48 @@ static void ena_get_ethtool_stats(struct net_device *netdev,
 	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
 		ena_stats = &ena_stats_global_strings[i];
 
-		ptr = (u64 *)((uintptr_t)&adapter->dev_stats +
-			(uintptr_t)ena_stats->stat_offset);
+		ptr = (u64 *)&adapter->dev_stats + ena_stats->stat_offset;
 
 		ena_safe_update_stat(ptr, data++, &adapter->syncp);
 	}
 
+	if (eni_stats_needed) {
+		ena_update_hw_stats(adapter);
+		for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
+			ena_stats = &ena_stats_eni_strings[i];
+
+			ptr = (u64 *)&adapter->eni_stats +
+				ena_stats->stat_offset;
+
+			ena_safe_update_stat(ptr, data++, &adapter->syncp);
+		}
+	}
+
 	ena_queue_stats(adapter, &data);
 	ena_dev_admin_queue_stats(adapter, &data);
 }
 
+static void ena_get_ethtool_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats,
+				  u64 *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	ena_get_stats(adapter, data, adapter->eni_stats_supported);
+}
+
+static int ena_get_sw_stats_count(struct ena_adapter *adapter)
+{
+	return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+		+ adapter->xdp_num_queues * ENA_STATS_ARRAY_TX
+		+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
+}
+
+static int ena_get_hw_stats_count(struct ena_adapter *adapter)
+{
+	return ENA_STATS_ARRAY_ENI(adapter);
+}
+
 int ena_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
@@ -206,31 +234,38 @@ int ena_get_sset_count(struct net_device *netdev, int sset)
 	if (sset != ETH_SS_STATS)
 		return -EOPNOTSUPP;
 
-	return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
-		+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
+	return ena_get_sw_stats_count(adapter) + ena_get_hw_stats_count(adapter);
 }
 
 static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 {
 	const struct ena_stats *ena_stats;
+	bool is_xdp;
 	int i, j;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
+		is_xdp = ENA_IS_XDP_INDEX(adapter, i);
 		/* Tx stats */
 		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
 			ena_stats = &ena_stats_tx_strings[j];
 
 			snprintf(*data, ETH_GSTRING_LEN,
-				 "queue_%u_tx_%s", i, ena_stats->name);
+				 "queue_%u_%s_%s", i,
+				 is_xdp ? "xdp_tx" : "tx", ena_stats->name);
 			(*data) += ETH_GSTRING_LEN;
 		}
-		/* Rx stats */
-		for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
-			ena_stats = &ena_stats_rx_strings[j];
 
-			snprintf(*data, ETH_GSTRING_LEN,
-				 "queue_%u_rx_%s", i, ena_stats->name);
-			(*data) += ETH_GSTRING_LEN;
+		if (!is_xdp) {
+			/* RX stats, in XDP there isn't a RX queue
+			 * counterpart
+			 */
+			for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+				ena_stats = &ena_stats_rx_strings[j];
+
+				snprintf(*data, ETH_GSTRING_LEN,
+					 "queue_%u_rx_%s", i, ena_stats->name);
+				(*data) += ETH_GSTRING_LEN;
+			}
 		}
 	}
 }
@@ -249,25 +284,43 @@ static void ena_com_dev_strings(u8 **data)
 	}
 }
 
-static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+static void ena_get_strings(struct ena_adapter *adapter,
+			    u8 *data,
+			    bool eni_stats_needed)
 {
-	struct ena_adapter *adapter = netdev_priv(netdev);
 	const struct ena_stats *ena_stats;
 	int i;
 
-	if (sset != ETH_SS_STATS)
-		return;
-
 	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
 		ena_stats = &ena_stats_global_strings[i];
 		memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
 		data += ETH_GSTRING_LEN;
 	}
 
+	if (eni_stats_needed) {
+		for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
+			ena_stats = &ena_stats_eni_strings[i];
+			memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
+			data += ETH_GSTRING_LEN;
+		}
+	}
+
 	ena_queue_strings(adapter, &data);
 	ena_com_dev_strings(&data);
 }
 
+static void ena_get_ethtool_strings(struct net_device *netdev,
+				    u32 sset,
+				    u8 *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	if (sset != ETH_SS_STATS)
+		return;
+
+	ena_get_strings(adapter, data, adapter->eni_stats_supported);
+}
+
 static int ena_get_link_ksettings(struct net_device *netdev,
 				  struct ethtool_link_ksettings *link_ksettings)
 {
@@ -847,7 +900,7 @@ static const struct ethtool_ops ena_ethtool_ops = {
 	.get_ringparam		= ena_get_ringparam,
 	.set_ringparam		= ena_set_ringparam,
 	.get_sset_count         = ena_get_sset_count,
-	.get_strings		= ena_get_strings,
+	.get_strings		= ena_get_ethtool_strings,
 	.get_ethtool_stats      = ena_get_ethtool_stats,
 	.get_rxnfc		= ena_get_rxnfc,
 	.set_rxnfc		= ena_set_rxnfc,
@@ -875,7 +928,7 @@ static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
 	int strings_num;
 	int i, rc;
 
-	strings_num = ena_get_sset_count(netdev, ETH_SS_STATS);
+	strings_num = ena_get_sw_stats_count(adapter);
 	if (strings_num <= 0) {
 		netif_err(adapter, drv, netdev, "Can't get stats num\n");
 		return;
@@ -886,7 +939,7 @@ static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
 				   GFP_ATOMIC);
 	if (!strings_buf) {
 		netif_err(adapter, drv, netdev,
-			  "failed to alloc strings_buf\n");
+			  "Failed to allocate strings_buf\n");
 		return;
 	}
 
@@ -895,13 +948,13 @@ static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
 				GFP_ATOMIC);
 	if (!data_buf) {
 		netif_err(adapter, drv, netdev,
-			  "failed to allocate data buf\n");
+			  "Failed to allocate data buf\n");
 		devm_kfree(&adapter->pdev->dev, strings_buf);
 		return;
 	}
 
-	ena_get_strings(netdev, ETH_SS_STATS, strings_buf);
-	ena_get_ethtool_stats(netdev, NULL, data_buf);
+	ena_get_strings(adapter, strings_buf, false);
+	ena_get_stats(adapter, data_buf, false);
 
 	/* If there is a buffer, dump stats, otherwise print them to dmesg */
 	if (buf)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index a3a8edf9a734..e8131dadc22c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -139,7 +112,7 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
 
 	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 	if (!ret) {
-		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
+		netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
 		update_rx_ring_mtu(adapter, new_mtu);
 		dev->mtu = new_mtu;
 	} else {
@@ -178,7 +151,7 @@ static int ena_xmit_common(struct net_device *dev,
 	 */
 	if (unlikely(rc)) {
 		netif_err(adapter, tx_queued, dev,
-			  "failed to prepare tx bufs\n");
+			  "Failed to prepare tx bufs\n");
 		u64_stats_update_begin(&ring->syncp);
 		ring->tx_stats.prepare_ctx_err++;
 		u64_stats_update_end(&ring->syncp);
@@ -292,7 +265,7 @@ error_report_dma_error:
 	u64_stats_update_begin(&xdp_ring->syncp);
 	xdp_ring->tx_stats.dma_mapping_err++;
 	u64_stats_update_end(&xdp_ring->syncp);
-	netdev_warn(adapter->netdev, "failed to map xdp buff\n");
+	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
 
 	xdp_return_frame_rx_napi(tx_info->xdpf);
 	tx_info->xdpf = NULL;
@@ -365,6 +338,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring,
 {
 	struct bpf_prog *xdp_prog;
 	u32 verdict = XDP_PASS;
+	u64 *xdp_stat;
 
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
@@ -374,17 +348,31 @@ static int ena_xdp_execute(struct ena_ring *rx_ring,
 
 	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
 
-	if (verdict == XDP_TX)
+	if (verdict == XDP_TX) {
 		ena_xdp_xmit_buff(rx_ring->netdev,
 				  xdp,
 				  rx_ring->qid + rx_ring->adapter->num_io_queues,
 				  rx_info);
-	else if (unlikely(verdict == XDP_ABORTED))
+
+		xdp_stat = &rx_ring->rx_stats.xdp_tx;
+	} else if (unlikely(verdict == XDP_ABORTED)) {
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
-	else if (unlikely(verdict > XDP_TX))
+		xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+	} else if (unlikely(verdict == XDP_DROP)) {
+		xdp_stat = &rx_ring->rx_stats.xdp_drop;
+	} else if (unlikely(verdict == XDP_PASS)) {
+		xdp_stat = &rx_ring->rx_stats.xdp_pass;
+	} else {
 		bpf_warn_invalid_xdp_action(verdict);
+		xdp_stat = &rx_ring->rx_stats.xdp_invalid;
+	}
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	(*xdp_stat)++;
+	u64_stats_update_end(&rx_ring->syncp);
 out:
 	rcu_read_unlock();
+
 	return verdict;
 }
 
@@ -549,7 +537,7 @@ static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
 
 		if (!old_bpf_prog)
 			netif_info(adapter, drv, adapter->netdev,
-				   "xdp program set, changing the max_mtu from %d to %d",
+				   "XDP program is set, changing the max_mtu from %d to %d",
 				   prev_mtu, netdev->max_mtu);
 
 	} else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
@@ -968,7 +956,7 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 		return -EIO;
 	}
 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
-		  "alloc page %p, rx_info %p\n", page, rx_info);
+		  "Allocate page %p, rx_info %p\n", page, rx_info);
 
 	rx_info->page = page;
 	rx_info->page_offset = 0;
@@ -1018,7 +1006,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 				       GFP_ATOMIC | __GFP_COMP);
 		if (unlikely(rc < 0)) {
 			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
-				   "failed to alloc buffer for rx queue %d\n",
+				   "Failed to allocate buffer for rx queue %d\n",
 				   rx_ring->qid);
 			break;
 		}
@@ -1027,7 +1015,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 						req_id);
 		if (unlikely(rc)) {
 			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
-				   "failed to add buffer for rx queue %d\n",
+				   "Failed to add buffer for rx queue %d\n",
 				   rx_ring->qid);
 			break;
 		}
@@ -1039,9 +1027,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 		u64_stats_update_begin(&rx_ring->syncp);
 		rx_ring->rx_stats.refil_partial++;
 		u64_stats_update_end(&rx_ring->syncp);
-		netdev_warn(rx_ring->netdev,
-			    "refilled rx qid %d with only %d buffers (from %d)\n",
-			    rx_ring->qid, i, num);
+		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+			   "Refilled rx qid %d with only %d buffers (from %d)\n",
+			   rx_ring->qid, i, num);
 	}
 
 	/* ena_com_write_sq_doorbell issues a wmb() */
@@ -1082,7 +1070,7 @@ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 
 		if (unlikely(rc != bufs_num))
 			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
-				   "refilling Queue %d failed. allocated %d buffers from: %d\n",
+				   "Refilling Queue %d failed. allocated %d buffers from: %d\n",
 				   i, rc, bufs_num);
 	}
 }
@@ -1140,14 +1128,14 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 			continue;
 
 		if (print_once) {
-			netdev_notice(tx_ring->netdev,
-				      "free uncompleted tx skb qid %d idx 0x%x\n",
-				      tx_ring->qid, i);
+			netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
+				     "Free uncompleted tx skb qid %d idx 0x%x\n",
+				     tx_ring->qid, i);
 			print_once = false;
 		} else {
-			netdev_dbg(tx_ring->netdev,
-				   "free uncompleted tx skb qid %d idx 0x%x\n",
-				   tx_ring->qid, i);
+			netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
+				  "Free uncompleted tx skb qid %d idx 0x%x\n",
+				  tx_ring->qid, i);
 		}
 
 		ena_unmap_tx_buff(tx_ring, tx_info);
@@ -1399,7 +1387,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 			return NULL;
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
-			  "rx allocated small packet. len %d. data_len %d\n",
+			  "RX allocated small packet. len %d. data_len %d\n",
 			  skb->len, skb->data_len);
 
 		/* sync this buffer for CPU use */
@@ -1436,7 +1424,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 		rx_info->page_offset = 0;
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
-			  "rx skb updated. len %d. data_len %d\n",
+			  "RX skb updated. len %d. data_len %d\n",
 			  skb->len, skb->data_len);
 
 		rx_info->page = NULL;
@@ -1643,6 +1631,11 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 					 &next_to_clean);
 
 		if (unlikely(!skb)) {
+			/* The page might not actually be freed here since the
+			 * page reference count is incremented in
+			 * ena_xdp_xmit_buff(), and it will be decreased only
+			 * when send completion was received from the device
+			 */
 			if (xdp_verdict == XDP_TX)
 				ena_free_rx_page(rx_ring,
 						 &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
@@ -1770,6 +1763,7 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 	u64_stats_update_begin(&tx_ring->syncp);
 	tx_ring->tx_stats.unmask_interrupt++;
 	u64_stats_update_end(&tx_ring->syncp);
+
 	/* It is a shared MSI-X.
 	 * Tx and Rx CQ have pointer to it.
 	 * So we use one of them to reach the intr reg
@@ -1987,7 +1981,7 @@ static int ena_enable_msix(struct ena_adapter *adapter)
 	/* Reserved the max msix vectors we might need */
 	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
 	netif_dbg(adapter, probe, adapter->netdev,
-		  "trying to enable MSI-X, vectors %d\n", msix_vecs);
+		  "Trying to enable MSI-X, vectors %d\n", msix_vecs);
 
 	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
 					msix_vecs, PCI_IRQ_MSIX);
@@ -2000,7 +1994,7 @@ static int ena_enable_msix(struct ena_adapter *adapter)
 
 	if (irq_cnt != msix_vecs) {
 		netif_notice(adapter, probe, adapter->netdev,
-			     "enable only %d MSI-X (out of %d), reduce the number of queues\n",
+			     "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
 			     irq_cnt, msix_vecs);
 		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
 	}
@@ -2070,12 +2064,12 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
 			 irq->data);
 	if (rc) {
 		netif_err(adapter, probe, adapter->netdev,
-			  "failed to request admin irq\n");
+			  "Failed to request admin irq\n");
 		return rc;
 	}
 
 	netif_dbg(adapter, probe, adapter->netdev,
-		  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
+		  "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
 		  irq->affinity_hint_mask.bits[0], irq->vector);
 
 	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
@@ -2108,7 +2102,7 @@ static int ena_request_io_irq(struct ena_adapter *adapter)
 		}
 
 		netif_dbg(adapter, ifup, adapter->netdev,
-			  "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
+			  "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
 			  i, irq->affinity_hint_mask.bits[0], irq->vector);
 
 		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
@@ -2548,7 +2542,7 @@ static int ena_up(struct ena_adapter *adapter)
 {
 	int io_queue_count, rc, i;
 
-	netdev_dbg(adapter->netdev, "%s\n", __func__);
+	netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
 
 	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
 	ena_setup_io_intr(adapter);
@@ -2632,7 +2626,8 @@ static void ena_down(struct ena_adapter *adapter)
 
 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
 		if (rc)
-			dev_err(&adapter->pdev->dev, "Device reset failed\n");
+			netif_err(adapter, ifdown, adapter->netdev,
+				  "Device reset failed\n");
 		/* stop submitting admin commands on a device that was reset */
 		ena_com_set_admin_running_state(adapter->ena_dev, false);
 	}
@@ -2954,7 +2949,7 @@ error_report_dma_error:
 	u64_stats_update_begin(&tx_ring->syncp);
 	tx_ring->tx_stats.dma_mapping_err++;
 	u64_stats_update_end(&tx_ring->syncp);
-	netdev_warn(adapter->netdev, "failed to map skb\n");
+	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
 
 	tx_info->skb = NULL;
 
@@ -3092,13 +3087,14 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
 
 static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
 {
+	struct device *dev = &pdev->dev;
 	struct ena_admin_host_info *host_info;
 	int rc;
 
 	/* Allocate only the host info */
 	rc = ena_com_allocate_host_info(ena_dev);
 	if (rc) {
-		pr_err("Cannot allocate host info\n");
+		dev_err(dev, "Cannot allocate host info\n");
 		return;
 	}
 
@@ -3128,9 +3124,9 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd
 	rc = ena_com_set_host_attributes(ena_dev);
 	if (rc) {
 		if (rc == -EOPNOTSUPP)
-			pr_warn("Cannot set host attributes\n");
+			dev_warn(dev, "Cannot set host attributes\n");
 		else
-			pr_err("Cannot set host attributes\n");
+			dev_err(dev, "Cannot set host attributes\n");
 
 		goto err;
 	}
@@ -3158,7 +3154,8 @@ static void ena_config_debug_area(struct ena_adapter *adapter)
 
 	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
 	if (rc) {
-		pr_err("Cannot allocate debug area\n");
+		netif_err(adapter, drv, adapter->netdev,
+			  "Cannot allocate debug area\n");
 		return;
 	}
 
@@ -3178,6 +3175,19 @@ err:
 	ena_com_delete_debug_area(adapter->ena_dev);
 }
 
+int ena_update_hw_stats(struct ena_adapter *adapter)
+{
+	int rc = 0;
+
+	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
+	if (rc) {
+		dev_info_once(&adapter->pdev->dev, "Failed to get ENI stats\n");
+		return rc;
+	}
+
+	return 0;
+}
+
 static void ena_get_stats64(struct net_device *netdev,
 			    struct rtnl_link_stats64 *stats)
 {
@@ -3349,7 +3359,7 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 
 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
 	if (rc) {
-		dev_err(dev, "failed to init mmio read less\n");
+		dev_err(dev, "Failed to init mmio read less\n");
 		return rc;
 	}
 
@@ -3367,7 +3377,7 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 
 	rc = ena_com_validate_version(ena_dev);
 	if (rc) {
-		dev_err(dev, "device version is too low\n");
+		dev_err(dev, "Device version is too low\n");
 		goto err_mmio_read_less;
 	}
 
@@ -3436,7 +3446,7 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
 					     &llq_config);
 	if (rc) {
-		dev_err(&pdev->dev, "ena device init failed\n");
+		dev_err(dev, "ENA device init failed\n");
 		goto err_admin_init;
 	}
 
@@ -3572,9 +3582,10 @@ static int ena_restore_device(struct ena_adapter *adapter)
 		netif_carrier_on(adapter->netdev);
 
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
-	dev_err(&pdev->dev, "Device reset completed successfully\n");
 	adapter->last_keep_alive_jiffies = jiffies;
 
+	dev_err(&pdev->dev, "Device reset completed successfully\n");
+
 	return rc;
 err_disable_msix:
 	ena_free_mgmnt_irq(adapter);
@@ -3776,7 +3787,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter)
 				u64_stats_update_end(&rx_ring->syncp);
 
 				netif_err(adapter, drv, adapter->netdev,
-					  "trigger refill for ring %d\n", i);
+					  "Trigger refill for ring %d\n", i);
 
 				napi_schedule(rx_ring->napi);
 				rx_ring->empty_rx_queue = 0;
@@ -4138,14 +4149,13 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
  */
 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
+	struct ena_calc_queue_size_ctx calc_queue_ctx = {};
 	struct ena_com_dev_get_features_ctx get_feat_ctx;
 	struct ena_com_dev *ena_dev = NULL;
 	struct ena_adapter *adapter;
 	struct net_device *netdev;
 	static int adapters_found;
 	u32 max_num_io_queues;
-	char *queue_type_str;
 	bool wd_state;
 	int bars, rc;
 
@@ -4177,7 +4187,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 					pci_resource_start(pdev, ENA_REG_BAR),
 					pci_resource_len(pdev, ENA_REG_BAR));
 	if (!ena_dev->reg_bar) {
-		dev_err(&pdev->dev, "failed to remap regs bar\n");
+		dev_err(&pdev->dev, "Failed to remap regs bar\n");
 		rc = -EFAULT;
 		goto err_free_region;
 	}
@@ -4188,7 +4198,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
 	if (rc) {
-		dev_err(&pdev->dev, "ena device init failed\n");
+		dev_err(&pdev->dev, "ENA device init failed\n");
 		if (rc == -ETIME)
 			rc = -EPROBE_DEFER;
 		goto err_free_region;
@@ -4196,7 +4206,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
 	if (rc) {
-		dev_err(&pdev->dev, "ena llq bar mapping failed\n");
+		dev_err(&pdev->dev, "ENA llq bar mapping failed\n");
 		goto err_free_ena_dev;
 	}
 
@@ -4296,6 +4306,11 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ena_config_debug_area(adapter);
 
+	if (!ena_update_hw_stats(adapter))
+		adapter->eni_stats_supported = true;
+	else
+		adapter->eni_stats_supported = false;
+
 	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
 
 	netif_carrier_off(netdev);
@@ -4318,15 +4333,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	timer_setup(&adapter->timer_service, ena_timer_service, 0);
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
 
-	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
-		queue_type_str = "Regular";
-	else
-		queue_type_str = "Low Latency";
-
 	dev_info(&pdev->dev,
-		 "%s found at mem %lx, mac addr %pM, Placement policy: %s\n",
+		 "%s found at mem %lx, mac addr %pM\n",
 		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
-		 netdev->dev_addr, queue_type_str);
+		 netdev->dev_addr);
 
 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 
@@ -4456,7 +4466,7 @@ static int __maybe_unused ena_suspend(struct device *dev_d)
 	rtnl_lock();
 	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
 		dev_err(&pdev->dev,
-			"ignoring device reset request as the device is being suspended\n");
+			"Ignoring device reset request as the device is being suspended\n");
 		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 	}
 	ena_destroy_device(adapter, true);
@@ -4531,7 +4541,7 @@ static void ena_update_on_link_change(void *adapter_data,
 		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
 
 	if (status) {
-		netdev_dbg(adapter->netdev, "%s\n", __func__);
+		netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
 		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
 		if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
 			netif_carrier_on(adapter->netdev);
@@ -4575,7 +4585,7 @@ static void ena_notification(void *adapter_data,
 	     aenq_e->aenq_common_desc.group,
 	     ENA_ADMIN_NOTIFICATION);
 
-	switch (aenq_e->aenq_common_desc.syndrom) {
+	switch (aenq_e->aenq_common_desc.syndrome) {
 	case ENA_ADMIN_UPDATE_HINTS:
 		hints = (struct ena_admin_ena_hw_hints *)
 			(&aenq_e->inline_data_w4);
@@ -4584,7 +4594,7 @@ static void ena_notification(void *adapter_data,
 	default:
 		netif_err(adapter, drv, adapter->netdev,
 			  "Invalid aenq notification link state %d\n",
-			  aenq_e->aenq_common_desc.syndrom);
+			  aenq_e->aenq_common_desc.syndrome);
 	}
 }
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 0c8504006247..30eb686749dc 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef ENA_H
@@ -261,6 +234,11 @@ struct ena_stats_rx {
 	u64 bad_req_id;
 	u64 empty_rx_ring;
 	u64 csum_unchecked;
+	u64 xdp_aborted;
+	u64 xdp_drop;
+	u64 xdp_pass;
+	u64 xdp_tx;
+	u64 xdp_invalid;
 };
 
 struct ena_ring {
@@ -405,6 +383,8 @@ struct ena_adapter {
 
 	struct u64_stats_sync syncp;
 	struct ena_stats_dev dev_stats;
+	struct ena_admin_eni_stats eni_stats;
+	bool eni_stats_supported;
 
 	/* last queue index that was checked for uncompleted tx packets */
 	u32 last_monitored_tx_qid;
@@ -422,6 +402,8 @@ void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
 
 void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 
+int ena_update_hw_stats(struct ena_adapter *adapter);
+
 int ena_update_queue_sizes(struct ena_adapter *adapter,
 			   u32 new_tx_size,
 			   u32 new_rx_size);
diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
index 426e57e10a7f..3ecdf29160ca 100644
--- a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
+++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef ENA_PCI_ID_TBL_H_
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
index b514bb1b855d..1e007a41a525 100644
--- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
- * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 #ifndef _ENA_REGS_H_
 #define _ENA_REGS_H_
diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c
index e1fde585fd0d..00ae1081254d 100644
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c
@@ -657,16 +657,6 @@ static irqreturn_t lance_interrupt( int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct lance_private *lp = netdev_priv(dev);
 	int csr0;
-	static int in_interrupt;
-
-	if (dev == NULL) {
-		DPRINTK( 1, ( "lance_interrupt(): invalid dev_id\n" ));
-		return IRQ_NONE;
-	}
-
-	if (in_interrupt)
-		DPRINTK( 2, ( "%s: Re-entering the interrupt handler.\n", dev->name ));
-	in_interrupt = 1;
 
  still_more:
 	flush_cache_all();
@@ -774,7 +764,6 @@ static irqreturn_t lance_interrupt( int irq, void *dev_id)
 
 	DPRINTK( 2, ( "%s: exiting interrupt, csr0=%#04x.\n",
 				  dev->name, DREG ));
-	in_interrupt = 0;
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 4ba75551cb17..2709a2db5657 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -403,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
 	return false;
 }
 
-static void xgbe_ecc_isr_task(unsigned long data)
+static void xgbe_ecc_isr_task(struct tasklet_struct *t)
 {
-	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_ecc);
 	unsigned int ecc_isr;
 	bool stop = false;
 
@@ -468,14 +468,14 @@ static irqreturn_t xgbe_ecc_isr(int irq, void *data)
 	if (pdata->isr_as_tasklet)
 		tasklet_schedule(&pdata->tasklet_ecc);
 	else
-		xgbe_ecc_isr_task((unsigned long)pdata);
+		xgbe_ecc_isr_task(&pdata->tasklet_ecc);
 
 	return IRQ_HANDLED;
 }
 
-static void xgbe_isr_task(unsigned long data)
+static void xgbe_isr_task(struct tasklet_struct *t)
 {
-	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_dev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_channel *channel;
 	unsigned int dma_isr, dma_ch_isr;
@@ -582,7 +582,7 @@ isr_done:
 
 	/* If there is not a separate ECC irq, handle it here */
 	if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
-		xgbe_ecc_isr_task((unsigned long)pdata);
+		xgbe_ecc_isr_task(&pdata->tasklet_ecc);
 
 	/* If there is not a separate I2C irq, handle it here */
 	if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
@@ -607,7 +607,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 	if (pdata->isr_as_tasklet)
 		tasklet_schedule(&pdata->tasklet_dev);
 	else
-		xgbe_isr_task((unsigned long)pdata);
+		xgbe_isr_task(&pdata->tasklet_dev);
 
 	return IRQ_HANDLED;
 }
@@ -991,9 +991,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 	unsigned int i;
 	int ret;
 
-	tasklet_init(&pdata->tasklet_dev, xgbe_isr_task, (unsigned long)pdata);
-	tasklet_init(&pdata->tasklet_ecc, xgbe_ecc_isr_task,
-		     (unsigned long)pdata);
+	tasklet_setup(&pdata->tasklet_dev, xgbe_isr_task);
+	tasklet_setup(&pdata->tasklet_ecc, xgbe_ecc_isr_task);
 
 	ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
 			       netdev_name(netdev), pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index 4d9062d35930..22d4fc547a0a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -274,9 +274,9 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
 		XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
 }
 
-static void xgbe_i2c_isr_task(unsigned long data)
+static void xgbe_i2c_isr_task(struct tasklet_struct *t)
 {
-	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_i2c);
 	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
 	unsigned int isr;
 
@@ -324,7 +324,7 @@ static irqreturn_t xgbe_i2c_isr(int irq, void *data)
 	if (pdata->isr_as_tasklet)
 		tasklet_schedule(&pdata->tasklet_i2c);
 	else
-		xgbe_i2c_isr_task((unsigned long)pdata);
+		xgbe_i2c_isr_task(&pdata->tasklet_i2c);
 
 	return IRQ_HANDLED;
 }
@@ -369,7 +369,7 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
 
 static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata)
 {
-	xgbe_i2c_isr_task((unsigned long)pdata);
+	xgbe_i2c_isr_task(&pdata->tasklet_i2c);
 
 	return IRQ_HANDLED;
 }
@@ -462,8 +462,7 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
 
 	/* If we have a separate I2C irq, enable it */
 	if (pdata->dev_irq != pdata->i2c_irq) {
-		tasklet_init(&pdata->tasklet_i2c, xgbe_i2c_isr_task,
-			     (unsigned long)pdata);
+		tasklet_setup(&pdata->tasklet_i2c, xgbe_i2c_isr_task);
 
 		ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
 				       xgbe_i2c_isr, 0, pdata->i2c_name,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 8a3a60bb2688..93ef5a30cb8d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -688,9 +688,9 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
 	}
 }
 
-static void xgbe_an_isr_task(unsigned long data)
+static void xgbe_an_isr_task(struct tasklet_struct *t)
 {
-	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_an);
 
 	netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
 
@@ -715,14 +715,14 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
 	if (pdata->isr_as_tasklet)
 		tasklet_schedule(&pdata->tasklet_an);
 	else
-		xgbe_an_isr_task((unsigned long)pdata);
+		xgbe_an_isr_task(&pdata->tasklet_an);
 
 	return IRQ_HANDLED;
 }
 
 static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata)
 {
-	xgbe_an_isr_task((unsigned long)pdata);
+	xgbe_an_isr_task(&pdata->tasklet_an);
 
 	return IRQ_HANDLED;
 }
@@ -1414,8 +1414,7 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 
 	/* If we have a separate AN irq, enable it */
 	if (pdata->dev_irq != pdata->an_irq) {
-		tasklet_init(&pdata->tasklet_an, xgbe_an_isr_task,
-			     (unsigned long)pdata);
+		tasklet_setup(&pdata->tasklet_an, xgbe_an_isr_task);
 
 		ret = devm_request_irq(pdata->dev, pdata->an_irq,
 				       xgbe_an_isr, 0, pdata->an_name,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index d35a338120cf..643f5e646740 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -18,6 +18,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
+#include <linux/mdio/mdio-xgene.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <linux/prefetch.h>
@@ -26,7 +27,6 @@
 #include "xgene_enet_hw.h"
 #include "xgene_enet_cle.h"
 #include "xgene_enet_ring2.h"
-#include "../../../phy/mdio-xgene.h"
 
 #define ETHER_MIN_PACKET	64
 #define ETHER_STD_PACKET	1518
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 1ab5314c4c1b..de2a9348bc3f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -917,6 +917,57 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags)
 	return ret;
 }
 
+static int aq_ethtool_get_phy_tunable(struct net_device *ndev,
+				      const struct ethtool_tunable *tuna, void *data)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	switch (tuna->id) {
+	case ETHTOOL_PHY_EDPD: {
+		u16 *val = data;
+
+		*val = aq_nic->aq_nic_cfg.is_media_detect ? AQ_HW_MEDIA_DETECT_CNT : 0;
+		break;
+	}
+	case ETHTOOL_PHY_DOWNSHIFT: {
+		u8 *val = data;
+
+		*val = (u8)aq_nic->aq_nic_cfg.downshift_counter;
+		break;
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int aq_ethtool_set_phy_tunable(struct net_device *ndev,
+				      const struct ethtool_tunable *tuna, const void *data)
+{
+	int err = -EOPNOTSUPP;
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	switch (tuna->id) {
+	case ETHTOOL_PHY_EDPD: {
+		const u16 *val = data;
+
+		err = aq_nic_set_media_detect(aq_nic, *val);
+		break;
+	}
+	case ETHTOOL_PHY_DOWNSHIFT: {
+		const u8 *val = data;
+
+		err = aq_nic_set_downshift(aq_nic, *val);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return err;
+}
+
 const struct ethtool_ops aq_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -952,4 +1003,6 @@ const struct ethtool_ops aq_ethtool_ops = {
 	.get_coalesce	     = aq_ethtool_get_coalesce,
 	.set_coalesce	     = aq_ethtool_set_coalesce,
 	.get_ts_info         = aq_ethtool_get_ts_info,
+	.get_phy_tunable     = aq_ethtool_get_phy_tunable,
+	.set_phy_tunable     = aq_ethtool_set_phy_tunable,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 7df74015fbc9..bed481816ea3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -143,6 +143,8 @@ struct aq_stats_s {
 #define AQ_HW_LED_BLINK    0x2U
 #define AQ_HW_LED_DEFAULT  0x0U
 
+#define AQ_HW_MEDIA_DETECT_CNT 6000
+
 enum aq_priv_flags {
 	AQ_HW_LOOPBACK_DMA_SYS,
 	AQ_HW_LOOPBACK_PKT_SYS,
@@ -386,6 +388,10 @@ struct aq_fw_ops {
 	int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
 			    u32 *supported_rates);
 
+	int (*set_downshift)(struct aq_hw_s *self, u32 counter);
+
+	int (*set_media_detect)(struct aq_hw_s *self, bool enable);
+
 	u32 (*get_link_capabilities)(struct aq_hw_s *self);
 
 	int (*send_macsec_req)(struct aq_hw_s *self,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index c6bdf1d677d1..0f865daeb36d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -405,6 +405,10 @@ int aq_nic_init(struct aq_nic_s *self)
 	mutex_unlock(&self->fwreq_mutex);
 	if (err < 0)
 		goto err_exit;
+	/* Restore default settings */
+	aq_nic_set_downshift(self, self->aq_nic_cfg.downshift_counter);
+	aq_nic_set_media_detect(self, self->aq_nic_cfg.is_media_detect ?
+				AQ_HW_MEDIA_DETECT_CNT : 0);
 
 	err = self->aq_hw_ops->hw_init(self->aq_hw,
 				       aq_nic_get_ndev(self)->dev_addr);
@@ -1398,6 +1402,52 @@ void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
 	}
 }
 
+int aq_nic_set_downshift(struct aq_nic_s *self, int val)
+{
+	int err = 0;
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+
+	if (!self->aq_fw_ops->set_downshift)
+		return -EOPNOTSUPP;
+
+	if (val > 15) {
+		netdev_err(self->ndev, "downshift counter should be <= 15\n");
+		return -EINVAL;
+	}
+	cfg->downshift_counter = val;
+
+	mutex_lock(&self->fwreq_mutex);
+	err = self->aq_fw_ops->set_downshift(self->aq_hw, cfg->downshift_counter);
+	mutex_unlock(&self->fwreq_mutex);
+
+	return err;
+}
+
+int aq_nic_set_media_detect(struct aq_nic_s *self, int val)
+{
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+	int err = 0;
+
+	if (!self->aq_fw_ops->set_media_detect)
+		return -EOPNOTSUPP;
+
+	if (val > 0 && val != AQ_HW_MEDIA_DETECT_CNT) {
+		netdev_err(self->ndev, "EDPD on this device could have only fixed value of %d\n",
+			   AQ_HW_MEDIA_DETECT_CNT);
+		return -EINVAL;
+	}
+
+	mutex_lock(&self->fwreq_mutex);
+	err = self->aq_fw_ops->set_media_detect(self->aq_hw, !!val);
+	mutex_unlock(&self->fwreq_mutex);
+
+	/* msecs plays no role - configuration is always fixed in PHY */
+	if (!err)
+		cfg->is_media_detect = !!val;
+
+	return err;
+}
+
 int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map)
 {
 	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index eb7d8430f2f5..926cca9a0c83 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -62,6 +62,8 @@ struct aq_nic_cfg_s {
 	bool is_lro;
 	bool is_qos;
 	bool is_ptp;
+	bool is_media_detect;
+	int downshift_counter;
 	enum aq_tc_mode tc_mode;
 	u32 priv_flags;
 	u8  tcs;
@@ -195,6 +197,8 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self,
 struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self);
 u32 aq_nic_get_fw_version(struct aq_nic_s *self);
 int aq_nic_set_loopback(struct aq_nic_s *self);
+int aq_nic_set_downshift(struct aq_nic_s *self, int val);
+int aq_nic_set_media_detect(struct aq_nic_s *self, int val);
 int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self);
 void aq_nic_shutdown(struct aq_nic_s *self);
 u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 8941ac4df9e3..9f1b15077e7d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -1536,7 +1536,7 @@ static int hw_atl_b0_hw_fl2_clear(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-/**
+/*
  * @brief Set VLAN filter table
  * @details Configure VLAN filter table to accept (and assign the queue) traffic
  *  for the particular vlan ids.
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 93c06dfa6c55..ee0c22d04935 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -612,6 +612,41 @@ static u32 aq_fw2x_state2_get(struct aq_hw_s *self)
 	return aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR);
 }
 
+static int aq_fw2x_set_downshift(struct aq_hw_s *self, u32 counter)
+{
+	int err = 0;
+	u32 mpi_opts;
+	u32 offset;
+
+	offset = offsetof(struct hw_atl_utils_settings, downshift_retry_count);
+	err = hw_atl_write_fwsettings_dwords(self, offset, &counter, 1);
+	if (err)
+		return err;
+
+	mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+	if (counter)
+		mpi_opts |= HW_ATL_FW2X_CTRL_DOWNSHIFT;
+	else
+		mpi_opts &= ~HW_ATL_FW2X_CTRL_DOWNSHIFT;
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	return err;
+}
+
+static int aq_fw2x_set_media_detect(struct aq_hw_s *self, bool on)
+{
+	u32 enable;
+	u32 offset;
+
+	if (self->fw_ver_actual < HW_ATL_FW_VER_MEDIA_CONTROL)
+		return -EOPNOTSUPP;
+
+	offset = offsetof(struct hw_atl_utils_settings, media_detect);
+	enable = on;
+
+	return hw_atl_write_fwsettings_dwords(self, offset, &enable, 1);
+}
+
 static u32 aq_fw2x_get_link_capabilities(struct aq_hw_s *self)
 {
 	int err = 0;
@@ -692,6 +727,8 @@ const struct aq_fw_ops aq_fw_2x_ops = {
 	.enable_ptp         = aq_fw3x_enable_ptp,
 	.led_control        = aq_fw2x_led_control,
 	.set_phyloopback    = aq_fw2x_set_phyloopback,
+	.set_downshift      = aq_fw2x_set_downshift,
+	.set_media_detect   = aq_fw2x_set_media_detect,
 	.adjust_ptp         = aq_fw3x_adjust_ptp,
 	.get_link_capabilities = aq_fw2x_get_link_capabilities,
 	.send_macsec_req    = aq_fw2x_send_macsec_req,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
index 85628acbcc1d..dd259c8f2f4f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
@@ -519,6 +519,18 @@ int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
 	return 0;
 }
 
+static int aq_a2_fw_set_downshift(struct aq_hw_s *self, u32 counter)
+{
+	struct link_options_s link_options;
+
+	hw_atl2_shared_buffer_get(self, link_options, link_options);
+	link_options.downshift = !!counter;
+	link_options.downshift_retry = counter;
+	hw_atl2_shared_buffer_write(self, link_options, link_options);
+
+	return hw_atl2_shared_buffer_finish_ack(self);
+}
+
 const struct aq_fw_ops aq_a2_fw_ops = {
 	.init               = aq_a2_fw_init,
 	.deinit             = aq_a2_fw_deinit,
@@ -536,4 +548,5 @@ const struct aq_fw_ops aq_a2_fw_ops = {
 	.set_flow_control   = aq_a2_fw_set_flow_control,
 	.get_flow_control   = aq_a2_fw_get_flow_control,
 	.set_phyloopback    = aq_a2_fw_set_phyloopback,
+	.set_downshift      = aq_a2_fw_set_downshift,
 };
diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c
index 1c7736b7eaf7..800620b8f10d 100644
--- a/drivers/net/ethernet/arc/emac_arc.c
+++ b/drivers/net/ethernet/arc/emac_arc.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /**
- * emac_arc.c - ARC EMAC specific glue layer
+ * DOC: emac_arc.c - ARC EMAC specific glue layer
  *
  * Copyright (C) 2014 Romain Perier
  *
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 38cce66ef212..dd5c8a9038bb 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -235,6 +235,59 @@
 	| NETIF_MSG_RX_ERR		\
 	| NETIF_MSG_TX_ERR)
 
+struct ag71xx_statistic {
+	unsigned short offset;
+	u32 mask;
+	const char name[ETH_GSTRING_LEN];
+};
+
+static const struct ag71xx_statistic ag71xx_statistics[] = {
+	{ 0x0080, GENMASK(17, 0), "Tx/Rx 64 Byte", },
+	{ 0x0084, GENMASK(17, 0), "Tx/Rx 65-127 Byte", },
+	{ 0x0088, GENMASK(17, 0), "Tx/Rx 128-255 Byte", },
+	{ 0x008C, GENMASK(17, 0), "Tx/Rx 256-511 Byte", },
+	{ 0x0090, GENMASK(17, 0), "Tx/Rx 512-1023 Byte", },
+	{ 0x0094, GENMASK(17, 0), "Tx/Rx 1024-1518 Byte", },
+	{ 0x0098, GENMASK(17, 0), "Tx/Rx 1519-1522 Byte VLAN", },
+	{ 0x009C, GENMASK(23, 0), "Rx Byte", },
+	{ 0x00A0, GENMASK(17, 0), "Rx Packet", },
+	{ 0x00A4, GENMASK(11, 0), "Rx FCS Error", },
+	{ 0x00A8, GENMASK(17, 0), "Rx Multicast Packet", },
+	{ 0x00AC, GENMASK(21, 0), "Rx Broadcast Packet", },
+	{ 0x00B0, GENMASK(17, 0), "Rx Control Frame Packet", },
+	{ 0x00B4, GENMASK(11, 0), "Rx Pause Frame Packet", },
+	{ 0x00B8, GENMASK(11, 0), "Rx Unknown OPCode Packet", },
+	{ 0x00BC, GENMASK(11, 0), "Rx Alignment Error", },
+	{ 0x00C0, GENMASK(15, 0), "Rx Frame Length Error", },
+	{ 0x00C4, GENMASK(11, 0), "Rx Code Error", },
+	{ 0x00C8, GENMASK(11, 0), "Rx Carrier Sense Error", },
+	{ 0x00CC, GENMASK(11, 0), "Rx Undersize Packet", },
+	{ 0x00D0, GENMASK(11, 0), "Rx Oversize Packet", },
+	{ 0x00D4, GENMASK(11, 0), "Rx Fragments", },
+	{ 0x00D8, GENMASK(11, 0), "Rx Jabber", },
+	{ 0x00DC, GENMASK(11, 0), "Rx Dropped Packet", },
+	{ 0x00E0, GENMASK(23, 0), "Tx Byte", },
+	{ 0x00E4, GENMASK(17, 0), "Tx Packet", },
+	{ 0x00E8, GENMASK(17, 0), "Tx Multicast Packet", },
+	{ 0x00EC, GENMASK(17, 0), "Tx Broadcast Packet", },
+	{ 0x00F0, GENMASK(11, 0), "Tx Pause Control Frame", },
+	{ 0x00F4, GENMASK(11, 0), "Tx Deferral Packet", },
+	{ 0x00F8, GENMASK(11, 0), "Tx Excessive Deferral Packet", },
+	{ 0x00FC, GENMASK(11, 0), "Tx Single Collision Packet", },
+	{ 0x0100, GENMASK(11, 0), "Tx Multiple Collision", },
+	{ 0x0104, GENMASK(11, 0), "Tx Late Collision Packet", },
+	{ 0x0108, GENMASK(11, 0), "Tx Excessive Collision Packet", },
+	{ 0x010C, GENMASK(12, 0), "Tx Total Collision", },
+	{ 0x0110, GENMASK(11, 0), "Tx Pause Frames Honored", },
+	{ 0x0114, GENMASK(11, 0), "Tx Drop Frame", },
+	{ 0x0118, GENMASK(11, 0), "Tx Jabber Frame", },
+	{ 0x011C, GENMASK(11, 0), "Tx FCS Error", },
+	{ 0x0120, GENMASK(11, 0), "Tx Control Frame", },
+	{ 0x0124, GENMASK(11, 0), "Tx Oversize Frame", },
+	{ 0x0128, GENMASK(11, 0), "Tx Undersize Frame", },
+	{ 0x012C, GENMASK(11, 0), "Tx Fragment", },
+};
+
 #define DESC_EMPTY		BIT(31)
 #define DESC_MORE		BIT(24)
 #define DESC_PKTLEN_M		0xfff
@@ -394,6 +447,99 @@ static void ag71xx_int_disable(struct ag71xx *ag, u32 ints)
 	ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints);
 }
 
+static void ag71xx_get_drvinfo(struct net_device *ndev,
+			       struct ethtool_drvinfo *info)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	strlcpy(info->driver, "ag71xx", sizeof(info->driver));
+	strlcpy(info->bus_info, of_node_full_name(ag->pdev->dev.of_node),
+		sizeof(info->bus_info));
+}
+
+static int ag71xx_get_link_ksettings(struct net_device *ndev,
+				   struct ethtool_link_ksettings *kset)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_get(ag->phylink, kset);
+}
+
+static int ag71xx_set_link_ksettings(struct net_device *ndev,
+				   const struct ethtool_link_ksettings *kset)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_set(ag->phylink, kset);
+}
+
+static int ag71xx_ethtool_nway_reset(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	return phylink_ethtool_nway_reset(ag->phylink);
+}
+
+static void ag71xx_ethtool_get_pauseparam(struct net_device *ndev,
+					  struct ethtool_pauseparam *pause)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	phylink_ethtool_get_pauseparam(ag->phylink, pause);
+}
+
+static int ag71xx_ethtool_set_pauseparam(struct net_device *ndev,
+					 struct ethtool_pauseparam *pause)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	return phylink_ethtool_set_pauseparam(ag->phylink, pause);
+}
+
+static void ag71xx_ethtool_get_strings(struct net_device *netdev, u32 sset,
+				       u8 *data)
+{
+	if (sset == ETH_SS_STATS) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       ag71xx_statistics[i].name, ETH_GSTRING_LEN);
+	}
+}
+
+static void ag71xx_ethtool_get_stats(struct net_device *ndev,
+				     struct ethtool_stats *stats, u64 *data)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++)
+		*data++ = ag71xx_rr(ag, ag71xx_statistics[i].offset)
+				& ag71xx_statistics[i].mask;
+}
+
+static int ag71xx_ethtool_get_sset_count(struct net_device *ndev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return ARRAY_SIZE(ag71xx_statistics);
+	return -EOPNOTSUPP;
+}
+
+static const struct ethtool_ops ag71xx_ethtool_ops = {
+	.get_drvinfo			= ag71xx_get_drvinfo,
+	.get_link			= ethtool_op_get_link,
+	.get_ts_info			= ethtool_op_get_ts_info,
+	.get_link_ksettings		= ag71xx_get_link_ksettings,
+	.set_link_ksettings		= ag71xx_set_link_ksettings,
+	.nway_reset			= ag71xx_ethtool_nway_reset,
+	.get_pauseparam			= ag71xx_ethtool_get_pauseparam,
+	.set_pauseparam			= ag71xx_ethtool_set_pauseparam,
+	.get_strings			= ag71xx_ethtool_get_strings,
+	.get_ethtool_stats		= ag71xx_ethtool_get_stats,
+	.get_sset_count			= ag71xx_ethtool_get_sset_count,
+};
+
 static int ag71xx_mdio_wait_busy(struct ag71xx *ag)
 {
 	struct net_device *ndev = ag->ndev;
@@ -910,6 +1056,8 @@ static void ag71xx_mac_validate(struct phylink_config *config,
 
 	phylink_set(mask, MII);
 
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
 	phylink_set(mask, Autoneg);
 	phylink_set(mask, 10baseT_Half);
 	phylink_set(mask, 10baseT_Full);
@@ -960,7 +1108,7 @@ static void ag71xx_mac_link_up(struct phylink_config *config,
 			       bool tx_pause, bool rx_pause)
 {
 	struct ag71xx *ag = netdev_priv(to_net_dev(config->dev));
-	u32 cfg2;
+	u32 cfg1, cfg2;
 	u32 ifctl;
 	u32 fifo5;
 
@@ -994,6 +1142,15 @@ static void ag71xx_mac_link_up(struct phylink_config *config,
 	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, fifo5);
 	ag71xx_wr(ag, AG71XX_REG_MAC_IFCTL, ifctl);
 
+	cfg1 = ag71xx_rr(ag, AG71XX_REG_MAC_CFG1);
+	cfg1 &= ~(MAC_CFG1_TFC | MAC_CFG1_RFC);
+	if (tx_pause)
+		cfg1 |= MAC_CFG1_TFC;
+
+	if (rx_pause)
+		cfg1 |= MAC_CFG1_RFC;
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, cfg1);
+
 	ag71xx_hw_start(ag);
 }
 
@@ -1769,6 +1926,7 @@ static int ag71xx_probe(struct platform_device *pdev)
 	}
 
 	ndev->netdev_ops = &ag71xx_netdev_ops;
+	ndev->ethtool_ops = &ag71xx_ethtool_ops;
 
 	INIT_DELAYED_WORK(&ag->restart_work, ag71xx_restart_work_func);
 	timer_setup(&ag->oom_timer, ag71xx_oom_timer_handler, 0);
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index decab9a8e4a8..0c12cf7bda50 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -204,7 +204,7 @@ static u32 atl1c_wait_until_idle(struct atl1c_hw *hw, u32 modu_ctrl)
 
 /**
  * atl1c_phy_config - Timer Call-back
- * @data: pointer to netdev cast into an unsigned long
+ * @t: timer list containing pointer to netdev cast into an unsigned long
  */
 static void atl1c_phy_config(struct timer_list *t)
 {
@@ -220,7 +220,6 @@ static void atl1c_phy_config(struct timer_list *t)
 
 void atl1c_reinit_locked(struct atl1c_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
 	atl1c_down(adapter);
 	atl1c_up(adapter);
 	clear_bit(__AT_RESETTING, &adapter->flags);
@@ -346,6 +345,7 @@ static void atl1c_del_timer(struct atl1c_adapter *adapter)
 /**
  * atl1c_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: index of hanging tx queue
  */
 static void atl1c_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
@@ -826,16 +826,16 @@ static inline void atl1c_clean_buffer(struct pci_dev *pdev,
 		return;
 	if (buffer_info->dma) {
 		if (buffer_info->flags & ATL1C_PCIMAP_FROMDEVICE)
-			pci_driection = PCI_DMA_FROMDEVICE;
+			pci_driection = DMA_FROM_DEVICE;
 		else
-			pci_driection = PCI_DMA_TODEVICE;
+			pci_driection = DMA_TO_DEVICE;
 
 		if (buffer_info->flags & ATL1C_PCIMAP_SINGLE)
-			pci_unmap_single(pdev, buffer_info->dma,
-					buffer_info->length, pci_driection);
+			dma_unmap_single(&pdev->dev, buffer_info->dma,
+					 buffer_info->length, pci_driection);
 		else if (buffer_info->flags & ATL1C_PCIMAP_PAGE)
-			pci_unmap_page(pdev, buffer_info->dma,
-					buffer_info->length, pci_driection);
+			dma_unmap_page(&pdev->dev, buffer_info->dma,
+				       buffer_info->length, pci_driection);
 	}
 	if (buffer_info->skb)
 		dev_consume_skb_any(buffer_info->skb);
@@ -846,6 +846,7 @@ static inline void atl1c_clean_buffer(struct pci_dev *pdev,
 /**
  * atl1c_clean_tx_ring - Free Tx-skb
  * @adapter: board private structure
+ * @type: type of transmit queue
  */
 static void atl1c_clean_tx_ring(struct atl1c_adapter *adapter,
 				enum atl1c_trans_queue type)
@@ -933,9 +934,8 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
 
-	pci_free_consistent(pdev, adapter->ring_header.size,
-					adapter->ring_header.desc,
-					adapter->ring_header.dma);
+	dma_free_coherent(&pdev->dev, adapter->ring_header.size,
+			  adapter->ring_header.desc, adapter->ring_header.dma);
 	adapter->ring_header.desc = NULL;
 
 	/* Note: just free tdp_ring.buffer_info,
@@ -1717,10 +1717,9 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
 		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
 		buffer_info->skb = skb;
 		buffer_info->length = adapter->rx_buffer_len;
-		mapping = pci_map_single(pdev, vir_addr,
-						buffer_info->length,
-						PCI_DMA_FROMDEVICE);
-		if (unlikely(pci_dma_mapping_error(pdev, mapping))) {
+		mapping = dma_map_single(&pdev->dev, vir_addr,
+					 buffer_info->length, DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(&pdev->dev, mapping))) {
 			dev_kfree_skb(skb);
 			buffer_info->skb = NULL;
 			buffer_info->length = 0;
@@ -1831,8 +1830,8 @@ rrs_checked:
 			rfd_index = (rrs->word0 >> RRS_RX_RFD_INDEX_SHIFT) &
 					RRS_RX_RFD_INDEX_MASK;
 			buffer_info = &rfd_ring->buffer_info[rfd_index];
-			pci_unmap_single(pdev, buffer_info->dma,
-				buffer_info->length, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&pdev->dev, buffer_info->dma,
+					 buffer_info->length, DMA_FROM_DEVICE);
 			skb = buffer_info->skb;
 		} else {
 			/* TODO */
@@ -1863,6 +1862,8 @@ rrs_checked:
 
 /**
  * atl1c_clean - NAPI Rx polling callback
+ * @napi: napi info
+ * @budget: limit of packets to clean
  */
 static int atl1c_clean(struct napi_struct *napi, int budget)
 {
@@ -2106,10 +2107,10 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 
 		buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
 		buffer_info->length = map_len;
-		buffer_info->dma = pci_map_single(adapter->pdev,
-					skb->data, hdr_len, PCI_DMA_TODEVICE);
-		if (unlikely(pci_dma_mapping_error(adapter->pdev,
-						   buffer_info->dma)))
+		buffer_info->dma = dma_map_single(&adapter->pdev->dev,
+						  skb->data, hdr_len,
+						  DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)))
 			goto err_dma;
 		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
 		ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
@@ -2131,10 +2132,10 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 		buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
 		buffer_info->length = buf_len - mapped_len;
 		buffer_info->dma =
-			pci_map_single(adapter->pdev, skb->data + mapped_len,
-					buffer_info->length, PCI_DMA_TODEVICE);
-		if (unlikely(pci_dma_mapping_error(adapter->pdev,
-						   buffer_info->dma)))
+			dma_map_single(&adapter->pdev->dev,
+				       skb->data + mapped_len,
+				       buffer_info->length, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)))
 			goto err_dma;
 
 		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
@@ -2542,8 +2543,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
-	    (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
+	if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
+	    (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
 		dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
 		goto err_dma;
 	}
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 223ef846123e..098b0328e3cb 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -111,7 +111,7 @@ static inline void atl1e_irq_reset(struct atl1e_adapter *adapter)
 
 /**
  * atl1e_phy_config - Timer Call-back
- * @data: pointer to netdev cast into an unsigned long
+ * @t: timer list containing pointer to netdev cast into an unsigned long
  */
 static void atl1e_phy_config(struct timer_list *t)
 {
@@ -127,8 +127,6 @@ static void atl1e_phy_config(struct timer_list *t)
 
 void atl1e_reinit_locked(struct atl1e_adapter *adapter)
 {
-
-	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
 		msleep(1);
 	atl1e_down(adapter);
@@ -196,7 +194,7 @@ static int atl1e_check_link(struct atl1e_adapter *adapter)
 
 /**
  * atl1e_link_chg_task - deal with link change event Out of interrupt context
- * @netdev: network interface device structure
+ * @work: work struct with driver info
  */
 static void atl1e_link_chg_task(struct work_struct *work)
 {
@@ -246,6 +244,7 @@ static void atl1e_cancel_work(struct atl1e_adapter *adapter)
 /**
  * atl1e_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: the index of the hanging queue
  */
 static void atl1e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
@@ -654,11 +653,13 @@ static void atl1e_clean_tx_ring(struct atl1e_adapter *adapter)
 		tx_buffer = &tx_ring->tx_buffer[index];
 		if (tx_buffer->dma) {
 			if (tx_buffer->flags & ATL1E_TX_PCIMAP_SINGLE)
-				pci_unmap_single(pdev, tx_buffer->dma,
-					tx_buffer->length, PCI_DMA_TODEVICE);
+				dma_unmap_single(&pdev->dev, tx_buffer->dma,
+						 tx_buffer->length,
+						 DMA_TO_DEVICE);
 			else if (tx_buffer->flags & ATL1E_TX_PCIMAP_PAGE)
-				pci_unmap_page(pdev, tx_buffer->dma,
-					tx_buffer->length, PCI_DMA_TODEVICE);
+				dma_unmap_page(&pdev->dev, tx_buffer->dma,
+					       tx_buffer->length,
+					       DMA_TO_DEVICE);
 			tx_buffer->dma = 0;
 		}
 	}
@@ -774,8 +775,8 @@ static void atl1e_free_ring_resources(struct atl1e_adapter *adapter)
 	atl1e_clean_rx_ring(adapter);
 
 	if (adapter->ring_vir_addr) {
-		pci_free_consistent(pdev, adapter->ring_size,
-				adapter->ring_vir_addr, adapter->ring_dma);
+		dma_free_coherent(&pdev->dev, adapter->ring_size,
+				  adapter->ring_vir_addr, adapter->ring_dma);
 		adapter->ring_vir_addr = NULL;
 	}
 
@@ -810,11 +811,12 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
 	/* real ring DMA buffer */
 
 	size = adapter->ring_size;
-	adapter->ring_vir_addr = pci_zalloc_consistent(pdev, adapter->ring_size,
-						       &adapter->ring_dma);
+	adapter->ring_vir_addr = dma_alloc_coherent(&pdev->dev,
+						    adapter->ring_size,
+						    &adapter->ring_dma, GFP_KERNEL);
 	if (adapter->ring_vir_addr == NULL) {
 		netdev_err(adapter->netdev,
-			   "pci_alloc_consistent failed, size = D%d\n", size);
+			   "dma_alloc_coherent failed, size = D%d\n", size);
 		return -ENOMEM;
 	}
 
@@ -870,8 +872,8 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
 	return 0;
 failed:
 	if (adapter->ring_vir_addr != NULL) {
-		pci_free_consistent(pdev, adapter->ring_size,
-				adapter->ring_vir_addr, adapter->ring_dma);
+		dma_free_coherent(&pdev->dev, adapter->ring_size,
+				  adapter->ring_vir_addr, adapter->ring_dma);
 		adapter->ring_vir_addr = NULL;
 	}
 	return err;
@@ -1233,11 +1235,15 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter)
 		tx_buffer = &tx_ring->tx_buffer[next_to_clean];
 		if (tx_buffer->dma) {
 			if (tx_buffer->flags & ATL1E_TX_PCIMAP_SINGLE)
-				pci_unmap_single(adapter->pdev, tx_buffer->dma,
-					tx_buffer->length, PCI_DMA_TODEVICE);
+				dma_unmap_single(&adapter->pdev->dev,
+						 tx_buffer->dma,
+						 tx_buffer->length,
+						 DMA_TO_DEVICE);
 			else if (tx_buffer->flags & ATL1E_TX_PCIMAP_PAGE)
-				pci_unmap_page(adapter->pdev, tx_buffer->dma,
-					tx_buffer->length, PCI_DMA_TODEVICE);
+				dma_unmap_page(&adapter->pdev->dev,
+					       tx_buffer->dma,
+					       tx_buffer->length,
+					       DMA_TO_DEVICE);
 			tx_buffer->dma = 0;
 		}
 
@@ -1495,6 +1501,8 @@ fatal_err:
 
 /**
  * atl1e_clean - NAPI Rx polling callback
+ * @napi: napi info
+ * @budget: number of packets to clean
  */
 static int atl1e_clean(struct napi_struct *napi, int budget)
 {
@@ -1710,8 +1718,9 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter,
 
 		tx_buffer = atl1e_get_tx_buffer(adapter, use_tpd);
 		tx_buffer->length = map_len;
-		tx_buffer->dma = pci_map_single(adapter->pdev,
-					skb->data, hdr_len, PCI_DMA_TODEVICE);
+		tx_buffer->dma = dma_map_single(&adapter->pdev->dev,
+						skb->data, hdr_len,
+						DMA_TO_DEVICE);
 		if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma))
 			return -ENOSPC;
 
@@ -1739,8 +1748,9 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter,
 			((buf_len - mapped_len) >= MAX_TX_BUF_LEN) ?
 			MAX_TX_BUF_LEN : (buf_len - mapped_len);
 		tx_buffer->dma =
-			pci_map_single(adapter->pdev, skb->data + mapped_len,
-					map_len, PCI_DMA_TODEVICE);
+			dma_map_single(&adapter->pdev->dev,
+				       skb->data + mapped_len, map_len,
+				       DMA_TO_DEVICE);
 
 		if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) {
 			/* We need to unwind the mappings we've done */
@@ -1749,8 +1759,10 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter,
 			while (adapter->tx_ring.next_to_use != ring_end) {
 				tpd = atl1e_get_tpd(adapter);
 				tx_buffer = atl1e_get_tx_buffer(adapter, tpd);
-				pci_unmap_single(adapter->pdev, tx_buffer->dma,
-						 tx_buffer->length, PCI_DMA_TODEVICE);
+				dma_unmap_single(&adapter->pdev->dev,
+						 tx_buffer->dma,
+						 tx_buffer->length,
+						 DMA_TO_DEVICE);
 			}
 			/* Reset the tx rings next pointer */
 			adapter->tx_ring.next_to_use = ring_start;
@@ -2300,8 +2312,8 @@ static int atl1e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
-	    (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
+	if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
+	    (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
 		dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
 		goto err_dma;
 	}
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index b35fcfcd692d..eaf96d002fa5 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1050,11 +1050,11 @@ static s32 atl1_setup_ring_resources(struct atl1_adapter *adapter)
 		+ sizeof(struct stats_msg_block)
 		+ 40;
 
-	ring_header->desc = pci_alloc_consistent(pdev, ring_header->size,
-		&ring_header->dma);
+	ring_header->desc = dma_alloc_coherent(&pdev->dev, ring_header->size,
+					       &ring_header->dma, GFP_KERNEL);
 	if (unlikely(!ring_header->desc)) {
 		if (netif_msg_drv(adapter))
-			dev_err(&pdev->dev, "pci_alloc_consistent failed\n");
+			dev_err(&pdev->dev, "dma_alloc_coherent failed\n");
 		goto err_nomem;
 	}
 
@@ -1136,8 +1136,8 @@ static void atl1_clean_rx_ring(struct atl1_adapter *adapter)
 	for (i = 0; i < rfd_ring->count; i++) {
 		buffer_info = &rfd_ring->buffer_info[i];
 		if (buffer_info->dma) {
-			pci_unmap_page(pdev, buffer_info->dma,
-				buffer_info->length, PCI_DMA_FROMDEVICE);
+			dma_unmap_page(&pdev->dev, buffer_info->dma,
+				       buffer_info->length, DMA_FROM_DEVICE);
 			buffer_info->dma = 0;
 		}
 		if (buffer_info->skb) {
@@ -1175,8 +1175,8 @@ static void atl1_clean_tx_ring(struct atl1_adapter *adapter)
 	for (i = 0; i < tpd_ring->count; i++) {
 		buffer_info = &tpd_ring->buffer_info[i];
 		if (buffer_info->dma) {
-			pci_unmap_page(pdev, buffer_info->dma,
-				buffer_info->length, PCI_DMA_TODEVICE);
+			dma_unmap_page(&pdev->dev, buffer_info->dma,
+				       buffer_info->length, DMA_TO_DEVICE);
 			buffer_info->dma = 0;
 		}
 	}
@@ -1217,8 +1217,8 @@ static void atl1_free_ring_resources(struct atl1_adapter *adapter)
 	atl1_clean_rx_ring(adapter);
 
 	kfree(tpd_ring->buffer_info);
-	pci_free_consistent(pdev, ring_header->size, ring_header->desc,
-		ring_header->dma);
+	dma_free_coherent(&pdev->dev, ring_header->size, ring_header->desc,
+			  ring_header->dma);
 
 	tpd_ring->buffer_info = NULL;
 	tpd_ring->desc = NULL;
@@ -1866,9 +1866,9 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 		buffer_info->length = (u16) adapter->rx_buffer_len;
 		page = virt_to_page(skb->data);
 		offset = offset_in_page(skb->data);
-		buffer_info->dma = pci_map_page(pdev, page, offset,
+		buffer_info->dma = dma_map_page(&pdev->dev, page, offset,
 						adapter->rx_buffer_len,
-						PCI_DMA_FROMDEVICE);
+						DMA_FROM_DEVICE);
 		rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
 		rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len);
 		rfd_desc->coalese = 0;
@@ -1992,8 +1992,8 @@ rrd_ok:
 		}
 
 		/* Good Receive */
-		pci_unmap_page(adapter->pdev, buffer_info->dma,
-			       buffer_info->length, PCI_DMA_FROMDEVICE);
+		dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
+			       buffer_info->length, DMA_FROM_DEVICE);
 		buffer_info->dma = 0;
 		skb = buffer_info->skb;
 		length = le16_to_cpu(rrd->xsz.xsum_sz.pkt_size);
@@ -2062,8 +2062,8 @@ static int atl1_intr_tx(struct atl1_adapter *adapter)
 	while (cmb_tpd_next_to_clean != sw_tpd_next_to_clean) {
 		buffer_info = &tpd_ring->buffer_info[sw_tpd_next_to_clean];
 		if (buffer_info->dma) {
-			pci_unmap_page(adapter->pdev, buffer_info->dma,
-				       buffer_info->length, PCI_DMA_TODEVICE);
+			dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
+				       buffer_info->length, DMA_TO_DEVICE);
 			buffer_info->dma = 0;
 		}
 
@@ -2210,9 +2210,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
 		buffer_info->length = hdr_len;
 		page = virt_to_page(skb->data);
 		offset = offset_in_page(skb->data);
-		buffer_info->dma = pci_map_page(adapter->pdev, page,
+		buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
 						offset, hdr_len,
-						PCI_DMA_TODEVICE);
+						DMA_TO_DEVICE);
 
 		if (++next_to_use == tpd_ring->count)
 			next_to_use = 0;
@@ -2235,9 +2235,10 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
 					(hdr_len + i * ATL1_MAX_TX_BUF_LEN));
 				offset = offset_in_page(skb->data +
 					(hdr_len + i * ATL1_MAX_TX_BUF_LEN));
-				buffer_info->dma = pci_map_page(adapter->pdev,
-					page, offset, buffer_info->length,
-					PCI_DMA_TODEVICE);
+				buffer_info->dma = dma_map_page(&adapter->pdev->dev,
+								page, offset,
+								buffer_info->length,
+								DMA_TO_DEVICE);
 				if (++next_to_use == tpd_ring->count)
 					next_to_use = 0;
 			}
@@ -2247,8 +2248,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
 		buffer_info->length = buf_len;
 		page = virt_to_page(skb->data);
 		offset = offset_in_page(skb->data);
-		buffer_info->dma = pci_map_page(adapter->pdev, page,
-			offset, buf_len, PCI_DMA_TODEVICE);
+		buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
+						offset, buf_len,
+						DMA_TO_DEVICE);
 		if (++next_to_use == tpd_ring->count)
 			next_to_use = 0;
 	}
@@ -2550,7 +2552,7 @@ static irqreturn_t atl1_intr(int irq, void *data)
 
 /**
  * atl1_phy_config - Timer Call-back
- * @data: pointer to netdev cast into an unsigned long
+ * @t: timer_list containing pointer to netdev cast into an unsigned long
  */
 static void atl1_phy_config(struct timer_list *t)
 {
@@ -2922,7 +2924,7 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (err) {
 		dev_err(&pdev->dev, "no usable DMA configuration\n");
 		goto err_dma;
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index c915852b8892..7b80d924632a 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -281,8 +281,8 @@ static s32 atl2_setup_ring_resources(struct atl2_adapter *adapter)
 		adapter->txs_ring_size * 4 + 7 +	/* dword align */
 		adapter->rxd_ring_size * 1536 + 127;	/* 128bytes align */
 
-	adapter->ring_vir_addr = pci_alloc_consistent(pdev, size,
-		&adapter->ring_dma);
+	adapter->ring_vir_addr = dma_alloc_coherent(&pdev->dev, size,
+						    &adapter->ring_dma, GFP_KERNEL);
 	if (!adapter->ring_vir_addr)
 		return -ENOMEM;
 
@@ -663,8 +663,8 @@ static int atl2_request_irq(struct atl2_adapter *adapter)
 static void atl2_free_ring_resources(struct atl2_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
-	pci_free_consistent(pdev, adapter->ring_size, adapter->ring_vir_addr,
-		adapter->ring_dma);
+	dma_free_coherent(&pdev->dev, adapter->ring_size,
+			  adapter->ring_vir_addr, adapter->ring_dma);
 }
 
 /**
@@ -994,6 +994,7 @@ static int atl2_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 /**
  * atl2_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: index of the hanging transmit queue
  */
 static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
@@ -1005,7 +1006,7 @@ static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 
 /**
  * atl2_watchdog - Timer Call-back
- * @data: pointer to netdev cast into an unsigned long
+ * @t: timer list containing a pointer to netdev cast into an unsigned long
  */
 static void atl2_watchdog(struct timer_list *t)
 {
@@ -1030,7 +1031,7 @@ static void atl2_watchdog(struct timer_list *t)
 
 /**
  * atl2_phy_config - Timer Call-back
- * @data: pointer to netdev cast into an unsigned long
+ * @t: timer list containing a pointer to netdev cast into an unsigned long
  */
 static void atl2_phy_config(struct timer_list *t)
 {
@@ -1085,7 +1086,6 @@ err_up:
 
 static void atl2_reinit_locked(struct atl2_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__ATL2_RESETTING, &adapter->flags))
 		msleep(1);
 	atl2_down(adapter);
@@ -1235,6 +1235,7 @@ static int atl2_check_link(struct atl2_adapter *adapter)
 
 /**
  * atl2_link_chg_task - deal with link change event Out of interrupt context
+ * @work: pointer to work struct with private info
  */
 static void atl2_link_chg_task(struct work_struct *work)
 {
@@ -1328,8 +1329,8 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * until the kernel has the proper infrastructure to support 64-bit DMA
 	 * on these devices.
 	 */
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
-		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) &&
+	    dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
 		err = -EIO;
 		goto err_dma;
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 6fb620e25208..74c1778d841e 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2210,12 +2210,12 @@ static void b44_adjust_link(struct net_device *dev)
 {
 	struct b44 *bp = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
-	bool status_changed = 0;
+	bool status_changed = false;
 
 	BUG_ON(!phydev);
 
 	if (bp->old_link != phydev->link) {
-		status_changed = 1;
+		status_changed = true;
 		bp->old_link = phydev->link;
 	}
 
@@ -2223,11 +2223,11 @@ static void b44_adjust_link(struct net_device *dev)
 	if (phydev->link) {
 		if ((phydev->duplex == DUPLEX_HALF) &&
 		    (bp->flags & B44_FLAG_FULL_DUPLEX)) {
-			status_changed = 1;
+			status_changed = true;
 			bp->flags &= ~B44_FLAG_FULL_DUPLEX;
 		} else if ((phydev->duplex == DUPLEX_FULL) &&
 			   !(bp->flags & B44_FLAG_FULL_DUPLEX)) {
-			status_changed = 1;
+			status_changed = true;
 			bp->flags |= B44_FLAG_FULL_DUPLEX;
 		}
 	}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 0762d5d1a810..0fdd19d99d99 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -20,6 +20,7 @@
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 #include <net/dsa.h>
+#include <linux/clk.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 
@@ -186,6 +187,11 @@ static int bcm_sysport_set_features(struct net_device *dev,
 				    netdev_features_t features)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	/* Read CRC forward */
 	if (!priv->is_lite)
@@ -197,6 +203,8 @@ static int bcm_sysport_set_features(struct net_device *dev,
 	bcm_sysport_set_rx_csum(dev, features);
 	bcm_sysport_set_tx_csum(dev, features);
 
+	clk_disable_unprepare(priv->clk);
+
 	return 0;
 }
 
@@ -1940,6 +1948,8 @@ static int bcm_sysport_open(struct net_device *dev)
 	unsigned int i;
 	int ret;
 
+	clk_prepare_enable(priv->clk);
+
 	/* Reset UniMAC */
 	umac_reset(priv);
 
@@ -1970,7 +1980,8 @@ static int bcm_sysport_open(struct net_device *dev)
 				0, priv->phy_interface);
 	if (!phydev) {
 		netdev_err(dev, "could not attach to PHY\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out_clk_disable;
 	}
 
 	/* Reset house keeping link status */
@@ -2048,6 +2059,8 @@ out_free_irq0:
 	free_irq(priv->irq0, dev);
 out_phy_disconnect:
 	phy_disconnect(phydev);
+out_clk_disable:
+	clk_disable_unprepare(priv->clk);
 	return ret;
 }
 
@@ -2106,6 +2119,8 @@ static int bcm_sysport_stop(struct net_device *dev)
 	/* Disconnect from PHY */
 	phy_disconnect(dev->phydev);
 
+	clk_disable_unprepare(priv->clk);
+
 	return 0;
 }
 
@@ -2487,6 +2502,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	/* Initialize private members */
 	priv = netdev_priv(dev);
 
+	priv->clk = devm_clk_get_optional(&pdev->dev, "sw_sysport");
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
 	/* Allocate number of TX rings */
 	priv->tx_rings = devm_kcalloc(&pdev->dev, txq,
 				      sizeof(struct bcm_sysport_tx_ring),
@@ -2566,6 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	if (!ret)
 		device_set_wakeup_capable(&pdev->dev, 1);
 
+	priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol");
+	if (IS_ERR(priv->wol_clk))
+		return PTR_ERR(priv->wol_clk);
+
 	/* Set the needed headroom once and for all */
 	BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
 	dev->needed_headroom += sizeof(struct bcm_tsb);
@@ -2590,6 +2613,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 		goto err_deregister_notifier;
 	}
 
+	clk_prepare_enable(priv->clk);
+
 	priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
 	dev_info(&pdev->dev,
 		 "Broadcom SYSTEMPORT%s " REV_FMT
@@ -2598,6 +2623,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 		 (priv->rev >> 8) & 0xff, priv->rev & 0xff,
 		 priv->irq0, priv->irq1, txq, rxq);
 
+	clk_disable_unprepare(priv->clk);
+
 	return 0;
 
 err_deregister_notifier:
@@ -2751,8 +2778,12 @@ static int __maybe_unused bcm_sysport_suspend(struct device *d)
 	bcm_sysport_fini_rx_ring(priv);
 
 	/* Get prepared for Wake-on-LAN */
-	if (device_may_wakeup(d) && priv->wolopts)
+	if (device_may_wakeup(d) && priv->wolopts) {
+		clk_prepare_enable(priv->wol_clk);
 		ret = bcm_sysport_suspend_to_wol(priv);
+	}
+
+	clk_disable_unprepare(priv->clk);
 
 	return ret;
 }
@@ -2767,6 +2798,10 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
 	if (!netif_running(dev))
 		return 0;
 
+	clk_prepare_enable(priv->clk);
+	if (priv->wolopts)
+		clk_disable_unprepare(priv->wol_clk);
+
 	umac_reset(priv);
 
 	/* Disable the UniMAC RX/TX */
@@ -2846,6 +2881,7 @@ out_free_rx_ring:
 out_free_tx_rings:
 	for (i = 0; i < dev->num_tx_queues; i++)
 		bcm_sysport_fini_tx_ring(priv, i);
+	clk_disable_unprepare(priv->clk);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 6d80735fbc7f..3a5cb6f128f5 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -770,6 +770,8 @@ struct bcm_sysport_priv {
 	u32			wolopts;
 	u8			sopass[SOPASS_MAX];
 	unsigned int		wol_irq_disabled:1;
+	struct clk		*clk;
+	struct clk		*wol_clk;
 
 	/* MIB related fields */
 	struct bcm_sysport_mib	mib;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e3d92e4f2193..1a6ec1a12d53 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -504,6 +504,7 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
  * @len_on_bd:		total length of the first packet for the
  *			aggregation.
  * @pkt_len:		length of all segments
+ * @num_of_coalesced_segs: count of segments
  *
  * Approximate value of the MSS for this aggregation calculated using
  * the first packet of it.
@@ -1958,6 +1959,7 @@ void bnx2x_set_num_queues(struct bnx2x *bp)
  * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues
  *
  * @bp:		Driver handle
+ * @include_cnic: handle cnic case
  *
  * We currently support for at most 16 Tx queues for each CoS thus we will
  * allocate a multiple of 16 for ETH L2 rings according to the value of the
@@ -4229,8 +4231,8 @@ void bnx2x_get_c2s_mapping(struct bnx2x *bp, u8 *c2s_map, u8 *c2s_default)
 /**
  * bnx2x_setup_tc - routine to configure net_device for multi tc
  *
- * @netdev: net device to configure
- * @tc: number of traffic classes to enable
+ * @dev: net device to configure
+ * @num_tc: number of traffic classes to enable
  *
  * callback connected to the ndo_setup_tc function pointer
  */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 7e4c93be4451..d8b1824c334d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -825,9 +825,9 @@ static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp)
 	int i;
 
 	for_each_rx_queue_cnic(bp, i) {
-		napi_hash_del(&bnx2x_fp(bp, i, napi));
-		netif_napi_del(&bnx2x_fp(bp, i, napi));
+		__netif_napi_del(&bnx2x_fp(bp, i, napi));
 	}
+	synchronize_net();
 }
 
 static inline void bnx2x_del_all_napi(struct bnx2x *bp)
@@ -835,9 +835,9 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp)
 	int i;
 
 	for_each_eth_queue(bp, i) {
-		napi_hash_del(&bnx2x_fp(bp, i, napi));
-		netif_napi_del(&bnx2x_fp(bp, i, napi));
+		__netif_napi_del(&bnx2x_fp(bp, i, napi));
 	}
+	synchronize_net();
 }
 
 int bnx2x_set_int_mode(struct bnx2x *bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 7cea33803f7f..32245bbe88a8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -839,8 +839,9 @@ static bool bnx2x_is_wreg_in_chip(struct bnx2x *bp,
 /**
  * bnx2x_read_pages_regs - read "paged" registers
  *
- * @bp		device handle
- * @p		output buffer
+ * @bp:		device handle
+ * @p:		output buffer
+ * @preset:	the preset value
  *
  * Reads "paged" memories: memories that may only be read by first writing to a
  * specific address ("write address") and then reading from a specific address
@@ -3561,6 +3562,7 @@ static void bnx2x_get_channels(struct net_device *dev,
  * bnx2x_change_num_queues - change the number of RSS queues.
  *
  * @bp:			bnx2x private structure
+ * @num_rss:		rss count
  *
  * Re-configure interrupt mode to get the new number of MSI-X
  * vectors and re-add NAPI objects.
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 3c543dd7a8f3..28069b290862 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -3086,9 +3086,9 @@ void bnx2x_func_init(struct bnx2x *bp, struct bnx2x_func_init_params *p)
 /**
  * bnx2x_get_common_flags - Return common flags
  *
- * @bp		device handle
- * @fp		queue handle
- * @zero_stats	TRUE if statistics zeroing is needed
+ * @bp:		device handle
+ * @fp:		queue handle
+ * @zero_stats:	TRUE if statistics zeroing is needed
  *
  * Return the flags that are common for the Tx-only and not normal connections.
  */
@@ -6313,11 +6313,11 @@ static void bnx2x_init_internal(struct bnx2x *bp, u32 load_code)
 	case FW_MSG_CODE_DRV_LOAD_COMMON:
 	case FW_MSG_CODE_DRV_LOAD_COMMON_CHIP:
 		bnx2x_init_internal_common(bp);
-		/* no break */
+		fallthrough;
 
 	case FW_MSG_CODE_DRV_LOAD_PORT:
 		/* nothing to do */
-		/* no break */
+		fallthrough;
 
 	case FW_MSG_CODE_DRV_LOAD_FUNCTION:
 		/* internal memory per function is
@@ -12390,7 +12390,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 	}
 
 	if (CHIP_IS_E1(bp))
-		bp->dropless_fc = 0;
+		bp->dropless_fc = false;
 	else
 		bp->dropless_fc = dropless_fc | bnx2x_get_dropless_info(bp);
 
@@ -13591,8 +13591,8 @@ static int bnx2x_set_qm_cid_count(struct bnx2x *bp)
 
 /**
  * bnx2x_get_num_none_def_sbs - return the number of none default SBs
- *
- * @dev:	pci device
+ * @pdev: pci device
+ * @cnic_cnt: count
  *
  */
 static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev, int cnic_cnt)
@@ -14451,9 +14451,7 @@ module_exit(bnx2x_cleanup);
 
 /**
  * bnx2x_set_iscsi_eth_mac_addr - set iSCSI MAC(s).
- *
  * @bp:		driver handle
- * @set:	set or clear the CAM entry
  *
  * This function will wait until the ramrod completion returns.
  * Return 0 if success, -ENODEV if ramrod doesn't return.
@@ -15412,7 +15410,7 @@ static int bnx2x_hwtstamp_ioctl(struct bnx2x *bp, struct ifreq *ifr)
 		return -EINVAL;
 	}
 
-	bp->hwtstamp_ioctl_called = 1;
+	bp->hwtstamp_ioctl_called = true;
 	bp->tx_type = config.tx_type;
 	bp->rx_filter = config.rx_filter;
 
@@ -15494,7 +15492,7 @@ void bnx2x_init_ptp(struct bnx2x *bp)
 		bnx2x_init_cyclecounter(bp);
 		timecounter_init(&bp->timecounter, &bp->cyclecounter,
 				 ktime_to_ns(ktime_get_real()));
-		bp->timecounter_init_done = 1;
+		bp->timecounter_init_done = true;
 	}
 
 	DP(BNX2X_MSG_PTP, "PTP initialization ended successfully\n");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index e26f4da5a6d7..6cd1523ad9e5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -37,10 +37,12 @@
 /**
  * bnx2x_exe_queue_init - init the Exe Queue object
  *
+ * @bp:		driver handle
  * @o:		pointer to the object
  * @exe_len:	length
  * @owner:	pointer to the owner
  * @validate:	validate function pointer
+ * @remove:	remove function pointer
  * @optimize:	optimize function pointer
  * @exec:	execute function pointer
  * @get:	get function pointer
@@ -103,7 +105,7 @@ static inline int bnx2x_exe_queue_length(struct bnx2x_exe_queue_obj *o)
  *
  * @bp:		driver handle
  * @o:		queue
- * @cmd:	new command to add
+ * @elem:	new command to add
  * @restore:	true - do not optimize the command
  *
  * If the element is optimized or is illegal, frees it.
@@ -277,7 +279,7 @@ static void bnx2x_raw_set_pending(struct bnx2x_raw_obj *o)
  *
  * @bp:		device handle
  * @state:	state which is to be cleared
- * @state_p:	state buffer
+ * @pstate:	state buffer
  *
  */
 static inline int bnx2x_state_wait(struct bnx2x *bp, int state,
@@ -424,8 +426,8 @@ static bool bnx2x_put_credit_vlan_mac(struct bnx2x_vlan_mac_obj *o)
  * @bp:		device handle
  * @o:		vlan_mac object
  *
- * @details: Non-blocking implementation; should be called under execution
- *           queue lock.
+ * Context: Non-blocking implementation; should be called under execution
+ *          queue lock.
  */
 static int __bnx2x_vlan_mac_h_write_trylock(struct bnx2x *bp,
 					    struct bnx2x_vlan_mac_obj *o)
@@ -445,7 +447,7 @@ static int __bnx2x_vlan_mac_h_write_trylock(struct bnx2x *bp,
  * @bp:		device handle
  * @o:		vlan_mac object
  *
- * @details Should be called under execution queue lock; notice it might release
+ * details Should be called under execution queue lock; notice it might release
  *          and reclaim it during its run.
  */
 static void __bnx2x_vlan_mac_h_exec_pending(struct bnx2x *bp,
@@ -475,7 +477,7 @@ static void __bnx2x_vlan_mac_h_exec_pending(struct bnx2x *bp,
  * @o:			vlan_mac object
  * @ramrod_flags:	ramrod flags of missed execution
  *
- * @details Should be called under execution queue lock.
+ * Context: Should be called under execution queue lock.
  */
 static void __bnx2x_vlan_mac_h_pend(struct bnx2x *bp,
 				    struct bnx2x_vlan_mac_obj *o,
@@ -493,7 +495,7 @@ static void __bnx2x_vlan_mac_h_pend(struct bnx2x *bp,
  * @bp:			device handle
  * @o:			vlan_mac object
  *
- * @details Should be called under execution queue lock. Notice if a pending
+ * Context: Should be called under execution queue lock. Notice if a pending
  *          execution exists, it would perform it - possibly releasing and
  *          reclaiming the execution queue lock.
  */
@@ -516,7 +518,7 @@ static void __bnx2x_vlan_mac_h_write_unlock(struct bnx2x *bp,
  * @bp:			device handle
  * @o:			vlan_mac object
  *
- * @details Should be called under the execution queue lock. May sleep. May
+ * Context: Should be called under the execution queue lock. May sleep. May
  *          release and reclaim execution queue lock during its run.
  */
 static int __bnx2x_vlan_mac_h_read_lock(struct bnx2x *bp,
@@ -536,7 +538,7 @@ static int __bnx2x_vlan_mac_h_read_lock(struct bnx2x *bp,
  * @bp:			device handle
  * @o:			vlan_mac object
  *
- * @details May sleep. Claims and releases execution queue lock during its run.
+ * Context: May sleep. Claims and releases execution queue lock during its run.
  */
 int bnx2x_vlan_mac_h_read_lock(struct bnx2x *bp,
 			       struct bnx2x_vlan_mac_obj *o)
@@ -556,7 +558,7 @@ int bnx2x_vlan_mac_h_read_lock(struct bnx2x *bp,
  * @bp:			device handle
  * @o:			vlan_mac object
  *
- * @details Should be called under execution queue lock. Notice if a pending
+ * Context: Should be called under execution queue lock. Notice if a pending
  *          execution exists, it would be performed if this was the last
  *          reader. possibly releasing and reclaiming the execution queue lock.
  */
@@ -591,7 +593,7 @@ static void __bnx2x_vlan_mac_h_read_unlock(struct bnx2x *bp,
  * @bp:			device handle
  * @o:			vlan_mac object
  *
- * @details Notice if a pending execution exists, it would be performed if this
+ * Context: Notice if a pending execution exists, it would be performed if this
  *          was the last reader. Claims and releases the execution queue lock
  *          during its run.
  */
@@ -968,7 +970,7 @@ static void bnx2x_set_one_mac_e2(struct bnx2x *bp,
  *
  * @bp:		device handle
  * @o:		queue
- * @type:
+ * @type:	the type of echo
  * @cam_offset:	offset in cam memory
  * @hdr:	pointer to a header to setup
  *
@@ -1608,8 +1610,8 @@ static int __bnx2x_vlan_mac_execute_step(struct bnx2x *bp,
  *
  * @bp:		device handle
  * @o:		bnx2x_vlan_mac_obj
- * @cqe:
- * @cont:	if true schedule next execution chunk
+ * @cqe:	completion element
+ * @ramrod_flags: if set schedule next execution chunk
  *
  */
 static int bnx2x_complete_vlan_mac(struct bnx2x *bp,
@@ -1656,7 +1658,7 @@ static int bnx2x_complete_vlan_mac(struct bnx2x *bp,
  * bnx2x_optimize_vlan_mac - optimize ADD and DEL commands.
  *
  * @bp:		device handle
- * @o:		bnx2x_qable_obj
+ * @qo:		bnx2x_qable_obj
  * @elem:	bnx2x_exeq_elem
  */
 static int bnx2x_optimize_vlan_mac(struct bnx2x *bp,
@@ -1714,10 +1716,10 @@ static int bnx2x_optimize_vlan_mac(struct bnx2x *bp,
  * bnx2x_vlan_mac_get_registry_elem - prepare a registry element
  *
  * @bp:	  device handle
- * @o:
- * @elem:
- * @restore:
- * @re:
+ * @o:	vlan object
+ * @elem: element
+ * @restore: to restore or not
+ * @re: registry
  *
  * prepare a registry element according to the current command request.
  */
@@ -1768,9 +1770,9 @@ static inline int bnx2x_vlan_mac_get_registry_elem(
  * bnx2x_execute_vlan_mac - execute vlan mac command
  *
  * @bp:			device handle
- * @qo:
- * @exe_chunk:
- * @ramrod_flags:
+ * @qo:			bnx2x_qable_obj pointer
+ * @exe_chunk:		chunk
+ * @ramrod_flags:	flags
  *
  * go and send a ramrod!
  */
@@ -2006,8 +2008,8 @@ int bnx2x_config_vlan_mac(struct bnx2x *bp,
  * bnx2x_vlan_mac_del_all - delete elements with given vlan_mac_flags spec
  *
  * @bp:			device handle
- * @o:
- * @vlan_mac_flags:
+ * @o:			vlan object info
+ * @vlan_mac_flags:	vlan flags
  * @ramrod_flags:	execution flags to be used for this deletion
  *
  * if the last operation has completed successfully and there are no
@@ -2767,7 +2769,7 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp,
 /**
  * bnx2x_mcast_get_next_bin - get the next set bin (index)
  *
- * @o:
+ * @o:		multicast object info
  * @last:	index to start looking from (including)
  *
  * returns the next found (set) bin or a negative value if none is found.
@@ -2892,7 +2894,7 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp,
  * bnx2x_mcast_handle_restore_cmd_e2 - restore configuration from the registry
  *
  * @bp:		device handle
- * @o:
+ * @o:		multicast object info
  * @start_bin:	index in the registry to start from (including)
  * @rdata_idx:	index in the ramrod data to start from
  *
@@ -3202,11 +3204,11 @@ static inline void bnx2x_mcast_hdl_del(struct bnx2x *bp,
 }
 
 /**
- * bnx2x_mcast_handle_current_cmd -
+ * bnx2x_mcast_handle_current_cmd - send command if room
  *
  * @bp:		device handle
- * @p:
- * @cmd:
+ * @p:		ramrod mcast info
+ * @cmd:	command
  * @start_cnt:	first line in the ramrod data that may be used
  *
  * This function is called iff there is enough place for the current command in
@@ -3323,7 +3325,7 @@ static void bnx2x_mcast_revert_e2(struct bnx2x *bp,
  * bnx2x_mcast_set_rdata_hdr_e2 - sets a header values
  *
  * @bp:		device handle
- * @p:
+ * @p:		ramrod parameters
  * @len:	number of rules to handle
  */
 static inline void bnx2x_mcast_set_rdata_hdr_e2(struct bnx2x *bp,
@@ -3684,7 +3686,7 @@ static void bnx2x_mcast_set_one_rule_e1(struct bnx2x *bp,
  * bnx2x_mcast_set_rdata_hdr_e1  - set header values in mac_configuration_cmd
  *
  * @bp:		device handle
- * @p:
+ * @p:		ramrod parameters
  * @len:	number of rules to handle
  */
 static inline void bnx2x_mcast_set_rdata_hdr_e1(struct bnx2x *bp,
@@ -3711,7 +3713,7 @@ static inline void bnx2x_mcast_set_rdata_hdr_e1(struct bnx2x *bp,
  * bnx2x_mcast_handle_restore_cmd_e1 - restore command for 57710
  *
  * @bp:		device handle
- * @o:
+ * @o:		multicast info
  * @start_idx:	index in the registry to start from
  * @rdata_idx:	index in the ramrod data to start from
  *
@@ -3798,10 +3800,10 @@ static inline int bnx2x_mcast_handle_pending_cmds_e1(
 /**
  * bnx2x_get_fw_mac_addr - revert the bnx2x_set_fw_mac_addr().
  *
- * @fw_hi:
- * @fw_mid:
- * @fw_lo:
- * @mac:
+ * @fw_hi: address
+ * @fw_mid: address
+ * @fw_lo: address
+ * @mac: mac address
  */
 static inline void bnx2x_get_fw_mac_addr(__le16 *fw_hi, __le16 *fw_mid,
 					 __le16 *fw_lo, u8 *mac)
@@ -3818,7 +3820,7 @@ static inline void bnx2x_get_fw_mac_addr(__le16 *fw_hi, __le16 *fw_mid,
  * bnx2x_mcast_refresh_registry_e1 -
  *
  * @bp:		device handle
- * @cnt:
+ * @o:		multicast info
  *
  * Check the ramrod data first entry flag to see if it's a DELETE or ADD command
  * and update the registry correspondingly: if ADD - allocate a memory and add
@@ -4311,7 +4313,7 @@ static bool bnx2x_credit_pool_get_entry_always_true(
 /**
  * bnx2x_init_credit_pool - initialize credit pool internals.
  *
- * @p:
+ * @p:		credit pool
  * @base:	Base entry in the CAM to use.
  * @credit:	pool size.
  *
@@ -4725,8 +4727,8 @@ static int bnx2x_queue_wait_comp(struct bnx2x *bp,
  * bnx2x_queue_comp_cmd - complete the state change command.
  *
  * @bp:		device handle
- * @o:
- * @cmd:
+ * @o:		queue info
+ * @cmd:	command to exec
  *
  * Checks that the arrived completion is expected.
  */
@@ -5477,8 +5479,8 @@ static int bnx2x_queue_send_cmd_e2(struct bnx2x *bp,
  * bnx2x_queue_chk_transition - check state machine of a regular Queue
  *
  * @bp:		device handle
- * @o:
- * @params:
+ * @o:		queue info
+ * @params:	queue state
  *
  * (not Forwarding)
  * It both checks if the requested command is legal in a current
@@ -5735,8 +5737,8 @@ static int bnx2x_func_wait_comp(struct bnx2x *bp,
  * bnx2x_func_state_change_comp - complete the state machine transition
  *
  * @bp:		device handle
- * @o:
- * @cmd:
+ * @o:		function info
+ * @cmd:	more info
  *
  * Called on state change transition. Completes the state
  * machine transition only - no HW interaction.
@@ -5776,8 +5778,8 @@ static inline int bnx2x_func_state_change_comp(struct bnx2x *bp,
  * bnx2x_func_comp_cmd - complete the state change command
  *
  * @bp:		device handle
- * @o:
- * @cmd:
+ * @o:		function info
+ * @cmd:	more info
  *
  * Checks that the arrived completion is expected.
  */
@@ -5796,8 +5798,8 @@ static int bnx2x_func_comp_cmd(struct bnx2x *bp,
  * bnx2x_func_chk_transition - perform function state machine transition
  *
  * @bp:		device handle
- * @o:
- * @params:
+ * @o:		function info
+ * @params:	state parameters
  *
  * It both checks if the requested command is legal in a current
  * state and, if it's legal, sets a `next_state' in the object
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7b7e8b7883c8..fa147865e33f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -69,6 +69,7 @@
 #include "bnxt_debugfs.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
+#define BNXT_DEF_MSG_ENABLE	(NETIF_MSG_DRV | NETIF_MSG_HW)
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Broadcom BCM573xx network driver");
@@ -254,6 +255,7 @@ static const u16 bnxt_async_events_arr[] = {
 	ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE,
 	ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY,
 	ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY,
+	ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG,
 };
 
 static struct workqueue_struct *bnxt_pf_wq;
@@ -1172,7 +1174,10 @@ static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 {
 	if (!rxr->bnapi->in_reset) {
 		rxr->bnapi->in_reset = true;
-		set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
+		else
+			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
 		bnxt_queue_sp_work(bp);
 	}
 	rxr->rx_next_cons = 0xffff;
@@ -1738,8 +1743,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	if (unlikely(cons != rxr->rx_next_cons)) {
 		int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
 
-		netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
-			    cons, rxr->rx_next_cons);
+		/* 0xffff is forced error, don't print it */
+		if (rxr->rx_next_cons != 0xffff)
+			netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
+				    cons, rxr->rx_next_cons);
 		bnxt_sched_reset(bp, rxr);
 		return rc1;
 	}
@@ -1772,9 +1779,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		rc = -EIO;
 		if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
 			bnapi->cp_ring.sw_stats.rx.rx_buf_errors++;
-			if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
-				netdev_warn(bp->dev, "RX buffer error %x\n",
-					    rx_err);
+			if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+			    !(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) {
+				netdev_warn_once(bp->dev, "RX buffer error %x\n",
+						 rx_err);
 				bnxt_sched_reset(bp, rxr);
 			}
 		}
@@ -1941,19 +1949,43 @@ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx)
 	return val;
 }
 
+static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
+{
+	int i;
+
+	for (i = 0; i < bp->rx_nr_rings; i++) {
+		u16 grp_idx = bp->rx_ring[i].bnapi->index;
+		struct bnxt_ring_grp_info *grp_info;
+
+		grp_info = &bp->grp_info[grp_idx];
+		if (grp_info->agg_fw_ring_id == ring_id)
+			return grp_idx;
+	}
+	return INVALID_HW_RING_ID;
+}
+
 #define BNXT_GET_EVENT_PORT(data)	\
 	((data) &			\
 	 ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
 
+#define BNXT_EVENT_RING_TYPE(data2)	\
+	((data2) &			\
+	 ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_MASK)
+
+#define BNXT_EVENT_RING_TYPE_RX(data2)	\
+	(BNXT_EVENT_RING_TYPE(data2) ==	\
+	 ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX)
+
 static int bnxt_async_event_process(struct bnxt *bp,
 				    struct hwrm_async_event_cmpl *cmpl)
 {
 	u16 event_id = le16_to_cpu(cmpl->event_id);
+	u32 data1 = le32_to_cpu(cmpl->event_data1);
+	u32 data2 = le32_to_cpu(cmpl->event_data2);
 
 	/* TODO CHIMP_FW: Define event id's for link change, error etc */
 	switch (event_id) {
 	case ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: {
-		u32 data1 = le32_to_cpu(cmpl->event_data1);
 		struct bnxt_link_info *link_info = &bp->link_info;
 
 		if (BNXT_VF(bp))
@@ -1983,7 +2015,6 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		set_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event);
 		break;
 	case ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: {
-		u32 data1 = le32_to_cpu(cmpl->event_data1);
 		u16 port_id = BNXT_GET_EVENT_PORT(data1);
 
 		if (BNXT_VF(bp))
@@ -2000,9 +2031,10 @@ static int bnxt_async_event_process(struct bnxt *bp,
 			goto async_event_process_exit;
 		set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
 		break;
-	case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
-		u32 data1 = le32_to_cpu(cmpl->event_data1);
-
+	case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		if (netif_msg_hw(bp))
+			netdev_warn(bp->dev, "Received RESET_NOTIFY event, data1: 0x%x, data2: 0x%x\n",
+				    data1, data2);
 		if (!bp->fw_health)
 			goto async_event_process_exit;
 
@@ -2022,10 +2054,8 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		}
 		set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event);
 		break;
-	}
 	case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: {
 		struct bnxt_fw_health *fw_health = bp->fw_health;
-		u32 data1 = le32_to_cpu(cmpl->event_data1);
 
 		if (!fw_health)
 			goto async_event_process_exit;
@@ -2052,6 +2082,28 @@ static int bnxt_async_event_process(struct bnxt *bp,
 			bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
 		goto async_event_process_exit;
 	}
+	case ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG: {
+		struct bnxt_rx_ring_info *rxr;
+		u16 grp_idx;
+
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			goto async_event_process_exit;
+
+		netdev_warn(bp->dev, "Ring monitor event, ring type %lu id 0x%x\n",
+			    BNXT_EVENT_RING_TYPE(data2), data1);
+		if (!BNXT_EVENT_RING_TYPE_RX(data2))
+			goto async_event_process_exit;
+
+		grp_idx = bnxt_agg_ring_id_to_grp_idx(bp, data1);
+		if (grp_idx == INVALID_HW_RING_ID) {
+			netdev_warn(bp->dev, "Unknown RX agg ring id 0x%x\n",
+				    data1);
+			goto async_event_process_exit;
+		}
+		rxr = bp->bnapi[grp_idx]->rx_ring;
+		bnxt_sched_reset(bp, rxr);
+		goto async_event_process_exit;
+	}
 	default:
 		goto async_event_process_exit;
 	}
@@ -2250,7 +2302,7 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
 		bnapi->tx_pkts = 0;
 	}
 
-	if (bnapi->events & BNXT_RX_EVENT) {
+	if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
 		struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 
 		if (bnapi->events & BNXT_AGG_EVENT)
@@ -2540,93 +2592,91 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
 	}
 }
 
-static void bnxt_free_rx_skbs(struct bnxt *bp)
+static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
 {
-	int i, max_idx, max_agg_idx;
+	struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
 	struct pci_dev *pdev = bp->pdev;
-
-	if (!bp->rx_ring)
-		return;
+	struct bnxt_tpa_idx_map *map;
+	int i, max_idx, max_agg_idx;
 
 	max_idx = bp->rx_nr_pages * RX_DESC_CNT;
 	max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT;
-	for (i = 0; i < bp->rx_nr_rings; i++) {
-		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
-		struct bnxt_tpa_idx_map *map;
-		int j;
-
-		if (rxr->rx_tpa) {
-			for (j = 0; j < bp->max_tpa; j++) {
-				struct bnxt_tpa_info *tpa_info =
-							&rxr->rx_tpa[j];
-				u8 *data = tpa_info->data;
+	if (!rxr->rx_tpa)
+		goto skip_rx_tpa_free;
 
-				if (!data)
-					continue;
+	for (i = 0; i < bp->max_tpa; i++) {
+		struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[i];
+		u8 *data = tpa_info->data;
 
-				dma_unmap_single_attrs(&pdev->dev,
-						       tpa_info->mapping,
-						       bp->rx_buf_use_size,
-						       bp->rx_dir,
-						       DMA_ATTR_WEAK_ORDERING);
+		if (!data)
+			continue;
 
-				tpa_info->data = NULL;
+		dma_unmap_single_attrs(&pdev->dev, tpa_info->mapping,
+				       bp->rx_buf_use_size, bp->rx_dir,
+				       DMA_ATTR_WEAK_ORDERING);
 
-				kfree(data);
-			}
-		}
+		tpa_info->data = NULL;
 
-		for (j = 0; j < max_idx; j++) {
-			struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j];
-			dma_addr_t mapping = rx_buf->mapping;
-			void *data = rx_buf->data;
+		kfree(data);
+	}
 
-			if (!data)
-				continue;
+skip_rx_tpa_free:
+	for (i = 0; i < max_idx; i++) {
+		struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+		dma_addr_t mapping = rx_buf->mapping;
+		void *data = rx_buf->data;
 
-			rx_buf->data = NULL;
+		if (!data)
+			continue;
 
-			if (BNXT_RX_PAGE_MODE(bp)) {
-				mapping -= bp->rx_dma_offset;
-				dma_unmap_page_attrs(&pdev->dev, mapping,
-						     PAGE_SIZE, bp->rx_dir,
-						     DMA_ATTR_WEAK_ORDERING);
-				page_pool_recycle_direct(rxr->page_pool, data);
-			} else {
-				dma_unmap_single_attrs(&pdev->dev, mapping,
-						       bp->rx_buf_use_size,
-						       bp->rx_dir,
-						       DMA_ATTR_WEAK_ORDERING);
-				kfree(data);
-			}
+		rx_buf->data = NULL;
+		if (BNXT_RX_PAGE_MODE(bp)) {
+			mapping -= bp->rx_dma_offset;
+			dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
+					     bp->rx_dir,
+					     DMA_ATTR_WEAK_ORDERING);
+			page_pool_recycle_direct(rxr->page_pool, data);
+		} else {
+			dma_unmap_single_attrs(&pdev->dev, mapping,
+					       bp->rx_buf_use_size, bp->rx_dir,
+					       DMA_ATTR_WEAK_ORDERING);
+			kfree(data);
 		}
+	}
+	for (i = 0; i < max_agg_idx; i++) {
+		struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i];
+		struct page *page = rx_agg_buf->page;
 
-		for (j = 0; j < max_agg_idx; j++) {
-			struct bnxt_sw_rx_agg_bd *rx_agg_buf =
-				&rxr->rx_agg_ring[j];
-			struct page *page = rx_agg_buf->page;
-
-			if (!page)
-				continue;
+		if (!page)
+			continue;
 
-			dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
-					     BNXT_RX_PAGE_SIZE,
-					     PCI_DMA_FROMDEVICE,
-					     DMA_ATTR_WEAK_ORDERING);
+		dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
+				     BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE,
+				     DMA_ATTR_WEAK_ORDERING);
 
-			rx_agg_buf->page = NULL;
-			__clear_bit(j, rxr->rx_agg_bmap);
+		rx_agg_buf->page = NULL;
+		__clear_bit(i, rxr->rx_agg_bmap);
 
-			__free_page(page);
-		}
-		if (rxr->rx_page) {
-			__free_page(rxr->rx_page);
-			rxr->rx_page = NULL;
-		}
-		map = rxr->rx_tpa_idx_map;
-		if (map)
-			memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap));
+		__free_page(page);
+	}
+	if (rxr->rx_page) {
+		__free_page(rxr->rx_page);
+		rxr->rx_page = NULL;
 	}
+	map = rxr->rx_tpa_idx_map;
+	if (map)
+		memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap));
+}
+
+static void bnxt_free_rx_skbs(struct bnxt *bp)
+{
+	int i;
+
+	if (!bp->rx_ring)
+		return;
+
+	for (i = 0; i < bp->rx_nr_rings; i++)
+		bnxt_free_one_rx_ring_skbs(bp, i);
 }
 
 static void bnxt_free_skbs(struct bnxt *bp)
@@ -3165,31 +3215,16 @@ static void bnxt_init_rxbd_pages(struct bnxt_ring_struct *ring, u32 type)
 	}
 }
 
-static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
+static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr)
 {
+	struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
 	struct net_device *dev = bp->dev;
-	struct bnxt_rx_ring_info *rxr;
-	struct bnxt_ring_struct *ring;
-	u32 prod, type;
+	u32 prod;
 	int i;
 
-	type = (bp->rx_buf_use_size << RX_BD_LEN_SHIFT) |
-		RX_BD_TYPE_RX_PACKET_BD | RX_BD_FLAGS_EOP;
-
-	if (NET_IP_ALIGN == 2)
-		type |= RX_BD_FLAGS_SOP;
-
-	rxr = &bp->rx_ring[ring_nr];
-	ring = &rxr->rx_ring_struct;
-	bnxt_init_rxbd_pages(ring, type);
-
-	if (BNXT_RX_PAGE_MODE(bp) && bp->xdp_prog) {
-		bpf_prog_add(bp->xdp_prog, 1);
-		rxr->xdp_prog = bp->xdp_prog;
-	}
 	prod = rxr->rx_prod;
 	for (i = 0; i < bp->rx_ring_size; i++) {
-		if (bnxt_alloc_rx_data(bp, rxr, prod, GFP_KERNEL) != 0) {
+		if (bnxt_alloc_rx_data(bp, rxr, prod, GFP_KERNEL)) {
 			netdev_warn(dev, "init'ed rx ring %d with %d/%d skbs only\n",
 				    ring_nr, i, bp->rx_ring_size);
 			break;
@@ -3197,22 +3232,13 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
 		prod = NEXT_RX(prod);
 	}
 	rxr->rx_prod = prod;
-	ring->fw_ring_id = INVALID_HW_RING_ID;
-
-	ring = &rxr->rx_agg_ring_struct;
-	ring->fw_ring_id = INVALID_HW_RING_ID;
 
 	if (!(bp->flags & BNXT_FLAG_AGG_RINGS))
 		return 0;
 
-	type = ((u32)BNXT_RX_PAGE_SIZE << RX_BD_LEN_SHIFT) |
-		RX_BD_TYPE_RX_AGG_BD | RX_BD_FLAGS_SOP;
-
-	bnxt_init_rxbd_pages(ring, type);
-
 	prod = rxr->rx_agg_prod;
 	for (i = 0; i < bp->rx_agg_ring_size; i++) {
-		if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_KERNEL) != 0) {
+		if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_KERNEL)) {
 			netdev_warn(dev, "init'ed rx ring %d with %d/%d pages only\n",
 				    ring_nr, i, bp->rx_ring_size);
 			break;
@@ -3221,30 +3247,58 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
 	}
 	rxr->rx_agg_prod = prod;
 
-	if (bp->flags & BNXT_FLAG_TPA) {
-		if (rxr->rx_tpa) {
-			u8 *data;
-			dma_addr_t mapping;
+	if (rxr->rx_tpa) {
+		dma_addr_t mapping;
+		u8 *data;
 
-			for (i = 0; i < bp->max_tpa; i++) {
-				data = __bnxt_alloc_rx_data(bp, &mapping,
-							    GFP_KERNEL);
-				if (!data)
-					return -ENOMEM;
+		for (i = 0; i < bp->max_tpa; i++) {
+			data = __bnxt_alloc_rx_data(bp, &mapping, GFP_KERNEL);
+			if (!data)
+				return -ENOMEM;
 
-				rxr->rx_tpa[i].data = data;
-				rxr->rx_tpa[i].data_ptr = data + bp->rx_offset;
-				rxr->rx_tpa[i].mapping = mapping;
-			}
-		} else {
-			netdev_err(bp->dev, "No resource allocated for LRO/GRO\n");
-			return -ENOMEM;
+			rxr->rx_tpa[i].data = data;
+			rxr->rx_tpa[i].data_ptr = data + bp->rx_offset;
+			rxr->rx_tpa[i].mapping = mapping;
 		}
 	}
-
 	return 0;
 }
 
+static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
+{
+	struct bnxt_rx_ring_info *rxr;
+	struct bnxt_ring_struct *ring;
+	u32 type;
+
+	type = (bp->rx_buf_use_size << RX_BD_LEN_SHIFT) |
+		RX_BD_TYPE_RX_PACKET_BD | RX_BD_FLAGS_EOP;
+
+	if (NET_IP_ALIGN == 2)
+		type |= RX_BD_FLAGS_SOP;
+
+	rxr = &bp->rx_ring[ring_nr];
+	ring = &rxr->rx_ring_struct;
+	bnxt_init_rxbd_pages(ring, type);
+
+	if (BNXT_RX_PAGE_MODE(bp) && bp->xdp_prog) {
+		bpf_prog_add(bp->xdp_prog, 1);
+		rxr->xdp_prog = bp->xdp_prog;
+	}
+	ring->fw_ring_id = INVALID_HW_RING_ID;
+
+	ring = &rxr->rx_agg_ring_struct;
+	ring->fw_ring_id = INVALID_HW_RING_ID;
+
+	if ((bp->flags & BNXT_FLAG_AGG_RINGS)) {
+		type = ((u32)BNXT_RX_PAGE_SIZE << RX_BD_LEN_SHIFT) |
+			RX_BD_TYPE_RX_AGG_BD | RX_BD_FLAGS_SOP;
+
+		bnxt_init_rxbd_pages(ring, type);
+	}
+
+	return bnxt_alloc_one_rx_ring(bp, ring_nr);
+}
+
 static void bnxt_init_cp_rings(struct bnxt *bp)
 {
 	int i, j;
@@ -4269,6 +4323,8 @@ static int bnxt_hwrm_to_stderr(u32 hwrm_err)
 	switch (hwrm_err) {
 	case HWRM_ERR_CODE_SUCCESS:
 		return 0;
+	case HWRM_ERR_CODE_RESOURCE_LOCKED:
+		return -EROFS;
 	case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED:
 		return -EACCES;
 	case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR:
@@ -5343,13 +5399,16 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
 		 * VLAN_STRIP_CAP properly.
 		 */
 		if ((flags & VNIC_QCAPS_RESP_FLAGS_VLAN_STRIP_CAP) ||
-		    ((bp->flags & BNXT_FLAG_CHIP_P5) &&
+		    (BNXT_CHIP_P5_THOR(bp) &&
 		     !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED)))
 			bp->fw_cap |= BNXT_FW_CAP_VLAN_RX_STRIP;
 		bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
-		if (bp->max_tpa_v2)
-			bp->hw_ring_stats_size =
-				sizeof(struct ctx_hw_stats_ext);
+		if (bp->max_tpa_v2) {
+			if (BNXT_CHIP_P5_THOR(bp))
+				bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5;
+			else
+				bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5_SR2;
+		}
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -6639,6 +6698,8 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 	}
 	if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST))
 		bp->flags |= BNXT_FLAG_MULTI_HOST;
+	if (flags & FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED)
+		bp->fw_cap |= BNXT_FW_CAP_RING_MONITOR;
 
 	switch (resp->port_partition_type) {
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
@@ -7333,6 +7394,77 @@ hwrm_cfa_adv_qcaps_exit:
 	return rc;
 }
 
+static int __bnxt_alloc_fw_health(struct bnxt *bp)
+{
+	if (bp->fw_health)
+		return 0;
+
+	bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
+	if (!bp->fw_health)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int bnxt_alloc_fw_health(struct bnxt *bp)
+{
+	int rc;
+
+	if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
+	    !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
+		return 0;
+
+	rc = __bnxt_alloc_fw_health(bp);
+	if (rc) {
+		bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
+		bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __bnxt_map_fw_health_reg(struct bnxt *bp, u32 reg)
+{
+	writel(reg & BNXT_GRC_BASE_MASK, bp->bar0 +
+					 BNXT_GRCPF_REG_WINDOW_BASE_OUT +
+					 BNXT_FW_HEALTH_WIN_MAP_OFF);
+}
+
+static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
+{
+	void __iomem *hs;
+	u32 status_loc;
+	u32 reg_type;
+	u32 sig;
+
+	__bnxt_map_fw_health_reg(bp, HCOMM_STATUS_STRUCT_LOC);
+	hs = bp->bar0 + BNXT_FW_HEALTH_WIN_OFF(HCOMM_STATUS_STRUCT_LOC);
+
+	sig = readl(hs + offsetof(struct hcomm_status, sig_ver));
+	if ((sig & HCOMM_STATUS_SIGNATURE_MASK) != HCOMM_STATUS_SIGNATURE_VAL) {
+		if (bp->fw_health)
+			bp->fw_health->status_reliable = false;
+		return;
+	}
+
+	if (__bnxt_alloc_fw_health(bp)) {
+		netdev_warn(bp->dev, "no memory for firmware status checks\n");
+		return;
+	}
+
+	status_loc = readl(hs + offsetof(struct hcomm_status, fw_status_loc));
+	bp->fw_health->regs[BNXT_FW_HEALTH_REG] = status_loc;
+	reg_type = BNXT_FW_HEALTH_REG_TYPE(status_loc);
+	if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) {
+		__bnxt_map_fw_health_reg(bp, status_loc);
+		bp->fw_health->mapped_regs[BNXT_FW_HEALTH_REG] =
+			BNXT_FW_HEALTH_WIN_OFF(status_loc);
+	}
+
+	bp->fw_health->status_reliable = true;
+}
+
 static int bnxt_map_fw_health_regs(struct bnxt *bp)
 {
 	struct bnxt_fw_health *fw_health = bp->fw_health;
@@ -7349,14 +7481,12 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
 			reg_base = reg & BNXT_GRC_BASE_MASK;
 		if ((reg & BNXT_GRC_BASE_MASK) != reg_base)
 			return -ERANGE;
-		fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_BASE +
-					    (reg & BNXT_GRC_OFFSET_MASK);
+		fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg);
 	}
 	if (reg_base == 0xffffffff)
 		return 0;
 
-	writel(reg_base, bp->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT +
-			 BNXT_FW_HEALTH_WIN_MAP_OFF);
+	__bnxt_map_fw_health_reg(bp, reg_base);
 	return 0;
 }
 
@@ -7432,6 +7562,16 @@ static int bnxt_hwrm_func_reset(struct bnxt *bp)
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_RESET_TIMEOUT);
 }
 
+static void bnxt_nvm_cfg_ver_get(struct bnxt *bp)
+{
+	struct hwrm_nvm_get_dev_info_output nvm_info;
+
+	if (!bnxt_hwrm_nvm_get_dev_info(bp, &nvm_info))
+		snprintf(bp->nvm_cfg_ver, FW_VER_STR_LEN, "%d.%d.%d",
+			 nvm_info.nvm_cfg_ver_maj, nvm_info.nvm_cfg_ver_min,
+			 nvm_info.nvm_cfg_ver_upd);
+}
+
 static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 {
 	int rc = 0;
@@ -8635,10 +8775,9 @@ static void bnxt_del_napi(struct bnxt *bp)
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 
-		napi_hash_del(&bnapi->napi);
-		netif_napi_del(&bnapi->napi);
+		__netif_napi_del(&bnapi->napi);
 	}
-	/* We called napi_hash_del() before netif_napi_del(), we need
+	/* We called __netif_napi_del(), we need
 	 * to respect an RCU grace period before freeing napi structures.
 	 */
 	synchronize_net();
@@ -8694,14 +8833,19 @@ static void bnxt_enable_napi(struct bnxt *bp)
 	int i;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
-		struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
-		bp->bnapi[i]->in_reset = false;
+		struct bnxt_napi *bnapi = bp->bnapi[i];
+		struct bnxt_cp_ring_info *cpr;
+
+		cpr = &bnapi->cp_ring;
+		if (bnapi->in_reset)
+			cpr->sw_stats.rx.rx_resets++;
+		bnapi->in_reset = false;
 
-		if (bp->bnapi[i]->rx_ring) {
+		if (bnapi->rx_ring) {
 			INIT_WORK(&cpr->dim.work, bnxt_dim_work);
 			cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 		}
-		napi_enable(&bp->bnapi[i]->napi);
+		napi_enable(&bnapi->napi);
 	}
 }
 
@@ -8735,6 +8879,30 @@ void bnxt_tx_enable(struct bnxt *bp)
 		netif_carrier_on(bp->dev);
 }
 
+static char *bnxt_report_fec(struct bnxt_link_info *link_info)
+{
+	u8 active_fec = link_info->active_fec_sig_mode &
+			PORT_PHY_QCFG_RESP_ACTIVE_FEC_MASK;
+
+	switch (active_fec) {
+	default:
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE:
+		return "None";
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE74_ACTIVE:
+		return "Clause 74 BaseR";
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE91_ACTIVE:
+		return "Clause 91 RS(528,514)";
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_1XN_ACTIVE:
+		return "Clause 91 RS544_1XN";
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_IEEE_ACTIVE:
+		return "Clause 91 RS(544,514)";
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_1XN_ACTIVE:
+		return "Clause 91 RS272_1XN";
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE:
+		return "Clause 91 RS(272,257)";
+	}
+}
+
 static void bnxt_report_link(struct bnxt *bp)
 {
 	if (bp->link_info.link_up) {
@@ -8744,6 +8912,11 @@ static void bnxt_report_link(struct bnxt *bp)
 		u16 fec;
 
 		netif_carrier_on(bp->dev);
+		speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed);
+		if (speed == SPEED_UNKNOWN) {
+			netdev_info(bp->dev, "NIC Link is Up, speed unknown\n");
+			return;
+		}
 		if (bp->link_info.duplex == BNXT_LINK_DUPLEX_FULL)
 			duplex = "full";
 		else
@@ -8756,7 +8929,6 @@ static void bnxt_report_link(struct bnxt *bp)
 			flow_ctrl = "ON - receive";
 		else
 			flow_ctrl = "none";
-		speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed);
 		netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n",
 			    speed, duplex, flow_ctrl);
 		if (bp->flags & BNXT_FLAG_EEE_CAP)
@@ -8765,16 +8937,25 @@ static void bnxt_report_link(struct bnxt *bp)
 							 "not active");
 		fec = bp->link_info.fec_cfg;
 		if (!(fec & PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED))
-			netdev_info(bp->dev, "FEC autoneg %s encodings: %s\n",
+			netdev_info(bp->dev, "FEC autoneg %s encoding: %s\n",
 				    (fec & BNXT_FEC_AUTONEG) ? "on" : "off",
-				    (fec & BNXT_FEC_ENC_BASE_R) ? "BaseR" :
-				     (fec & BNXT_FEC_ENC_RS) ? "RS" : "None");
+				    bnxt_report_fec(&bp->link_info));
 	} else {
 		netif_carrier_off(bp->dev);
 		netdev_err(bp->dev, "NIC Link is Down\n");
 	}
 }
 
+static bool bnxt_phy_qcaps_no_speed(struct hwrm_port_phy_qcaps_output *resp)
+{
+	if (!resp->supported_speeds_auto_mode &&
+	    !resp->supported_speeds_force_mode &&
+	    !resp->supported_pam4_speeds_auto_mode &&
+	    !resp->supported_pam4_speeds_force_mode)
+		return true;
+	return false;
+}
+
 static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 {
 	int rc = 0;
@@ -8822,9 +9003,24 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET)
 		bp->fw_cap |= BNXT_FW_CAP_PORT_STATS_NO_RESET;
 
+	if (bp->hwrm_spec_code >= 0x10a01) {
+		if (bnxt_phy_qcaps_no_speed(resp)) {
+			link_info->phy_state = BNXT_PHY_STATE_DISABLED;
+			netdev_warn(bp->dev, "Ethernet link disabled\n");
+		} else if (link_info->phy_state == BNXT_PHY_STATE_DISABLED) {
+			link_info->phy_state = BNXT_PHY_STATE_ENABLED;
+			netdev_info(bp->dev, "Ethernet link enabled\n");
+			/* Phy re-enabled, reprobe the speeds */
+			link_info->support_auto_speeds = 0;
+			link_info->support_pam4_auto_speeds = 0;
+		}
+	}
 	if (resp->supported_speeds_auto_mode)
 		link_info->support_auto_speeds =
 			le16_to_cpu(resp->supported_speeds_auto_mode);
+	if (resp->supported_pam4_speeds_auto_mode)
+		link_info->support_pam4_auto_speeds =
+			le16_to_cpu(resp->supported_pam4_speeds_auto_mode);
 
 	bp->port_count = resp->port_cnt;
 
@@ -8833,14 +9029,21 @@ hwrm_phy_qcaps_exit:
 	return rc;
 }
 
-static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
+static bool bnxt_support_dropped(u16 advertising, u16 supported)
+{
+	u16 diff = advertising ^ supported;
+
+	return ((supported | diff) != supported);
+}
+
+int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 {
 	int rc = 0;
 	struct bnxt_link_info *link_info = &bp->link_info;
 	struct hwrm_port_phy_qcfg_input req = {0};
 	struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 	u8 link_up = link_info->link_up;
-	u16 diff;
+	bool support_changed = false;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCFG, -1, -1);
 
@@ -8867,10 +9070,17 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 	else
 		link_info->link_speed = 0;
 	link_info->force_link_speed = le16_to_cpu(resp->force_link_speed);
+	link_info->force_pam4_link_speed =
+		le16_to_cpu(resp->force_pam4_link_speed);
 	link_info->support_speeds = le16_to_cpu(resp->support_speeds);
+	link_info->support_pam4_speeds = le16_to_cpu(resp->support_pam4_speeds);
 	link_info->auto_link_speeds = le16_to_cpu(resp->auto_link_speed_mask);
+	link_info->auto_pam4_link_speeds =
+		le16_to_cpu(resp->auto_pam4_link_speed_mask);
 	link_info->lp_auto_link_speeds =
 		le16_to_cpu(resp->link_partner_adv_speeds);
+	link_info->lp_auto_pam4_link_speeds =
+		resp->link_partner_pam4_adv_speeds;
 	link_info->preemphasis = le32_to_cpu(resp->preemphasis);
 	link_info->phy_ver[0] = resp->phy_maj;
 	link_info->phy_ver[1] = resp->phy_min;
@@ -8919,9 +9129,10 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 	}
 
 	link_info->fec_cfg = PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED;
-	if (bp->hwrm_spec_code >= 0x10504)
+	if (bp->hwrm_spec_code >= 0x10504) {
 		link_info->fec_cfg = le16_to_cpu(resp->fec_cfg);
-
+		link_info->active_fec_sig_mode = resp->active_fec_signal_mode;
+	}
 	/* TODO: need to add more logic to report VF link */
 	if (chng_link_state) {
 		if (link_info->phy_link_status == BNXT_LINK_LINK)
@@ -8939,17 +9150,21 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 	if (!BNXT_PHY_CFG_ABLE(bp))
 		return 0;
 
-	diff = link_info->support_auto_speeds ^ link_info->advertising;
-	if ((link_info->support_auto_speeds | diff) !=
-	    link_info->support_auto_speeds) {
-		/* An advertised speed is no longer supported, so we need to
-		 * update the advertisement settings.  Caller holds RTNL
-		 * so we can modify link settings.
-		 */
+	/* Check if any advertised speeds are no longer supported. The caller
+	 * holds the link_lock mutex, so we can modify link_info settings.
+	 */
+	if (bnxt_support_dropped(link_info->advertising,
+				 link_info->support_auto_speeds)) {
 		link_info->advertising = link_info->support_auto_speeds;
-		if (link_info->autoneg & BNXT_AUTONEG_SPEED)
-			bnxt_hwrm_set_link_setting(bp, true, false);
+		support_changed = true;
+	}
+	if (bnxt_support_dropped(link_info->advertising_pam4,
+				 link_info->support_pam4_auto_speeds)) {
+		link_info->advertising_pam4 = link_info->support_pam4_auto_speeds;
+		support_changed = true;
 	}
+	if (support_changed && (link_info->autoneg & BNXT_AUTONEG_SPEED))
+		bnxt_hwrm_set_link_setting(bp, true, false);
 	return 0;
 }
 
@@ -9008,27 +9223,30 @@ bnxt_hwrm_set_pause_common(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req)
 	}
 }
 
-static void bnxt_hwrm_set_link_common(struct bnxt *bp,
-				      struct hwrm_port_phy_cfg_input *req)
+static void bnxt_hwrm_set_link_common(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req)
 {
-	u8 autoneg = bp->link_info.autoneg;
-	u16 fw_link_speed = bp->link_info.req_link_speed;
-	u16 advertising = bp->link_info.advertising;
-
-	if (autoneg & BNXT_AUTONEG_SPEED) {
-		req->auto_mode |=
-			PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK;
-
-		req->enables |= cpu_to_le32(
-			PORT_PHY_CFG_REQ_ENABLES_AUTO_LINK_SPEED_MASK);
-		req->auto_link_speed_mask = cpu_to_le16(advertising);
-
+	if (bp->link_info.autoneg & BNXT_AUTONEG_SPEED) {
+		req->auto_mode |= PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK;
+		if (bp->link_info.advertising) {
+			req->enables |= cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_LINK_SPEED_MASK);
+			req->auto_link_speed_mask = cpu_to_le16(bp->link_info.advertising);
+		}
+		if (bp->link_info.advertising_pam4) {
+			req->enables |=
+				cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_PAM4_LINK_SPEED_MASK);
+			req->auto_link_pam4_speed_mask =
+				cpu_to_le16(bp->link_info.advertising_pam4);
+		}
 		req->enables |= cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE);
-		req->flags |=
-			cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG);
+		req->flags |= cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG);
 	} else {
-		req->force_link_speed = cpu_to_le16(fw_link_speed);
 		req->flags |= cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE);
+		if (bp->link_info.req_signal_mode == BNXT_SIG_MODE_PAM4) {
+			req->force_pam4_link_speed = cpu_to_le16(bp->link_info.req_link_speed);
+			req->enables |= cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_FORCE_PAM4_LINK_SPEED);
+		} else {
+			req->force_link_speed = cpu_to_le16(bp->link_info.req_link_speed);
+		}
 	}
 
 	/* tell chimp that the setting takes effect immediately */
@@ -9424,14 +9642,19 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
 	if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
 		if (BNXT_AUTO_MODE(link_info->auto_mode))
 			update_link = true;
-		if (link_info->req_link_speed != link_info->force_link_speed)
+		if (link_info->req_signal_mode == BNXT_SIG_MODE_NRZ &&
+		    link_info->req_link_speed != link_info->force_link_speed)
+			update_link = true;
+		else if (link_info->req_signal_mode == BNXT_SIG_MODE_PAM4 &&
+			 link_info->req_link_speed != link_info->force_pam4_link_speed)
 			update_link = true;
 		if (link_info->req_duplex != link_info->duplex_setting)
 			update_link = true;
 	} else {
 		if (link_info->auto_mode == BNXT_LINK_AUTO_NONE)
 			update_link = true;
-		if (link_info->advertising != link_info->auto_link_speeds)
+		if (link_info->advertising != link_info->auto_link_speeds ||
+		    link_info->advertising_pam4 != link_info->auto_pam4_link_speeds)
 			update_link = true;
 	}
 
@@ -10362,6 +10585,23 @@ static void bnxt_dbg_dump_states(struct bnxt *bp)
 	}
 }
 
+static int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int ring_nr)
+{
+	struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
+	struct hwrm_ring_reset_input req = {0};
+	struct bnxt_napi *bnapi = rxr->bnapi;
+	struct bnxt_cp_ring_info *cpr;
+	u16 cp_ring_id;
+
+	cpr = &bnapi->cp_ring;
+	cp_ring_id = cpr->cp_ring_struct.fw_ring_id;
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_RESET, cp_ring_id, -1);
+	req.ring_type = RING_RESET_REQ_RING_TYPE_RX_RING_GRP;
+	req.ring_id = cpu_to_le16(bp->grp_info[bnapi->index].fw_grp_id);
+	return hwrm_send_message_silent(bp, &req, sizeof(req),
+					HWRM_CMD_TIMEOUT);
+}
+
 static void bnxt_reset_task(struct bnxt *bp, bool silent)
 {
 	if (!silent)
@@ -10497,6 +10737,55 @@ static void bnxt_reset(struct bnxt *bp, bool silent)
 	bnxt_rtnl_unlock_sp(bp);
 }
 
+/* Only called from bnxt_sp_task() */
+static void bnxt_rx_ring_reset(struct bnxt *bp)
+{
+	int i;
+
+	bnxt_rtnl_lock_sp(bp);
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+		bnxt_rtnl_unlock_sp(bp);
+		return;
+	}
+	/* Disable and flush TPA before resetting the RX ring */
+	if (bp->flags & BNXT_FLAG_TPA)
+		bnxt_set_tpa(bp, false);
+	for (i = 0; i < bp->rx_nr_rings; i++) {
+		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+		struct bnxt_cp_ring_info *cpr;
+		int rc;
+
+		if (!rxr->bnapi->in_reset)
+			continue;
+
+		rc = bnxt_hwrm_rx_ring_reset(bp, i);
+		if (rc) {
+			if (rc == -EINVAL || rc == -EOPNOTSUPP)
+				netdev_info_once(bp->dev, "RX ring reset not supported by firmware, falling back to global reset\n");
+			else
+				netdev_warn(bp->dev, "RX ring reset failed, rc = %d, falling back to global reset\n",
+					    rc);
+			bnxt_reset_task(bp, true);
+			break;
+		}
+		bnxt_free_one_rx_ring_skbs(bp, i);
+		rxr->rx_prod = 0;
+		rxr->rx_agg_prod = 0;
+		rxr->rx_sw_agg_prod = 0;
+		rxr->rx_next_cons = 0;
+		rxr->bnapi->in_reset = false;
+		bnxt_alloc_one_rx_ring(bp, i);
+		cpr = &rxr->bnapi->cp_ring;
+		cpr->sw_stats.rx.rx_resets++;
+		if (bp->flags & BNXT_FLAG_AGG_RINGS)
+			bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+		bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
+	}
+	if (bp->flags & BNXT_FLAG_TPA)
+		bnxt_set_tpa(bp, true);
+	bnxt_rtnl_unlock_sp(bp);
+}
+
 static void bnxt_fw_reset_close(struct bnxt *bp)
 {
 	bnxt_ulp_stop(bp);
@@ -10691,8 +10980,15 @@ static void bnxt_init_ethtool_link_settings(struct bnxt *bp)
 			link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
 		}
 		link_info->advertising = link_info->auto_link_speeds;
+		link_info->advertising_pam4 = link_info->auto_pam4_link_speeds;
 	} else {
 		link_info->req_link_speed = link_info->force_link_speed;
+		link_info->req_signal_mode = BNXT_SIG_MODE_NRZ;
+		if (link_info->force_pam4_link_speed) {
+			link_info->req_link_speed =
+				link_info->force_pam4_link_speed;
+			link_info->req_signal_mode = BNXT_SIG_MODE_PAM4;
+		}
 		link_info->req_duplex = link_info->duplex_setting;
 	}
 	if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
@@ -10778,6 +11074,9 @@ static void bnxt_sp_task(struct work_struct *work)
 	if (test_and_clear_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event))
 		bnxt_reset(bp, true);
 
+	if (test_and_clear_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event))
+		bnxt_rx_ring_reset(bp);
+
 	if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event))
 		bnxt_devlink_health_report(bp, BNXT_FW_RESET_NOTIFY_SP_EVENT);
 
@@ -10882,21 +11181,19 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
 	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
 }
 
-static void bnxt_alloc_fw_health(struct bnxt *bp)
+static int bnxt_fw_reset_via_optee(struct bnxt *bp)
 {
-	if (bp->fw_health)
-		return;
+#ifdef CONFIG_TEE_BNXT_FW
+	int rc = tee_bnxt_fw_load();
 
-	if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
-	    !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
-		return;
+	if (rc)
+		netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc);
 
-	bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
-	if (!bp->fw_health) {
-		netdev_warn(bp->dev, "Failed to allocate fw_health\n");
-		bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
-		bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
-	}
+	return rc;
+#else
+	netdev_err(bp->dev, "OP-TEE not supported\n");
+	return -ENODEV;
+#endif
 }
 
 static int bnxt_fw_init_one_p1(struct bnxt *bp)
@@ -10905,8 +11202,24 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
 
 	bp->fw_cap = 0;
 	rc = bnxt_hwrm_ver_get(bp);
-	if (rc)
-		return rc;
+	bnxt_try_map_fw_health_reg(bp);
+	if (rc) {
+		if (bp->fw_health && bp->fw_health->status_reliable) {
+			u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+
+			netdev_err(bp->dev,
+				   "Firmware not responding, status: 0x%x\n",
+				   sts);
+			if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) {
+				netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n");
+				rc = bnxt_fw_reset_via_optee(bp);
+				if (!rc)
+					rc = bnxt_hwrm_ver_get(bp);
+			}
+		}
+		if (rc)
+			return rc;
+	}
 
 	if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
 		rc = bnxt_alloc_kong_hwrm_resources(bp);
@@ -10920,6 +11233,8 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
 		if (rc)
 			return rc;
 	}
+	bnxt_nvm_cfg_ver_get(bp);
+
 	rc = bnxt_hwrm_func_reset(bp);
 	if (rc)
 		return -ENODEV;
@@ -10945,11 +11260,14 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
 		netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
 			    rc);
 
-	bnxt_alloc_fw_health(bp);
-	rc = bnxt_hwrm_error_recovery_qcfg(bp);
-	if (rc)
-		netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
-			    rc);
+	if (bnxt_alloc_fw_health(bp)) {
+		netdev_warn(bp->dev, "no memory for firmware error recovery\n");
+	} else {
+		rc = bnxt_hwrm_error_recovery_qcfg(bp);
+		if (rc)
+			netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
+				    rc);
+	}
 
 	rc = bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, false);
 	if (rc)
@@ -11075,12 +11393,8 @@ static void bnxt_reset_all(struct bnxt *bp)
 	int i, rc;
 
 	if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
-#ifdef CONFIG_TEE_BNXT_FW
-		rc = tee_bnxt_fw_load();
-		if (rc)
-			netdev_err(bp->dev, "Unable to reset FW rc=%d\n", rc);
+		bnxt_fw_reset_via_optee(bp);
 		bp->fw_reset_timestamp = jiffies;
-#endif
 		return;
 	}
 
@@ -11199,7 +11513,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 			if (time_after(jiffies, bp->fw_reset_timestamp +
 				       (bp->fw_reset_max_dsecs * HZ / 10))) {
 				netdev_err(bp->dev, "Firmware reset aborted\n");
-				goto fw_reset_abort;
+				goto fw_reset_abort_status;
 			}
 			bnxt_queue_fw_reset_work(bp, HZ / 5);
 			return;
@@ -11233,6 +11547,13 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 	}
 	return;
 
+fw_reset_abort_status:
+	if (bp->fw_health->status_reliable ||
+	    (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) {
+		u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+
+		netdev_err(bp->dev, "fw_health_status 0x%x\n", sts);
+	}
 fw_reset_abort:
 	clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
 	if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF)
@@ -12203,6 +12524,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENOMEM;
 
 	bp = netdev_priv(dev);
+	bp->msg_enable = BNXT_DEF_MSG_ENABLE;
 	bnxt_set_max_func_irqs(bp, max_irqs);
 
 	if (bnxt_vf_pciid(ent->driver_data))
@@ -12234,8 +12556,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
-	if (BNXT_CHIP_P5(bp))
+	if (BNXT_CHIP_P5(bp)) {
 		bp->flags |= BNXT_FLAG_CHIP_P5;
+		if (BNXT_CHIP_SR2(bp))
+			bp->flags |= BNXT_FLAG_CHIP_SR2;
+	}
 
 	rc = bnxt_alloc_rss_indir_tbl(bp);
 	if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 0ef89dabfd61..21ef1c21f602 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -907,6 +907,7 @@ struct bnxt_rx_ring_info {
 
 struct bnxt_rx_sw_stats {
 	u64			rx_l4_csum_errors;
+	u64			rx_resets;
 	u64			rx_buf_errors;
 };
 
@@ -1142,50 +1143,6 @@ struct bnxt_ntuple_filter {
 #define BNXT_FLTR_UPDATE	1
 };
 
-struct hwrm_port_phy_qcfg_output_compat {
-	__le16	error_code;
-	__le16	req_type;
-	__le16	seq_id;
-	__le16	resp_len;
-	u8	link;
-	u8	link_signal_mode;
-	__le16	link_speed;
-	u8	duplex_cfg;
-	u8	pause;
-	__le16	support_speeds;
-	__le16	force_link_speed;
-	u8	auto_mode;
-	u8	auto_pause;
-	__le16	auto_link_speed;
-	__le16	auto_link_speed_mask;
-	u8	wirespeed;
-	u8	lpbk;
-	u8	force_pause;
-	u8	module_status;
-	__le32	preemphasis;
-	u8	phy_maj;
-	u8	phy_min;
-	u8	phy_bld;
-	u8	phy_type;
-	u8	media_type;
-	u8	xcvr_pkg_type;
-	u8	eee_config_phy_addr;
-	u8	parallel_detect;
-	__le16	link_partner_adv_speeds;
-	u8	link_partner_adv_auto_mode;
-	u8	link_partner_adv_pause;
-	__le16	adv_eee_link_speed_mask;
-	__le16	link_partner_adv_eee_link_speed_mask;
-	__le32	xcvr_identifier_type_tx_lpi_timer;
-	__le16	fec_cfg;
-	u8	duplex_state;
-	u8	option_flags;
-	char	phy_vendor_name[16];
-	char	phy_vendor_partnumber[16];
-	u8	unused_0[7];
-	u8	valid;
-};
-
 struct bnxt_link_info {
 	u8			phy_type;
 	u8			media_type;
@@ -1196,7 +1153,10 @@ struct bnxt_link_info {
 #define BNXT_LINK_SIGNAL	PORT_PHY_QCFG_RESP_LINK_SIGNAL
 #define BNXT_LINK_LINK		PORT_PHY_QCFG_RESP_LINK_LINK
 	u8			wire_speed;
-	u8			loop_back;
+	u8			phy_state;
+#define BNXT_PHY_STATE_ENABLED		0
+#define BNXT_PHY_STATE_DISABLED		1
+
 	u8			link_up;
 	u8			duplex;
 #define BNXT_LINK_DUPLEX_HALF	PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF
@@ -1232,6 +1192,7 @@ struct bnxt_link_info {
 #define BNXT_LINK_SPEED_50GB	PORT_PHY_QCFG_RESP_LINK_SPEED_50GB
 #define BNXT_LINK_SPEED_100GB	PORT_PHY_QCFG_RESP_LINK_SPEED_100GB
 	u16			support_speeds;
+	u16			support_pam4_speeds;
 	u16			auto_link_speeds;	/* fw adv setting */
 #define BNXT_LINK_SPEED_MSK_100MB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_100MB
 #define BNXT_LINK_SPEED_MSK_1GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_1GB
@@ -1243,24 +1204,51 @@ struct bnxt_link_info {
 #define BNXT_LINK_SPEED_MSK_40GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_40GB
 #define BNXT_LINK_SPEED_MSK_50GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_50GB
 #define BNXT_LINK_SPEED_MSK_100GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_100GB
+	u16			auto_pam4_link_speeds;
+#define BNXT_LINK_PAM4_SPEED_MSK_50GB PORT_PHY_QCFG_RESP_SUPPORT_PAM4_SPEEDS_50G
+#define BNXT_LINK_PAM4_SPEED_MSK_100GB PORT_PHY_QCFG_RESP_SUPPORT_PAM4_SPEEDS_100G
+#define BNXT_LINK_PAM4_SPEED_MSK_200GB PORT_PHY_QCFG_RESP_SUPPORT_PAM4_SPEEDS_200G
 	u16			support_auto_speeds;
+	u16			support_pam4_auto_speeds;
 	u16			lp_auto_link_speeds;
+	u16			lp_auto_pam4_link_speeds;
 	u16			force_link_speed;
+	u16			force_pam4_link_speed;
 	u32			preemphasis;
 	u8			module_status;
+	u8			active_fec_sig_mode;
 	u16			fec_cfg;
+#define BNXT_FEC_NONE		PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED
+#define BNXT_FEC_AUTONEG_CAP	PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED
 #define BNXT_FEC_AUTONEG	PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED
+#define BNXT_FEC_ENC_BASE_R_CAP	\
+	PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED
 #define BNXT_FEC_ENC_BASE_R	PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED
-#define BNXT_FEC_ENC_RS		PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED
+#define BNXT_FEC_ENC_RS_CAP	\
+	PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED
+#define BNXT_FEC_ENC_LLRS_CAP	\
+	(PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_SUPPORTED |	\
+	 PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_SUPPORTED)
+#define BNXT_FEC_ENC_RS		\
+	(PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED |	\
+	 PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_ENABLED |	\
+	 PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_IEEE_ENABLED)
+#define BNXT_FEC_ENC_LLRS	\
+	(PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_ENABLED |	\
+	 PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_ENABLED)
 
 	/* copy of requested setting from ethtool cmd */
 	u8			autoneg;
 #define BNXT_AUTONEG_SPEED		1
 #define BNXT_AUTONEG_FLOW_CTRL		2
+	u8			req_signal_mode;
+#define BNXT_SIG_MODE_NRZ	PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ
+#define BNXT_SIG_MODE_PAM4	PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4
 	u8			req_duplex;
 	u8			req_flow_ctrl;
 	u16			req_link_speed;
 	u16			advertising;	/* user adv setting */
+	u16			advertising_pam4;
 	bool			force_link_chng;
 
 	bool			phy_retry;
@@ -1272,6 +1260,49 @@ struct bnxt_link_info {
 	struct hwrm_port_phy_qcfg_output phy_qcfg_resp;
 };
 
+#define BNXT_FEC_RS544_ON					\
+	 (PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_ENABLE |		\
+	  PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_IEEE_ENABLE)
+
+#define BNXT_FEC_RS544_OFF					\
+	 (PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_DISABLE |	\
+	  PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_IEEE_DISABLE)
+
+#define BNXT_FEC_RS272_ON					\
+	 (PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_ENABLE |		\
+	  PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE)
+
+#define BNXT_FEC_RS272_OFF					\
+	 (PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE |	\
+	  PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE)
+
+#define BNXT_PAM4_SUPPORTED(link_info)				\
+	((link_info)->support_pam4_speeds)
+
+#define BNXT_FEC_RS_ON(link_info)				\
+	(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE |		\
+	 PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE |		\
+	 (BNXT_PAM4_SUPPORTED(link_info) ?			\
+	  (BNXT_FEC_RS544_ON | BNXT_FEC_RS272_OFF) : 0))
+
+#define BNXT_FEC_LLRS_ON					\
+	(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE |		\
+	 PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE |		\
+	 BNXT_FEC_RS272_ON | BNXT_FEC_RS544_OFF)
+
+#define BNXT_FEC_RS_OFF(link_info)				\
+	(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE |		\
+	 (BNXT_PAM4_SUPPORTED(link_info) ?			\
+	  (BNXT_FEC_RS544_OFF | BNXT_FEC_RS272_OFF) : 0))
+
+#define BNXT_FEC_BASE_R_ON(link_info)				\
+	(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE |		\
+	 BNXT_FEC_RS_OFF(link_info))
+
+#define BNXT_FEC_ALL_OFF(link_info)				\
+	(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE |		\
+	 BNXT_FEC_RS_OFF(link_info))
+
 #define BNXT_MAX_QUEUE	8
 
 struct bnxt_queue_info {
@@ -1464,6 +1495,7 @@ struct bnxt_fw_health {
 	u8 enabled:1;
 	u8 master:1;
 	u8 fatal:1;
+	u8 status_reliable:1;
 	u8 tmr_multiplier;
 	u8 tmr_counter;
 	u8 fw_reset_seq_cnt;
@@ -1491,6 +1523,9 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_HEALTH_WIN_BASE		0x3000
 #define BNXT_FW_HEALTH_WIN_MAP_OFF	8
 
+#define BNXT_FW_HEALTH_WIN_OFF(reg)	(BNXT_FW_HEALTH_WIN_BASE +	\
+					 ((reg) & BNXT_GRC_OFFSET_MASK))
+
 #define BNXT_FW_STATUS_HEALTHY		0x8000
 #define BNXT_FW_STATUS_SHUTDOWN		0x100000
 
@@ -1535,6 +1570,8 @@ struct bnxt {
 
 	u8			chip_rev;
 
+#define CHIP_NUM_58818		0xd818
+
 #define BNXT_CHIP_NUM_5730X(chip_num)		\
 	((chip_num) >= CHIP_NUM_57301 &&	\
 	 (chip_num) <= CHIP_NUM_57304)
@@ -1613,6 +1650,7 @@ struct bnxt {
 					 BNXT_FLAG_ROCEV2_CAP)
 	#define BNXT_FLAG_NO_AGG_RINGS	0x20000
 	#define BNXT_FLAG_RX_PAGE_MODE	0x40000
+	#define BNXT_FLAG_CHIP_SR2	0x80000
 	#define BNXT_FLAG_MULTI_HOST	0x100000
 	#define BNXT_FLAG_DSN_VALID	0x200000
 	#define BNXT_FLAG_DOUBLE_DB	0x400000
@@ -1630,20 +1668,27 @@ struct bnxt {
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
 #define BNXT_MH(bp)		((bp)->flags & BNXT_FLAG_MULTI_HOST)
 #define BNXT_SINGLE_PF(bp)	(BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp))
-#define BNXT_PHY_CFG_ABLE(bp)	(BNXT_SINGLE_PF(bp) ||			\
-				 ((bp)->fw_cap & BNXT_FW_CAP_SHARED_PORT_CFG))
+#define BNXT_PHY_CFG_ABLE(bp)	((BNXT_SINGLE_PF(bp) ||			\
+				  ((bp)->fw_cap & BNXT_FW_CAP_SHARED_PORT_CFG)) && \
+				 (bp)->link_info.phy_state == BNXT_PHY_STATE_ENABLED)
 #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
 #define BNXT_RX_PAGE_MODE(bp)	((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
 #define BNXT_SUPPORTS_TPA(bp)	(!BNXT_CHIP_TYPE_NITRO_A0(bp) &&	\
 				 (!((bp)->flags & BNXT_FLAG_CHIP_P5) ||	\
 				  (bp)->max_tpa_v2) && !is_kdump_kernel())
 
-/* Chip class phase 5 */
-#define BNXT_CHIP_P5(bp)			\
+#define BNXT_CHIP_SR2(bp)			\
+	((bp)->chip_num == CHIP_NUM_58818)
+
+#define BNXT_CHIP_P5_THOR(bp)			\
 	((bp)->chip_num == CHIP_NUM_57508 ||	\
 	 (bp)->chip_num == CHIP_NUM_57504 ||	\
 	 (bp)->chip_num == CHIP_NUM_57502)
 
+/* Chip class phase 5 */
+#define BNXT_CHIP_P5(bp)			\
+	(BNXT_CHIP_P5_THOR(bp) || BNXT_CHIP_SR2(bp))
+
 /* Chip class phase 4.x */
 #define BNXT_CHIP_P4(bp)			\
 	(BNXT_CHIP_NUM_57X1X((bp)->chip_num) ||	\
@@ -1777,6 +1822,7 @@ struct bnxt {
 	#define BNXT_FW_CAP_VLAN_TX_INSERT		0x02000000
 	#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED	0x04000000
 	#define BNXT_FW_CAP_PORT_STATS_NO_RESET		0x10000000
+	#define BNXT_FW_CAP_RING_MONITOR		0x40000000
 
 #define BNXT_NEW_RM(bp)		((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
 	u32			hwrm_spec_code;
@@ -1810,6 +1856,7 @@ struct bnxt {
 #define PHY_VER_STR_LEN         (FW_VER_STR_LEN - BC_HWRM_STR_LEN)
 	char			fw_ver_str[FW_VER_STR_LEN];
 	char			hwrm_ver_supp[FW_VER_STR_LEN];
+	char			nvm_cfg_ver[FW_VER_STR_LEN];
 	u64			fw_ver_code;
 #define BNXT_FW_VER_CODE(maj, min, bld, rsv)			\
 	((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv))
@@ -1935,6 +1982,20 @@ struct bnxt {
 	struct device		*hwmon_dev;
 };
 
+#define BNXT_NUM_RX_RING_STATS			8
+#define BNXT_NUM_TX_RING_STATS			8
+#define BNXT_NUM_TPA_RING_STATS			4
+#define BNXT_NUM_TPA_RING_STATS_P5		5
+#define BNXT_NUM_TPA_RING_STATS_P5_SR2		6
+
+#define BNXT_RING_STATS_SIZE_P5					\
+	((BNXT_NUM_RX_RING_STATS + BNXT_NUM_TX_RING_STATS +	\
+	  BNXT_NUM_TPA_RING_STATS_P5) * 8)
+
+#define BNXT_RING_STATS_SIZE_P5_SR2				\
+	((BNXT_NUM_RX_RING_STATS + BNXT_NUM_TX_RING_STATS +	\
+	  BNXT_NUM_TPA_RING_STATS_P5_SR2) * 8)
+
 #define BNXT_GET_RING_STATS64(sw, counter)		\
 	(*((sw) + offsetof(struct ctx_hw_stats, counter) / 8))
 
@@ -2114,6 +2175,7 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
+int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
 int bnxt_hwrm_set_pause(struct bnxt *);
 int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 3a854195d5b0..184b6d0513b2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -17,15 +17,13 @@
 #include "bnxt_ethtool.h"
 
 static int
-bnxt_dl_flash_update(struct devlink *dl, const char *filename,
-		     const char *region, struct netlink_ext_ack *extack)
+bnxt_dl_flash_update(struct devlink *dl,
+		     struct devlink_flash_update_params *params,
+		     struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
 	int rc;
 
-	if (region)
-		return -EOPNOTSUPP;
-
 	if (!BNXT_PF(bp)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "flash update not supported from a VF");
@@ -33,15 +31,12 @@ bnxt_dl_flash_update(struct devlink *dl, const char *filename,
 	}
 
 	devlink_flash_update_begin_notify(dl);
-	devlink_flash_update_status_notify(dl, "Preparing to flash", region, 0,
-					   0);
-	rc = bnxt_flash_package_from_file(bp->dev, filename, 0);
+	devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0);
+	rc = bnxt_flash_package_from_file(bp->dev, params->file_name, 0);
 	if (!rc)
-		devlink_flash_update_status_notify(dl, "Flashing done", region,
-						   0, 0);
+		devlink_flash_update_status_notify(dl, "Flashing done", NULL, 0, 0);
 	else
-		devlink_flash_update_status_notify(dl, "Flashing failed",
-						   region, 0, 0);
+		devlink_flash_update_status_notify(dl, "Flashing failed", NULL, 0, 0);
 	devlink_flash_update_end_notify(dl);
 	return rc;
 }
@@ -387,15 +382,41 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp,
 	return rc;
 }
 
+static int bnxt_dl_info_put(struct bnxt *bp, struct devlink_info_req *req,
+			    enum bnxt_dl_version_type type, const char *key,
+			    char *buf)
+{
+	if (!strlen(buf))
+		return 0;
+
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) &&
+	    (!strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_NCSI) ||
+	     !strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_ROCE)))
+		return 0;
+
+	switch (type) {
+	case BNXT_VERSION_FIXED:
+		return devlink_info_version_fixed_put(req, key, buf);
+	case BNXT_VERSION_RUNNING:
+		return devlink_info_version_running_put(req, key, buf);
+	case BNXT_VERSION_STORED:
+		return devlink_info_version_stored_put(req, key, buf);
+	}
+	return 0;
+}
+
+#define HWRM_FW_VER_STR_LEN	16
+
 static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 			    struct netlink_ext_ack *extack)
 {
+	struct hwrm_nvm_get_dev_info_output nvm_dev_info;
 	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
 	union devlink_param_value nvm_cfg_ver;
 	struct hwrm_ver_get_output *ver_resp;
 	char mgmt_ver[FW_VER_STR_LEN];
 	char roce_ver[FW_VER_STR_LEN];
-	char fw_ver[FW_VER_STR_LEN];
+	char ncsi_ver[FW_VER_STR_LEN];
 	char buf[32];
 	int rc;
 
@@ -403,10 +424,11 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 	if (rc)
 		return rc;
 
-	if (strlen(bp->board_partno)) {
-		rc = devlink_info_version_fixed_put(req,
-			DEVLINK_INFO_VERSION_GENERIC_BOARD_ID,
-			bp->board_partno);
+	if (BNXT_PF(bp) && (bp->flags & BNXT_FLAG_DSN_VALID)) {
+		sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X",
+			bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4],
+			bp->dsn[3], bp->dsn[2], bp->dsn[1], bp->dsn[0]);
+		rc = devlink_info_serial_number_put(req, buf);
 		if (rc)
 			return rc;
 	}
@@ -417,54 +439,56 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 			return rc;
 	}
 
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_FIXED,
+			      DEVLINK_INFO_VERSION_GENERIC_BOARD_ID,
+			      bp->board_partno);
+	if (rc)
+		return rc;
+
 	sprintf(buf, "%X", bp->chip_num);
-	rc = devlink_info_version_fixed_put(req,
-			DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, buf);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_FIXED,
+			      DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, buf);
 	if (rc)
 		return rc;
 
 	ver_resp = &bp->ver_resp;
 	sprintf(buf, "%X", ver_resp->chip_rev);
-	rc = devlink_info_version_fixed_put(req,
-			DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_FIXED,
+			      DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf);
 	if (rc)
 		return rc;
 
-	if (BNXT_PF(bp)) {
-		sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X",
-			bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4],
-			bp->dsn[3], bp->dsn[2], bp->dsn[1], bp->dsn[0]);
-		rc = devlink_info_serial_number_put(req, buf);
-		if (rc)
-			return rc;
-	}
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_RUNNING,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_PSID,
+			      bp->nvm_cfg_ver);
+	if (rc)
+		return rc;
 
-	if (strlen(ver_resp->active_pkg_name)) {
-		rc =
-		    devlink_info_version_running_put(req,
-					DEVLINK_INFO_VERSION_GENERIC_FW,
-					ver_resp->active_pkg_name);
-		if (rc)
-			return rc;
-	}
+	buf[0] = 0;
+	strncat(buf, ver_resp->active_pkg_name, HWRM_FW_VER_STR_LEN);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_RUNNING,
+			      DEVLINK_INFO_VERSION_GENERIC_FW, buf);
+	if (rc)
+		return rc;
 
 	if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) {
 		u32 ver = nvm_cfg_ver.vu32;
 
 		sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF,
 			ver & 0xF);
-		rc = devlink_info_version_running_put(req,
-				DEVLINK_INFO_VERSION_GENERIC_FW_PSID, buf);
+		rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+				      DEVLINK_INFO_VERSION_GENERIC_FW_PSID,
+				      buf);
 		if (rc)
 			return rc;
 	}
 
 	if (ver_resp->flags & VER_GET_RESP_FLAGS_EXT_VER_AVAIL) {
-		snprintf(fw_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
 			 ver_resp->hwrm_fw_major, ver_resp->hwrm_fw_minor,
 			 ver_resp->hwrm_fw_build, ver_resp->hwrm_fw_patch);
 
-		snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		snprintf(ncsi_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
 			 ver_resp->mgmt_fw_major, ver_resp->mgmt_fw_minor,
 			 ver_resp->mgmt_fw_build, ver_resp->mgmt_fw_patch);
 
@@ -472,11 +496,11 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 			 ver_resp->roce_fw_major, ver_resp->roce_fw_minor,
 			 ver_resp->roce_fw_build, ver_resp->roce_fw_patch);
 	} else {
-		snprintf(fw_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
 			 ver_resp->hwrm_fw_maj_8b, ver_resp->hwrm_fw_min_8b,
 			 ver_resp->hwrm_fw_bld_8b, ver_resp->hwrm_fw_rsvd_8b);
 
-		snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		snprintf(ncsi_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
 			 ver_resp->mgmt_fw_maj_8b, ver_resp->mgmt_fw_min_8b,
 			 ver_resp->mgmt_fw_bld_8b, ver_resp->mgmt_fw_rsvd_8b);
 
@@ -484,29 +508,60 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 			 ver_resp->roce_fw_maj_8b, ver_resp->roce_fw_min_8b,
 			 ver_resp->roce_fw_bld_8b, ver_resp->roce_fw_rsvd_8b);
 	}
-	rc = devlink_info_version_running_put(req,
-			DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, fw_ver);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_RUNNING,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, mgmt_ver);
 	if (rc)
 		return rc;
 
-	rc = devlink_info_version_running_put(req,
-				DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API,
-				bp->hwrm_ver_supp);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_RUNNING,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API,
+			      bp->hwrm_ver_supp);
 	if (rc)
 		return rc;
 
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
-		rc = devlink_info_version_running_put(req,
-			DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, mgmt_ver);
-		if (rc)
-			return rc;
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_RUNNING,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, ncsi_ver);
+	if (rc)
+		return rc;
 
-		rc = devlink_info_version_running_put(req,
-			DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver);
-		if (rc)
-			return rc;
-	}
-	return 0;
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_RUNNING,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_nvm_get_dev_info(bp, &nvm_dev_info);
+	if (rc ||
+	    !(nvm_dev_info.flags & NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID))
+		return 0;
+
+	buf[0] = 0;
+	strncat(buf, nvm_dev_info.pkg_name, HWRM_FW_VER_STR_LEN);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+			      DEVLINK_INFO_VERSION_GENERIC_FW, buf);
+	if (rc)
+		return rc;
+
+	snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		 nvm_dev_info.hwrm_fw_major, nvm_dev_info.hwrm_fw_minor,
+		 nvm_dev_info.hwrm_fw_build, nvm_dev_info.hwrm_fw_patch);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, mgmt_ver);
+	if (rc)
+		return rc;
+
+	snprintf(ncsi_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		 nvm_dev_info.mgmt_fw_major, nvm_dev_info.mgmt_fw_minor,
+		 nvm_dev_info.mgmt_fw_build, nvm_dev_info.mgmt_fw_patch);
+	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+			      DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, ncsi_ver);
+	if (rc)
+		return rc;
+
+	snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+		 nvm_dev_info.roce_fw_major, nvm_dev_info.roce_fw_minor,
+		 nvm_dev_info.roce_fw_build, nvm_dev_info.roce_fw_patch);
+	return bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+				DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver);
 }
 
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index d5c8bd49383a..d22cab5d6856 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -60,6 +60,12 @@ struct bnxt_dl_nvm_param {
 	u8 dl_num_bytes;
 };
 
+enum bnxt_dl_version_type {
+	BNXT_VERSION_FIXED,
+	BNXT_VERSION_RUNNING,
+	BNXT_VERSION_STORED,
+};
+
 void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
 void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy);
 void bnxt_dl_health_recovery_done(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index fecdfd875af1..53687bc7fcf5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -11,6 +11,7 @@
 #include <linux/ctype.h>
 #include <linux/stringify.h>
 #include <linux/ethtool.h>
+#include <linux/linkmode.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/etherdevice.h>
@@ -172,10 +173,12 @@ static const char * const bnxt_ring_tpa2_stats_str[] = {
 	"rx_tpa_pkt",
 	"rx_tpa_bytes",
 	"rx_tpa_errors",
+	"rx_tpa_events",
 };
 
 static const char * const bnxt_rx_sw_stats_str[] = {
 	"rx_l4_csum_errors",
+	"rx_resets",
 	"rx_buf_errors",
 };
 
@@ -462,9 +465,12 @@ static const struct {
 static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
 {
 	if (BNXT_SUPPORTS_TPA(bp)) {
-		if (bp->max_tpa_v2)
-			return ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
-		return ARRAY_SIZE(bnxt_ring_tpa_stats_str);
+		if (bp->max_tpa_v2) {
+			if (BNXT_CHIP_P5_THOR(bp))
+				return BNXT_NUM_TPA_RING_STATS_P5;
+			return BNXT_NUM_TPA_RING_STATS_P5_SR2;
+		}
+		return BNXT_NUM_TPA_RING_STATS;
 	}
 	return 0;
 }
@@ -796,7 +802,7 @@ static void bnxt_get_channels(struct net_device *dev,
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 	int max_rx_rings, max_tx_rings, tcs;
-	int max_tx_sch_inputs;
+	int max_tx_sch_inputs, tx_grps;
 
 	/* Get the most up-to-date max_tx_sch_inputs. */
 	if (netif_running(dev) && BNXT_NEW_RM(bp))
@@ -806,6 +812,12 @@ static void bnxt_get_channels(struct net_device *dev,
 	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true);
 	if (max_tx_sch_inputs)
 		max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs);
+
+	tcs = netdev_get_num_tc(dev);
+	tx_grps = max(tcs, 1);
+	if (bp->tx_nr_rings_xdp)
+		tx_grps++;
+	max_tx_rings /= tx_grps;
 	channel->max_combined = min_t(int, max_rx_rings, max_tx_rings);
 
 	if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) {
@@ -815,7 +827,6 @@ static void bnxt_get_channels(struct net_device *dev,
 	if (max_tx_sch_inputs)
 		max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs);
 
-	tcs = netdev_get_num_tc(dev);
 	if (tcs > 1)
 		max_tx_rings /= tcs;
 
@@ -1503,6 +1514,53 @@ u32 _bnxt_fw_to_ethtool_adv_spds(u16 fw_speeds, u8 fw_pause)
 		(fw_speeds) |= BNXT_LINK_SPEED_MSK_100GB;		\
 }
 
+#define BNXT_FW_TO_ETHTOOL_PAM4_SPDS(fw_speeds, lk_ksettings, name)	\
+{									\
+	if ((fw_speeds) & BNXT_LINK_PAM4_SPEED_MSK_50GB)		\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     50000baseCR_Full);	\
+	if ((fw_speeds) & BNXT_LINK_PAM4_SPEED_MSK_100GB)		\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     100000baseCR2_Full);\
+	if ((fw_speeds) & BNXT_LINK_PAM4_SPEED_MSK_200GB)		\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     200000baseCR4_Full);\
+}
+
+#define BNXT_ETHTOOL_TO_FW_PAM4_SPDS(fw_speeds, lk_ksettings, name)	\
+{									\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  50000baseCR_Full))	\
+		(fw_speeds) |= BNXT_LINK_PAM4_SPEED_MSK_50GB;		\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  100000baseCR2_Full))	\
+		(fw_speeds) |= BNXT_LINK_PAM4_SPEED_MSK_100GB;		\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  200000baseCR4_Full))	\
+		(fw_speeds) |= BNXT_LINK_PAM4_SPEED_MSK_200GB;		\
+}
+
+static void bnxt_fw_to_ethtool_advertised_fec(struct bnxt_link_info *link_info,
+				struct ethtool_link_ksettings *lk_ksettings)
+{
+	u16 fec_cfg = link_info->fec_cfg;
+
+	if ((fec_cfg & BNXT_FEC_NONE) || !(fec_cfg & BNXT_FEC_AUTONEG)) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+				 lk_ksettings->link_modes.advertising);
+		return;
+	}
+	if (fec_cfg & BNXT_FEC_ENC_BASE_R)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+				 lk_ksettings->link_modes.advertising);
+	if (fec_cfg & BNXT_FEC_ENC_RS)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT,
+				 lk_ksettings->link_modes.advertising);
+	if (fec_cfg & BNXT_FEC_ENC_LLRS)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT,
+				 lk_ksettings->link_modes.advertising);
+}
+
 static void bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info,
 				struct ethtool_link_ksettings *lk_ksettings)
 {
@@ -1513,6 +1571,9 @@ static void bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info,
 		fw_pause = link_info->auto_pause_setting;
 
 	BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, fw_pause, lk_ksettings, advertising);
+	fw_speeds = link_info->advertising_pam4;
+	BNXT_FW_TO_ETHTOOL_PAM4_SPDS(fw_speeds, lk_ksettings, advertising);
+	bnxt_fw_to_ethtool_advertised_fec(link_info, lk_ksettings);
 }
 
 static void bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info,
@@ -1526,6 +1587,29 @@ static void bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info,
 
 	BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, fw_pause, lk_ksettings,
 				lp_advertising);
+	fw_speeds = link_info->lp_auto_pam4_link_speeds;
+	BNXT_FW_TO_ETHTOOL_PAM4_SPDS(fw_speeds, lk_ksettings, lp_advertising);
+}
+
+static void bnxt_fw_to_ethtool_support_fec(struct bnxt_link_info *link_info,
+				struct ethtool_link_ksettings *lk_ksettings)
+{
+	u16 fec_cfg = link_info->fec_cfg;
+
+	if (fec_cfg & BNXT_FEC_NONE) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+				 lk_ksettings->link_modes.supported);
+		return;
+	}
+	if (fec_cfg & BNXT_FEC_ENC_BASE_R_CAP)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+				 lk_ksettings->link_modes.supported);
+	if (fec_cfg & BNXT_FEC_ENC_RS_CAP)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT,
+				 lk_ksettings->link_modes.supported);
+	if (fec_cfg & BNXT_FEC_ENC_LLRS_CAP)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT,
+				 lk_ksettings->link_modes.supported);
 }
 
 static void bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info,
@@ -1534,14 +1618,18 @@ static void bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info,
 	u16 fw_speeds = link_info->support_speeds;
 
 	BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, 0, lk_ksettings, supported);
+	fw_speeds = link_info->support_pam4_speeds;
+	BNXT_FW_TO_ETHTOOL_PAM4_SPDS(fw_speeds, lk_ksettings, supported);
 
 	ethtool_link_ksettings_add_link_mode(lk_ksettings, supported, Pause);
 	ethtool_link_ksettings_add_link_mode(lk_ksettings, supported,
 					     Asym_Pause);
 
-	if (link_info->support_auto_speeds)
+	if (link_info->support_auto_speeds ||
+	    link_info->support_pam4_auto_speeds)
 		ethtool_link_ksettings_add_link_mode(lk_ksettings, supported,
 						     Autoneg);
+	bnxt_fw_to_ethtool_support_fec(link_info, lk_ksettings);
 }
 
 u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
@@ -1632,55 +1720,86 @@ static int bnxt_get_link_ksettings(struct net_device *dev,
 	return 0;
 }
 
-static u32 bnxt_get_fw_speed(struct net_device *dev, u32 ethtool_speed)
+static int bnxt_force_link_speed(struct net_device *dev, u32 ethtool_speed)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_link_info *link_info = &bp->link_info;
+	u16 support_pam4_spds = link_info->support_pam4_speeds;
 	u16 support_spds = link_info->support_speeds;
-	u32 fw_speed = 0;
+	u8 sig_mode = BNXT_SIG_MODE_NRZ;
+	u16 fw_speed = 0;
 
 	switch (ethtool_speed) {
 	case SPEED_100:
 		if (support_spds & BNXT_LINK_SPEED_MSK_100MB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB;
 		break;
 	case SPEED_1000:
 		if (support_spds & BNXT_LINK_SPEED_MSK_1GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB;
 		break;
 	case SPEED_2500:
 		if (support_spds & BNXT_LINK_SPEED_MSK_2_5GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB;
 		break;
 	case SPEED_10000:
 		if (support_spds & BNXT_LINK_SPEED_MSK_10GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB;
 		break;
 	case SPEED_20000:
 		if (support_spds & BNXT_LINK_SPEED_MSK_20GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB;
 		break;
 	case SPEED_25000:
 		if (support_spds & BNXT_LINK_SPEED_MSK_25GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB;
 		break;
 	case SPEED_40000:
 		if (support_spds & BNXT_LINK_SPEED_MSK_40GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB;
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB;
 		break;
 	case SPEED_50000:
-		if (support_spds & BNXT_LINK_SPEED_MSK_50GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB;
+		if (support_spds & BNXT_LINK_SPEED_MSK_50GB) {
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB;
+		} else if (support_pam4_spds & BNXT_LINK_PAM4_SPEED_MSK_50GB) {
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_50GB;
+			sig_mode = BNXT_SIG_MODE_PAM4;
+		}
 		break;
 	case SPEED_100000:
-		if (support_spds & BNXT_LINK_SPEED_MSK_100GB)
-			fw_speed = PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB;
+		if (support_spds & BNXT_LINK_SPEED_MSK_100GB) {
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB;
+		} else if (support_pam4_spds & BNXT_LINK_PAM4_SPEED_MSK_100GB) {
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_100GB;
+			sig_mode = BNXT_SIG_MODE_PAM4;
+		}
 		break;
-	default:
-		netdev_err(dev, "unsupported speed!\n");
+	case SPEED_200000:
+		if (support_pam4_spds & BNXT_LINK_PAM4_SPEED_MSK_200GB) {
+			fw_speed = PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_200GB;
+			sig_mode = BNXT_SIG_MODE_PAM4;
+		}
 		break;
 	}
-	return fw_speed;
+
+	if (!fw_speed) {
+		netdev_err(dev, "unsupported speed!\n");
+		return -EINVAL;
+	}
+
+	if (link_info->req_link_speed == fw_speed &&
+	    link_info->req_signal_mode == sig_mode &&
+	    link_info->autoneg == 0)
+		return -EALREADY;
+
+	link_info->req_link_speed = fw_speed;
+	link_info->req_signal_mode = sig_mode;
+	link_info->req_duplex = BNXT_LINK_DUPLEX_FULL;
+	link_info->autoneg = 0;
+	link_info->advertising = 0;
+	link_info->advertising_pam4 = 0;
+
+	return 0;
 }
 
 u16 bnxt_get_fw_auto_link_speeds(u32 advertising)
@@ -1712,7 +1831,6 @@ static int bnxt_set_link_ksettings(struct net_device *dev,
 	struct bnxt_link_info *link_info = &bp->link_info;
 	const struct ethtool_link_settings *base = &lk_ksettings->base;
 	bool set_pause = false;
-	u16 fw_advertising = 0;
 	u32 speed;
 	int rc = 0;
 
@@ -1721,19 +1839,23 @@ static int bnxt_set_link_ksettings(struct net_device *dev,
 
 	mutex_lock(&bp->link_lock);
 	if (base->autoneg == AUTONEG_ENABLE) {
-		BNXT_ETHTOOL_TO_FW_SPDS(fw_advertising, lk_ksettings,
+		link_info->advertising = 0;
+		link_info->advertising_pam4 = 0;
+		BNXT_ETHTOOL_TO_FW_SPDS(link_info->advertising, lk_ksettings,
 					advertising);
+		BNXT_ETHTOOL_TO_FW_PAM4_SPDS(link_info->advertising_pam4,
+					     lk_ksettings, advertising);
 		link_info->autoneg |= BNXT_AUTONEG_SPEED;
-		if (!fw_advertising)
+		if (!link_info->advertising && !link_info->advertising_pam4) {
 			link_info->advertising = link_info->support_auto_speeds;
-		else
-			link_info->advertising = fw_advertising;
+			link_info->advertising_pam4 =
+				link_info->support_pam4_auto_speeds;
+		}
 		/* any change to autoneg will cause link change, therefore the
 		 * driver should put back the original pause setting in autoneg
 		 */
 		set_pause = true;
 	} else {
-		u16 fw_speed;
 		u8 phy_type = link_info->phy_type;
 
 		if (phy_type == PORT_PHY_QCFG_RESP_PHY_TYPE_BASET  ||
@@ -1749,15 +1871,12 @@ static int bnxt_set_link_ksettings(struct net_device *dev,
 			goto set_setting_exit;
 		}
 		speed = base->speed;
-		fw_speed = bnxt_get_fw_speed(dev, speed);
-		if (!fw_speed) {
-			rc = -EINVAL;
+		rc = bnxt_force_link_speed(dev, speed);
+		if (rc) {
+			if (rc == -EALREADY)
+				rc = 0;
 			goto set_setting_exit;
 		}
-		link_info->req_link_speed = fw_speed;
-		link_info->req_duplex = BNXT_LINK_DUPLEX_FULL;
-		link_info->autoneg = 0;
-		link_info->advertising = 0;
 	}
 
 	if (netif_running(dev))
@@ -1768,6 +1887,110 @@ set_setting_exit:
 	return rc;
 }
 
+static int bnxt_get_fecparam(struct net_device *dev,
+			     struct ethtool_fecparam *fec)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_link_info *link_info;
+	u8 active_fec;
+	u16 fec_cfg;
+
+	link_info = &bp->link_info;
+	fec_cfg = link_info->fec_cfg;
+	active_fec = link_info->active_fec_sig_mode &
+		     PORT_PHY_QCFG_RESP_ACTIVE_FEC_MASK;
+	if (fec_cfg & BNXT_FEC_NONE) {
+		fec->fec = ETHTOOL_FEC_NONE;
+		fec->active_fec = ETHTOOL_FEC_NONE;
+		return 0;
+	}
+	if (fec_cfg & BNXT_FEC_AUTONEG)
+		fec->fec |= ETHTOOL_FEC_AUTO;
+	if (fec_cfg & BNXT_FEC_ENC_BASE_R)
+		fec->fec |= ETHTOOL_FEC_BASER;
+	if (fec_cfg & BNXT_FEC_ENC_RS)
+		fec->fec |= ETHTOOL_FEC_RS;
+	if (fec_cfg & BNXT_FEC_ENC_LLRS)
+		fec->fec |= ETHTOOL_FEC_LLRS;
+
+	switch (active_fec) {
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE74_ACTIVE:
+		fec->active_fec |= ETHTOOL_FEC_BASER;
+		break;
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE91_ACTIVE:
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_1XN_ACTIVE:
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_IEEE_ACTIVE:
+		fec->active_fec |= ETHTOOL_FEC_RS;
+		break;
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_1XN_ACTIVE:
+	case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE:
+		fec->active_fec |= ETHTOOL_FEC_LLRS;
+		break;
+	}
+	return 0;
+}
+
+static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info,
+					 u32 fec)
+{
+	u32 fw_fec = PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE;
+
+	if (fec & ETHTOOL_FEC_BASER)
+		fw_fec |= BNXT_FEC_BASE_R_ON(link_info);
+	else if (fec & ETHTOOL_FEC_RS)
+		fw_fec |= BNXT_FEC_RS_ON(link_info);
+	else if (fec & ETHTOOL_FEC_LLRS)
+		fw_fec |= BNXT_FEC_LLRS_ON;
+	return fw_fec;
+}
+
+static int bnxt_set_fecparam(struct net_device *dev,
+			     struct ethtool_fecparam *fecparam)
+{
+	struct hwrm_port_phy_cfg_input req = {0};
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_link_info *link_info;
+	u32 new_cfg, fec = fecparam->fec;
+	u16 fec_cfg;
+	int rc;
+
+	link_info = &bp->link_info;
+	fec_cfg = link_info->fec_cfg;
+	if (fec_cfg & BNXT_FEC_NONE)
+		return -EOPNOTSUPP;
+
+	if (fec & ETHTOOL_FEC_OFF) {
+		new_cfg = PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE |
+			  BNXT_FEC_ALL_OFF(link_info);
+		goto apply_fec;
+	}
+	if (((fec & ETHTOOL_FEC_AUTO) && !(fec_cfg & BNXT_FEC_AUTONEG_CAP)) ||
+	    ((fec & ETHTOOL_FEC_RS) && !(fec_cfg & BNXT_FEC_ENC_RS_CAP)) ||
+	    ((fec & ETHTOOL_FEC_LLRS) && !(fec_cfg & BNXT_FEC_ENC_LLRS_CAP)) ||
+	    ((fec & ETHTOOL_FEC_BASER) && !(fec_cfg & BNXT_FEC_ENC_BASE_R_CAP)))
+		return -EINVAL;
+
+	if (fec & ETHTOOL_FEC_AUTO) {
+		if (!link_info->autoneg)
+			return -EINVAL;
+		new_cfg = PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE;
+	} else {
+		new_cfg = bnxt_ethtool_forced_fec_to_fw(link_info, fec);
+	}
+
+apply_fec:
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
+	req.flags = cpu_to_le32(new_cfg | PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	/* update current settings */
+	if (!rc) {
+		mutex_lock(&bp->link_lock);
+		bnxt_update_link(bp, false);
+		mutex_unlock(&bp->link_lock);
+	}
+	return rc;
+}
+
 static void bnxt_get_pauseparam(struct net_device *dev,
 				struct ethtool_pauseparam *epause)
 {
@@ -1781,6 +2004,22 @@ static void bnxt_get_pauseparam(struct net_device *dev,
 	epause->tx_pause = !!(link_info->req_flow_ctrl & BNXT_LINK_PAUSE_TX);
 }
 
+static void bnxt_get_pause_stats(struct net_device *dev,
+				 struct ethtool_pause_stats *epstat)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u64 *rx, *tx;
+
+	if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS))
+		return;
+
+	rx = bp->port_stats.sw_stats;
+	tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8;
+
+	epstat->rx_pause_frames = BNXT_GET_RX_PORT_STATS64(rx, rx_pause_frames);
+	epstat->tx_pause_frames = BNXT_GET_TX_PORT_STATS64(tx, tx_pause_frames);
+}
+
 static int bnxt_set_pauseparam(struct net_device *dev,
 			       struct ethtool_pauseparam *epause)
 {
@@ -1833,6 +2072,22 @@ static u32 bnxt_get_link(struct net_device *dev)
 	return bp->link_info.link_up;
 }
 
+int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
+			       struct hwrm_nvm_get_dev_info_output *nvm_dev_info)
+{
+	struct hwrm_nvm_get_dev_info_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_nvm_get_dev_info_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DEV_INFO, -1, -1);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		memcpy(nvm_dev_info, resp, sizeof(*resp));
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static void bnxt_print_admin_err(struct bnxt *bp)
 {
 	netdev_info(bp->dev, "PF does not have admin privileges to flash or reset the device\n");
@@ -3059,7 +3314,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 	u8 test_mask = 0;
 	int rc = 0, i;
 
-	if (!bp->num_tests || !BNXT_SINGLE_PF(bp))
+	if (!bp->num_tests || !BNXT_PF(bp))
 		return;
 	memset(buf, 0, sizeof(u64) * bp->num_tests);
 	if (!netif_running(dev)) {
@@ -3072,9 +3327,9 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 		do_ext_lpbk = true;
 
 	if (etest->flags & ETH_TEST_FL_OFFLINE) {
-		if (bp->pf.active_vfs) {
+		if (bp->pf.active_vfs || !BNXT_SINGLE_PF(bp)) {
 			etest->flags |= ETH_TEST_FL_FAILED;
-			netdev_warn(dev, "Offline tests cannot be run with active VFs\n");
+			netdev_warn(dev, "Offline tests cannot be run with active VFs or on shared PF\n");
 			return;
 		}
 		offline = true;
@@ -3590,7 +3845,7 @@ void bnxt_ethtool_init(struct bnxt *bp)
 		bnxt_get_pkgver(dev);
 
 	bp->num_tests = 0;
-	if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp))
+	if (bp->hwrm_spec_code < 0x10704 || !BNXT_PF(bp))
 		return;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_QLIST, -1, -1);
@@ -3657,6 +3912,9 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
 	.get_link_ksettings	= bnxt_get_link_ksettings,
 	.set_link_ksettings	= bnxt_set_link_ksettings,
+	.get_fecparam		= bnxt_get_fecparam,
+	.set_fecparam		= bnxt_set_fecparam,
+	.get_pause_stats	= bnxt_get_pause_stats,
 	.get_pauseparam		= bnxt_get_pauseparam,
 	.set_pauseparam		= bnxt_set_pauseparam,
 	.get_drvinfo		= bnxt_get_drvinfo,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index 34f44ddfad79..fa6fbde52bea 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -92,6 +92,8 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev);
 u32 _bnxt_fw_to_ethtool_adv_spds(u16, u8);
 u32 bnxt_fw_to_ethtool_speed(u16);
 u16 bnxt_get_fw_auto_link_speeds(u32);
+int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
+			       struct hwrm_nvm_get_dev_info_output *nvm_dev_info);
 int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
 				 u32 install_type);
 void bnxt_ethtool_init(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index c4af6bf15e36..2d3e962bdac3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -213,7 +213,10 @@ struct cmd_nums {
 	#define HWRM_PORT_PHY_MDIO_BUS_ACQUIRE            0xb7UL
 	#define HWRM_PORT_PHY_MDIO_BUS_RELEASE            0xb8UL
 	#define HWRM_PORT_QSTATS_EXT_PFC_WD               0xb9UL
-	#define HWRM_PORT_ECN_QSTATS                      0xbaUL
+	#define HWRM_RESERVED7                            0xbaUL
+	#define HWRM_PORT_TX_FIR_CFG                      0xbbUL
+	#define HWRM_PORT_TX_FIR_QCFG                     0xbcUL
+	#define HWRM_PORT_ECN_QSTATS                      0xbdUL
 	#define HWRM_FW_RESET                             0xc0UL
 	#define HWRM_FW_QSTATUS                           0xc1UL
 	#define HWRM_FW_HEALTH_CHECK                      0xc2UL
@@ -370,6 +373,9 @@ struct cmd_nums {
 	#define HWRM_TF_SESSION_RESC_FLUSH                0x2cfUL
 	#define HWRM_TF_TBL_TYPE_GET                      0x2daUL
 	#define HWRM_TF_TBL_TYPE_SET                      0x2dbUL
+	#define HWRM_TF_TBL_TYPE_BULK_GET                 0x2dcUL
+	#define HWRM_TF_CTXT_MEM_ALLOC                    0x2e2UL
+	#define HWRM_TF_CTXT_MEM_FREE                     0x2e3UL
 	#define HWRM_TF_CTXT_MEM_RGTR                     0x2e4UL
 	#define HWRM_TF_CTXT_MEM_UNRGTR                   0x2e5UL
 	#define HWRM_TF_EXT_EM_QCAPS                      0x2e6UL
@@ -384,6 +390,8 @@ struct cmd_nums {
 	#define HWRM_TF_TCAM_FREE                         0x2fbUL
 	#define HWRM_TF_GLOBAL_CFG_SET                    0x2fcUL
 	#define HWRM_TF_GLOBAL_CFG_GET                    0x2fdUL
+	#define HWRM_TF_IF_TBL_SET                        0x2feUL
+	#define HWRM_TF_IF_TBL_GET                        0x2ffUL
 	#define HWRM_SV                                   0x400UL
 	#define HWRM_DBG_READ_DIRECT                      0xff10UL
 	#define HWRM_DBG_READ_INDIRECT                    0xff11UL
@@ -447,6 +455,7 @@ struct ret_codes {
 	#define HWRM_ERR_CODE_KEY_ALREADY_EXISTS           0xeUL
 	#define HWRM_ERR_CODE_HWRM_ERROR                   0xfUL
 	#define HWRM_ERR_CODE_BUSY                         0x10UL
+	#define HWRM_ERR_CODE_RESOURCE_LOCKED              0x11UL
 	#define HWRM_ERR_CODE_TLV_ENCAPSULATED_RESPONSE    0x8000UL
 	#define HWRM_ERR_CODE_UNKNOWN_ERR                  0xfffeUL
 	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED            0xffffUL
@@ -478,8 +487,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 1
-#define HWRM_VERSION_RSVD 54
-#define HWRM_VERSION_STR "1.10.1.54"
+#define HWRM_VERSION_RSVD 68
+#define HWRM_VERSION_STR "1.10.1.68"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -675,6 +684,7 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE        0x7UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY               0x8UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY             0x9UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG           0xaUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD           0x10UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD             0x11UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT        0x12UL
@@ -851,6 +861,32 @@ struct hwrm_async_event_cmpl_error_recovery {
 	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED      0x2UL
 };
 
+/* hwrm_async_event_cmpl_ring_monitor_msg (size:128b/16B) */
+struct hwrm_async_event_cmpl_ring_monitor_msg {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_LAST             ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_ID_RING_MONITOR_MSG 0xaUL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_ID_LAST            ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_ID_RING_MONITOR_MSG
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_MASK 0xffUL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_SFT 0
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_TX    0x0UL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX    0x1UL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_CMPL  0x2UL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_LAST ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_CMPL
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_V          0x1UL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+};
+
 /* hwrm_async_event_cmpl_vf_cfg_change (size:128b/16B) */
 struct hwrm_async_event_cmpl_vf_cfg_change {
 	__le16	type;
@@ -975,6 +1011,28 @@ struct hwrm_async_event_cmpl_eem_cache_flush_done {
 	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_DATA1_FID_SFT 0
 };
 
+/* hwrm_async_event_cmpl_deferred_response (size:128b/16B) */
+struct hwrm_async_event_cmpl_deferred_response {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_LAST             ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_ID_DEFERRED_RESPONSE 0x40UL
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_ID_LAST             ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_ID_DEFERRED_RESPONSE
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_DATA2_SEQ_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_DATA2_SEQ_ID_SFT 0
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_V          0x1UL
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+};
+
 /* hwrm_func_reset_input (size:192b/24B) */
 struct hwrm_func_reset_input {
 	__le16	req_type;
@@ -1214,7 +1272,13 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_SCHQ_SUPPORTED                         0x40UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED                0x80UL
 	u8	max_schqs;
-	u8	unused_1[2];
+	u8	mpc_chnls_cap;
+	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
+	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RCE         0x2UL
+	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TE_CFA      0x4UL
+	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RE_CFA      0x8UL
+	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_PRIMATE     0x10UL
+	u8	unused_1;
 	u8	valid;
 };
 
@@ -1250,6 +1314,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_FLAGS_PREBOOT_LEGACY_L2_RINGS      0x100UL
 	#define FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED            0x200UL
 	#define FUNC_QCFG_RESP_FLAGS_PPP_PUSH_MODE_ENABLED        0x400UL
+	#define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED         0x800UL
 	u8	mac_address[6];
 	__le16	pci_id;
 	__le16	alloc_rsscos_ctx;
@@ -1341,7 +1406,13 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_SVIF_INFO_SVIF_MASK      0x7fffUL
 	#define FUNC_QCFG_RESP_SVIF_INFO_SVIF_SFT       0
 	#define FUNC_QCFG_RESP_SVIF_INFO_SVIF_VALID     0x8000UL
-	u8	unused_2[7];
+	u8	mpc_chnls;
+	#define FUNC_QCFG_RESP_MPC_CHNLS_TCE_ENABLED         0x1UL
+	#define FUNC_QCFG_RESP_MPC_CHNLS_RCE_ENABLED         0x2UL
+	#define FUNC_QCFG_RESP_MPC_CHNLS_TE_CFA_ENABLED      0x4UL
+	#define FUNC_QCFG_RESP_MPC_CHNLS_RE_CFA_ENABLED      0x8UL
+	#define FUNC_QCFG_RESP_MPC_CHNLS_PRIMATE_ENABLED     0x10UL
+	u8	unused_2[6];
 	u8	valid;
 };
 
@@ -1405,6 +1476,7 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_ENABLES_ADMIN_LINK_STATE         0x400000UL
 	#define FUNC_CFG_REQ_ENABLES_HOT_RESET_IF_SUPPORT     0x800000UL
 	#define FUNC_CFG_REQ_ENABLES_SCHQ_ID                  0x1000000UL
+	#define FUNC_CFG_REQ_ENABLES_MPC_CHNLS                0x2000000UL
 	__le16	mtu;
 	__le16	mru;
 	__le16	num_rsscos_ctxs;
@@ -1479,7 +1551,18 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_OPTIONS_RSVD_SFT                    4
 	__le16	num_mcast_filters;
 	__le16	schq_id;
-	u8	unused_0[6];
+	__le16	mpc_chnls;
+	#define FUNC_CFG_REQ_MPC_CHNLS_TCE_ENABLE          0x1UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_TCE_DISABLE         0x2UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_RCE_ENABLE          0x4UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_RCE_DISABLE         0x8UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_TE_CFA_ENABLE       0x10UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_TE_CFA_DISABLE      0x20UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_RE_CFA_ENABLE       0x40UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_RE_CFA_DISABLE      0x80UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_PRIMATE_ENABLE      0x100UL
+	#define FUNC_CFG_REQ_MPC_CHNLS_PRIMATE_DISABLE     0x200UL
+	u8	unused_0[4];
 };
 
 /* hwrm_func_cfg_output (size:128b/16B) */
@@ -1559,7 +1642,7 @@ struct hwrm_func_qstats_ext_input {
 	u8	unused_1[4];
 };
 
-/* hwrm_func_qstats_ext_output (size:1472b/184B) */
+/* hwrm_func_qstats_ext_output (size:1536b/192B) */
 struct hwrm_func_qstats_ext_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1586,6 +1669,7 @@ struct hwrm_func_qstats_ext_output {
 	__le64	rx_tpa_pkt;
 	__le64	rx_tpa_bytes;
 	__le64	rx_tpa_errors;
+	__le64	rx_tpa_events;
 	u8	unused_0[7];
 	u8	valid;
 };
@@ -2412,25 +2496,29 @@ struct hwrm_port_phy_cfg_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define PORT_PHY_CFG_REQ_FLAGS_RESET_PHY                 0x1UL
-	#define PORT_PHY_CFG_REQ_FLAGS_DEPRECATED                0x2UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FORCE                     0x4UL
-	#define PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG           0x8UL
-	#define PORT_PHY_CFG_REQ_FLAGS_EEE_ENABLE                0x10UL
-	#define PORT_PHY_CFG_REQ_FLAGS_EEE_DISABLE               0x20UL
-	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_ENABLE         0x40UL
-	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE        0x80UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE        0x100UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE       0x200UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE       0x400UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE      0x800UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE       0x1000UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE      0x2000UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN            0x4000UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_ENABLE      0x8000UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_DISABLE     0x10000UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_2XN_ENABLE      0x20000UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_2XN_DISABLE     0x40000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_RESET_PHY                  0x1UL
+	#define PORT_PHY_CFG_REQ_FLAGS_DEPRECATED                 0x2UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FORCE                      0x4UL
+	#define PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG            0x8UL
+	#define PORT_PHY_CFG_REQ_FLAGS_EEE_ENABLE                 0x10UL
+	#define PORT_PHY_CFG_REQ_FLAGS_EEE_DISABLE                0x20UL
+	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_ENABLE          0x40UL
+	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE         0x80UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE         0x100UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE        0x200UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE        0x400UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE       0x800UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE        0x1000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE       0x2000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN             0x4000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_ENABLE       0x8000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_DISABLE      0x10000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_IEEE_ENABLE      0x20000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_IEEE_DISABLE     0x40000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_ENABLE       0x80000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE      0x100000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE      0x200000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE     0x400000UL
 	__le32	enables;
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE                     0x1UL
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX                   0x2UL
@@ -2573,7 +2661,7 @@ struct hwrm_port_phy_qcfg_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_port_phy_qcfg_output (size:832b/104B) */
+/* hwrm_port_phy_qcfg_output (size:768b/96B) */
 struct hwrm_port_phy_qcfg_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -2584,10 +2672,22 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_LINK_SIGNAL  0x1UL
 	#define PORT_PHY_QCFG_RESP_LINK_LINK    0x2UL
 	#define PORT_PHY_QCFG_RESP_LINK_LAST   PORT_PHY_QCFG_RESP_LINK_LINK
-	u8	link_signal_mode;
-	#define PORT_PHY_QCFG_RESP_LINK_SIGNAL_MODE_NRZ  0x0UL
-	#define PORT_PHY_QCFG_RESP_LINK_SIGNAL_MODE_PAM4 0x1UL
-	#define PORT_PHY_QCFG_RESP_LINK_SIGNAL_MODE_LAST PORT_PHY_QCFG_RESP_LINK_SIGNAL_MODE_PAM4
+	u8	active_fec_signal_mode;
+	#define PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK                0xfUL
+	#define PORT_PHY_QCFG_RESP_SIGNAL_MODE_SFT                 0
+	#define PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ                   0x0UL
+	#define PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4                  0x1UL
+	#define PORT_PHY_QCFG_RESP_SIGNAL_MODE_LAST                 PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_MASK                 0xf0UL
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_SFT                  4
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE        (0x0UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE74_ACTIVE    (0x1UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE91_ACTIVE    (0x2UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_1XN_ACTIVE   (0x3UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_IEEE_ACTIVE  (0x4UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_1XN_ACTIVE   (0x5UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE  (0x6UL << 4)
+	#define PORT_PHY_QCFG_RESP_ACTIVE_FEC_LAST                  PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE
 	__le16	link_speed;
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB 0x1UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB   0xaUL
@@ -2809,21 +2909,21 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28    (0x11UL << 24)
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_LAST     PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28
 	__le16	fec_cfg;
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED          0x1UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED       0x2UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED         0x4UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED      0x8UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED        0x10UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED      0x20UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED        0x40UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_SUPPORTED     0x80UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_ENABLED       0x100UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_2XN_SUPPORTED     0x200UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_2XN_ENABLED       0x400UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ACTIVE         0x800UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ACTIVE         0x1000UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_ACTIVE        0x2000UL
-	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_2XN_ACTIVE        0x4000UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED           0x1UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED        0x2UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED          0x4UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED       0x8UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED         0x10UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED       0x20UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED         0x40UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_SUPPORTED      0x80UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_ENABLED        0x100UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_IEEE_SUPPORTED     0x200UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_IEEE_ENABLED       0x400UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_SUPPORTED      0x800UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_ENABLED        0x1000UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_SUPPORTED     0x2000UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_ENABLED       0x4000UL
 	u8	duplex_state;
 	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF 0x0UL
 	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL 0x1UL
@@ -2845,11 +2945,10 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_AUTO_PAM4_LINK_SPEED_MASK_50G      0x1UL
 	#define PORT_PHY_QCFG_RESP_AUTO_PAM4_LINK_SPEED_MASK_100G     0x2UL
 	#define PORT_PHY_QCFG_RESP_AUTO_PAM4_LINK_SPEED_MASK_200G     0x4UL
-	__le16	link_partner_pam4_adv_speeds;
+	u8	link_partner_pam4_adv_speeds;
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_PAM4_ADV_SPEEDS_50GB      0x1UL
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_PAM4_ADV_SPEEDS_100GB     0x2UL
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_PAM4_ADV_SPEEDS_200GB     0x4UL
-	u8	unused_0[7];
 	u8	valid;
 };
 
@@ -3293,6 +3392,47 @@ struct hwrm_port_lpbk_qstats_output {
 	u8	valid;
 };
 
+/* hwrm_port_ecn_qstats_input (size:256b/32B) */
+struct hwrm_port_ecn_qstats_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	port_id;
+	__le16	ecn_stat_buf_size;
+	u8	flags;
+	#define PORT_ECN_QSTATS_REQ_FLAGS_UNUSED       0x0UL
+	#define PORT_ECN_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL
+	#define PORT_ECN_QSTATS_REQ_FLAGS_LAST        PORT_ECN_QSTATS_REQ_FLAGS_COUNTER_MASK
+	u8	unused_0[3];
+	__le64	ecn_stat_host_addr;
+};
+
+/* hwrm_port_ecn_qstats_output (size:128b/16B) */
+struct hwrm_port_ecn_qstats_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	ecn_stat_buf_size;
+	u8	mark_en;
+	u8	unused_0[4];
+	u8	valid;
+};
+
+/* port_stats_ecn (size:512b/64B) */
+struct port_stats_ecn {
+	__le64	mark_cnt_cos0;
+	__le64	mark_cnt_cos1;
+	__le64	mark_cnt_cos2;
+	__le64	mark_cnt_cos3;
+	__le64	mark_cnt_cos4;
+	__le64	mark_cnt_cos5;
+	__le64	mark_cnt_cos6;
+	__le64	mark_cnt_cos7;
+};
+
 /* hwrm_port_clr_stats_input (size:192b/24B) */
 struct hwrm_port_clr_stats_input {
 	__le16	req_type;
@@ -3387,8 +3527,9 @@ struct hwrm_port_phy_qcaps_output {
 	#define PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED           0x4UL
 	#define PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED         0x8UL
 	#define PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET     0x10UL
-	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK                       0xe0UL
-	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT                        5
+	#define PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED         0x20UL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK                       0xc0UL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT                        6
 	u8	port_cnt;
 	#define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN 0x0UL
 	#define PORT_PHY_QCAPS_RESP_PORT_CNT_1       0x1UL
@@ -5365,6 +5506,7 @@ struct hwrm_ring_alloc_input {
 	#define RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID      0x80UL
 	#define RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID     0x100UL
 	#define RING_ALLOC_REQ_ENABLES_SCHQ_ID               0x200UL
+	#define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE        0x400UL
 	u8	ring_type;
 	#define RING_ALLOC_REQ_RING_TYPE_L2_CMPL   0x0UL
 	#define RING_ALLOC_REQ_RING_TYPE_TX        0x1UL
@@ -5424,7 +5566,14 @@ struct hwrm_ring_alloc_input {
 	#define RING_ALLOC_REQ_INT_MODE_MSIX   0x2UL
 	#define RING_ALLOC_REQ_INT_MODE_POLL   0x3UL
 	#define RING_ALLOC_REQ_INT_MODE_LAST  RING_ALLOC_REQ_INT_MODE_POLL
-	u8	unused_4[3];
+	u8	mpc_chnls_type;
+	#define RING_ALLOC_REQ_MPC_CHNLS_TYPE_TCE     0x0UL
+	#define RING_ALLOC_REQ_MPC_CHNLS_TYPE_RCE     0x1UL
+	#define RING_ALLOC_REQ_MPC_CHNLS_TYPE_TE_CFA  0x2UL
+	#define RING_ALLOC_REQ_MPC_CHNLS_TYPE_RE_CFA  0x3UL
+	#define RING_ALLOC_REQ_MPC_CHNLS_TYPE_PRIMATE 0x4UL
+	#define RING_ALLOC_REQ_MPC_CHNLS_TYPE_LAST   RING_ALLOC_REQ_MPC_CHNLS_TYPE_PRIMATE
+	u8	unused_4[2];
 	__le64	cq_handle;
 };
 
@@ -6661,7 +6810,7 @@ struct hwrm_cfa_vfr_alloc_output {
 	u8	valid;
 };
 
-/* hwrm_cfa_vfr_free_input (size:384b/48B) */
+/* hwrm_cfa_vfr_free_input (size:448b/56B) */
 struct hwrm_cfa_vfr_free_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -6669,6 +6818,9 @@ struct hwrm_cfa_vfr_free_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	char	vfr_name[32];
+	__le16	vf_id;
+	__le16	reserved;
+	u8	unused_0[4];
 };
 
 /* hwrm_cfa_vfr_free_output (size:128b/16B) */
@@ -6970,7 +7122,7 @@ struct ctx_hw_stats {
 	__le64	tpa_aborts;
 };
 
-/* ctx_hw_stats_ext (size:1344b/168B) */
+/* ctx_hw_stats_ext (size:1408b/176B) */
 struct ctx_hw_stats_ext {
 	__le64	rx_ucast_pkts;
 	__le64	rx_mcast_pkts;
@@ -6993,6 +7145,7 @@ struct ctx_hw_stats_ext {
 	__le64	rx_tpa_pkt;
 	__le64	rx_tpa_bytes;
 	__le64	rx_tpa_errors;
+	__le64	rx_tpa_events;
 };
 
 /* hwrm_stat_ctx_alloc_input (size:256b/32B) */
@@ -7065,16 +7218,16 @@ struct hwrm_stat_ctx_query_output {
 	__le64	tx_ucast_pkts;
 	__le64	tx_mcast_pkts;
 	__le64	tx_bcast_pkts;
-	__le64	tx_err_pkts;
-	__le64	tx_drop_pkts;
+	__le64	tx_discard_pkts;
+	__le64	tx_error_pkts;
 	__le64	tx_ucast_bytes;
 	__le64	tx_mcast_bytes;
 	__le64	tx_bcast_bytes;
 	__le64	rx_ucast_pkts;
 	__le64	rx_mcast_pkts;
 	__le64	rx_bcast_pkts;
-	__le64	rx_err_pkts;
-	__le64	rx_drop_pkts;
+	__le64	rx_discard_pkts;
+	__le64	rx_error_pkts;
 	__le64	rx_ucast_bytes;
 	__le64	rx_mcast_bytes;
 	__le64	rx_bcast_bytes;
@@ -7099,7 +7252,7 @@ struct hwrm_stat_ext_ctx_query_input {
 	u8	unused_0[3];
 };
 
-/* hwrm_stat_ext_ctx_query_output (size:1472b/184B) */
+/* hwrm_stat_ext_ctx_query_output (size:1536b/192B) */
 struct hwrm_stat_ext_ctx_query_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -7126,6 +7279,7 @@ struct hwrm_stat_ext_ctx_query_output {
 	__le64	rx_tpa_pkt;
 	__le64	rx_tpa_bytes;
 	__le64	rx_tpa_errors;
+	__le64	rx_tpa_events;
 	u8	unused_0[7];
 	u8	valid;
 };
@@ -7702,6 +7856,77 @@ struct hwrm_dbg_read_direct_output {
 	u8	valid;
 };
 
+/* hwrm_dbg_qcaps_input (size:192b/24B) */
+struct hwrm_dbg_qcaps_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	fid;
+	u8	unused_0[6];
+};
+
+/* hwrm_dbg_qcaps_output (size:192b/24B) */
+struct hwrm_dbg_qcaps_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	fid;
+	u8	unused_0[2];
+	__le32	coredump_component_disable_caps;
+	#define DBG_QCAPS_RESP_COREDUMP_COMPONENT_DISABLE_CAPS_NVRAM     0x1UL
+	__le32	flags;
+	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_NVM          0x1UL
+	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR     0x2UL
+	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR      0x4UL
+	u8	unused_1[3];
+	u8	valid;
+};
+
+/* hwrm_dbg_qcfg_input (size:192b/24B) */
+struct hwrm_dbg_qcfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	fid;
+	__le16	flags;
+	#define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_MASK         0x3UL
+	#define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_SFT          0
+	#define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_NVM       0x0UL
+	#define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_HOST_DDR  0x1UL
+	#define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR   0x2UL
+	#define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_LAST          DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR
+	__le32	coredump_component_disable_flags;
+	#define DBG_QCFG_REQ_COREDUMP_COMPONENT_DISABLE_FLAGS_NVRAM     0x1UL
+};
+
+/* hwrm_dbg_qcfg_output (size:256b/32B) */
+struct hwrm_dbg_qcfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	fid;
+	u8	unused_0[2];
+	__le32	coredump_size;
+	__le32	flags;
+	#define DBG_QCFG_RESP_FLAGS_UART_LOG               0x1UL
+	#define DBG_QCFG_RESP_FLAGS_UART_LOG_SECONDARY     0x2UL
+	#define DBG_QCFG_RESP_FLAGS_FW_TRACE               0x4UL
+	#define DBG_QCFG_RESP_FLAGS_FW_TRACE_SECONDARY     0x8UL
+	#define DBG_QCFG_RESP_FLAGS_DEBUG_NOTIFY           0x10UL
+	#define DBG_QCFG_RESP_FLAGS_JTAG_DEBUG             0x20UL
+	__le16	async_cmpl_ring;
+	u8	unused_2[2];
+	__le32	crashdump_size;
+	u8	unused_3[3];
+	u8	valid;
+};
+
 /* coredump_segment_record (size:128b/16B) */
 struct coredump_segment_record {
 	__le16	component_id;
@@ -8048,7 +8273,7 @@ struct hwrm_nvm_get_dev_info_input {
 	__le64	resp_addr;
 };
 
-/* hwrm_nvm_get_dev_info_output (size:256b/32B) */
+/* hwrm_nvm_get_dev_info_output (size:640b/80B) */
 struct hwrm_nvm_get_dev_info_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -8063,6 +8288,22 @@ struct hwrm_nvm_get_dev_info_output {
 	u8	nvm_cfg_ver_maj;
 	u8	nvm_cfg_ver_min;
 	u8	nvm_cfg_ver_upd;
+	u8	flags;
+	#define NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID     0x1UL
+	char	pkg_name[16];
+	__le16	hwrm_fw_major;
+	__le16	hwrm_fw_minor;
+	__le16	hwrm_fw_build;
+	__le16	hwrm_fw_patch;
+	__le16	mgmt_fw_major;
+	__le16	mgmt_fw_minor;
+	__le16	mgmt_fw_build;
+	__le16	mgmt_fw_patch;
+	__le16	roce_fw_major;
+	__le16	roce_fw_minor;
+	__le16	roce_fw_build;
+	__le16	roce_fw_patch;
+	u8	unused_0[7];
 	u8	valid;
 };
 
@@ -8381,6 +8622,16 @@ struct hwrm_selftest_irq_output {
 	u8	valid;
 };
 
+/* db_push_info (size:64b/8B) */
+struct db_push_info {
+	u32	push_size_push_index;
+	#define DB_PUSH_INFO_PUSH_INDEX_MASK 0xffffffUL
+	#define DB_PUSH_INFO_PUSH_INDEX_SFT 0
+	#define DB_PUSH_INFO_PUSH_SIZE_MASK 0x1f000000UL
+	#define DB_PUSH_INFO_PUSH_SIZE_SFT  24
+	u32	reserved32;
+};
+
 /* fw_status_reg (size:32b/4B) */
 struct fw_status_reg {
 	u32	fw_status;
@@ -8393,6 +8644,32 @@ struct fw_status_reg {
 	#define FW_STATUS_REG_CRASHDUMP_ONGOING      0x40000UL
 	#define FW_STATUS_REG_CRASHDUMP_COMPLETE     0x80000UL
 	#define FW_STATUS_REG_SHUTDOWN               0x100000UL
-};
+	#define FW_STATUS_REG_CRASHED_NO_MASTER      0x200000UL
+};
+
+/* hcomm_status (size:64b/8B) */
+struct hcomm_status {
+	u32	sig_ver;
+	#define HCOMM_STATUS_VER_MASK      0xffUL
+	#define HCOMM_STATUS_VER_SFT       0
+	#define HCOMM_STATUS_VER_LATEST      0x1UL
+	#define HCOMM_STATUS_VER_LAST       HCOMM_STATUS_VER_LATEST
+	#define HCOMM_STATUS_SIGNATURE_MASK 0xffffff00UL
+	#define HCOMM_STATUS_SIGNATURE_SFT 8
+	#define HCOMM_STATUS_SIGNATURE_VAL   (0x484353UL << 8)
+	#define HCOMM_STATUS_SIGNATURE_LAST HCOMM_STATUS_SIGNATURE_VAL
+	u32	fw_status_loc;
+	#define HCOMM_STATUS_TRUE_ADDR_SPACE_MASK    0x3UL
+	#define HCOMM_STATUS_TRUE_ADDR_SPACE_SFT     0
+	#define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_GRC       0x1UL
+	#define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_BAR0      0x2UL
+	#define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_BAR1      0x3UL
+	#define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_LAST     HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_BAR1
+	#define HCOMM_STATUS_TRUE_OFFSET_MASK        0xfffffffcUL
+	#define HCOMM_STATUS_TRUE_OFFSET_SFT         2
+};
+
+#define HCOMM_STATUS_STRUCT_LOC 0x31001F0UL
 
 #endif /* _BNXT_HSI_H_ */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index cc2ee4d0bd18..23b80aa171dd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -1029,7 +1029,7 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 		rc = bnxt_hwrm_exec_fwd_resp(
 			bp, vf, sizeof(struct hwrm_port_phy_qcfg_input));
 	} else {
-		struct hwrm_port_phy_qcfg_output_compat phy_qcfg_resp = {0};
+		struct hwrm_port_phy_qcfg_output phy_qcfg_resp = {0};
 		struct hwrm_port_phy_qcfg_input *phy_qcfg_req;
 
 		phy_qcfg_req =
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 84536292b031..f7f10cfb3476 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -3009,10 +3009,10 @@ static int cnic_service_bnx2(void *data, void *status_blk)
 	return cnic_service_bnx2_queues(dev);
 }
 
-static void cnic_service_bnx2_msix(unsigned long data)
+static void cnic_service_bnx2_msix(struct tasklet_struct *t)
 {
-	struct cnic_dev *dev = (struct cnic_dev *) data;
-	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task);
+	struct cnic_dev *dev = cp->dev;
 
 	cp->last_status_idx = cnic_service_bnx2_queues(dev);
 
@@ -3134,10 +3134,10 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
 	return last_status;
 }
 
-static void cnic_service_bnx2x_bh(unsigned long data)
+static void cnic_service_bnx2x_bh(struct tasklet_struct *t)
 {
-	struct cnic_dev *dev = (struct cnic_dev *) data;
-	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task);
+	struct cnic_dev *dev = cp->dev;
 	struct bnx2x *bp = netdev_priv(dev->netdev);
 	u32 status_idx, new_status_idx;
 
@@ -4458,8 +4458,7 @@ static int cnic_init_bnx2_irq(struct cnic_dev *dev)
 		CNIC_WR(dev, base + BNX2_HC_CMD_TICKS_OFF, (64 << 16) | 220);
 
 		cp->last_status_idx = cp->status_blk.bnx2->status_idx;
-		tasklet_init(&cp->cnic_irq_task, cnic_service_bnx2_msix,
-			     (unsigned long) dev);
+		tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2_msix);
 		err = cnic_request_irq(dev);
 		if (err)
 			return err;
@@ -4868,8 +4867,7 @@ static int cnic_init_bnx2x_irq(struct cnic_dev *dev)
 	struct cnic_eth_dev *ethdev = cp->ethdev;
 	int err = 0;
 
-	tasklet_init(&cp->cnic_irq_task, cnic_service_bnx2x_bh,
-		     (unsigned long) dev);
+	tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2x_bh);
 	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX)
 		err = cnic_request_irq(dev);
 
diff --git a/drivers/net/ethernet/brocade/bna/bfa_cee.c b/drivers/net/ethernet/brocade/bna/bfa_cee.c
index 09fb9315d1ae..06f221c44802 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_cee.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_cee.c
@@ -102,14 +102,10 @@ bfa_cee_get_stats_isr(struct bfa_cee *cee, enum bfa_status status)
 }
 
 /**
- * bfa_cee_get_attr_isr()
+ * bfa_cee_reset_stats_isr - CEE ISR for reset-stats responses from f/w
  *
- * @brief CEE ISR for reset-stats responses from f/w
- *
- * @param[in] cee - Pointer to the CEE module
- *            status - Return status from the f/w
- *
- * @return void
+ * @cee: Input Pointer to the CEE module
+ * @status: Return status from the f/w
  */
 static void
 bfa_cee_reset_stats_isr(struct bfa_cee *cee, enum bfa_status status)
@@ -148,9 +144,12 @@ bfa_nw_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva, u64 dma_pa)
 }
 
 /**
- * bfa_cee_get_attr - Send the request to the f/w to fetch CEE attributes.
+ * bfa_nw_cee_get_attr - Send the request to the f/w to fetch CEE attributes.
  *
  * @cee: Pointer to the CEE module data structure.
+ * @attr: attribute requested
+ * @cbfn: function pointer
+ * @cbarg: function pointer arguments
  *
  * Return: status
  */
@@ -181,7 +180,9 @@ bfa_nw_cee_get_attr(struct bfa_cee *cee, struct bfa_cee_attr *attr,
 }
 
 /**
- * bfa_cee_isrs - Handles Mail-box interrupts for CEE module.
+ * bfa_cee_isr - Handles Mail-box interrupts for CEE module.
+ * @cbarg: argument passed containing pointer to the CEE module data structure.
+ * @m: message pointer
  */
 
 static void
@@ -210,6 +211,7 @@ bfa_cee_isr(void *cbarg, struct bfi_mbmsg *m)
 /**
  * bfa_cee_notify - CEE module heart-beat failure handler.
  *
+ * @arg: argument passed containing pointer to the CEE module data structure.
  * @event: IOC event type
  */
 
diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index b9dd06b12945..cd933817a0b8 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -269,7 +269,7 @@ bfa_ioc_sm_enabling(struct bfa_ioc *ioc, enum ioc_event event)
 		break;
 
 	case IOC_E_PFFAILED:
-		/* !!! fall through !!! */
+		fallthrough;
 	case IOC_E_HWERROR:
 		ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
 		bfa_fsm_set_state(ioc, bfa_ioc_sm_fail);
@@ -365,7 +365,8 @@ bfa_ioc_sm_op(struct bfa_ioc *ioc, enum ioc_event event)
 	case IOC_E_PFFAILED:
 	case IOC_E_HWERROR:
 		bfa_ioc_hb_stop(ioc);
-		/* !!! fall through !!! */
+		fallthrough;
+
 	case IOC_E_HBFAIL:
 		if (ioc->iocpf.auto_recover)
 			bfa_fsm_set_state(ioc, bfa_ioc_sm_fail_retry);
@@ -1763,7 +1764,7 @@ bfa_ioc_flash_fwver_cmp(struct bfa_ioc *ioc,
 		return BFI_IOC_IMG_VER_INCOMP;
 }
 
-/**
+/*
  * Returns TRUE if driver is willing to work with current smem f/w version.
  */
 bool
@@ -2469,6 +2470,7 @@ bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *m)
  *
  * @ioc:	memory for IOC
  * @bfa:	driver instance structure
+ * @cbfn:	callback function
  */
 void
 bfa_nw_ioc_attach(struct bfa_ioc *ioc, void *bfa, struct bfa_ioc_cbfn *cbfn)
@@ -2500,7 +2502,9 @@ bfa_nw_ioc_detach(struct bfa_ioc *ioc)
 /**
  * bfa_nw_ioc_pci_init - Setup IOC PCI properties.
  *
+ * @ioc:	memory for IOC
  * @pcidev:	PCI device information for this IOC
+ * @clscode:	class code
  */
 void
 bfa_nw_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev,
@@ -2569,6 +2573,7 @@ bfa_nw_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev,
 /**
  * bfa_nw_ioc_mem_claim - Initialize IOC dma memory
  *
+ * @ioc:	memory for IOC
  * @dm_kva:	kernel virtual address of IOC dma memory
  * @dm_pa:	physical address of IOC dma memory
  */
@@ -2636,6 +2641,8 @@ bfa_nw_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc,
  *
  * @ioc:	IOC instance
  * @cmd:	Mailbox command
+ * @cbfn:	callback function
+ * @cbarg:	arguments to callback
  *
  * Waits if mailbox is busy. Responsibility of caller to serialize
  */
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index cc80bbbefe87..7e4e831d720f 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -3277,7 +3277,7 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	int err, mtu;
 	struct bnad *bnad = netdev_priv(netdev);
-	u32 rx_count = 0, frame, new_frame;
+	u32 frame, new_frame;
 
 	mutex_lock(&bnad->conf_mutex);
 
@@ -3293,12 +3293,9 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu)
 		/* only when transition is over 4K */
 		if ((frame <= 4096 && new_frame > 4096) ||
 		    (frame > 4096 && new_frame <= 4096))
-			rx_count = bnad_reinit_rx(bnad);
+			bnad_reinit_rx(bnad);
 	}
 
-	/* rx_count > 0 - new rx created
-	 *	- Linux set err = 0 and return
-	 */
 	err = bnad_mtu_set(bnad, new_frame);
 	if (err)
 		err = -EBUSY;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 4f1b41569260..5de47f6fde5a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -7,6 +7,7 @@
 #ifndef _MACB_H
 #define _MACB_H
 
+#include <linux/clk.h>
 #include <linux/phylink.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/net_tstamp.h>
@@ -365,6 +366,8 @@
 #define MACB_ISR_RLE_SIZE	1
 #define MACB_TXERR_OFFSET	6 /* EN TX frame corrupt from error interrupt */
 #define MACB_TXERR_SIZE		1
+#define MACB_RM9200_TBRE_OFFSET	6 /* EN may send new frame interrupt (RM9200) */
+#define MACB_RM9200_TBRE_SIZE	1
 #define MACB_TCOMP_OFFSET	7 /* Enable transmit complete interrupt */
 #define MACB_TCOMP_SIZE		1
 #define MACB_ISR_LINK_OFFSET	9 /* Enable link change interrupt */
@@ -1204,10 +1207,10 @@ struct macb {
 
 	phy_interface_t		phy_interface;
 
-	/* AT91RM9200 transmit */
-	struct sk_buff *skb;			/* holds skb until xmit interrupt completes */
-	dma_addr_t skb_physaddr;		/* phys addr from pci_map_single */
-	int skb_length;				/* saved skb length for pci_unmap_single */
+	/* AT91RM9200 transmit queue (1 on wire + 1 queued) */
+	struct macb_tx_skb	rm9200_txq[2];
+	unsigned int		rm9200_tx_tail;
+	unsigned int		rm9200_tx_len;
 	unsigned int		max_tx_length;
 
 	u64			ethtool_stats[GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES];
@@ -1298,4 +1301,14 @@ static inline bool gem_has_ptp(struct macb *bp)
 	return !!(bp->caps & MACB_CAPS_GEM_HAS_PTP);
 }
 
+/**
+ * struct macb_platform_data - platform data for MACB Ethernet used for PCI registration
+ * @pclk:		platform clock
+ * @hclk:		AHB clock
+ */
+struct macb_platform_data {
+	struct clk	*pclk;
+	struct clk	*hclk;
+};
+
 #endif /* _MACB_H */
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 9179f7b0b900..883e47c5b1a7 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -23,7 +23,6 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/dma-mapping.h>
-#include <linux/platform_data/macb.h>
 #include <linux/platform_device.h>
 #include <linux/phylink.h>
 #include <linux/of.h>
@@ -458,9 +457,9 @@ static void macb_init_buffers(struct macb *bp)
 
 /**
  * macb_set_tx_clk() - Set a clock to a new frequency
- * @clk		Pointer to the clock to change
- * @rate	New frequency in Hz
- * @dev		Pointer to the struct net_device
+ * @clk:	Pointer to the clock to change
+ * @speed:	New frequency in Hz
+ * @dev:	Pointer to the struct net_device
  */
 static void macb_set_tx_clk(struct clk *clk, int speed, struct net_device *dev)
 {
@@ -1465,9 +1464,9 @@ static int macb_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void macb_hresp_error_task(unsigned long data)
+static void macb_hresp_error_task(struct tasklet_struct *t)
 {
-	struct macb *bp = (struct macb *)data;
+	struct macb *bp = from_tasklet(bp, t, hresp_err_tasklet);
 	struct net_device *dev = bp->dev;
 	struct macb_queue *queue;
 	unsigned int q;
@@ -3909,6 +3908,7 @@ static int at91ether_start(struct macb *lp)
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
 			     MACB_BIT(TCOMP)	|
+			     MACB_BIT(RM9200_TBRE)	|
 			     MACB_BIT(ISR_ROVR)	|
 			     MACB_BIT(HRESP));
 
@@ -3925,6 +3925,7 @@ static void at91ether_stop(struct macb *lp)
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
 			     MACB_BIT(TCOMP)	|
+			     MACB_BIT(RM9200_TBRE)	|
 			     MACB_BIT(ISR_ROVR) |
 			     MACB_BIT(HRESP));
 
@@ -3994,24 +3995,34 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 					struct net_device *dev)
 {
 	struct macb *lp = netdev_priv(dev);
+	unsigned long flags;
 
-	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
-		netif_stop_queue(dev);
+	if (lp->rm9200_tx_len < 2) {
+		int desc = lp->rm9200_tx_tail;
 
 		/* Store packet information (to free when Tx completed) */
-		lp->skb = skb;
-		lp->skb_length = skb->len;
-		lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data,
-						  skb->len, DMA_TO_DEVICE);
-		if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) {
+		lp->rm9200_txq[desc].skb = skb;
+		lp->rm9200_txq[desc].size = skb->len;
+		lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data,
+							      skb->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
 			dev->stats.tx_dropped++;
 			netdev_err(dev, "%s: DMA mapping error\n", __func__);
 			return NETDEV_TX_OK;
 		}
 
+		spin_lock_irqsave(&lp->lock, flags);
+
+		lp->rm9200_tx_tail = (desc + 1) & 1;
+		lp->rm9200_tx_len++;
+		if (lp->rm9200_tx_len > 1)
+			netif_stop_queue(dev);
+
+		spin_unlock_irqrestore(&lp->lock, flags);
+
 		/* Set address of the data in the Transmit Address register */
-		macb_writel(lp, TAR, lp->skb_physaddr);
+		macb_writel(lp, TAR, lp->rm9200_txq[desc].mapping);
 		/* Set length of the packet in the Transmit Control register */
 		macb_writel(lp, TCR, skb->len);
 
@@ -4074,6 +4085,9 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct macb *lp = netdev_priv(dev);
 	u32 intstatus, ctl;
+	unsigned int desc;
+	unsigned int qlen;
+	u32 tsr;
 
 	/* MAC Interrupt Status register indicates what interrupts are pending.
 	 * It is automatically cleared once read.
@@ -4085,20 +4099,39 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 		at91ether_rx(dev);
 
 	/* Transmit complete */
-	if (intstatus & MACB_BIT(TCOMP)) {
+	if (intstatus & (MACB_BIT(TCOMP) | MACB_BIT(RM9200_TBRE))) {
 		/* The TCOM bit is set even if the transmission failed */
 		if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
 			dev->stats.tx_errors++;
 
-		if (lp->skb) {
-			dev_consume_skb_irq(lp->skb);
-			lp->skb = NULL;
-			dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr,
-					 lp->skb_length, DMA_TO_DEVICE);
+		spin_lock(&lp->lock);
+
+		tsr = macb_readl(lp, TSR);
+
+		/* we have three possibilities here:
+		 *   - all pending packets transmitted (TGO, implies BNQ)
+		 *   - only first packet transmitted (!TGO && BNQ)
+		 *   - two frames pending (!TGO && !BNQ)
+		 * Note that TGO ("transmit go") is called "IDLE" on RM9200.
+		 */
+		qlen = (tsr & MACB_BIT(TGO)) ? 0 :
+			(tsr & MACB_BIT(RM9200_BNQ)) ? 1 : 2;
+
+		while (lp->rm9200_tx_len > qlen) {
+			desc = (lp->rm9200_tx_tail - lp->rm9200_tx_len) & 1;
+			dev_consume_skb_irq(lp->rm9200_txq[desc].skb);
+			lp->rm9200_txq[desc].skb = NULL;
+			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
+					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
 			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += lp->skb_length;
+			dev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			lp->rm9200_tx_len--;
 		}
-		netif_wake_queue(dev);
+
+		if (lp->rm9200_tx_len < 2 && netif_queue_stopped(dev))
+			netif_wake_queue(dev);
+
+		spin_unlock(&lp->lock);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
@@ -4559,8 +4592,7 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_out_unregister_mdio;
 	}
 
-	tasklet_init(&bp->hresp_err_tasklet, macb_hresp_error_task,
-		     (unsigned long)bp);
+	tasklet_setup(&bp->hresp_err_tasklet, macb_hresp_error_task);
 
 	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index cd7d0332cba3..353393dea639 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /**
- * Cadence GEM PCI wrapper.
+ * DOC: Cadence GEM PCI wrapper.
  *
  * Copyright (C) 2016 Cadence Design Systems - https://www.cadence.com
  *
@@ -13,7 +13,6 @@
 #include <linux/etherdevice.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/platform_data/macb.h>
 #include <linux/platform_device.h>
 #include "macb.h"
 
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 05a3d067c3fc..bbb453c6a5f7 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1246,6 +1246,8 @@ static int xgmac_poll(struct napi_struct *napi, int budget)
 /**
  *  xgmac_tx_timeout
  *  @dev : Pointer to net device structure
+ *  @txqueue: index of the hung transmit queue
+ *
  *  Description: this function is called when a packet transmission fails to
  *   complete within a reasonable tmrate. The driver will mark the error in the
  *   netdev structure and arrange for the device to be reset to a sane state
diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c
index 81ff9ac73f9a..9fd717b9cf69 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.c
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL(cavium_ptp_put);
 
 /**
  * cavium_ptp_adjfine() - Adjust ptp frequency
- * @ptp: PTP clock info
+ * @ptp_info: PTP clock info
  * @scaled_ppm: how much to adjust by, in parts per million, but with a
  *              16 bit binary fractional field
  */
@@ -134,7 +134,7 @@ static int cavium_ptp_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
 
 /**
  * cavium_ptp_adjtime() - Adjust ptp time
- * @ptp:   PTP clock info
+ * @ptp_info:   PTP clock info
  * @delta: how much to adjust by, in nanosecs
  */
 static int cavium_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
@@ -155,7 +155,7 @@ static int cavium_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
 
 /**
  * cavium_ptp_gettime() - Get hardware clock time with adjustment
- * @ptp: PTP clock info
+ * @ptp_info: PTP clock info
  * @ts:  timespec
  */
 static int cavium_ptp_gettime(struct ptp_clock_info *ptp_info,
@@ -177,7 +177,7 @@ static int cavium_ptp_gettime(struct ptp_clock_info *ptp_info,
 
 /**
  * cavium_ptp_settime() - Set hardware clock time. Reset adjustment
- * @ptp: PTP clock info
+ * @ptp_info: PTP clock info
  * @ts:  timespec
  */
 static int cavium_ptp_settime(struct ptp_clock_info *ptp_info,
@@ -199,7 +199,7 @@ static int cavium_ptp_settime(struct ptp_clock_info *ptp_info,
 
 /**
  * cavium_ptp_enable() - Request to enable or disable an ancillary feature.
- * @ptp: PTP clock info
+ * @ptp_info: PTP clock info
  * @rq:  request
  * @on:  is it on
  */
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
index 50b533ff58e6..2a6d1cadac9e 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
@@ -25,7 +25,9 @@
 #include "octeon_main.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
+#include "cn68xx_device.h"
 #include "cn68xx_regs.h"
+#include "cn68xx_device.h"
 
 static void lio_cn68xx_set_dpi_regs(struct octeon_device *oct)
 {
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index e40c64b79f66..9ef172976b35 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -32,8 +32,8 @@
 #define OCTNIC_MAX_SG  MAX_SKB_FRAGS
 
 /**
- * \brief Delete gather lists
- * @param lio per-network private data
+ * lio_delete_glists - Delete gather lists
+ * @lio: per-network private data
  */
 void lio_delete_glists(struct lio *lio)
 {
@@ -73,8 +73,10 @@ void lio_delete_glists(struct lio *lio)
 }
 
 /**
- * \brief Setup gather lists
- * @param lio per-network private data
+ * lio_setup_glists - Setup gather lists
+ * @oct: octeon_device
+ * @lio: per-network private data
+ * @num_iqs: count of iqs to allocate
  */
 int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 {
@@ -521,12 +523,12 @@ static void lio_update_txq_status(struct octeon_device *oct, int iq_num)
 }
 
 /**
- * \brief Setup output queue
- * @param oct octeon device
- * @param q_no which queue
- * @param num_descs how many descriptors
- * @param desc_size size of each descriptor
- * @param app_ctx application context
+ * octeon_setup_droq - Setup output queue
+ * @oct: octeon device
+ * @q_no: which queue
+ * @num_descs: how many descriptors
+ * @desc_size: size of each descriptor
+ * @app_ctx: application context
  */
 static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
 			     int desc_size, void *app_ctx)
@@ -555,16 +557,17 @@ static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
 	return ret_val;
 }
 
-/** Routine to push packets arriving on Octeon interface upto network layer.
- * @param oct_id   - octeon device id.
- * @param skbuff   - skbuff struct to be passed to network layer.
- * @param len      - size of total data received.
- * @param rh       - Control header associated with the packet
- * @param param    - additional control data with the packet
- * @param arg      - farg registered in droq_ops
+/**
+ * liquidio_push_packet - Routine to push packets arriving on Octeon interface upto network layer.
+ * @octeon_id:octeon device id.
+ * @skbuff:   skbuff struct to be passed to network layer.
+ * @len:      size of total data received.
+ * @rh:       Control header associated with the packet
+ * @param:    additional control data with the packet
+ * @arg:      farg registered in droq_ops
  */
 static void
-liquidio_push_packet(u32 octeon_id __attribute__((unused)),
+liquidio_push_packet(u32 __maybe_unused octeon_id,
 		     void *skbuff,
 		     u32 len,
 		     union octeon_rh *rh,
@@ -698,8 +701,8 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 }
 
 /**
- * \brief wrapper for calling napi_schedule
- * @param param parameters to pass to napi_schedule
+ * napi_schedule_wrapper - wrapper for calling napi_schedule
+ * @param: parameters to pass to napi_schedule
  *
  * Used when scheduling on different CPUs
  */
@@ -711,8 +714,8 @@ static void napi_schedule_wrapper(void *param)
 }
 
 /**
- * \brief callback when receive interrupt occurs and we are in NAPI mode
- * @param arg pointer to octeon output queue
+ * liquidio_napi_drv_callback - callback when receive interrupt occurs and we are in NAPI mode
+ * @arg: pointer to octeon output queue
  */
 static void liquidio_napi_drv_callback(void *arg)
 {
@@ -737,9 +740,9 @@ static void liquidio_napi_drv_callback(void *arg)
 }
 
 /**
- * \brief Entry point for NAPI polling
- * @param napi NAPI structure
- * @param budget maximum number of items to process
+ * liquidio_napi_poll - Entry point for NAPI polling
+ * @napi: NAPI structure
+ * @budget: maximum number of items to process
  */
 static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 {
@@ -792,9 +795,11 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 }
 
 /**
- * \brief Setup input and output queues
- * @param octeon_dev octeon device
- * @param ifidx Interface index
+ * liquidio_setup_io_queues - Setup input and output queues
+ * @octeon_dev: octeon device
+ * @ifidx: Interface index
+ * @num_iqs: input io queue count
+ * @num_oqs: output io queue count
  *
  * Note: Queues are with respect to the octeon device. Thus
  * an input queue is for egress packets, and output queues
@@ -927,7 +932,7 @@ int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
 }
 
 irqreturn_t
-liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
+liquidio_msix_intr_handler(int __maybe_unused irq, void *dev)
 {
 	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
 	struct octeon_device *oct = ioq_vector->oct_dev;
@@ -943,8 +948,8 @@ liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
 }
 
 /**
- * \brief Droq packet processor sceduler
- * @param oct octeon device
+ * liquidio_schedule_droq_pkt_handlers - Droq packet processor sceduler
+ * @oct: octeon device
  */
 static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
 {
@@ -972,13 +977,12 @@ static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
 }
 
 /**
- * \brief Interrupt handler for octeon
- * @param irq unused
- * @param dev octeon device
+ * liquidio_legacy_intr_handler - Interrupt handler for octeon
+ * @irq: unused
+ * @dev: octeon device
  */
 static
-irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)),
-					 void *dev)
+irqreturn_t liquidio_legacy_intr_handler(int __maybe_unused irq, void *dev)
 {
 	struct octeon_device *oct = (struct octeon_device *)dev;
 	irqreturn_t ret;
@@ -999,8 +1003,9 @@ irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)),
 }
 
 /**
- * \brief Setup interrupt for octeon device
- * @param oct octeon device
+ * octeon_setup_interrupt - Setup interrupt for octeon device
+ * @oct: octeon device
+ * @num_ioqs: number of queues
  *
  *  Enable interrupt in Octeon device as given in the PCI interrupt mask.
  */
@@ -1083,7 +1088,7 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 		dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
 
 		num_ioq_vectors = oct->num_msix_irqs;
-		/** For PF, there is one non-ioq interrupt handler */
+		/* For PF, there is one non-ioq interrupt handler */
 		if (OCTEON_CN23XX_PF(oct)) {
 			num_ioq_vectors -= 1;
 
@@ -1126,13 +1131,13 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 				dev_err(&oct->pci_dev->dev,
 					"Request_irq failed for MSIX interrupt Error: %d\n",
 					irqret);
-				/** Freeing the non-ioq irq vector here . */
+				/* Freeing the non-ioq irq vector here . */
 				free_irq(msix_entries[num_ioq_vectors].vector,
 					 oct);
 
 				while (i) {
 					i--;
-					/** clearing affinity mask. */
+					/* clearing affinity mask. */
 					irq_set_affinity_hint(
 						      msix_entries[i].vector,
 						      NULL);
@@ -1197,8 +1202,9 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 }
 
 /**
- * \brief Net device change_mtu
- * @param netdev network device
+ * liquidio_change_mtu - Net device change_mtu
+ * @netdev: network device
+ * @new_mtu: the new max transmit unit size
  */
 int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 {
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8e0ed01e7f03..7d00d3a8ded4 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -69,9 +69,9 @@ MODULE_PARM_DESC(console_bitmask,
 		 "Bitmask indicating which consoles have debug output redirected to syslog.");
 
 /**
- * \brief determines if a given console has debug enabled.
- * @param console console to check
- * @returns  1 = enabled. 0 otherwise
+ * octeon_console_debug_enabled - determines if a given console has debug enabled.
+ * @console: console to check
+ * Return:  1 = enabled. 0 otherwise
  */
 static int octeon_console_debug_enabled(u32 console)
 {
@@ -126,7 +126,7 @@ union tx_info {
 	} s;
 };
 
-/** Octeon device properties to be used by the NIC module.
+/* Octeon device properties to be used by the NIC module.
  * Each octeon device in the system will be represented
  * by this structure in the NIC module.
  */
@@ -161,13 +161,13 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
 static struct handshake handshake[MAX_OCTEON_DEVICES];
 static struct completion first_stage;
 
-static void octeon_droq_bh(unsigned long pdev)
+static void octeon_droq_bh(struct tasklet_struct *t)
 {
 	int q_no;
 	int reschedule = 0;
-	struct octeon_device *oct = (struct octeon_device *)pdev;
-	struct octeon_device_priv *oct_priv =
-		(struct octeon_device_priv *)oct->priv;
+	struct octeon_device_priv *oct_priv = from_tasklet(oct_priv, t,
+							  droq_tasklet);
+	struct octeon_device *oct = oct_priv->dev;
 
 	for (q_no = 0; q_no < MAX_OCTEON_OUTPUT_QUEUES(oct); q_no++) {
 		if (!(oct->io_qmask.oq & BIT_ULL(q_no)))
@@ -222,8 +222,8 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 }
 
 /**
- * \brief Forces all IO queues off on a given device
- * @param oct Pointer to Octeon device
+ * force_io_queues_off - Forces all IO queues off on a given device
+ * @oct: Pointer to Octeon device
  */
 static void force_io_queues_off(struct octeon_device *oct)
 {
@@ -238,8 +238,8 @@ static void force_io_queues_off(struct octeon_device *oct)
 }
 
 /**
- * \brief Cause device to go quiet so it can be safely removed/reset/etc
- * @param oct Pointer to Octeon device
+ * pcierror_quiesce_device - Cause device to go quiet so it can be safely removed/reset/etc
+ * @oct: Pointer to Octeon device
  */
 static inline void pcierror_quiesce_device(struct octeon_device *oct)
 {
@@ -283,8 +283,8 @@ static inline void pcierror_quiesce_device(struct octeon_device *oct)
 }
 
 /**
- * \brief Cleanup PCI AER uncorrectable error status
- * @param dev Pointer to PCI device
+ * cleanup_aer_uncorrect_error_status - Cleanup PCI AER uncorrectable error status
+ * @dev: Pointer to PCI device
  */
 static void cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 {
@@ -303,8 +303,8 @@ static void cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 
 /**
- * \brief Stop all PCI IO to a given device
- * @param dev Pointer to Octeon device
+ * stop_pci_io - Stop all PCI IO to a given device
+ * @oct: Pointer to Octeon device
  */
 static void stop_pci_io(struct octeon_device *oct)
 {
@@ -332,9 +332,9 @@ static void stop_pci_io(struct octeon_device *oct)
 }
 
 /**
- * \brief called when PCI error is detected
- * @param pdev Pointer to PCI device
- * @param state The current pci connection state
+ * liquidio_pcie_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
  *
  * This function is called after a PCI bus error affecting
  * this device has been detected.
@@ -362,11 +362,10 @@ static pci_ers_result_t liquidio_pcie_error_detected(struct pci_dev *pdev,
 }
 
 /**
- * \brief mmio handler
- * @param pdev Pointer to PCI device
+ * liquidio_pcie_mmio_enabled - mmio handler
+ * @pdev: Pointer to PCI device
  */
-static pci_ers_result_t liquidio_pcie_mmio_enabled(
-				struct pci_dev *pdev __attribute__((unused)))
+static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev __maybe_unused *pdev)
 {
 	/* We should never hit this since we never ask for a reset for a Fatal
 	 * Error. We always return DISCONNECT in io_error above.
@@ -376,14 +375,13 @@ static pci_ers_result_t liquidio_pcie_mmio_enabled(
 }
 
 /**
- * \brief called after the pci bus has been reset.
- * @param pdev Pointer to PCI device
+ * liquidio_pcie_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
  *
  * Restart the card from scratch, as if from a cold-boot. Implementation
  * resembles the first-half of the octeon_resume routine.
  */
-static pci_ers_result_t liquidio_pcie_slot_reset(
-				struct pci_dev *pdev __attribute__((unused)))
+static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev __maybe_unused *pdev)
 {
 	/* We should never hit this since we never ask for a reset for a Fatal
 	 * Error. We always return DISCONNECT in io_error above.
@@ -393,14 +391,14 @@ static pci_ers_result_t liquidio_pcie_slot_reset(
 }
 
 /**
- * \brief called when traffic can start flowing again.
- * @param pdev Pointer to PCI device
+ * liquidio_pcie_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
  *
  * This callback is called when the error recovery driver tells us that
  * its OK to resume normal operation. Implementation resembles the
  * second-half of the octeon_resume routine.
  */
-static void liquidio_pcie_resume(struct pci_dev *pdev __attribute__((unused)))
+static void liquidio_pcie_resume(struct pci_dev __maybe_unused *pdev)
 {
 	/* Nothing to be done here. */
 }
@@ -447,7 +445,7 @@ static struct pci_driver liquidio_pci_driver = {
 };
 
 /**
- * \brief register PCI driver
+ * liquidio_init_pci - register PCI driver
  */
 static int liquidio_init_pci(void)
 {
@@ -455,7 +453,7 @@ static int liquidio_init_pci(void)
 }
 
 /**
- * \brief unregister PCI driver
+ * liquidio_deinit_pci - unregister PCI driver
  */
 static void liquidio_deinit_pci(void)
 {
@@ -463,9 +461,9 @@ static void liquidio_deinit_pci(void)
 }
 
 /**
- * \brief Check Tx queue status, and take appropriate action
- * @param lio per-network private data
- * @returns 0 if full, number of queues woken up otherwise
+ * check_txq_status - Check Tx queue status, and take appropriate action
+ * @lio: per-network private data
+ * Return: 0 if full, number of queues woken up otherwise
  */
 static inline int check_txq_status(struct lio *lio)
 {
@@ -491,8 +489,8 @@ static inline int check_txq_status(struct lio *lio)
 }
 
 /**
- * \brief Print link information
- * @param netdev network device
+ * print_link_info -  Print link information
+ * @netdev: network device
  */
 static void print_link_info(struct net_device *netdev)
 {
@@ -513,8 +511,8 @@ static void print_link_info(struct net_device *netdev)
 }
 
 /**
- * \brief Routine to notify MTU change
- * @param work work_struct data structure
+ * octnet_link_status_change - Routine to notify MTU change
+ * @work: work_struct data structure
  */
 static void octnet_link_status_change(struct work_struct *work)
 {
@@ -531,8 +529,8 @@ static void octnet_link_status_change(struct work_struct *work)
 }
 
 /**
- * \brief Sets up the mtu status change work
- * @param netdev network device
+ * setup_link_status_change_wq - Sets up the mtu status change work
+ * @netdev: network device
  */
 static inline int setup_link_status_change_wq(struct net_device *netdev)
 {
@@ -563,9 +561,9 @@ static inline void cleanup_link_status_change_wq(struct net_device *netdev)
 }
 
 /**
- * \brief Update link status
- * @param netdev network device
- * @param ls link status structure
+ * update_link_status - Update link status
+ * @netdev: network device
+ * @ls: link status structure
  *
  * Called on receipt of a link status response from the core application to
  * update each interface's link status.
@@ -663,10 +661,9 @@ static void lio_sync_octeon_time(struct work_struct *work)
 }
 
 /**
- * setup_sync_octeon_time_wq - Sets up the work to periodically update
- * local time to octeon firmware
+ * setup_sync_octeon_time_wq - prepare work to periodically update local time to octeon firmware
  *
- * @netdev - network device which should send time update to firmware
+ * @netdev: network device which should send time update to firmware
  **/
 static inline int setup_sync_octeon_time_wq(struct net_device *netdev)
 {
@@ -690,10 +687,12 @@ static inline int setup_sync_octeon_time_wq(struct net_device *netdev)
 }
 
 /**
- * cleanup_sync_octeon_time_wq - stop scheduling and destroy the work created
- * to periodically update local time to octeon firmware
+ * cleanup_sync_octeon_time_wq - destroy wq
  *
- * @netdev - network device which should send time update to firmware
+ * @netdev: network device which should send time update to firmware
+ *
+ * Stop scheduling and destroy the work created to periodically update local
+ * time to octeon firmware.
  **/
 static inline void cleanup_sync_octeon_time_wq(struct net_device *netdev)
 {
@@ -828,13 +827,12 @@ static int liquidio_watchdog(void *param)
 }
 
 /**
- * \brief PCI probe handler
- * @param pdev PCI device structure
- * @param ent unused
+ * liquidio_probe - PCI probe handler
+ * @pdev: PCI device structure
+ * @ent: unused
  */
 static int
-liquidio_probe(struct pci_dev *pdev,
-	       const struct pci_device_id *ent __attribute__((unused)))
+liquidio_probe(struct pci_dev *pdev, const struct pci_device_id __maybe_unused *ent)
 {
 	struct octeon_device *oct_dev = NULL;
 	struct handshake *hs;
@@ -924,8 +922,8 @@ static bool fw_type_is_auto(void)
 }
 
 /**
- * \brief PCI FLR for each Octeon device.
- * @param oct octeon device
+ * octeon_pci_flr - PCI FLR for each Octeon device.
+ * @oct: octeon device
  */
 static void octeon_pci_flr(struct octeon_device *oct)
 {
@@ -951,9 +949,8 @@ static void octeon_pci_flr(struct octeon_device *oct)
 }
 
 /**
- *\brief Destroy resources associated with octeon device
- * @param pdev PCI device structure
- * @param ent unused
+ * octeon_destroy_resources - Destroy resources associated with octeon device
+ * @oct: octeon device
  */
 static void octeon_destroy_resources(struct octeon_device *oct)
 {
@@ -1152,9 +1149,9 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 }
 
 /**
- * \brief Send Rx control command
- * @param lio per-network private data
- * @param start_stop whether to start or stop
+ * send_rx_ctrl_cmd - Send Rx control command
+ * @lio: per-network private data
+ * @start_stop: whether to start or stop
  */
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
@@ -1210,9 +1207,9 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 }
 
 /**
- * \brief Destroy NIC device interface
- * @param oct octeon device
- * @param ifidx which interface to destroy
+ * liquidio_destroy_nic_device - Destroy NIC device interface
+ * @oct: octeon device
+ * @ifidx: which interface to destroy
  *
  * Cleanup associated with each interface for an Octeon device  when NIC
  * module is being unloaded or if initialization fails during load.
@@ -1272,8 +1269,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 }
 
 /**
- * \brief Stop complete NIC functionality
- * @param oct octeon device
+ * liquidio_stop_nic_module - Stop complete NIC functionality
+ * @oct: octeon device
  */
 static int liquidio_stop_nic_module(struct octeon_device *oct)
 {
@@ -1313,8 +1310,8 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 }
 
 /**
- * \brief Cleans up resources at unload time
- * @param pdev PCI device structure
+ * liquidio_remove - Cleans up resources at unload time
+ * @pdev: PCI device structure
  */
 static void liquidio_remove(struct pci_dev *pdev)
 {
@@ -1346,8 +1343,8 @@ static void liquidio_remove(struct pci_dev *pdev)
 }
 
 /**
- * \brief Identify the Octeon device and to map the BAR address space
- * @param oct octeon device
+ * octeon_chip_specific_setup - Identify the Octeon device and to map the BAR address space
+ * @oct: octeon device
  */
 static int octeon_chip_specific_setup(struct octeon_device *oct)
 {
@@ -1390,8 +1387,8 @@ static int octeon_chip_specific_setup(struct octeon_device *oct)
 }
 
 /**
- * \brief PCI initialization for each Octeon device.
- * @param oct octeon device
+ * octeon_pci_os_setup - PCI initialization for each Octeon device.
+ * @oct: octeon device
  */
 static int octeon_pci_os_setup(struct octeon_device *oct)
 {
@@ -1414,8 +1411,8 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 }
 
 /**
- * \brief Unmap and free network buffer
- * @param buf buffer
+ * free_netbuf - Unmap and free network buffer
+ * @buf: buffer
  */
 static void free_netbuf(void *buf)
 {
@@ -1434,8 +1431,8 @@ static void free_netbuf(void *buf)
 }
 
 /**
- * \brief Unmap and free gather buffer
- * @param buf buffer
+ * free_netsgbuf - Unmap and free gather buffer
+ * @buf: buffer
  */
 static void free_netsgbuf(void *buf)
 {
@@ -1474,8 +1471,8 @@ static void free_netsgbuf(void *buf)
 }
 
 /**
- * \brief Unmap and free gather buffer with response
- * @param buf buffer
+ * free_netsgbuf_with_resp - Unmap and free gather buffer with response
+ * @buf: buffer
  */
 static void free_netsgbuf_with_resp(void *buf)
 {
@@ -1518,9 +1515,9 @@ static void free_netsgbuf_with_resp(void *buf)
 }
 
 /**
- * \brief Adjust ptp frequency
- * @param ptp PTP clock info
- * @param ppb how much to adjust by, in parts-per-billion
+ * liquidio_ptp_adjfreq - Adjust ptp frequency
+ * @ptp: PTP clock info
+ * @ppb: how much to adjust by, in parts-per-billion
  */
 static int liquidio_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 {
@@ -1555,9 +1552,9 @@ static int liquidio_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 }
 
 /**
- * \brief Adjust ptp time
- * @param ptp PTP clock info
- * @param delta how much to adjust by, in nanosecs
+ * liquidio_ptp_adjtime - Adjust ptp time
+ * @ptp: PTP clock info
+ * @delta: how much to adjust by, in nanosecs
  */
 static int liquidio_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
@@ -1572,9 +1569,9 @@ static int liquidio_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 }
 
 /**
- * \brief Get hardware clock time, including any adjustment
- * @param ptp PTP clock info
- * @param ts timespec
+ * liquidio_ptp_gettime - Get hardware clock time, including any adjustment
+ * @ptp: PTP clock info
+ * @ts: timespec
  */
 static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
 				struct timespec64 *ts)
@@ -1595,9 +1592,9 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
 }
 
 /**
- * \brief Set hardware clock time. Reset adjustment
- * @param ptp PTP clock info
- * @param ts timespec
+ * liquidio_ptp_settime - Set hardware clock time. Reset adjustment
+ * @ptp: PTP clock info
+ * @ts: timespec
  */
 static int liquidio_ptp_settime(struct ptp_clock_info *ptp,
 				const struct timespec64 *ts)
@@ -1618,22 +1615,22 @@ static int liquidio_ptp_settime(struct ptp_clock_info *ptp,
 }
 
 /**
- * \brief Check if PTP is enabled
- * @param ptp PTP clock info
- * @param rq request
- * @param on is it on
+ * liquidio_ptp_enable - Check if PTP is enabled
+ * @ptp: PTP clock info
+ * @rq: request
+ * @on: is it on
  */
 static int
-liquidio_ptp_enable(struct ptp_clock_info *ptp __attribute__((unused)),
-		    struct ptp_clock_request *rq __attribute__((unused)),
-		    int on __attribute__((unused)))
+liquidio_ptp_enable(struct ptp_clock_info __maybe_unused *ptp,
+		    struct ptp_clock_request __maybe_unused *rq,
+		    int __maybe_unused on)
 {
 	return -EOPNOTSUPP;
 }
 
 /**
- * \brief Open PTP clock source
- * @param netdev network device
+ * oct_ptp_open - Open PTP clock source
+ * @netdev: network device
  */
 static void oct_ptp_open(struct net_device *netdev)
 {
@@ -1665,8 +1662,8 @@ static void oct_ptp_open(struct net_device *netdev)
 }
 
 /**
- * \brief Init PTP clock
- * @param oct octeon device
+ * liquidio_ptp_init - Init PTP clock
+ * @oct: octeon device
  */
 static void liquidio_ptp_init(struct octeon_device *oct)
 {
@@ -1682,8 +1679,8 @@ static void liquidio_ptp_init(struct octeon_device *oct)
 }
 
 /**
- * \brief Load firmware to device
- * @param oct octeon device
+ * load_firmware - Load firmware to device
+ * @oct: octeon device
  *
  * Maps device to firmware filename, requests firmware, and downloads it
  */
@@ -1721,8 +1718,8 @@ static int load_firmware(struct octeon_device *oct)
 }
 
 /**
- * \brief Poll routine for checking transmit queue status
- * @param work work_struct data structure
+ * octnet_poll_check_txq_status - Poll routine for checking transmit queue status
+ * @work: work_struct data structure
  */
 static void octnet_poll_check_txq_status(struct work_struct *work)
 {
@@ -1738,8 +1735,8 @@ static void octnet_poll_check_txq_status(struct work_struct *work)
 }
 
 /**
- * \brief Sets up the txq poll check
- * @param netdev network device
+ * setup_tx_poll_fn - Sets up the txq poll check
+ * @netdev: network device
  */
 static inline int setup_tx_poll_fn(struct net_device *netdev)
 {
@@ -1771,8 +1768,8 @@ static inline void cleanup_tx_poll_fn(struct net_device *netdev)
 }
 
 /**
- * \brief Net device open for LiquidIO
- * @param netdev network device
+ * liquidio_open - Net device open for LiquidIO
+ * @netdev: network device
  */
 static int liquidio_open(struct net_device *netdev)
 {
@@ -1831,8 +1828,8 @@ static int liquidio_open(struct net_device *netdev)
 }
 
 /**
- * \brief Net device stop for LiquidIO
- * @param netdev network device
+ * liquidio_stop - Net device stop for LiquidIO
+ * @netdev: network device
  */
 static int liquidio_stop(struct net_device *netdev)
 {
@@ -1896,8 +1893,8 @@ static int liquidio_stop(struct net_device *netdev)
 }
 
 /**
- * \brief Converts a mask based on net device flags
- * @param netdev network device
+ * get_new_flags - Converts a mask based on net device flags
+ * @netdev: network device
  *
  * This routine generates a octnet_ifflags mask from the net device flags
  * received from the OS.
@@ -1929,8 +1926,8 @@ static inline enum octnet_ifflags get_new_flags(struct net_device *netdev)
 }
 
 /**
- * \brief Net device set_multicast_list
- * @param netdev network device
+ * liquidio_set_mcast_list - Net device set_multicast_list
+ * @netdev: network device
  */
 static void liquidio_set_mcast_list(struct net_device *netdev)
 {
@@ -1977,8 +1974,9 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 }
 
 /**
- * \brief Net device set_mac_address
- * @param netdev network device
+ * liquidio_set_mac - Net device set_mac_address
+ * @netdev: network device
+ * @p: pointer to sockaddr
  */
 static int liquidio_set_mac(struct net_device *netdev, void *p)
 {
@@ -2096,10 +2094,9 @@ liquidio_get_stats64(struct net_device *netdev,
 }
 
 /**
- * \brief Handler for SIOCSHWTSTAMP ioctl
- * @param netdev network device
- * @param ifr interface request
- * @param cmd command
+ * hwtstamp_ioctl - Handler for SIOCSHWTSTAMP ioctl
+ * @netdev: network device
+ * @ifr: interface request
  */
 static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
 {
@@ -2154,10 +2151,10 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
 }
 
 /**
- * \brief ioctl handler
- * @param netdev network device
- * @param ifr interface request
- * @param cmd command
+ * liquidio_ioctl - ioctl handler
+ * @netdev: network device
+ * @ifr: interface request
+ * @cmd: command
  */
 static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
@@ -2174,9 +2171,10 @@ static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 }
 
 /**
- * \brief handle a Tx timestamp response
- * @param status response status
- * @param buf pointer to skb
+ * handle_timestamp - handle a Tx timestamp response
+ * @oct: octeon device
+ * @status: response status
+ * @buf: pointer to skb
  */
 static void handle_timestamp(struct octeon_device *oct,
 			     u32 status,
@@ -2217,10 +2215,12 @@ static void handle_timestamp(struct octeon_device *oct,
 	tx_buffer_free(skb);
 }
 
-/* \brief Send a data packet that will be timestamped
- * @param oct octeon device
- * @param ndata pointer to network data
- * @param finfo pointer to private network data
+/**
+ * send_nic_timestamp_pkt - Send a data packet that will be timestamped
+ * @oct: octeon device
+ * @ndata: pointer to network data
+ * @finfo: pointer to private network data
+ * @xmit_more: more is coming
  */
 static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 					 struct octnic_data_pkt *ndata,
@@ -2276,10 +2276,12 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 	return retval;
 }
 
-/** \brief Transmit networks packets to the Octeon interface
- * @param skbuff   skbuff struct to be passed to network layer.
- * @param netdev    pointer to network device
- * @returns whether the packet was transmitted to the device okay or not
+/**
+ * liquidio_xmit - Transmit networks packets to the Octeon interface
+ * @skb: skbuff struct to be passed to network layer.
+ * @netdev: pointer to network device
+ *
+ * Return: whether the packet was transmitted to the device okay or not
  *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
  */
 static netdev_tx_t liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
@@ -2524,8 +2526,10 @@ lio_xmit_failed:
 	return NETDEV_TX_OK;
 }
 
-/** \brief Network device Tx timeout
- * @param netdev    pointer to network device
+/**
+ * liquidio_tx_timeout - Network device Tx timeout
+ * @netdev:    pointer to network device
+ * @txqueue: index of the hung transmit queue
  */
 static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
@@ -2597,12 +2601,12 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev,
 	return ret;
 }
 
-/** Sending command to enable/disable RX checksum offload
- * @param netdev                pointer to network device
- * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
- * @param rx_cmd_bit            OCTNET_CMD_RXCSUM_ENABLE/
- *                              OCTNET_CMD_RXCSUM_DISABLE
- * @returns                     SUCCESS or FAILURE
+/**
+ * liquidio_set_rxcsum_command - Sending command to enable/disable RX checksum offload
+ * @netdev:                pointer to network device
+ * @command:               OCTNET_CMD_TNL_RX_CSUM_CTL
+ * @rx_cmd:                OCTNET_CMD_RXCSUM_ENABLE/OCTNET_CMD_RXCSUM_DISABLE
+ * Returns:                SUCCESS or FAILURE
  */
 static int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
 				       u8 rx_cmd)
@@ -2632,13 +2636,14 @@ static int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
 	return ret;
 }
 
-/** Sending command to add/delete VxLAN UDP port to firmware
- * @param netdev                pointer to network device
- * @param command               OCTNET_CMD_VXLAN_PORT_CONFIG
- * @param vxlan_port            VxLAN port to be added or deleted
- * @param vxlan_cmd_bit         OCTNET_CMD_VXLAN_PORT_ADD,
+/**
+ * liquidio_vxlan_port_command - Sending command to add/delete VxLAN UDP port to firmware
+ * @netdev:                pointer to network device
+ * @command:               OCTNET_CMD_VXLAN_PORT_CONFIG
+ * @vxlan_port:            VxLAN port to be added or deleted
+ * @vxlan_cmd_bit:         OCTNET_CMD_VXLAN_PORT_ADD,
  *                              OCTNET_CMD_VXLAN_PORT_DEL
- * @returns                     SUCCESS or FAILURE
+ * Return:                     SUCCESS or FAILURE
  */
 static int liquidio_vxlan_port_command(struct net_device *netdev, int command,
 				       u16 vxlan_port, u8 vxlan_cmd_bit)
@@ -2698,10 +2703,11 @@ static const struct udp_tunnel_nic_info liquidio_udp_tunnels = {
 	},
 };
 
-/** \brief Net device fix features
- * @param netdev  pointer to network device
- * @param request features requested
- * @returns updated features list
+/**
+ * liquidio_fix_features - Net device fix features
+ * @netdev:  pointer to network device
+ * @request: features requested
+ * Return: updated features list
  */
 static netdev_features_t liquidio_fix_features(struct net_device *netdev,
 					       netdev_features_t request)
@@ -2737,9 +2743,10 @@ static netdev_features_t liquidio_fix_features(struct net_device *netdev,
 	return request;
 }
 
-/** \brief Net device set features
- * @param netdev  pointer to network device
- * @param features features to enable/disable
+/**
+ * liquidio_set_features - Net device set features
+ * @netdev:  pointer to network device
+ * @features: features to enable/disable
  */
 static int liquidio_set_features(struct net_device *netdev,
 				 netdev_features_t features)
@@ -3224,7 +3231,8 @@ static const struct net_device_ops lionetdevops = {
 	.ndo_get_port_parent_id	= liquidio_get_port_parent_id,
 };
 
-/** \brief Entry point for the liquidio module
+/**
+ * liquidio_init - Entry point for the liquidio module
  */
 static int __init liquidio_init(void)
 {
@@ -3307,8 +3315,8 @@ nic_info_err:
 }
 
 /**
- * \brief Setup network interfaces
- * @param octeon_dev  octeon device
+ * setup_nic_devices - Setup network interfaces
+ * @octeon_dev:  octeon device
  *
  * Called during init time for each device. It assumes the NIC
  * is already up and running.  The link information for each
@@ -3872,8 +3880,8 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs)
 #endif
 
 /**
- * \brief initialize the NIC
- * @param oct octeon device
+ * liquidio_init_nic_module - initialize the NIC
+ * @oct: octeon device
  *
  * This initialization routine is called once the Octeon device application is
  * up and running
@@ -3928,9 +3936,10 @@ octnet_init_failure:
 }
 
 /**
- * \brief starter callback that invokes the remaining initialization work after
- * the NIC is up and running.
- * @param octptr  work struct work_struct
+ * nic_starter - finish init
+ * @work:  work struct work_struct
+ *
+ * starter callback that invokes the remaining initialization work after the NIC is up and running.
  */
 static void nic_starter(struct work_struct *work)
 {
@@ -4023,8 +4032,8 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf)
 }
 
 /**
- * \brief Device initialization for each Octeon device that is probed
- * @param octeon_dev  octeon device
+ * octeon_device_init - Device initialization for each Octeon device that is probed
+ * @octeon_dev:  octeon device
  */
 static int octeon_device_init(struct octeon_device *octeon_dev)
 {
@@ -4193,8 +4202,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	/* Initialize the tasklet that handles output queue packet processing.*/
 	dev_dbg(&octeon_dev->pci_dev->dev, "Initializing droq tasklet\n");
-	tasklet_init(&oct_priv->droq_tasklet, octeon_droq_bh,
-		     (unsigned long)octeon_dev);
+	tasklet_setup(&oct_priv->droq_tasklet, octeon_droq_bh);
 
 	/* Setup the interrupt handler and record the INT SUM register address
 	 */
@@ -4298,16 +4306,17 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 	complete(&handshake[octeon_dev->octeon_id].init);
 
 	atomic_set(&octeon_dev->status, OCT_DEV_HOST_OK);
+	oct_priv->dev = octeon_dev;
 
 	return 0;
 }
 
 /**
- * \brief Debug console print function
- * @param octeon_dev  octeon device
- * @param console_num console number
- * @param prefix      first portion of line to display
- * @param suffix      second portion of line to display
+ * octeon_dbg_console_print - Debug console print function
+ * @oct:  octeon device
+ * @console_num: console number
+ * @prefix:      first portion of line to display
+ * @suffix:      second portion of line to display
  *
  * The OCTEON debug console outputs entire lines (excluding '\n').
  * Normally, the line will be passed in the 'prefix' parameter.
@@ -4330,7 +4339,7 @@ static int octeon_dbg_console_print(struct octeon_device *oct, u32 console_num,
 }
 
 /**
- * \brief Exits the module
+ * liquidio_exit - Exits the module
  */
 static void __exit liquidio_exit(void)
 {
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 8c5879e31240..103440f97bc8 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -99,8 +99,8 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 }
 
 /**
- * \brief Cause device to go quiet so it can be safely removed/reset/etc
- * @param oct Pointer to Octeon device
+ * pcierror_quiesce_device - Cause device to go quiet so it can be safely removed/reset/etc
+ * @oct: Pointer to Octeon device
  */
 static void pcierror_quiesce_device(struct octeon_device *oct)
 {
@@ -143,8 +143,8 @@ static void pcierror_quiesce_device(struct octeon_device *oct)
 }
 
 /**
- * \brief Cleanup PCI AER uncorrectable error status
- * @param dev Pointer to PCI device
+ * cleanup_aer_uncorrect_error_status - Cleanup PCI AER uncorrectable error status
+ * @dev: Pointer to PCI device
  */
 static void cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 {
@@ -163,8 +163,8 @@ static void cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 
 /**
- * \brief Stop all PCI IO to a given device
- * @param dev Pointer to Octeon device
+ * stop_pci_io - Stop all PCI IO to a given device
+ * @oct: Pointer to Octeon device
  */
 static void stop_pci_io(struct octeon_device *oct)
 {
@@ -205,9 +205,9 @@ static void stop_pci_io(struct octeon_device *oct)
 }
 
 /**
- * \brief called when PCI error is detected
- * @param pdev Pointer to PCI device
- * @param state The current pci connection state
+ * liquidio_pcie_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
  *
  * This function is called after a PCI bus error affecting
  * this device has been detected.
@@ -256,8 +256,8 @@ static struct pci_driver liquidio_vf_pci_driver = {
 };
 
 /**
- * \brief Print link information
- * @param netdev network device
+ * print_link_info - Print link information
+ * @netdev: network device
  */
 static void print_link_info(struct net_device *netdev)
 {
@@ -278,8 +278,8 @@ static void print_link_info(struct net_device *netdev)
 }
 
 /**
- * \brief Routine to notify MTU change
- * @param work work_struct data structure
+ * octnet_link_status_change - Routine to notify MTU change
+ * @work: work_struct data structure
  */
 static void octnet_link_status_change(struct work_struct *work)
 {
@@ -296,8 +296,8 @@ static void octnet_link_status_change(struct work_struct *work)
 }
 
 /**
- * \brief Sets up the mtu status change work
- * @param netdev network device
+ * setup_link_status_change_wq - Sets up the mtu status change work
+ * @netdev: network device
  */
 static int setup_link_status_change_wq(struct net_device *netdev)
 {
@@ -328,9 +328,9 @@ static void cleanup_link_status_change_wq(struct net_device *netdev)
 }
 
 /**
- * \brief Update link status
- * @param netdev network device
- * @param ls link status structure
+ * update_link_status - Update link status
+ * @netdev: network device
+ * @ls: link status structure
  *
  * Called on receipt of a link status response from the core application to
  * update each interface's link status.
@@ -374,13 +374,13 @@ static void update_link_status(struct net_device *netdev,
 }
 
 /**
- * \brief PCI probe handler
- * @param pdev PCI device structure
- * @param ent unused
+ * liquidio_vf_probe - PCI probe handler
+ * @pdev: PCI device structure
+ * @ent: unused
  */
 static int
 liquidio_vf_probe(struct pci_dev *pdev,
-		  const struct pci_device_id *ent __attribute__((unused)))
+		  const struct pci_device_id __maybe_unused *ent)
 {
 	struct octeon_device *oct_dev = NULL;
 
@@ -416,8 +416,8 @@ liquidio_vf_probe(struct pci_dev *pdev,
 }
 
 /**
- * \brief PCI FLR for each Octeon device.
- * @param oct octeon device
+ * octeon_pci_flr - PCI FLR for each Octeon device.
+ * @oct: octeon device
  */
 static void octeon_pci_flr(struct octeon_device *oct)
 {
@@ -437,9 +437,8 @@ static void octeon_pci_flr(struct octeon_device *oct)
 }
 
 /**
- *\brief Destroy resources associated with octeon device
- * @param pdev PCI device structure
- * @param ent unused
+ * octeon_destroy_resources - Destroy resources associated with octeon device
+ * @oct: octeon device
  */
 static void octeon_destroy_resources(struct octeon_device *oct)
 {
@@ -592,9 +591,9 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 }
 
 /**
- * \brief Send Rx control command
- * @param lio per-network private data
- * @param start_stop whether to start or stop
+ * send_rx_ctrl_cmd - Send Rx control command
+ * @lio: per-network private data
+ * @start_stop: whether to start or stop
  */
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
@@ -644,9 +643,9 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 }
 
 /**
- * \brief Destroy NIC device interface
- * @param oct octeon device
- * @param ifidx which interface to destroy
+ * liquidio_destroy_nic_device - Destroy NIC device interface
+ * @oct: octeon device
+ * @ifidx: which interface to destroy
  *
  * Cleanup associated with each interface for an Octeon device  when NIC
  * module is being unloaded or if initialization fails during load.
@@ -704,8 +703,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 }
 
 /**
- * \brief Stop complete NIC functionality
- * @param oct octeon device
+ * liquidio_stop_nic_module - Stop complete NIC functionality
+ * @oct: octeon device
  */
 static int liquidio_stop_nic_module(struct octeon_device *oct)
 {
@@ -737,8 +736,8 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 }
 
 /**
- * \brief Cleans up resources at unload time
- * @param pdev PCI device structure
+ * liquidio_vf_remove - Cleans up resources at unload time
+ * @pdev: PCI device structure
  */
 static void liquidio_vf_remove(struct pci_dev *pdev)
 {
@@ -763,8 +762,8 @@ static void liquidio_vf_remove(struct pci_dev *pdev)
 }
 
 /**
- * \brief PCI initialization for each Octeon device.
- * @param oct octeon device
+ * octeon_pci_os_setup - PCI initialization for each Octeon device.
+ * @oct: octeon device
  */
 static int octeon_pci_os_setup(struct octeon_device *oct)
 {
@@ -792,8 +791,8 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 }
 
 /**
- * \brief Unmap and free network buffer
- * @param buf buffer
+ * free_netbuf - Unmap and free network buffer
+ * @buf: buffer
  */
 static void free_netbuf(void *buf)
 {
@@ -812,8 +811,8 @@ static void free_netbuf(void *buf)
 }
 
 /**
- * \brief Unmap and free gather buffer
- * @param buf buffer
+ * free_netsgbuf - Unmap and free gather buffer
+ * @buf: buffer
  */
 static void free_netsgbuf(void *buf)
 {
@@ -853,8 +852,8 @@ static void free_netsgbuf(void *buf)
 }
 
 /**
- * \brief Unmap and free gather buffer with response
- * @param buf buffer
+ * free_netsgbuf_with_resp - Unmap and free gather buffer with response
+ * @buf: buffer
  */
 static void free_netsgbuf_with_resp(void *buf)
 {
@@ -897,8 +896,8 @@ static void free_netsgbuf_with_resp(void *buf)
 }
 
 /**
- * \brief Net device open for LiquidIO
- * @param netdev network device
+ * liquidio_open - Net device open for LiquidIO
+ * @netdev: network device
  */
 static int liquidio_open(struct net_device *netdev)
 {
@@ -941,8 +940,8 @@ static int liquidio_open(struct net_device *netdev)
 }
 
 /**
- * \brief Net device stop for LiquidIO
- * @param netdev network device
+ * liquidio_stop - jNet device stop for LiquidIO
+ * @netdev: network device
  */
 static int liquidio_stop(struct net_device *netdev)
 {
@@ -991,8 +990,8 @@ static int liquidio_stop(struct net_device *netdev)
 }
 
 /**
- * \brief Converts a mask based on net device flags
- * @param netdev network device
+ * get_new_flags - Converts a mask based on net device flags
+ * @netdev: network device
  *
  * This routine generates a octnet_ifflags mask from the net device flags
  * received from the OS.
@@ -1060,8 +1059,8 @@ static void liquidio_set_uc_list(struct net_device *netdev)
 }
 
 /**
- * \brief Net device set_multicast_list
- * @param netdev network device
+ * liquidio_set_mcast_list - Net device set_multicast_list
+ * @netdev: network device
  */
 static void liquidio_set_mcast_list(struct net_device *netdev)
 {
@@ -1110,8 +1109,9 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 }
 
 /**
- * \brief Net device set_mac_address
- * @param netdev network device
+ * liquidio_set_mac - Net device set_mac_address
+ * @netdev: network device
+ * @p: opaque pointer to sockaddr
  */
 static int liquidio_set_mac(struct net_device *netdev, void *p)
 {
@@ -1229,10 +1229,9 @@ liquidio_get_stats64(struct net_device *netdev,
 }
 
 /**
- * \brief Handler for SIOCSHWTSTAMP ioctl
- * @param netdev network device
- * @param ifr interface request
- * @param cmd command
+ * hwtstamp_ioctl - Handler for SIOCSHWTSTAMP ioctl
+ * @netdev: network device
+ * @ifr: interface request
  */
 static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
 {
@@ -1287,10 +1286,10 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
 }
 
 /**
- * \brief ioctl handler
- * @param netdev network device
- * @param ifr interface request
- * @param cmd command
+ * liquidio_ioctl - ioctl handler
+ * @netdev: network device
+ * @ifr: interface request
+ * @cmd: command
  */
 static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
@@ -1339,10 +1338,10 @@ static void handle_timestamp(struct octeon_device *oct, u32 status, void *buf)
 	tx_buffer_free(skb);
 }
 
-/* \brief Send a data packet that will be timestamped
- * @param oct octeon device
- * @param ndata pointer to network data
- * @param finfo pointer to private network data
+/* send_nic_timestamp_pkt - Send a data packet that will be timestamped
+ * @oct: octeon device
+ * @ndata: pointer to network data
+ * @finfo: pointer to private network data
  */
 static int send_nic_timestamp_pkt(struct octeon_device *oct,
 				  struct octnic_data_pkt *ndata,
@@ -1393,9 +1392,10 @@ static int send_nic_timestamp_pkt(struct octeon_device *oct,
 	return retval;
 }
 
-/** \brief Transmit networks packets to the Octeon interface
- * @param skbuff   skbuff struct to be passed to network layer.
- * @param netdev   pointer to network device
+/**
+ * liquidio_xmit - Transmit networks packets to the Octeon interface
+ * @skb: skbuff struct to be passed to network layer.
+ * @netdev: pointer to network device
  * @returns whether the packet was transmitted to the device okay or not
  *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
  */
@@ -1623,8 +1623,10 @@ lio_xmit_failed:
 	return NETDEV_TX_OK;
 }
 
-/** \brief Network device Tx timeout
- * @param netdev    pointer to network device
+/**
+ * liquidio_tx_timeout - Network device Tx timeout
+ * @netdev: pointer to network device
+ * @txqueue: index of the hung transmit queue
  */
 static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
@@ -1917,8 +1919,8 @@ nic_info_err:
 }
 
 /**
- * \brief Setup network interfaces
- * @param octeon_dev  octeon device
+ * setup_nic_devices - Setup network interfaces
+ * @octeon_dev:  octeon device
  *
  * Called during init time for each device. It assumes the NIC
  * is already up and running.  The link information for each
@@ -2229,8 +2231,8 @@ setup_nic_dev_done:
 }
 
 /**
- * \brief initialize the NIC
- * @param oct octeon device
+ * liquidio_init_nic_module - initialize the NIC
+ * @oct: octeon device
  *
  * This initialization routine is called once the Octeon device application is
  * up and running
@@ -2270,8 +2272,8 @@ octnet_init_failure:
 }
 
 /**
- * \brief Device initialization for each Octeon device that is probed
- * @param octeon_dev  octeon device
+ * octeon_device_init - Device initialization for each Octeon device that is probed
+ * @oct:  octeon device
  */
 static int octeon_device_init(struct octeon_device *oct)
 {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 0d2831d10f65..28feabec8fbb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -15,7 +15,7 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more details.
  ***********************************************************************/
-/**
+/*
  * @file octeon_console.c
  */
 #include <linux/moduleparam.h>
@@ -131,7 +131,7 @@ struct octeon_pci_console_desc {
 	/* Implicit storage for console_addr_array */
 };
 
-/**
+/*
  * This function is the implementation of the get macros defined
  * for individual structure members. The argument are generated
  * by the macros inorder to read only the needed memory.
@@ -160,7 +160,7 @@ static inline u64 __cvmx_bootmem_desc_get(struct octeon_device *oct,
 	}
 }
 
-/**
+/*
  * This function retrieves the string name of a named block. It is
  * more complicated than a simple memcpy() since the named block
  * descriptor may not be directly accessible.
@@ -182,7 +182,7 @@ static void CVMX_BOOTMEM_NAMED_GET_NAME(struct octeon_device *oct,
 
 /* See header file for descriptions of functions */
 
-/**
+/*
  * Check the version information on the bootmem descriptor
  *
  * @param exact_match
@@ -323,7 +323,7 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 	return result;
 }
 
-/**
+/*
  * Find a named block on the remote Octeon
  *
  * @param name      Name of block to find
@@ -707,7 +707,7 @@ int octeon_add_console(struct octeon_device *oct, u32 console_num,
 	return ret;
 }
 
-/**
+/*
  * Removes all consoles
  *
  * @param oct         octeon device
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index ac32facaa427..387a57cbfb73 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1307,7 +1307,7 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct)
 /* scratch register address is same in all the OCT-II and CN70XX models */
 #define CNXX_SLI_SCRATCH1   0x3C0
 
-/** Get the octeon device pointer.
+/* Get the octeon device pointer.
  *  @param octeon_id  - The id for which the octeon device pointer is required.
  *  @return Success: Octeon device pointer.
  *  @return Failure: NULL.
@@ -1324,7 +1324,7 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr)
 {
 	u64 val64;
 	unsigned long flags;
-	u32 val32, addrhi;
+	u32 addrhi;
 
 	spin_lock_irqsave(&oct->pci_win_lock, flags);
 
@@ -1339,10 +1339,10 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr)
 	writel(addrhi, oct->reg_list.pci_win_rd_addr_hi);
 
 	/* Read back to preserve ordering of writes */
-	val32 = readl(oct->reg_list.pci_win_rd_addr_hi);
+	readl(oct->reg_list.pci_win_rd_addr_hi);
 
 	writel(addr & 0xffffffff, oct->reg_list.pci_win_rd_addr_lo);
-	val32 = readl(oct->reg_list.pci_win_rd_addr_lo);
+	readl(oct->reg_list.pci_win_rd_addr_lo);
 
 	val64 = readq(oct->reg_list.pci_win_rd_data);
 
@@ -1355,7 +1355,6 @@ void lio_pci_writeq(struct octeon_device *oct,
 		    u64 val,
 		    u64 addr)
 {
-	u32 val32;
 	unsigned long flags;
 
 	spin_lock_irqsave(&oct->pci_win_lock, flags);
@@ -1365,7 +1364,7 @@ void lio_pci_writeq(struct octeon_device *oct,
 	/* The write happens when the LSB is written. So write MSB first. */
 	writel(val >> 32, oct->reg_list.pci_win_wr_data_hi);
 	/* Read the MSB to ensure ordering of writes. */
-	val32 = readl(oct->reg_list.pci_win_wr_data_hi);
+	readl(oct->reg_list.pci_win_wr_data_hi);
 
 	writel(val & 0xffffffff, oct->reg_list.pci_win_wr_data_lo);
 
@@ -1411,7 +1410,7 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
 	return ret;
 }
 
-/** Get the octeon id assigned to the octeon device passed as argument.
+/* Get the octeon id assigned to the octeon device passed as argument.
  *  This function is exported to other modules.
  *  @param dev - octeon device pointer passed as a void *.
  *  @return octeon device id
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 017169023cca..d4080bddcb6b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -280,13 +280,10 @@ int octeon_init_droq(struct octeon_device *oct,
 	dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
 		droq->max_count);
 
-	droq->recv_buf_list = (struct octeon_recv_buffer *)
-	      vzalloc_node(array_size(droq->max_count, OCT_DROQ_RECVBUF_SIZE),
-			   numa_node);
+	droq->recv_buf_list = vzalloc_node(array_size(droq->max_count, OCT_DROQ_RECVBUF_SIZE),
+					   numa_node);
 	if (!droq->recv_buf_list)
-		droq->recv_buf_list = (struct octeon_recv_buffer *)
-		      vzalloc(array_size(droq->max_count,
-					 OCT_DROQ_RECVBUF_SIZE));
+		droq->recv_buf_list = vzalloc(array_size(droq->max_count, OCT_DROQ_RECVBUF_SIZE));
 	if (!droq->recv_buf_list) {
 		dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
 		goto init_droq_fail;
@@ -777,7 +774,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
 	return 0;
 }
 
-/**
+/*
  * Utility function to poll for packets. check_hw_for_packets must be
  * called before calling this routine.
  */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 614d07be7181..ad685f5d0a13 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -28,7 +28,7 @@
 
 /**
  * octeon_mbox_read:
- * @oct: Pointer mailbox
+ * @mbox: Pointer mailbox
  *
  * Reads the 8-bytes of data from the mbox register
  * Writes back the acknowldgement inidcating completion of read
@@ -285,7 +285,8 @@ static int octeon_mbox_process_cmd(struct octeon_mbox *mbox,
 }
 
 /**
- *octeon_mbox_process_message:
+ * octeon_mbox_process_message
+ * @mbox: mailbox
  *
  * Process the received mbox message.
  */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 073d0647b439..5b4cb725f60f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -39,6 +39,7 @@ struct octeon_device_priv {
 	/** Tasklet structures for this device. */
 	struct tasklet_struct droq_tasklet;
 	unsigned long napi_mask;
+	struct octeon_device *dev;
 };
 
 /** This structure is used by NIC driver to store information required
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
index 4c85ae643b7b..7ccab36143c1 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
@@ -22,6 +22,7 @@
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
+#include "octeon_mem_ops.h"
 
 #define MEMOPS_IDX   BAR1_INDEX_DYNAMIC_MAP
 
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 6cb2162a75d4..5e50bb19bf26 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -315,9 +315,9 @@ static void octeon_mgmt_clean_tx_buffers(struct octeon_mgmt *p)
 		netif_wake_queue(p->netdev);
 }
 
-static void octeon_mgmt_clean_tx_tasklet(unsigned long arg)
+static void octeon_mgmt_clean_tx_tasklet(struct tasklet_struct *t)
 {
-	struct octeon_mgmt *p = (struct octeon_mgmt *)arg;
+	struct octeon_mgmt *p = from_tasklet(p, t, tx_clean_tasklet);
 	octeon_mgmt_clean_tx_buffers(p);
 	octeon_mgmt_enable_tx_irq(p);
 }
@@ -1491,8 +1491,8 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
 
 	skb_queue_head_init(&p->tx_list);
 	skb_queue_head_init(&p->rx_list);
-	tasklet_init(&p->tx_clean_tasklet,
-		     octeon_mgmt_clean_tx_tasklet, (unsigned long)p);
+	tasklet_setup(&p->tx_clean_tasklet,
+		      octeon_mgmt_clean_tx_tasklet);
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 063e560d9c1b..f3b7b443f964 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -985,9 +985,9 @@ static int nicvf_poll(struct napi_struct *napi, int budget)
  *
  * As of now only CQ errors are handled
  */
-static void nicvf_handle_qs_err(unsigned long data)
+static void nicvf_handle_qs_err(struct tasklet_struct *t)
 {
-	struct nicvf *nic = (struct nicvf *)data;
+	struct nicvf *nic = from_tasklet(nic, t, qs_err_task);
 	struct queue_set *qs = nic->qs;
 	int qidx;
 	u64 status;
@@ -1493,12 +1493,10 @@ int nicvf_open(struct net_device *netdev)
 	}
 
 	/* Init tasklet for handling Qset err interrupt */
-	tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err,
-		     (unsigned long)nic);
+	tasklet_setup(&nic->qs_err_task, nicvf_handle_qs_err);
 
 	/* Init RBDR tasklet which will refill RBDR */
-	tasklet_init(&nic->rbdr_task, nicvf_rbdr_task,
-		     (unsigned long)nic);
+	tasklet_setup(&nic->rbdr_task, nicvf_rbdr_task);
 	INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work);
 
 	/* Configure CPI alorithm */
@@ -2067,8 +2065,8 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
 			mode |= BGX_XCAST_MCAST_FILTER;
 			/* here we need to copy mc addrs */
 			if (netdev_mc_count(netdev)) {
-				mc_list = kmalloc(offsetof(typeof(*mc_list),
-							   mc[netdev_mc_count(netdev)]),
+				mc_list = kmalloc(struct_size(mc_list, mc,
+							      netdev_mc_count(netdev)),
 						  GFP_ATOMIC);
 				if (unlikely(!mc_list))
 					return;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index a45223f0cca5..7a141ce32e86 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -460,9 +460,9 @@ void nicvf_rbdr_work(struct work_struct *work)
 }
 
 /* In Softirq context, alloc rcv buffers in atomic mode */
-void nicvf_rbdr_task(unsigned long data)
+void nicvf_rbdr_task(struct tasklet_struct *t)
 {
-	struct nicvf *nic = (struct nicvf *)data;
+	struct nicvf *nic = from_tasklet(nic, t, rbdr_task);
 
 	nicvf_refill_rbdr(nic, GFP_ATOMIC);
 	if (nic->rb_alloc_fail) {
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 2460451fc48f..8453defc296c 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -348,7 +348,7 @@ void nicvf_xdp_sq_doorbell(struct nicvf *nic, struct snd_queue *sq, int sq_num);
 
 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
 				  struct cqe_rx_t *cqe_rx, bool xdp);
-void nicvf_rbdr_task(unsigned long data);
+void nicvf_rbdr_task(struct tasklet_struct *t);
 void nicvf_rbdr_work(struct work_struct *work);
 
 void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx);
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index f6f3ef9a93cf..87cc0ef68b31 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -134,4 +134,6 @@ config CHELSIO_LIB
 	help
 	Common library for Chelsio drivers.
 
+source "drivers/net/ethernet/chelsio/inline_crypto/Kconfig"
+
 endif # NET_VENDOR_CHELSIO
diff --git a/drivers/net/ethernet/chelsio/Makefile b/drivers/net/ethernet/chelsio/Makefile
index c0f978d2e8a7..1a6fd8b2bb7d 100644
--- a/drivers/net/ethernet/chelsio/Makefile
+++ b/drivers/net/ethernet/chelsio/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_CHELSIO_T3) += cxgb3/
 obj-$(CONFIG_CHELSIO_T4) += cxgb4/
 obj-$(CONFIG_CHELSIO_T4VF) += cxgb4vf/
 obj-$(CONFIG_CHELSIO_LIB) += libcxgb/
+obj-$(CONFIG_CHELSIO_INLINE_CRYPTO) += inline_crypto/
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 99736796e1a0..0e4a0f413960 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -997,17 +997,17 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_disable_pdev;
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+	if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
 		pci_using_dac = 1;
 
-		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-			pr_err("%s: unable to obtain 64-bit DMA for "
-			       "consistent allocations\n", pci_name(pdev));
+		if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+			pr_err("%s: unable to obtain 64-bit DMA for coherent allocations\n",
+			       pci_name(pdev));
 			err = -ENODEV;
 			goto out_disable_pdev;
 		}
 
-	} else if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+	} else if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
 		pr_err("%s: no usable DMA configuration\n", pci_name(pdev));
 		goto out_disable_pdev;
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 47b5c8e2104b..2d9c2b5a690a 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -239,8 +239,10 @@ struct sched {
 	unsigned int	num;		/* num skbs in per port queues */
 	struct sched_port p[MAX_NPORTS];
 	struct tasklet_struct sched_tsk;/* tasklet used to run scheduler */
+	struct sge *sge;
 };
-static void restart_sched(unsigned long);
+
+static void restart_sched(struct tasklet_struct *t);
 
 
 /*
@@ -378,7 +380,8 @@ static int tx_sched_init(struct sge *sge)
 		return -ENOMEM;
 
 	pr_debug("tx_sched_init\n");
-	tasklet_init(&s->sched_tsk, restart_sched, (unsigned long) sge);
+	tasklet_setup(&s->sched_tsk, restart_sched);
+	s->sge = sge;
 	sge->tx_sched = s;
 
 	for (i = 0; i < MAX_NPORTS; i++) {
@@ -509,9 +512,8 @@ static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *q)
 	while (q->credits--) {
 		struct freelQ_ce *ce = &q->centries[cidx];
 
-		pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
-				 dma_unmap_len(ce, dma_len),
-				 PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&pdev->dev, dma_unmap_addr(ce, dma_addr),
+				 dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
 		dev_kfree_skb(ce->skb);
 		ce->skb = NULL;
 		if (++cidx == q->size)
@@ -529,8 +531,8 @@ static void free_rx_resources(struct sge *sge)
 
 	if (sge->respQ.entries) {
 		size = sizeof(struct respQ_e) * sge->respQ.size;
-		pci_free_consistent(pdev, size, sge->respQ.entries,
-				    sge->respQ.dma_addr);
+		dma_free_coherent(&pdev->dev, size, sge->respQ.entries,
+				  sge->respQ.dma_addr);
 	}
 
 	for (i = 0; i < SGE_FREELQ_N; i++) {
@@ -542,8 +544,8 @@ static void free_rx_resources(struct sge *sge)
 		}
 		if (q->entries) {
 			size = sizeof(struct freelQ_e) * q->size;
-			pci_free_consistent(pdev, size, q->entries,
-					    q->dma_addr);
+			dma_free_coherent(&pdev->dev, size, q->entries,
+					  q->dma_addr);
 		}
 	}
 }
@@ -564,7 +566,8 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
 		q->size = p->freelQ_size[i];
 		q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN;
 		size = sizeof(struct freelQ_e) * q->size;
-		q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
+		q->entries = dma_alloc_coherent(&pdev->dev, size,
+						&q->dma_addr, GFP_KERNEL);
 		if (!q->entries)
 			goto err_no_mem;
 
@@ -601,7 +604,8 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
 	sge->respQ.credits = 0;
 	size = sizeof(struct respQ_e) * sge->respQ.size;
 	sge->respQ.entries =
-		pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr);
+		dma_alloc_coherent(&pdev->dev, size, &sge->respQ.dma_addr,
+				   GFP_KERNEL);
 	if (!sge->respQ.entries)
 		goto err_no_mem;
 	return 0;
@@ -624,9 +628,10 @@ static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n)
 	ce = &q->centries[cidx];
 	while (n--) {
 		if (likely(dma_unmap_len(ce, dma_len))) {
-			pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
+			dma_unmap_single(&pdev->dev,
+					 dma_unmap_addr(ce, dma_addr),
 					 dma_unmap_len(ce, dma_len),
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 			if (q->sop)
 				q->sop = 0;
 		}
@@ -663,8 +668,8 @@ static void free_tx_resources(struct sge *sge)
 		}
 		if (q->entries) {
 			size = sizeof(struct cmdQ_e) * q->size;
-			pci_free_consistent(pdev, size, q->entries,
-					    q->dma_addr);
+			dma_free_coherent(&pdev->dev, size, q->entries,
+					  q->dma_addr);
 		}
 	}
 }
@@ -689,7 +694,8 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p)
 		q->stop_thres = 0;
 		spin_lock_init(&q->lock);
 		size = sizeof(struct cmdQ_e) * q->size;
-		q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
+		q->entries = dma_alloc_coherent(&pdev->dev, size,
+						&q->dma_addr, GFP_KERNEL);
 		if (!q->entries)
 			goto err_no_mem;
 
@@ -837,8 +843,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
 			break;
 
 		skb_reserve(skb, q->dma_offset);
-		mapping = pci_map_single(pdev, skb->data, dma_len,
-					 PCI_DMA_FROMDEVICE);
+		mapping = dma_map_single(&pdev->dev, skb->data, dma_len,
+					 DMA_FROM_DEVICE);
 		skb_reserve(skb, sge->rx_pkt_pad);
 
 		ce->skb = skb;
@@ -1049,15 +1055,15 @@ static inline struct sk_buff *get_packet(struct adapter *adapter,
 			goto use_orig_buf;
 
 		skb_put(skb, len);
-		pci_dma_sync_single_for_cpu(pdev,
-					    dma_unmap_addr(ce, dma_addr),
-					    dma_unmap_len(ce, dma_len),
-					    PCI_DMA_FROMDEVICE);
+		dma_sync_single_for_cpu(&pdev->dev,
+					dma_unmap_addr(ce, dma_addr),
+					dma_unmap_len(ce, dma_len),
+					DMA_FROM_DEVICE);
 		skb_copy_from_linear_data(ce->skb, skb->data, len);
-		pci_dma_sync_single_for_device(pdev,
-					       dma_unmap_addr(ce, dma_addr),
-					       dma_unmap_len(ce, dma_len),
-					       PCI_DMA_FROMDEVICE);
+		dma_sync_single_for_device(&pdev->dev,
+					   dma_unmap_addr(ce, dma_addr),
+					   dma_unmap_len(ce, dma_len),
+					   DMA_FROM_DEVICE);
 		recycle_fl_buf(fl, fl->cidx);
 		return skb;
 	}
@@ -1068,8 +1074,8 @@ use_orig_buf:
 		return NULL;
 	}
 
-	pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
-			 dma_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&pdev->dev, dma_unmap_addr(ce, dma_addr),
+			 dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
 	skb = ce->skb;
 	prefetch(skb->data);
 
@@ -1091,8 +1097,9 @@ static void unexpected_offload(struct adapter *adapter, struct freelQ *fl)
 	struct freelQ_ce *ce = &fl->centries[fl->cidx];
 	struct sk_buff *skb = ce->skb;
 
-	pci_dma_sync_single_for_cpu(adapter->pdev, dma_unmap_addr(ce, dma_addr),
-			    dma_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&adapter->pdev->dev,
+				dma_unmap_addr(ce, dma_addr),
+				dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
 	pr_err("%s: unexpected offload packet, cmd %u\n",
 	       adapter->name, *skb->data);
 	recycle_fl_buf(fl, fl->cidx);
@@ -1209,8 +1216,8 @@ static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb,
 	e = e1 = &q->entries[pidx];
 	ce = &q->centries[pidx];
 
-	mapping = pci_map_single(adapter->pdev, skb->data,
-				 skb_headlen(skb), PCI_DMA_TODEVICE);
+	mapping = dma_map_single(&adapter->pdev->dev, skb->data,
+				 skb_headlen(skb), DMA_TO_DEVICE);
 
 	desc_mapping = mapping;
 	desc_len = skb_headlen(skb);
@@ -1301,9 +1308,10 @@ static inline void reclaim_completed_tx(struct sge *sge, struct cmdQ *q)
  * Called from tasklet. Checks the scheduler for any
  * pending skbs that can be sent.
  */
-static void restart_sched(unsigned long arg)
+static void restart_sched(struct tasklet_struct *t)
 {
-	struct sge *sge = (struct sge *) arg;
+	struct sched *s = from_tasklet(s, t, sched_tsk);
+	struct sge *sge = s->sge;
 	struct adapter *adapter = sge->adapter;
 	struct cmdQ *q = &sge->cmdQ[0];
 	struct sk_buff *skb;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/adapter.h b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
index 087ff0ffb597..f80fbd81b609 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
@@ -313,6 +313,7 @@ void t3_os_link_fault(struct adapter *adapter, int port_id, int state);
 void t3_os_link_fault_handler(struct adapter *adapter, int port_id);
 
 void t3_sge_start(struct adapter *adap);
+void t3_sge_stop_dma(struct adapter *adap);
 void t3_sge_stop(struct adapter *adap);
 void t3_start_sge_timers(struct adapter *adap);
 void t3_stop_sge_timers(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/ael1002.c b/drivers/net/ethernet/chelsio/cxgb3/ael1002.c
index dadf11e3dddb..9d591f0ddfc5 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/ael1002.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/ael1002.c
@@ -815,17 +815,12 @@ static const struct cphy_ops ael2020_ops = {
 int t3_ael2020_phy_prep(struct cphy *phy, struct adapter *adapter, int phy_addr,
 			const struct mdio_ops *mdio_ops)
 {
-	int err;
-
 	cphy_init(phy, adapter, phy_addr, &ael2020_ops, mdio_ops,
 		  SUPPORTED_10000baseT_Full | SUPPORTED_AUI | SUPPORTED_FIBRE |
 		  SUPPORTED_IRQ, "10GBASE-R");
 	msleep(125);
 
-	err = set_phy_regs(phy, ael2020_reset_regs);
-	if (err)
-		return err;
-	return 0;
+	return set_phy_regs(phy, ael2020_reset_regs);
 }
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 387c357e1b8e..84ad7261e243 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -148,7 +148,7 @@ struct workqueue_struct *cxgb3_wq;
 
 /**
  *	link_report - show link status and link speed/duplex
- *	@p: the port whose settings are to be reported
+ *	@dev: the port whose settings are to be reported
  *
  *	Shows the link status, speed, and duplex of a port.
  */
@@ -304,8 +304,8 @@ void t3_os_link_changed(struct adapter *adapter, int port_id, int link_stat,
 
 /**
  *	t3_os_phymod_changed - handle PHY module changes
- *	@phy: the PHY reporting the module change
- *	@mod_type: new module type
+ *	@adap: the adapter associated with the link change
+ *	@port_id: the port index whose limk status has changed
  *
  *	This is the OS-dependent handler for PHY module changes.  It is
  *	invoked when a PHY module is removed or inserted for any OS-specific
@@ -1200,7 +1200,7 @@ static void cxgb_vlan_mode(struct net_device *dev, netdev_features_t features)
 
 /**
  *	cxgb_up - enable the adapter
- *	@adapter: adapter being enabled
+ *	@adap: adapter being enabled
  *
  *	Called when the first port is enabled, this function performs the
  *	actions necessary to make an adapter operational, such as completing
@@ -2996,7 +2996,7 @@ void t3_fatal_err(struct adapter *adapter)
 	unsigned int fw_status[4];
 
 	if (adapter->flags & FULL_INIT_DONE) {
-		t3_sge_stop(adapter);
+		t3_sge_stop_dma(adapter);
 		t3_write_reg(adapter, A_XGM_TX_CTRL, 0);
 		t3_write_reg(adapter, A_XGM_RX_CTRL, 0);
 		t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 6dabbf1502c7..e18e9ce27f94 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -372,7 +372,7 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 /**
  *	free_rx_bufs - free the Rx buffers on an SGE free list
  *	@pdev: the PCI device associated with the adapter
- *	@rxq: the SGE free list to clean up
+ *	@q: the SGE free list to clean up
  *
  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  *	this queue should be stopped before calling this function.
@@ -493,7 +493,7 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 
 /**
  *	refill_fl - refill an SGE free-buffer list
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@q: the free-list to refill
  *	@n: the number of new buffers to allocate
  *	@gfp: the gfp flags for allocating new buffers
@@ -568,7 +568,7 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 
 /**
  *	recycle_rx_buf - recycle a receive buffer
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@q: the SGE free list
  *	@idx: index of buffer to recycle
  *
@@ -825,6 +825,7 @@ use_orig_buf:
  *	get_packet_pg - return the next ingress packet buffer from a free list
  *	@adap: the adapter that received the packet
  *	@fl: the SGE free list holding the packet
+ *	@q: the queue
  *	@len: the packet length including any SGE padding
  *	@drop_thres: # of remaining buffers before we start dropping packets
  *
@@ -1173,6 +1174,7 @@ static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
  *	@q: the Tx queue
  *	@ndesc: number of descriptors the packet will occupy
  *	@compl: the value of the COMPL bit to use
+ *	@addr: address
  *
  *	Generate a TX_PKT work request to send the supplied packet.
  */
@@ -1516,14 +1518,14 @@ static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
 
 /**
  *	restart_ctrlq - restart a suspended control queue
- *	@qs: the queue set cotaining the control queue
+ *	@t: pointer to the tasklet associated with this handler
  *
  *	Resumes transmission on a suspended Tx control queue.
  */
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct tasklet_struct *t)
 {
 	struct sk_buff *skb;
-	struct sge_qset *qs = (struct sge_qset *)data;
+	struct sge_qset *qs = from_tasklet(qs, t, txq[TXQ_CTRL].qresume_tsk);
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
 
 	spin_lock(&q->lock);
@@ -1622,6 +1624,7 @@ static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
  *	@pidx: index of the first Tx descriptor to write
  *	@gen: the generation value to use
  *	@ndesc: number of descriptors the packet will occupy
+ *	@addr: the address
  *
  *	Write an offload work request to send the supplied packet.  The packet
  *	data already carry the work request with most fields populated.
@@ -1733,14 +1736,14 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 
 /**
  *	restart_offloadq - restart a suspended offload queue
- *	@qs: the queue set cotaining the offload queue
+ *	@t: pointer to the tasklet associated with this handler
  *
  *	Resumes transmission on a suspended Tx offload queue.
  */
-static void restart_offloadq(unsigned long data)
+static void restart_offloadq(struct tasklet_struct *t)
 {
 	struct sk_buff *skb;
-	struct sge_qset *qs = (struct sge_qset *)data;
+	struct sge_qset *qs = from_tasklet(qs, t, txq[TXQ_OFLD].qresume_tsk);
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
 	const struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
@@ -1883,7 +1886,7 @@ static inline void deliver_partial_bundle(struct t3cdev *tdev,
 
 /**
  *	ofld_poll - NAPI handler for offload packets in interrupt mode
- *	@dev: the network device doing the polling
+ *	@napi: the network device doing the polling
  *	@budget: polling budget
  *
  *	The NAPI handler for offload packets when a response queue is serviced
@@ -2007,7 +2010,7 @@ static void restart_tx(struct sge_qset *qs)
 
 /**
  *	cxgb3_arp_process - process an ARP request probing a private IP address
- *	@adapter: the adapter
+ *	@pi: the port info
  *	@skb: the skbuff containing the ARP request
  *
  *	Check if the ARP request is probing the private IP address
@@ -2069,7 +2072,8 @@ static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
  *	@adap: the adapter
  *	@rq: the response queue that received the packet
  *	@skb: the packet
- *	@pad: amount of padding at the start of the buffer
+ *	@pad: padding
+ *	@lro: large receive offload
  *
  *	Process an ingress ethernet pakcet and deliver it to the stack.
  *	The padding is 2 if the packet was delivered in an Rx buffer and 0
@@ -2239,7 +2243,7 @@ static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
 
 /**
  *	check_ring_db - check if we need to ring any doorbells
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@qs: the queue set whose Tx queues are to be examined
  *	@sleeping: indicates which Tx queue sent GTS
  *
@@ -2372,10 +2376,7 @@ no_mem:
 			if (fl->use_pages) {
 				void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
 
-				prefetch(addr);
-#if L1_CACHE_BYTES < 128
-				prefetch(addr + L1_CACHE_BYTES);
-#endif
+				net_prefetch(addr);
 				__refill_fl(adap, fl);
 				if (lro > 0) {
 					lro_add_page(adap, qs, fl,
@@ -2902,7 +2903,7 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
 
 /**
  *	sge_timer_tx - perform periodic maintenance of an SGE qset
- *	@data: the SGE queue set to maintain
+ *	@t: a timer list containing the SGE queue set to maintain
  *
  *	Runs periodically from a timer to perform maintenance of an SGE queue
  *	set.  It performs two tasks:
@@ -2946,7 +2947,7 @@ static void sge_timer_tx(struct timer_list *t)
 
 /**
  *	sge_timer_rx - perform periodic maintenance of an SGE qset
- *	@data: the SGE queue set to maintain
+ *	@t: the timer list containing the SGE queue set to maintain
  *
  *	a) Replenishes Rx queues that have run out due to memory shortage.
  *	Normally new Rx buffers are added when existing ones are consumed but
@@ -3024,7 +3025,7 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
  *	@p: configuration parameters for this queue set
  *	@ntxq: number of Tx queues for the queue set
- *	@netdev: net device associated with this queue set
+ *	@dev: net device associated with this queue set
  *	@netdevq: net device TX queue associated with this queue set
  *
  *	Allocate resources and initialize an SGE queue set.  A queue set
@@ -3084,10 +3085,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		skb_queue_head_init(&q->txq[i].sendq);
 	}
 
-	tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
-		     (unsigned long)q);
-	tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
-		     (unsigned long)q);
+	tasklet_setup(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq);
+	tasklet_setup(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq);
 
 	q->fl[0].gen = q->fl[1].gen = 1;
 	q->fl[0].size = p->fl_size;
@@ -3271,30 +3270,40 @@ void t3_sge_start(struct adapter *adap)
 }
 
 /**
- *	t3_sge_stop - disable SGE operation
+ *	t3_sge_stop_dma - Disable SGE DMA engine operation
  *	@adap: the adapter
  *
- *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
- *	from error interrupts) or from normal process context.  In the latter
- *	case it also disables any pending queue restart tasklets.  Note that
- *	if it is called in interrupt context it cannot disable the restart
- *	tasklets as it cannot wait, however the tasklets will have no effect
- *	since the doorbells are disabled and the driver will call this again
- *	later from process context, at which time the tasklets will be stopped
- *	if they are still running.
+ *	Can be invoked from interrupt context e.g.  error handler.
+ *
+ *	Note that this function cannot disable the restart of tasklets as
+ *	it cannot wait if called from interrupt context, however the
+ *	tasklets will have no effect since the doorbells are disabled. The
+ *	driver will call tg3_sge_stop() later from process context, at
+ *	which time the tasklets will be stopped if they are still running.
  */
-void t3_sge_stop(struct adapter *adap)
+void t3_sge_stop_dma(struct adapter *adap)
 {
 	t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
-	if (!in_interrupt()) {
-		int i;
+}
 
-		for (i = 0; i < SGE_QSETS; ++i) {
-			struct sge_qset *qs = &adap->sge.qs[i];
+/**
+ *	t3_sge_stop - disable SGE operation completly
+ *	@adap: the adapter
+ *
+ *	Called from process context. Disables the DMA engine and any
+ *	pending queue restart tasklets.
+ */
+void t3_sge_stop(struct adapter *adap)
+{
+	int i;
 
-			tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
-			tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
-		}
+	t3_sge_stop_dma(adap);
+
+	for (i = 0; i < SGE_QSETS; ++i) {
+		struct sge_qset *qs = &adap->sge.qs[i];
+
+		tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
+		tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
 	}
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index 0a9f2c596624..7ff31d1026fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -2195,7 +2195,7 @@ static int t3_sge_write_context(struct adapter *adapter, unsigned int id,
 
 /**
  *	clear_sge_ctxt - completely clear an SGE context
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@id: the context id
  *	@type: the context type
  *
@@ -2484,6 +2484,7 @@ int t3_sge_disable_cqcntxt(struct adapter *adapter, unsigned int id)
  *	@adapter: the adapter
  *	@id: the context id
  *	@op: the operation to perform
+ *	@credits: credit value to write
  *
  *	Perform the selected operation on an SGE completion queue context.
  *	The caller is responsible for ensuring only one context operation
@@ -2885,7 +2886,7 @@ static void init_cong_ctrl(unsigned short *a, unsigned short *b)
  *	t3_load_mtus - write the MTU and congestion control HW tables
  *	@adap: the adapter
  *	@mtus: the unrestricted values for the MTU table
- *	@alphs: the values for the congestion control alpha parameter
+ *	@alpha: the values for the congestion control alpha parameter
  *	@beta: the values for the congestion control beta parameter
  *	@mtu_cap: the maximum permitted effective MTU
  *
@@ -2966,7 +2967,7 @@ static void ulp_config(struct adapter *adap, const struct tp_params *p)
 
 /**
  *	t3_set_proto_sram - set the contents of the protocol sram
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@data: the protocol image
  *
  *	Write the contents of the protocol SRAM.
@@ -3483,7 +3484,7 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p)
 /**
  *	init_link_config - initialize a link's SW state
  *	@lc: structure holding the link state
- *	@ai: information about the current card
+ *	@caps: information about the current card
  *
  *	Initializes the SW state maintained for each link, including the link's
  *	capabilities and default speed/duplex/flow-control/autonegotiation
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9cb8b229c1b3..3352dad6ca99 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -146,6 +146,11 @@ enum {
 	CXGB4_ETHTOOL_FLASH_BOOTCFG = 4
 };
 
+enum cxgb4_netdev_tls_ops {
+	CXGB4_TLSDEV_OPS  = 1,
+	CXGB4_XFRMDEV_OPS
+};
+
 struct cxgb4_bootcfg_data {
 	__le16 signature;
 	__u8 reserved[2];
@@ -1196,6 +1201,12 @@ struct adapter {
 	struct cxgb4_tc_u32_table *tc_u32;
 	struct chcr_ktls chcr_ktls;
 	struct chcr_stats_debug chcr_stats;
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	struct ch_ktls_stats_debug ch_ktls_stats;
+#endif
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+	struct ch_ipsec_stats_debug ch_ipsec_stats;
+#endif
 
 	/* TC flower offload */
 	bool tc_flower_initialized;
@@ -2100,7 +2111,7 @@ void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *txq,
 			      u32 ndesc);
 int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc);
-void cxgb4_ethofld_restart(unsigned long data);
+void cxgb4_ethofld_restart(struct tasklet_struct *t);
 int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
 			     const struct pkt_gl *si);
 void free_txq(struct adapter *adap, struct sge_txq *q);
@@ -2169,7 +2180,7 @@ void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
 void cxgb4_quiesce_rx(struct sge_rspq *q);
 int cxgb4_port_mirror_alloc(struct net_device *dev);
 void cxgb4_port_mirror_free(struct net_device *dev);
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 int cxgb4_set_ktls_feature(struct adapter *adap, bool enable);
 #endif
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 05f33b7e3677..0273f40b85f7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3527,6 +3527,10 @@ DEFINE_SHOW_ATTRIBUTE(meminfo);
 
 static int chcr_stats_show(struct seq_file *seq, void *v)
 {
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	struct ch_ktls_port_stats_debug *ktls_port;
+	int i = 0;
+#endif
 	struct adapter *adap = seq->private;
 
 	seq_puts(seq, "Chelsio Crypto Accelerator Stats \n");
@@ -3542,52 +3546,45 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
 		   atomic_read(&adap->chcr_stats.error));
 	seq_printf(seq, "Fallback: %10u \n",
 		   atomic_read(&adap->chcr_stats.fallback));
-	seq_printf(seq, "IPSec PDU: %10u\n",
-		   atomic_read(&adap->chcr_stats.ipsec_cnt));
 	seq_printf(seq, "TLS PDU Tx: %10u\n",
 		   atomic_read(&adap->chcr_stats.tls_pdu_tx));
 	seq_printf(seq, "TLS PDU Rx: %10u\n",
 		   atomic_read(&adap->chcr_stats.tls_pdu_rx));
 	seq_printf(seq, "TLS Keys (DDR) Count: %10u\n",
 		   atomic_read(&adap->chcr_stats.tls_key));
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+	seq_puts(seq, "\nChelsio Inline IPsec Crypto Accelerator Stats\n");
+	seq_printf(seq, "IPSec PDU: %10u\n",
+		   atomic_read(&adap->ch_ipsec_stats.ipsec_cnt));
+#endif
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 	seq_puts(seq, "\nChelsio KTLS Crypto Accelerator Stats\n");
 	seq_printf(seq, "Tx TLS offload refcount:          %20u\n",
 		   refcount_read(&adap->chcr_ktls.ktls_refcount));
-	seq_printf(seq, "Tx HW offload contexts added:     %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_ctx));
-	seq_printf(seq, "Tx connection created:            %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_connection_open));
-	seq_printf(seq, "Tx connection failed:             %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_connection_fail));
-	seq_printf(seq, "Tx connection closed:             %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_connection_close));
-	seq_printf(seq, "Packets passed for encryption :   %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_encrypted_packets));
-	seq_printf(seq, "Bytes passed for encryption :     %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_encrypted_bytes));
 	seq_printf(seq, "Tx records send:                  %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_send_records));
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_send_records));
 	seq_printf(seq, "Tx partial start of records:      %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_start_pkts));
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_start_pkts));
 	seq_printf(seq, "Tx partial middle of records:     %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_middle_pkts));
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_middle_pkts));
 	seq_printf(seq, "Tx partial end of record:         %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_end_pkts));
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_end_pkts));
 	seq_printf(seq, "Tx complete records:              %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_complete_pkts));
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts));
 	seq_printf(seq, "TX trim pkts :                    %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_trimmed_pkts));
-	seq_printf(seq, "Tx out of order packets:          %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_ooo));
-	seq_printf(seq, "Tx drop pkts before HW offload:   %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_skip_no_sync_data));
-	seq_printf(seq, "Tx drop not synced packets:       %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_drop_no_sync_data));
-	seq_printf(seq, "Tx drop bypass req:               %20llu\n",
-		   atomic64_read(&adap->chcr_stats.ktls_tx_drop_bypass_req));
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts));
+	while (i < MAX_NPORTS) {
+		ktls_port = &adap->ch_ktls_stats.ktls_port[i];
+		seq_printf(seq, "Port %d\n", i);
+		seq_printf(seq, "Tx connection created:            %20llu\n",
+			   atomic64_read(&ktls_port->ktls_tx_connection_open));
+		seq_printf(seq, "Tx connection failed:             %20llu\n",
+			   atomic64_read(&ktls_port->ktls_tx_connection_fail));
+		seq_printf(seq, "Tx connection closed:             %20llu\n",
+			   atomic64_read(&ktls_port->ktls_tx_connection_close));
+		i++;
+	}
 #endif
-
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(chcr_stats);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 9f3173f86eed..61ea3ec5c3fc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -117,15 +117,7 @@ static const char stats_strings[][ETH_GSTRING_LEN] = {
 	"vlan_insertions        ",
 	"gro_packets            ",
 	"gro_merged             ",
-};
-
-static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
-	"db_drop                ",
-	"db_full                ",
-	"db_empty               ",
-	"write_coal_success     ",
-	"write_coal_fail        ",
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if  IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 	"tx_tls_encrypted_packets",
 	"tx_tls_encrypted_bytes  ",
 	"tx_tls_ctx              ",
@@ -136,6 +128,14 @@ static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
 #endif
 };
 
+static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
+	"db_drop                ",
+	"db_full                ",
+	"db_empty               ",
+	"write_coal_success     ",
+	"write_coal_fail        ",
+};
+
 static char loopback_stats_strings[][ETH_GSTRING_LEN] = {
 	"-------Loopback----------- ",
 	"octets_ok              ",
@@ -257,15 +257,7 @@ struct queue_port_stats {
 	u64 vlan_ins;
 	u64 gro_pkts;
 	u64 gro_merged;
-};
-
-struct adapter_stats {
-	u64 db_drop;
-	u64 db_full;
-	u64 db_empty;
-	u64 wc_success;
-	u64 wc_fail;
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 	u64 tx_tls_encrypted_packets;
 	u64 tx_tls_encrypted_bytes;
 	u64 tx_tls_ctx;
@@ -276,12 +268,23 @@ struct adapter_stats {
 #endif
 };
 
+struct adapter_stats {
+	u64 db_drop;
+	u64 db_full;
+	u64 db_empty;
+	u64 wc_success;
+	u64 wc_fail;
+};
+
 static void collect_sge_port_stats(const struct adapter *adap,
 				   const struct port_info *p,
 				   struct queue_port_stats *s)
 {
 	const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
 	const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	const struct ch_ktls_port_stats_debug *ktls_stats;
+#endif
 	struct sge_eohw_txq *eohw_tx;
 	unsigned int i;
 
@@ -306,6 +309,21 @@ static void collect_sge_port_stats(const struct adapter *adap,
 			s->vlan_ins += eohw_tx->vlan_ins;
 		}
 	}
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	ktls_stats = &adap->ch_ktls_stats.ktls_port[p->port_id];
+	s->tx_tls_encrypted_packets =
+		atomic64_read(&ktls_stats->ktls_tx_encrypted_packets);
+	s->tx_tls_encrypted_bytes =
+		atomic64_read(&ktls_stats->ktls_tx_encrypted_bytes);
+	s->tx_tls_ctx = atomic64_read(&ktls_stats->ktls_tx_ctx);
+	s->tx_tls_ooo = atomic64_read(&ktls_stats->ktls_tx_ooo);
+	s->tx_tls_skip_no_sync_data =
+		atomic64_read(&ktls_stats->ktls_tx_skip_no_sync_data);
+	s->tx_tls_drop_no_sync_data =
+		atomic64_read(&ktls_stats->ktls_tx_drop_no_sync_data);
+	s->tx_tls_drop_bypass_req =
+		atomic64_read(&ktls_stats->ktls_tx_drop_bypass_req);
+#endif
 }
 
 static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 481498585ead..6ec5f2f26f05 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -604,17 +604,14 @@ int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en,
 			/* If the new rule wants to get inserted into
 			 * HPFILTER region, but its prio is greater
 			 * than the rule with the highest prio in HASH
-			 * region, then reject the rule.
-			 */
-			if (t->tc_hash_tids_max_prio &&
-			    tc_prio > t->tc_hash_tids_max_prio)
-				break;
-
-			/* If there's not enough slots available
-			 * in HPFILTER region, then move on to
-			 * normal FILTER region immediately.
+			 * region, or if there's not enough slots
+			 * available in HPFILTER region, then skip
+			 * trying to insert this rule into HPFILTER
+			 * region and directly go to the next region.
 			 */
-			if (ftid + n > t->nhpftids) {
+			if ((t->tc_hash_tids_max_prio &&
+			     tc_prio > t->tc_hash_tids_max_prio) ||
+			     (ftid + n) > t->nhpftids) {
 				ftid = t->nhpftids;
 				continue;
 			}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index de078a5bf23e..a952fe198eb9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -66,7 +66,7 @@
 #include <linux/crash_dump.h>
 #include <net/udp_tunnel.h>
 #include <net/xfrm.h>
-#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 #include <net/tls.h>
 #endif
 
@@ -6396,7 +6396,50 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 }
 #endif /* CONFIG_PCI_IOV */
 
-#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) || IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+
+static int chcr_offload_state(struct adapter *adap,
+			      enum cxgb4_netdev_tls_ops op_val)
+{
+	switch (op_val) {
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	case CXGB4_TLSDEV_OPS:
+		if (!adap->uld[CXGB4_ULD_KTLS].handle) {
+			dev_dbg(adap->pdev_dev, "ch_ktls driver is not loaded\n");
+			return -EOPNOTSUPP;
+		}
+		if (!adap->uld[CXGB4_ULD_KTLS].tlsdev_ops) {
+			dev_dbg(adap->pdev_dev,
+				"ch_ktls driver has no registered tlsdev_ops\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+#endif /* CONFIG_CHELSIO_TLS_DEVICE */
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+	case CXGB4_XFRMDEV_OPS:
+		if (!adap->uld[CXGB4_ULD_IPSEC].handle) {
+			dev_dbg(adap->pdev_dev, "chipsec driver is not loaded\n");
+			return -EOPNOTSUPP;
+		}
+		if (!adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops) {
+			dev_dbg(adap->pdev_dev,
+				"chipsec driver has no registered xfrmdev_ops\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+	default:
+		dev_dbg(adap->pdev_dev,
+			"driver has no support for offload %d\n", op_val);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+#endif /* CONFIG_CHELSIO_TLS_DEVICE || CONFIG_CHELSIO_IPSEC_INLINE */
+
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 
 static int cxgb4_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 			      enum tls_offload_ctx_dir direction,
@@ -6404,30 +6447,21 @@ static int cxgb4_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 			      u32 tcp_sn)
 {
 	struct adapter *adap = netdev2adap(netdev);
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&uld_mutex);
-	if (!adap->uld[CXGB4_ULD_CRYPTO].handle) {
-		dev_err(adap->pdev_dev, "chcr driver is not loaded\n");
-		ret = -EOPNOTSUPP;
-		goto out_unlock;
-	}
-
-	if (!adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops) {
-		dev_err(adap->pdev_dev,
-			"chcr driver has no registered tlsdev_ops()\n");
-		ret = -EOPNOTSUPP;
+	ret = chcr_offload_state(adap, CXGB4_TLSDEV_OPS);
+	if (ret)
 		goto out_unlock;
-	}
 
 	ret = cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_ENABLE);
 	if (ret)
 		goto out_unlock;
 
-	ret = adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops->tls_dev_add(netdev, sk,
-								  direction,
-								  crypto_info,
-								  tcp_sn);
+	ret = adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_add(netdev, sk,
+								direction,
+								crypto_info,
+								tcp_sn);
 	/* if there is a failure, clear the refcount */
 	if (ret)
 		cxgb4_set_ktls_feature(adap,
@@ -6444,19 +6478,11 @@ static void cxgb4_ktls_dev_del(struct net_device *netdev,
 	struct adapter *adap = netdev2adap(netdev);
 
 	mutex_lock(&uld_mutex);
-	if (!adap->uld[CXGB4_ULD_CRYPTO].handle) {
-		dev_err(adap->pdev_dev, "chcr driver is not loaded\n");
+	if (chcr_offload_state(adap, CXGB4_TLSDEV_OPS))
 		goto out_unlock;
-	}
 
-	if (!adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops) {
-		dev_err(adap->pdev_dev,
-			"chcr driver has no registered tlsdev_ops\n");
-		goto out_unlock;
-	}
-
-	adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops->tls_dev_del(netdev, tls_ctx,
-							    direction);
+	adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_del(netdev, tls_ctx,
+							  direction);
 	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
 
 out_unlock:
@@ -6469,6 +6495,114 @@ static const struct tlsdev_ops cxgb4_ktls_ops = {
 };
 #endif /* CONFIG_CHELSIO_TLS_DEVICE */
 
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+
+static int cxgb4_xfrm_add_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+	int ret;
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return -EBUSY;
+	}
+	ret = chcr_offload_state(adap, CXGB4_XFRMDEV_OPS);
+	if (ret)
+		goto out_unlock;
+
+	ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+
+	return ret;
+}
+
+static void cxgb4_xfrm_del_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static void cxgb4_xfrm_free_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static bool cxgb4_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+	bool ret = false;
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return ret;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_offload_ok(skb, x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+	return ret;
+}
+
+static void cxgb4_advance_esn_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_advance_esn(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static const struct xfrmdev_ops cxgb4_xfrmdev_ops = {
+	.xdo_dev_state_add      = cxgb4_xfrm_add_state,
+	.xdo_dev_state_delete   = cxgb4_xfrm_del_state,
+	.xdo_dev_state_free     = cxgb4_xfrm_free_state,
+	.xdo_dev_offload_ok     = cxgb4_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = cxgb4_advance_esn_state,
+};
+
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *netdev;
@@ -6721,14 +6855,22 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			netdev->hw_features |= NETIF_F_HIGHDMA;
 		netdev->features |= netdev->hw_features;
 		netdev->vlan_features = netdev->features & VLAN_FEAT;
-#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 		if (pi->adapter->params.crypto & FW_CAPS_CONFIG_TLS_HW) {
 			netdev->hw_features |= NETIF_F_HW_TLS_TX;
 			netdev->tlsdev_ops = &cxgb4_ktls_ops;
 			/* initialize the refcount */
 			refcount_set(&pi->adapter->chcr_ktls.ktls_refcount, 0);
 		}
-#endif
+#endif /* CONFIG_CHELSIO_TLS_DEVICE */
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+		if (pi->adapter->params.crypto & FW_CAPS_CONFIG_IPSEC_INLINE) {
+			netdev->hw_enc_features |= NETIF_F_HW_ESP;
+			netdev->features |= NETIF_F_HW_ESP;
+			netdev->xfrmdev_ops = &cxgb4_xfrmdev_ops;
+		}
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		/* MTU range: 81 - 9600 */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index f642c1b475c4..1b88bd1c2dbe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -60,6 +60,89 @@ static struct ch_tc_pedit_fields pedits[] = {
 	PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
 };
 
+static const struct cxgb4_natmode_config cxgb4_natmode_config_array[] = {
+	/* Default supported NAT modes */
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_NONE,
+		.natmode = NAT_MODE_NONE,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP,
+		.natmode = NAT_MODE_DIP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT,
+		.natmode = NAT_MODE_DIP_DP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+			 CXGB4_ACTION_NATMODE_SIP,
+		.natmode = NAT_MODE_DIP_DP_SIP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+			 CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_DIP_DP_SP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_SIP | CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_SIP_SP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+			 CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_DIP_SIP_SP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+			 CXGB4_ACTION_NATMODE_DPORT |
+			 CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_ALL,
+	},
+	/* T6+ can ignore L4 ports when they're disabled. */
+	{
+		.chip = CHELSIO_T6,
+		.flags = CXGB4_ACTION_NATMODE_SIP,
+		.natmode = NAT_MODE_SIP_SP,
+	},
+	{
+		.chip = CHELSIO_T6,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_DIP_DP_SP,
+	},
+	{
+		.chip = CHELSIO_T6,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP,
+		.natmode = NAT_MODE_ALL,
+	},
+};
+
+static void cxgb4_action_natmode_tweak(struct ch_filter_specification *fs,
+				       u8 natmode_flags)
+{
+	u8 i = 0;
+
+	/* Translate the enabled NAT 4-tuple fields to one of the
+	 * hardware supported NAT mode configurations. This ensures
+	 * that we pick a valid combination, where the disabled fields
+	 * do not get overwritten to 0.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+		if (cxgb4_natmode_config_array[i].flags == natmode_flags) {
+			fs->nat_mode = cxgb4_natmode_config_array[i].natmode;
+			return;
+		}
+	}
+}
+
 static struct ch_tc_flower_entry *allocate_flower_entry(void)
 {
 	struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -289,7 +372,8 @@ static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
 }
 
 static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
-				u32 mask, u32 offset, u8 htype)
+				u32 mask, u32 offset, u8 htype,
+				u8 *natmode_flags)
 {
 	switch (htype) {
 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@@ -314,60 +398,94 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
 		switch (offset) {
 		case PEDIT_IP4_SRC:
 			offload_pedit(fs, val, mask, IP4_SRC);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP4_DST:
 			offload_pedit(fs, val, mask, IP4_DST);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 		}
-		fs->nat_mode = NAT_MODE_ALL;
 		break;
 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 		switch (offset) {
 		case PEDIT_IP6_SRC_31_0:
 			offload_pedit(fs, val, mask, IP6_SRC_31_0);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_SRC_63_32:
 			offload_pedit(fs, val, mask, IP6_SRC_63_32);
+			*natmode_flags |=  CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_SRC_95_64:
 			offload_pedit(fs, val, mask, IP6_SRC_95_64);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_SRC_127_96:
 			offload_pedit(fs, val, mask, IP6_SRC_127_96);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_DST_31_0:
 			offload_pedit(fs, val, mask, IP6_DST_31_0);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		case PEDIT_IP6_DST_63_32:
 			offload_pedit(fs, val, mask, IP6_DST_63_32);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		case PEDIT_IP6_DST_95_64:
 			offload_pedit(fs, val, mask, IP6_DST_95_64);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		case PEDIT_IP6_DST_127_96:
 			offload_pedit(fs, val, mask, IP6_DST_127_96);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 		}
-		fs->nat_mode = NAT_MODE_ALL;
 		break;
 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 		switch (offset) {
 		case PEDIT_TCP_SPORT_DPORT:
-			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
 				fs->nat_fport = val;
-			else
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			} else {
 				fs->nat_lport = val >> 16;
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+			}
 		}
-		fs->nat_mode = NAT_MODE_ALL;
 		break;
 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 		switch (offset) {
 		case PEDIT_UDP_SPORT_DPORT:
-			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
 				fs->nat_fport = val;
-			else
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			} else {
 				fs->nat_lport = val >> 16;
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+			}
 		}
-		fs->nat_mode = NAT_MODE_ALL;
+		break;
+	}
+}
+
+static int cxgb4_action_natmode_validate(struct adapter *adap, u8 natmode_flags,
+					 struct netlink_ext_ack *extack)
+{
+	u8 i = 0;
+
+	/* Extract the NAT mode to enable based on what 4-tuple fields
+	 * are enabled to be overwritten. This ensures that the
+	 * disabled fields don't get overwritten to 0.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+		const struct cxgb4_natmode_config *c;
+
+		c = &cxgb4_natmode_config_array[i];
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) >= c->chip &&
+		    natmode_flags == c->flags)
+			return 0;
 	}
+	NL_SET_ERR_MSG_MOD(extack, "Unsupported NAT mode 4-tuple combination");
+	return -EOPNOTSUPP;
 }
 
 void cxgb4_process_flow_actions(struct net_device *in,
@@ -375,6 +493,7 @@ void cxgb4_process_flow_actions(struct net_device *in,
 				struct ch_filter_specification *fs)
 {
 	struct flow_action_entry *act;
+	u8 natmode_flags = 0;
 	int i;
 
 	flow_action_for_each(i, act, actions) {
@@ -426,7 +545,8 @@ void cxgb4_process_flow_actions(struct net_device *in,
 			val = act->mangle.val;
 			offset = act->mangle.offset;
 
-			process_pedit_field(fs, val, mask, offset, htype);
+			process_pedit_field(fs, val, mask, offset, htype,
+					    &natmode_flags);
 			}
 			break;
 		case FLOW_ACTION_QUEUE:
@@ -438,6 +558,9 @@ void cxgb4_process_flow_actions(struct net_device *in,
 			break;
 		}
 	}
+	if (natmode_flags)
+		cxgb4_action_natmode_tweak(fs, natmode_flags);
+
 }
 
 static bool valid_l4_mask(u32 mask)
@@ -454,7 +577,8 @@ static bool valid_l4_mask(u32 mask)
 }
 
 static bool valid_pedit_action(struct net_device *dev,
-			       const struct flow_action_entry *act)
+			       const struct flow_action_entry *act,
+			       u8 *natmode_flags)
 {
 	u32 mask, offset;
 	u8 htype;
@@ -479,7 +603,10 @@ static bool valid_pedit_action(struct net_device *dev,
 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 		switch (offset) {
 		case PEDIT_IP4_SRC:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+			break;
 		case PEDIT_IP4_DST:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -493,10 +620,13 @@ static bool valid_pedit_action(struct net_device *dev,
 		case PEDIT_IP6_SRC_63_32:
 		case PEDIT_IP6_SRC_95_64:
 		case PEDIT_IP6_SRC_127_96:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+			break;
 		case PEDIT_IP6_DST_31_0:
 		case PEDIT_IP6_DST_63_32:
 		case PEDIT_IP6_DST_95_64:
 		case PEDIT_IP6_DST_127_96:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -512,6 +642,10 @@ static bool valid_pedit_action(struct net_device *dev,
 					   __func__);
 				return false;
 			}
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			else
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -527,6 +661,10 @@ static bool valid_pedit_action(struct net_device *dev,
 					   __func__);
 				return false;
 			}
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			else
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -546,10 +684,12 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
 				struct netlink_ext_ack *extack,
 				u8 matchall_filter)
 {
+	struct adapter *adap = netdev2adap(dev);
 	struct flow_action_entry *act;
 	bool act_redir = false;
 	bool act_pedit = false;
 	bool act_vlan = false;
+	u8 natmode_flags = 0;
 	int i;
 
 	if (!flow_action_basic_hw_stats_check(actions, extack))
@@ -563,7 +703,6 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
 			break;
 		case FLOW_ACTION_MIRRED:
 		case FLOW_ACTION_REDIRECT: {
-			struct adapter *adap = netdev2adap(dev);
 			struct net_device *n_dev, *target_dev;
 			bool found = false;
 			unsigned int i;
@@ -620,7 +759,8 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
 			}
 			break;
 		case FLOW_ACTION_MANGLE: {
-			bool pedit_valid = valid_pedit_action(dev, act);
+			bool pedit_valid = valid_pedit_action(dev, act,
+							      &natmode_flags);
 
 			if (!pedit_valid)
 				return -EOPNOTSUPP;
@@ -642,6 +782,15 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
 		return -EINVAL;
 	}
 
+	if (act_pedit) {
+		int ret;
+
+		ret = cxgb4_action_natmode_validate(adap, natmode_flags,
+						    extack);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index 6296e1d5a12b..3a2fa00c8cde 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -108,6 +108,21 @@ struct ch_tc_pedit_fields {
 #define PEDIT_TCP_SPORT_DPORT		0x0
 #define PEDIT_UDP_SPORT_DPORT		0x0
 
+enum cxgb4_action_natmode_flags {
+	CXGB4_ACTION_NATMODE_NONE = 0,
+	CXGB4_ACTION_NATMODE_DIP = (1 << 0),
+	CXGB4_ACTION_NATMODE_SIP = (1 << 1),
+	CXGB4_ACTION_NATMODE_DPORT = (1 << 2),
+	CXGB4_ACTION_NATMODE_SPORT = (1 << 3),
+};
+
+/* TC PEDIT action to NATMODE translation entry */
+struct cxgb4_natmode_config {
+	enum chip_type chip;
+	u8 flags;
+	u8 natmode;
+};
+
 void cxgb4_process_flow_actions(struct net_device *in,
 				struct flow_action *actions,
 				struct ch_filter_specification *fs);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index ae7123a9de8e..6c259de96f96 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -114,8 +114,7 @@ static int cxgb4_init_eosw_txq(struct net_device *dev,
 	eosw_txq->cred = adap->params.ofldq_wr_cred;
 	eosw_txq->hwqid = hwqid;
 	eosw_txq->netdev = dev;
-	tasklet_init(&eosw_txq->qresume_tsk, cxgb4_ethofld_restart,
-		     (unsigned long)eosw_txq);
+	tasklet_setup(&eosw_txq->qresume_tsk, cxgb4_ethofld_restart);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 08439e215efe..743af9e654aa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -663,7 +663,7 @@ static int uld_attach(struct adapter *adap, unsigned int uld)
 	return 0;
 }
 
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 static bool cxgb4_uld_in_use(struct adapter *adap)
 {
 	const struct tid_info *t = &adap->tids;
@@ -690,8 +690,8 @@ int cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
 			 * ULD is/are already active, return failure.
 			 */
 			if (cxgb4_uld_in_use(adap)) {
-				dev_warn(adap->pdev_dev,
-					 "ULD connections (tid/stid) active. Can't enable kTLS\n");
+				dev_dbg(adap->pdev_dev,
+					"ULD connections (tid/stid) active. Can't enable kTLS\n");
 				return -EINVAL;
 			}
 			ret = t4_set_params(adap, adap->mbox, adap->pf,
@@ -699,7 +699,7 @@ int cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
 			if (ret)
 				return ret;
 			refcount_set(&adap->chcr_ktls.ktls_refcount, 1);
-			pr_info("kTLS has been enabled. Restrictions placed on ULD support\n");
+			pr_debug("kTLS has been enabled. Restrictions placed on ULD support\n");
 		} else {
 			/* ktls settings already up, just increment refcount. */
 			refcount_inc(&adap->chcr_ktls.ktls_refcount);
@@ -716,7 +716,7 @@ int cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
 					    0, 1, &params, &params);
 			if (ret)
 				return ret;
-			pr_info("kTLS is disabled. Restrictions on ULD support removed\n");
+			pr_debug("kTLS is disabled. Restrictions on ULD support removed\n");
 		}
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a963fd0b4540..b169776ab484 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -40,9 +40,11 @@
 #include <linux/skbuff.h>
 #include <linux/inetdevice.h>
 #include <linux/atomic.h>
+#include <net/tls.h>
 #include "cxgb4.h"
 
 #define MAX_ULD_QSETS 16
+#define MAX_ULD_NPORTS 4
 
 /* CPL message priority levels */
 enum {
@@ -302,7 +304,9 @@ enum cxgb4_uld {
 	CXGB4_ULD_ISCSI,
 	CXGB4_ULD_ISCSIT,
 	CXGB4_ULD_CRYPTO,
+	CXGB4_ULD_IPSEC,
 	CXGB4_ULD_TLS,
+	CXGB4_ULD_KTLS,
 	CXGB4_ULD_MAX
 };
 
@@ -361,28 +365,11 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range ppod_edram;
 };
 
-struct chcr_stats_debug {
-	atomic_t cipher_rqst;
-	atomic_t digest_rqst;
-	atomic_t aead_rqst;
-	atomic_t complete;
-	atomic_t error;
-	atomic_t fallback;
-	atomic_t ipsec_cnt;
-	atomic_t tls_pdu_tx;
-	atomic_t tls_pdu_rx;
-	atomic_t tls_key;
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+struct ch_ktls_port_stats_debug {
 	atomic64_t ktls_tx_connection_open;
 	atomic64_t ktls_tx_connection_fail;
 	atomic64_t ktls_tx_connection_close;
-	atomic64_t ktls_tx_send_records;
-	atomic64_t ktls_tx_end_pkts;
-	atomic64_t ktls_tx_start_pkts;
-	atomic64_t ktls_tx_middle_pkts;
-	atomic64_t ktls_tx_retransmit_pkts;
-	atomic64_t ktls_tx_complete_pkts;
-	atomic64_t ktls_tx_trimmed_pkts;
 	atomic64_t ktls_tx_encrypted_packets;
 	atomic64_t ktls_tx_encrypted_bytes;
 	atomic64_t ktls_tx_ctx;
@@ -390,10 +377,38 @@ struct chcr_stats_debug {
 	atomic64_t ktls_tx_skip_no_sync_data;
 	atomic64_t ktls_tx_drop_no_sync_data;
 	atomic64_t ktls_tx_drop_bypass_req;
+};
 
+struct ch_ktls_stats_debug {
+	struct ch_ktls_port_stats_debug ktls_port[MAX_ULD_NPORTS];
+	atomic64_t ktls_tx_send_records;
+	atomic64_t ktls_tx_end_pkts;
+	atomic64_t ktls_tx_start_pkts;
+	atomic64_t ktls_tx_middle_pkts;
+	atomic64_t ktls_tx_retransmit_pkts;
+	atomic64_t ktls_tx_complete_pkts;
+	atomic64_t ktls_tx_trimmed_pkts;
+};
 #endif
+
+struct chcr_stats_debug {
+	atomic_t cipher_rqst;
+	atomic_t digest_rqst;
+	atomic_t aead_rqst;
+	atomic_t complete;
+	atomic_t error;
+	atomic_t fallback;
+	atomic_t tls_pdu_tx;
+	atomic_t tls_pdu_rx;
+	atomic_t tls_key;
 };
 
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+struct ch_ipsec_stats_debug {
+	atomic_t ipsec_cnt;
+};
+#endif
+
 #define OCQ_WIN_OFFSET(pdev, vres) \
 	(pci_resource_len((pdev), 2) - roundup_pow_of_two((vres)->ocq.size))
 
@@ -470,9 +485,12 @@ struct cxgb4_uld_info {
 			      struct napi_struct *napi);
 	void (*lro_flush)(struct t4_lro_mgr *);
 	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
-#if IS_ENABLED(CONFIG_TLS_DEVICE)
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 	const struct tlsdev_ops *tlsdev_ops;
 #endif
+#if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
+	const struct xfrmdev_ops *xfrmdev_ops;
+#endif
 };
 
 void cxgb4_uld_enable(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 869431a1eedd..a9e9c7ae565d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1416,14 +1416,14 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
 	ssi = skb_shinfo(skb);
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
 	if (xfrm_offload(skb) && !ssi->gso_size)
-		return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+		return adap->uld[CXGB4_ULD_IPSEC].tx_handler(skb, dev);
 #endif /* CHELSIO_IPSEC_INLINE */
 
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
 	if (skb->decrypted)
-		return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+		return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
 #endif /* CHELSIO_TLS_DEVICE */
 
 	qidx = skb_get_queue_mapping(skb);
@@ -2660,15 +2660,15 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
 
 /**
  *	restart_ctrlq - restart a suspended control queue
- *	@data: the control queue to restart
+ *	@t: pointer to the tasklet associated with this handler
  *
  *	Resumes transmission on a suspended Tx control queue.
  */
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct tasklet_struct *t)
 {
 	struct sk_buff *skb;
 	unsigned int written = 0;
-	struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data;
+	struct sge_ctrl_txq *q = from_tasklet(q, t, qresume_tsk);
 
 	spin_lock(&q->sendq.lock);
 	reclaim_completed_tx_imm(&q->q);
@@ -2961,13 +2961,13 @@ static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
 
 /**
  *	restart_ofldq - restart a suspended offload queue
- *	@data: the offload queue to restart
+ *	@t: pointer to the tasklet associated with this handler
  *
  *	Resumes transmission on a suspended Tx offload queue.
  */
-static void restart_ofldq(unsigned long data)
+static void restart_ofldq(struct tasklet_struct *t)
 {
-	struct sge_uld_txq *q = (struct sge_uld_txq *)data;
+	struct sge_uld_txq *q = from_tasklet(q, t, qresume_tsk);
 
 	spin_lock(&q->sendq.lock);
 	q->full = 0;            /* the queue actually is completely empty now */
@@ -3887,9 +3887,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-void cxgb4_ethofld_restart(unsigned long data)
+void cxgb4_ethofld_restart(struct tasklet_struct *t)
 {
-	struct sge_eosw_txq *eosw_txq = (struct sge_eosw_txq *)data;
+	struct sge_eosw_txq *eosw_txq = from_tasklet(eosw_txq, t,
+						     qresume_tsk);
 	int pktcount;
 
 	spin_lock(&eosw_txq->lock);
@@ -4580,7 +4581,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 	init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
-	tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
+	tasklet_setup(&txq->qresume_tsk, restart_ctrlq);
 	txq->full = 0;
 	return 0;
 }
@@ -4670,7 +4671,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 	txq->q.q_type = CXGB4_TXQ_ULD;
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
-	tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
+	tasklet_setup(&txq->qresume_tsk, restart_ofldq);
 	txq->full = 0;
 	txq->mapping_err = 0;
 	return 0;
@@ -4872,9 +4873,6 @@ void t4_sge_stop(struct adapter *adap)
 	int i;
 	struct sge *s = &adap->sge;
 
-	if (in_interrupt())  /* actions below require waiting */
-		return;
-
 	if (s->rx_timer.function)
 		del_timer_sync(&s->rx_timer);
 	if (s->tx_timer.function)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index fa3367966f4b..98d01a7497ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4745,9 +4745,11 @@ static void le_intr_handler(struct adapter *adap)
 	static struct intr_info t6_le_intr_info[] = {
 		{ T6_LIPMISS_F, "LE LIP miss", -1, 0 },
 		{ T6_LIP0_F, "LE 0 LIP error", -1, 0 },
+		{ CMDTIDERR_F, "LE cmd tid error", -1, 1 },
 		{ TCAMINTPERR_F, "LE parity error", -1, 1 },
 		{ T6_UNKNOWNCMD_F, "LE unknown command", -1, 1 },
 		{ SSRAMINTPERR_F, "LE request queue parity error", -1, 1 },
+		{ HASHTBLMEMCRCERR_F, "LE hash table mem crc error", -1, 0 },
 		{ 0 }
 	};
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 065c01c654ff..b11a172b5174 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -3017,6 +3017,14 @@
 #define REV_V(x) ((x) << REV_S)
 #define REV_G(x) (((x) >> REV_S) & REV_M)
 
+#define HASHTBLMEMCRCERR_S    27
+#define HASHTBLMEMCRCERR_V(x) ((x) << HASHTBLMEMCRCERR_S)
+#define HASHTBLMEMCRCERR_F    HASHTBLMEMCRCERR_V(1U)
+
+#define CMDTIDERR_S    22
+#define CMDTIDERR_V(x) ((x) << CMDTIDERR_S)
+#define CMDTIDERR_F    CMDTIDERR_V(1U)
+
 #define T6_UNKNOWNCMD_S    3
 #define T6_UNKNOWNCMD_V(x) ((x) << T6_UNKNOWNCMD_S)
 #define T6_UNKNOWNCMD_F    T6_UNKNOWNCMD_V(1U)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index e2fe78e2e242..2820a0bb971b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2017,33 +2017,14 @@ static void mboxlog_stop(struct seq_file *seq, void *v)
 {
 }
 
-static const struct seq_operations mboxlog_seq_ops = {
+static const struct seq_operations mboxlog_sops = {
 	.start = mboxlog_start,
 	.next  = mboxlog_next,
 	.stop  = mboxlog_stop,
 	.show  = mboxlog_show
 };
 
-static int mboxlog_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &mboxlog_seq_ops);
-
-	if (!res) {
-		struct seq_file *seq = file->private_data;
-
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations mboxlog_fops = {
-	.owner   = THIS_MODULE,
-	.open    = mboxlog_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
+DEFINE_SEQ_ATTRIBUTE(mboxlog);
 /*
  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
  */
@@ -2171,31 +2152,14 @@ static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
 }
 
-static const struct seq_operations sge_qinfo_seq_ops = {
+static const struct seq_operations sge_qinfo_sops = {
 	.start = sge_queue_start,
 	.next  = sge_queue_next,
 	.stop  = sge_queue_stop,
 	.show  = sge_qinfo_show
 };
 
-static int sge_qinfo_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &sge_qinfo_seq_ops);
-
-	if (!res) {
-		struct seq_file *seq = file->private_data;
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations sge_qinfo_debugfs_fops = {
-	.owner   = THIS_MODULE,
-	.open    = sge_qinfo_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(sge_qinfo);
 
 /*
  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
@@ -2317,31 +2281,14 @@ static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
 }
 
-static const struct seq_operations sge_qstats_seq_ops = {
+static const struct seq_operations sge_qstats_sops = {
 	.start = sge_qstats_start,
 	.next  = sge_qstats_next,
 	.stop  = sge_qstats_stop,
 	.show  = sge_qstats_show
 };
 
-static int sge_qstats_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &sge_qstats_seq_ops);
-
-	if (res == 0) {
-		struct seq_file *seq = file->private_data;
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations sge_qstats_proc_fops = {
-	.owner   = THIS_MODULE,
-	.open    = sge_qstats_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(sge_qstats);
 
 /*
  * Show PCI-E SR-IOV Virtual Function Resource Limits.
@@ -2415,31 +2362,14 @@ static void interfaces_stop(struct seq_file *seq, void *v)
 {
 }
 
-static const struct seq_operations interfaces_seq_ops = {
+static const struct seq_operations interfaces_sops = {
 	.start = interfaces_start,
 	.next  = interfaces_next,
 	.stop  = interfaces_stop,
 	.show  = interfaces_show
 };
 
-static int interfaces_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &interfaces_seq_ops);
-
-	if (res == 0) {
-		struct seq_file *seq = file->private_data;
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations interfaces_proc_fops = {
-	.owner   = THIS_MODULE,
-	.open    = interfaces_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(interfaces);
 
 /*
  * /sys/kernel/debugfs/cxgb4vf/ files list.
@@ -2452,10 +2382,10 @@ struct cxgb4vf_debugfs_entry {
 
 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
 	{ "mboxlog",    0444, &mboxlog_fops },
-	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
-	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
+	{ "sge_qinfo",  0444, &sge_qinfo_fops },
+	{ "sge_qstats", 0444, &sge_qstats_fops },
 	{ "resources",  0444, &resources_fops },
-	{ "interfaces", 0444, &interfaces_proc_fops },
+	{ "interfaces", 0444, &interfaces_fops },
 };
 
 /*
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig
new file mode 100644
index 000000000000..7dfa57348d54
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Chelsio inline crypto configuration
+#
+
+config CHELSIO_INLINE_CRYPTO
+	bool "Chelsio Inline Crypto support"
+	depends on CHELSIO_T4
+	default y
+	help
+	  Enable support for inline crypto.
+	  Allows enable/disable from list of inline crypto drivers.
+
+if CHELSIO_INLINE_CRYPTO
+
+config CRYPTO_DEV_CHELSIO_TLS
+	tristate "Chelsio Crypto Inline TLS Driver"
+	depends on CHELSIO_T4
+	depends on TLS_TOE
+	help
+	  Support Chelsio Inline TLS with Chelsio crypto accelerator.
+	  Enable inline TLS support for Tx and Rx.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called chtls.
+
+config CHELSIO_IPSEC_INLINE
+       tristate "Chelsio IPSec XFRM Tx crypto offload"
+       depends on CHELSIO_T4
+       depends on XFRM_OFFLOAD
+       depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+       help
+        Support Chelsio Inline IPsec with Chelsio crypto accelerator.
+        Enable inline IPsec support for Tx.
+
+        To compile this driver as a module, choose M here: the module
+        will be called ch_ipsec.
+
+config CHELSIO_TLS_DEVICE
+        tristate "Chelsio Inline KTLS Offload"
+        depends on CHELSIO_T4
+        depends on TLS
+        depends on TLS_DEVICE
+        help
+          This flag enables support for kernel tls offload over Chelsio T6
+          crypto accelerator. CONFIG_CHELSIO_TLS_DEVICE flag can be enabled
+          only if CONFIG_TLS and CONFIG_TLS_DEVICE flags are enabled.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ch_ktls.
+
+endif # CHELSIO_INLINE_CRYPTO
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/Makefile
new file mode 100644
index 000000000000..27e6d7e2f1eb
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/
+obj-$(CONFIG_CHELSIO_IPSEC_INLINE) += ch_ipsec/
+obj-$(CONFIG_CHELSIO_TLS_DEVICE) += ch_ktls/
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile
new file mode 100644
index 000000000000..efdcaaebc455
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 \
+             -I $(srctree)/drivers/crypto/chelsio
+
+obj-$(CONFIG_CHELSIO_IPSEC_INLINE) += ch_ipsec.o
+ch_ipsec-objs := chcr_ipsec.o
+
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
new file mode 100644
index 000000000000..072299b14b8d
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
@@ -0,0 +1,825 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Written and Maintained by:
+ *	Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#define pr_fmt(fmt) "ch_ipsec: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/esp.h>
+#include <net/xfrm.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+
+#include "chcr_ipsec.h"
+
+/*
+ * Max Tx descriptor space we allow for an Ethernet packet to be inlined
+ * into a WR.
+ */
+#define MAX_IMM_TX_PKT_LEN 256
+#define GCM_ESP_IV_SIZE     8
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+
+static bool ch_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state);
+static int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
+static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop);
+static void ch_ipsec_advance_esn_state(struct xfrm_state *x);
+static void ch_ipsec_xfrm_free_state(struct xfrm_state *x);
+static void ch_ipsec_xfrm_del_state(struct xfrm_state *x);
+static int ch_ipsec_xfrm_add_state(struct xfrm_state *x);
+
+static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = {
+	.xdo_dev_state_add      = ch_ipsec_xfrm_add_state,
+	.xdo_dev_state_delete   = ch_ipsec_xfrm_del_state,
+	.xdo_dev_state_free     = ch_ipsec_xfrm_free_state,
+	.xdo_dev_offload_ok     = ch_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = ch_ipsec_advance_esn_state,
+};
+
+static struct cxgb4_uld_info ch_ipsec_uld_info = {
+	.name = CHIPSEC_DRV_MODULE_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	/* Max ntxq will be derived from fw config file*/
+	.rxq_size = 1024,
+	.add = ch_ipsec_uld_add,
+	.state_change = ch_ipsec_uld_state_change,
+	.tx_handler = ch_ipsec_xmit,
+	.xfrmdev_ops = &ch_ipsec_xfrmdev_ops,
+};
+
+static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop)
+{
+	struct ipsec_uld_ctx *u_ctx;
+
+	pr_info_once("%s - version %s\n", CHIPSEC_DRV_DESC,
+		     CHIPSEC_DRV_VERSION);
+	u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+	if (!u_ctx) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	u_ctx->lldi = *infop;
+out:
+	return u_ctx;
+}
+
+static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct ipsec_uld_ctx *u_ctx = handle;
+
+	pr_debug("new_state %u\n", new_state);
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		pr_info("%s: Up\n", pci_name(u_ctx->lldi.pdev));
+		mutex_lock(&dev_mutex);
+		list_add_tail(&u_ctx->entry, &uld_ctx_list);
+		mutex_unlock(&dev_mutex);
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+	case CXGB4_STATE_DOWN:
+	case CXGB4_STATE_DETACH:
+		pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
+		list_del(&u_ctx->entry);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ch_ipsec_setauthsize(struct xfrm_state *x,
+				struct ipsec_sa_entry *sa_entry)
+{
+	int hmac_ctrl;
+	int authsize = x->aead->alg_icv_len / 8;
+
+	sa_entry->authsize = authsize;
+
+	switch (authsize) {
+	case ICV_8:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		break;
+	case ICV_12:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		break;
+	case ICV_16:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return hmac_ctrl;
+}
+
+static int ch_ipsec_setkey(struct xfrm_state *x,
+			   struct ipsec_sa_entry *sa_entry)
+{
+	int keylen = (x->aead->alg_key_len + 7) / 8;
+	unsigned char *key = x->aead->alg_key;
+	int ck_size, key_ctx_size = 0;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	struct crypto_aes_ctx aes;
+	int ret = 0;
+
+	if (keylen > 3) {
+		keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
+		memcpy(sa_entry->salt, key + keylen, 4);
+	}
+
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memcpy(sa_entry->key, key, keylen);
+	sa_entry->enckey_len = keylen;
+	key_ctx_size = sizeof(struct _key_ctx) +
+			      ((DIV_ROUND_UP(keylen, 16)) << 4) +
+			      AEAD_H_SIZE;
+
+	sa_entry->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 0, 0,
+						 key_ctx_size >> 4);
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	ret = aes_expandkey(&aes, key, keylen);
+	if (ret) {
+		sa_entry->enckey_len = 0;
+		goto out;
+	}
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	aes_encrypt(&aes, ghash_h, ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
+
+	memcpy(sa_entry->key + (DIV_ROUND_UP(sa_entry->enckey_len, 16) *
+	       16), ghash_h, AEAD_H_SIZE);
+	sa_entry->kctx_len = ((DIV_ROUND_UP(sa_entry->enckey_len, 16)) << 4) +
+			      AEAD_H_SIZE;
+out:
+	return ret;
+}
+
+/*
+ * ch_ipsec_xfrm_add_state
+ * returns 0 on success, negative error if failed to send message to FPGA
+ * positive error if FPGA returned a bad response
+ */
+static int ch_ipsec_xfrm_add_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+	int res = 0;
+
+	if (x->props.aalgo != SADB_AALG_NONE) {
+		pr_debug("Cannot offload authenticated xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.calgo != SADB_X_CALG_NONE) {
+		pr_debug("Cannot offload compressed xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.family != AF_INET &&
+	    x->props.family != AF_INET6) {
+		pr_debug("Only IPv4/6 xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_TRANSPORT &&
+	    x->props.mode != XFRM_MODE_TUNNEL) {
+		pr_debug("Only transport and tunnel xfrm offload\n");
+		return -EINVAL;
+	}
+	if (x->id.proto != IPPROTO_ESP) {
+		pr_debug("Only ESP xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->encap) {
+		pr_debug("Encapsulated xfrm state not offloaded\n");
+		return -EINVAL;
+	}
+	if (!x->aead) {
+		pr_debug("Cannot offload xfrm states without aead\n");
+		return -EINVAL;
+	}
+	if (x->aead->alg_icv_len != 128 &&
+	    x->aead->alg_icv_len != 96) {
+		pr_debug("Cannot offload xfrm states with AEAD ICV length other than 96b & 128b\n");
+	return -EINVAL;
+	}
+	if ((x->aead->alg_key_len != 128 + 32) &&
+	    (x->aead->alg_key_len != 256 + 32)) {
+		pr_debug("cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EINVAL;
+	}
+	if (x->tfcpad) {
+		pr_debug("Cannot offload xfrm states with tfc padding\n");
+		return -EINVAL;
+	}
+	if (!x->geniv) {
+		pr_debug("Cannot offload xfrm states without geniv\n");
+		return -EINVAL;
+	}
+	if (strcmp(x->geniv, "seqiv")) {
+		pr_debug("Cannot offload xfrm states with geniv other than seqiv\n");
+		return -EINVAL;
+	}
+
+	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+	if (!sa_entry) {
+		res = -ENOMEM;
+		goto out;
+	}
+
+	sa_entry->hmac_ctrl = ch_ipsec_setauthsize(x, sa_entry);
+	if (x->props.flags & XFRM_STATE_ESN)
+		sa_entry->esn = 1;
+	ch_ipsec_setkey(x, sa_entry);
+	x->xso.offload_handle = (unsigned long)sa_entry;
+	try_module_get(THIS_MODULE);
+out:
+	return res;
+}
+
+static void ch_ipsec_xfrm_del_state(struct xfrm_state *x)
+{
+	/* do nothing */
+	if (!x->xso.offload_handle)
+		return;
+}
+
+static void ch_ipsec_xfrm_free_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+	kfree(sa_entry);
+	module_put(THIS_MODULE);
+}
+
+static bool ch_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	if (x->props.family == AF_INET) {
+		/* Offload with IP options is not supported yet */
+		if (ip_hdr(skb)->ihl > 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+	return true;
+}
+
+static void ch_ipsec_advance_esn_state(struct xfrm_state *x)
+{
+	/* do nothing */
+	if (!x->xso.offload_handle)
+		return;
+}
+
+static int is_eth_imm(const struct sk_buff *skb,
+		      struct ipsec_sa_entry *sa_entry)
+{
+	unsigned int kctx_len;
+	int hdrlen;
+
+	kctx_len = sa_entry->kctx_len;
+	hdrlen = sizeof(struct fw_ulptx_wr) +
+		 sizeof(struct chcr_ipsec_req) + kctx_len;
+
+	hdrlen += sizeof(struct cpl_tx_pkt);
+	if (sa_entry->esn)
+		hdrlen += (DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), 16)
+			   << 4);
+	if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen)
+		return hdrlen;
+	return 0;
+}
+
+static unsigned int calc_tx_sec_flits(const struct sk_buff *skb,
+				      struct ipsec_sa_entry *sa_entry,
+				      bool *immediate)
+{
+	unsigned int kctx_len;
+	unsigned int flits;
+	int aadivlen;
+	int hdrlen;
+
+	kctx_len = sa_entry->kctx_len;
+	hdrlen = is_eth_imm(skb, sa_entry);
+	aadivlen = sa_entry->esn ? DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv),
+						16) : 0;
+	aadivlen <<= 4;
+
+	/* If the skb is small enough, we can pump it out as a work request
+	 * with only immediate data.  In that case we just have to have the
+	 * TX Packet header plus the skb data in the Work Request.
+	 */
+
+	if (hdrlen) {
+		*immediate = true;
+		return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
+	}
+
+	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+
+	/* Otherwise, we're going to have to construct a Scatter gather list
+	 * of the skb body and fragments.  We also include the flits necessary
+	 * for the TX Packet Work Request and CPL.  We always have a firmware
+	 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+	 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+	 * message or, if we're doing a Large Send Offload, an LSO CPL message
+	 * with an embedded TX Packet Write CPL message.
+	 */
+	flits += (sizeof(struct fw_ulptx_wr) +
+		  sizeof(struct chcr_ipsec_req) +
+		  kctx_len +
+		  sizeof(struct cpl_tx_pkt_core) +
+		  aadivlen) / sizeof(__be64);
+	return flits;
+}
+
+static void *copy_esn_pktxt(struct sk_buff *skb,
+			    struct net_device *dev,
+			    void *pos,
+			    struct ipsec_sa_entry *sa_entry)
+{
+	struct chcr_ipsec_aadiv *aadiv;
+	struct ulptx_idata *sc_imm;
+	struct ip_esp_hdr *esphdr;
+	struct xfrm_offload *xo;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	__be64 seqno;
+	u32 qidx;
+	u32 seqlo;
+	u8 *iv;
+	int eoq;
+	int len;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	/* end of queue, reset pos to start of queue */
+	eoq = (void *)q->q.stat - pos;
+	if (!eoq)
+		pos = q->q.desc;
+
+	len = DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), 16) << 4;
+	memset(pos, 0, len);
+	aadiv = (struct chcr_ipsec_aadiv *)pos;
+	esphdr = (struct ip_esp_hdr *)skb_transport_header(skb);
+	iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr);
+	xo = xfrm_offload(skb);
+
+	aadiv->spi = (esphdr->spi);
+	seqlo = ntohl(esphdr->seq_no);
+	seqno = cpu_to_be64(seqlo + ((u64)xo->seq.hi << 32));
+	memcpy(aadiv->seq_no, &seqno, 8);
+	iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr);
+	memcpy(aadiv->iv, iv, 8);
+
+	if (is_eth_imm(skb, sa_entry) && !skb_is_nonlinear(skb)) {
+		sc_imm = (struct ulptx_idata *)(pos +
+			  (DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv),
+					sizeof(__be64)) << 3));
+		sc_imm->cmd_more = FILL_CMD_MORE(0);
+		sc_imm->len = cpu_to_be32(skb->len);
+	}
+	pos += len;
+	return pos;
+}
+
+static void *copy_cpltx_pktxt(struct sk_buff *skb,
+			      struct net_device *dev,
+			      void *pos,
+			      struct ipsec_sa_entry *sa_entry)
+{
+	struct cpl_tx_pkt_core *cpl;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	u32 ctrl0, qidx;
+	u64 cntrl = 0;
+	int left;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	left = (void *)q->q.stat - pos;
+	if (!left)
+		pos = q->q.desc;
+
+	cpl = (struct cpl_tx_pkt_core *)pos;
+
+	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+	ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) |
+			       TXPKT_PF_V(adap->pf);
+	if (skb_vlan_tag_present(skb)) {
+		q->vlan_ins++;
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+	}
+
+	cpl->ctrl0 = htonl(ctrl0);
+	cpl->pack = htons(0);
+	cpl->len = htons(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	pos += sizeof(struct cpl_tx_pkt_core);
+	/* Copy ESN info for HW */
+	if (sa_entry->esn)
+		pos = copy_esn_pktxt(skb, dev, pos, sa_entry);
+	return pos;
+}
+
+static void *copy_key_cpltx_pktxt(struct sk_buff *skb,
+				  struct net_device *dev,
+				  void *pos,
+				  struct ipsec_sa_entry *sa_entry)
+{
+	struct _key_ctx *key_ctx;
+	int left, eoq, key_len;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	unsigned int qidx;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	key_len = sa_entry->kctx_len;
+
+	/* end of queue, reset pos to start of queue */
+	eoq = (void *)q->q.stat - pos;
+	left = eoq;
+	if (!eoq) {
+		pos = q->q.desc;
+		left = 64 * q->q.size;
+	}
+
+	/* Copy the Key context header */
+	key_ctx = (struct _key_ctx *)pos;
+	key_ctx->ctx_hdr = sa_entry->key_ctx_hdr;
+	memcpy(key_ctx->salt, sa_entry->salt, MAX_SALT);
+	pos += sizeof(struct _key_ctx);
+	left -= sizeof(struct _key_ctx);
+
+	if (likely(key_len <= left)) {
+		memcpy(key_ctx->key, sa_entry->key, key_len);
+		pos += key_len;
+	} else {
+		memcpy(pos, sa_entry->key, left);
+		memcpy(q->q.desc, sa_entry->key + left,
+		       key_len - left);
+		pos = (u8 *)q->q.desc + (key_len - left);
+	}
+	/* Copy CPL TX PKT XT */
+	pos = copy_cpltx_pktxt(skb, dev, pos, sa_entry);
+
+	return pos;
+}
+
+static void *ch_ipsec_crypto_wreq(struct sk_buff *skb,
+				  struct net_device *dev,
+				  void *pos,
+				  int credits,
+				  struct ipsec_sa_entry *sa_entry)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	unsigned int ivsize = GCM_ESP_IV_SIZE;
+	struct chcr_ipsec_wr *wr;
+	bool immediate = false;
+	u16 immdatalen = 0;
+	unsigned int flits;
+	u32 ivinoffset;
+	u32 aadstart;
+	u32 aadstop;
+	u32 ciphstart;
+	u16 sc_more = 0;
+	u32 ivdrop = 0;
+	u32 esnlen = 0;
+	u32 wr_mid;
+	u16 ndesc;
+	int qidx = skb_get_queue_mapping(skb);
+	struct sge_eth_txq *q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	unsigned int kctx_len = sa_entry->kctx_len;
+	int qid = q->q.cntxt_id;
+
+	atomic_inc(&adap->ch_ipsec_stats.ipsec_cnt);
+
+	flits = calc_tx_sec_flits(skb, sa_entry, &immediate);
+	ndesc = DIV_ROUND_UP(flits, 2);
+	if (sa_entry->esn)
+		ivdrop = 1;
+
+	if (immediate)
+		immdatalen = skb->len;
+
+	if (sa_entry->esn) {
+		esnlen = sizeof(struct chcr_ipsec_aadiv);
+		if (!skb_is_nonlinear(skb))
+			sc_more  = 1;
+	}
+
+	/* WR Header */
+	wr = (struct chcr_ipsec_wr *)pos;
+	wr->wreq.op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr_mid = FW_CRYPTO_LOOKASIDE_WR_LEN16_V(ndesc);
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		netif_tx_stop_queue(q->txq);
+		q->q.stops++;
+		if (!q->dbqt)
+			wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+	wr_mid |= FW_ULPTX_WR_DATA_F;
+	wr->wreq.flowid_len16 = htonl(wr_mid);
+
+	/* ULPTX */
+	wr->req.ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(pi->port_id, qid);
+	wr->req.ulptx.len = htonl(ndesc - 1);
+
+	/* Sub-command */
+	wr->req.sc_imm.cmd_more = FILL_CMD_MORE(!immdatalen || sc_more);
+	wr->req.sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
+					 sizeof(wr->req.key_ctx) +
+					 kctx_len +
+					 sizeof(struct cpl_tx_pkt_core) +
+					 esnlen +
+					 (esnlen ? 0 : immdatalen));
+
+	/* CPL_SEC_PDU */
+	ivinoffset = sa_entry->esn ? (ESN_IV_INSERT_OFFSET + 1) :
+				     (skb_transport_offset(skb) +
+				      sizeof(struct ip_esp_hdr) + 1);
+	wr->req.sec_cpl.op_ivinsrtofst = htonl(
+				CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+				CPL_TX_SEC_PDU_CPLLEN_V(2) |
+				CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
+				CPL_TX_SEC_PDU_IVINSRTOFST_V(
+							     ivinoffset));
+
+	wr->req.sec_cpl.pldlen = htonl(skb->len + esnlen);
+	aadstart = sa_entry->esn ? 1 : (skb_transport_offset(skb) + 1);
+	aadstop = sa_entry->esn ? ESN_IV_INSERT_OFFSET :
+				  (skb_transport_offset(skb) +
+				   sizeof(struct ip_esp_hdr));
+	ciphstart = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr) +
+		    GCM_ESP_IV_SIZE + 1;
+	ciphstart += sa_entry->esn ?  esnlen : 0;
+
+	wr->req.sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+							aadstart,
+							aadstop,
+							ciphstart, 0);
+
+	wr->req.sec_cpl.cipherstop_lo_authinsert =
+		FILL_SEC_CPL_AUTHINSERT(0, ciphstart,
+					sa_entry->authsize,
+					 sa_entry->authsize);
+	wr->req.sec_cpl.seqno_numivs =
+		FILL_SEC_CPL_SCMD0_SEQNO(CHCR_ENCRYPT_OP, 1,
+					 CHCR_SCMD_CIPHER_MODE_AES_GCM,
+					 CHCR_SCMD_AUTH_MODE_GHASH,
+					 sa_entry->hmac_ctrl,
+					 ivsize >> 1);
+	wr->req.sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+								  0, ivdrop, 0);
+
+	pos += sizeof(struct fw_ulptx_wr) +
+	       sizeof(struct ulp_txpkt) +
+	       sizeof(struct ulptx_idata) +
+	       sizeof(struct cpl_tx_sec_pdu);
+
+	pos = copy_key_cpltx_pktxt(skb, dev, pos, sa_entry);
+
+	return pos;
+}
+
+/**
+ *      flits_to_desc - returns the num of Tx descriptors for the given flits
+ *      @n: the number of flits
+ *
+ *      Returns the number of Tx descriptors needed for the supplied number
+ *      of flits.
+ */
+static unsigned int flits_to_desc(unsigned int n)
+{
+	WARN_ON(n > SGE_MAX_WR_LEN / 8);
+	return DIV_ROUND_UP(n, 8);
+}
+
+static unsigned int txq_avail(const struct sge_txq *q)
+{
+	return q->size - 1 - q->in_use;
+}
+
+static void eth_txq_stop(struct sge_eth_txq *q)
+{
+	netif_tx_stop_queue(q->txq);
+	q->q.stops++;
+}
+
+static void txq_advance(struct sge_txq *q, unsigned int n)
+{
+	q->in_use += n;
+	q->pidx += n;
+	if (q->pidx >= q->size)
+		q->pidx -= q->size;
+}
+
+/*
+ *      ch_ipsec_xmit called from ULD Tx handler
+ */
+int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	unsigned int last_desc, ndesc, flits = 0;
+	struct ipsec_sa_entry *sa_entry;
+	u64 *pos, *end, *before, *sgl;
+	struct tx_sw_desc *sgl_sdesc;
+	int qidx, left, credits;
+	bool immediate = false;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	struct sec_path *sp;
+
+	if (!x->xso.offload_handle)
+		return NETDEV_TX_BUSY;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+
+	sp = skb_sec_path(skb);
+	if (sp->len != 1) {
+out_free:       dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+
+	flits = calc_tx_sec_flits(skb, sa_entry, &immediate);
+	ndesc = flits_to_desc(flits);
+	credits = txq_avail(&q->q) - ndesc;
+
+	if (unlikely(credits < 0)) {
+		eth_txq_stop(q);
+		dev_err(adap->pdev_dev,
+			"%s: Tx ring %u full while queue awake! cred:%d %d %d flits:%d\n",
+			dev->name, qidx, credits, ndesc, txq_avail(&q->q),
+			flits);
+		return NETDEV_TX_BUSY;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (!immediate &&
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		goto out_free;
+	}
+
+	pos = (u64 *)&q->q.desc[q->q.pidx];
+	before = (u64 *)pos;
+	end = (u64 *)pos + flits;
+	/* Setup IPSec CPL */
+	pos = (void *)ch_ipsec_crypto_wreq(skb, dev, (void *)pos,
+					   credits, sa_entry);
+	if (before > (u64 *)pos) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+	}
+	if (pos == (u64 *)q->q.stat) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+		pos = (void *)q->q.desc;
+	}
+
+	sgl = (void *)pos;
+	if (immediate) {
+		cxgb4_inline_tx_skb(skb, &q->q, sgl);
+		dev_consume_skb_any(skb);
+	} else {
+		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end,
+				0, sgl_sdesc->addr);
+		skb_orphan(skb);
+		sgl_sdesc->skb = skb;
+	}
+	txq_advance(&q->q, ndesc);
+
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	return NETDEV_TX_OK;
+}
+
+static int __init ch_ipsec_init(void)
+{
+	cxgb4_register_uld(CXGB4_ULD_IPSEC, &ch_ipsec_uld_info);
+
+	return 0;
+}
+
+static void __exit ch_ipsec_exit(void)
+{
+	struct ipsec_uld_ctx *u_ctx, *tmp;
+	struct adapter *adap;
+
+	mutex_lock(&dev_mutex);
+	list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+		adap = pci_get_drvdata(u_ctx->lldi.pdev);
+		atomic_set(&adap->ch_ipsec_stats.ipsec_cnt, 0);
+		list_del(&u_ctx->entry);
+		kfree(u_ctx);
+	}
+	mutex_unlock(&dev_mutex);
+	cxgb4_unregister_uld(CXGB4_ULD_IPSEC);
+}
+
+module_init(ch_ipsec_init);
+module_exit(ch_ipsec_exit);
+
+MODULE_DESCRIPTION("Crypto IPSEC for Chelsio Terminator cards.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHIPSEC_DRV_VERSION);
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h
new file mode 100644
index 000000000000..1d110d2edd64
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2018 Chelsio Communications, Inc. */
+
+#ifndef __CHCR_IPSEC_H__
+#define __CHCR_IPSEC_H__
+
+#include <crypto/algapi.h>
+#include "t4_hw.h"
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "cxgb4_uld.h"
+
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+#define CHIPSEC_DRV_MODULE_NAME "ch_ipsec"
+#define CHIPSEC_DRV_VERSION "1.0.0.0-ko"
+#define CHIPSEC_DRV_DESC "Chelsio T6 Crypto Ipsec offload Driver"
+
+struct ipsec_uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+};
+
+struct chcr_ipsec_req {
+	struct ulp_txpkt ulptx;
+	struct ulptx_idata sc_imm;
+	struct cpl_tx_sec_pdu sec_cpl;
+	struct _key_ctx key_ctx;
+};
+
+struct chcr_ipsec_wr {
+	struct fw_ulptx_wr wreq;
+	struct chcr_ipsec_req req;
+};
+
+#define ESN_IV_INSERT_OFFSET 12
+struct chcr_ipsec_aadiv {
+	__be32 spi;
+	u8 seq_no[8];
+	u8 iv[8];
+};
+
+struct ipsec_sa_entry {
+	int hmac_ctrl;
+	u16 esn;
+	u16 resv;
+	unsigned int enckey_len;
+	unsigned int kctx_len;
+	unsigned int authsize;
+	__be32 key_ctx_hdr;
+	char salt[MAX_SALT];
+	char key[2 * AES_MAX_KEY_SIZE];
+};
+
+#endif /* __CHCR_IPSEC_H__ */
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/Makefile
new file mode 100644
index 000000000000..5e7d161c3199
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
+
+obj-$(CONFIG_CHELSIO_TLS_DEVICE) += ch_ktls.o
+ch_ktls-objs := chcr_ktls.o
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_common.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_common.h
new file mode 100644
index 000000000000..38319f4c3121
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_common.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2020 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CHCR_COMMON_H__
+#define __CHCR_COMMON_H__
+
+#include "cxgb4.h"
+
+#define CHCR_MAX_SALT                      4
+#define CHCR_KEYCTX_MAC_KEY_SIZE_128       0
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128    0
+#define CHCR_SCMD_CIPHER_MODE_AES_GCM      2
+#define CHCR_SCMD_CIPHER_MODE_AES_CTR      3
+#define CHCR_CPL_TX_SEC_PDU_LEN_64BIT      2
+#define CHCR_SCMD_SEQ_NO_CTRL_64BIT        3
+#define CHCR_SCMD_PROTO_VERSION_TLS        0
+#define CHCR_SCMD_PROTO_VERSION_GENERIC    4
+#define CHCR_SCMD_AUTH_MODE_GHASH          4
+#define AES_BLOCK_LEN                      16
+
+struct ktls_key_ctx {
+	__be32 ctx_hdr;
+	u8 salt[CHCR_MAX_SALT];
+	__be64 iv_to_auth;
+	unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE +
+			  TLS_CIPHER_AES_GCM_256_TAG_SIZE];
+};
+
+/* Crypto key context */
+#define KEY_CONTEXT_CTX_LEN_S           24
+#define KEY_CONTEXT_CTX_LEN_V(x)        ((x) << KEY_CONTEXT_CTX_LEN_S)
+
+#define KEY_CONTEXT_SALT_PRESENT_S      10
+#define KEY_CONTEXT_SALT_PRESENT_V(x)   ((x) << KEY_CONTEXT_SALT_PRESENT_S)
+#define KEY_CONTEXT_SALT_PRESENT_F      KEY_CONTEXT_SALT_PRESENT_V(1U)
+
+#define KEY_CONTEXT_VALID_S     0
+#define KEY_CONTEXT_VALID_V(x)  ((x) << KEY_CONTEXT_VALID_S)
+#define KEY_CONTEXT_VALID_F     KEY_CONTEXT_VALID_V(1U)
+
+#define KEY_CONTEXT_CK_SIZE_S           6
+#define KEY_CONTEXT_CK_SIZE_V(x)        ((x) << KEY_CONTEXT_CK_SIZE_S)
+
+#define KEY_CONTEXT_MK_SIZE_S           2
+#define KEY_CONTEXT_MK_SIZE_V(x)        ((x) << KEY_CONTEXT_MK_SIZE_S)
+
+#define KEY_CONTEXT_OPAD_PRESENT_S      11
+#define KEY_CONTEXT_OPAD_PRESENT_V(x)   ((x) << KEY_CONTEXT_OPAD_PRESENT_S)
+#define KEY_CONTEXT_OPAD_PRESENT_F      KEY_CONTEXT_OPAD_PRESENT_V(1U)
+
+#define FILL_KEY_CTX_HDR(ck_size, mk_size, ctx_len) \
+		htonl(KEY_CONTEXT_MK_SIZE_V(mk_size) | \
+		      KEY_CONTEXT_CK_SIZE_V(ck_size) | \
+		      KEY_CONTEXT_VALID_F | \
+		      KEY_CONTEXT_SALT_PRESENT_F | \
+		      KEY_CONTEXT_CTX_LEN_V((ctx_len)))
+
+static inline void *chcr_copy_to_txd(const void *src, const struct sge_txq *q,
+				     void *pos, int length)
+{
+	int left = (void *)q->stat - pos;
+	u64 *p;
+
+	if (likely(length <= left)) {
+		memcpy(pos, src, length);
+		pos += length;
+	} else {
+		memcpy(pos, src, left);
+		memcpy(q->desc, src + left, length - left);
+		pos = (void *)q->desc + (length - left);
+	}
+	/* 0-pad to multiple of 16 */
+	p = PTR_ALIGN(pos, 8);
+	if ((uintptr_t)p & 8) {
+		*p = 0;
+		return p + 1;
+	}
+	return p;
+}
+
+static inline unsigned int chcr_txq_avail(const struct sge_txq *q)
+{
+	return q->size - 1 - q->in_use;
+}
+
+static inline void chcr_txq_advance(struct sge_txq *q, unsigned int n)
+{
+	q->in_use += n;
+	q->pidx += n;
+	if (q->pidx >= q->size)
+		q->pidx -= q->size;
+}
+
+static inline void chcr_eth_txq_stop(struct sge_eth_txq *q)
+{
+	netif_tx_stop_queue(q->txq);
+	q->q.stops++;
+}
+
+static inline unsigned int chcr_sgl_len(unsigned int n)
+{
+	n--;
+	return (3 * n) / 2 + (n & 1) + 2;
+}
+
+static inline unsigned int chcr_flits_to_desc(unsigned int n)
+{
+	WARN_ON(n > SGE_MAX_WR_LEN / 8);
+	return DIV_ROUND_UP(n, 8);
+}
+#endif /* __CHCR_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
new file mode 100644
index 000000000000..5195f692f14d
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -0,0 +1,2154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Chelsio Communications.  All rights reserved. */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <linux/netdevice.h>
+#include "chcr_ktls.h"
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+
+static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info);
+/*
+ * chcr_ktls_save_keys: calculate and save crypto keys.
+ * @tx_info - driver specific tls info.
+ * @crypto_info - tls crypto information.
+ * @direction - TX/RX direction.
+ * return - SUCCESS/FAILURE.
+ */
+static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info,
+			       struct tls_crypto_info *crypto_info,
+			       enum tls_offload_ctx_dir direction)
+{
+	int ck_size, key_ctx_size, mac_key_size, keylen, ghash_size, ret;
+	unsigned char ghash_h[TLS_CIPHER_AES_GCM_256_TAG_SIZE];
+	struct tls12_crypto_info_aes_gcm_128 *info_128_gcm;
+	struct ktls_key_ctx *kctx = &tx_info->key_ctx;
+	struct crypto_cipher *cipher;
+	unsigned char *key, *salt;
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		info_128_gcm =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+		keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		tx_info->salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
+		mac_key_size = CHCR_KEYCTX_MAC_KEY_SIZE_128;
+		tx_info->iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+		tx_info->iv = be64_to_cpu(*(__be64 *)info_128_gcm->iv);
+
+		ghash_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+		key = info_128_gcm->key;
+		salt = info_128_gcm->salt;
+		tx_info->record_no = *(u64 *)info_128_gcm->rec_seq;
+
+		/* The SCMD fields used when encrypting a full TLS
+		 * record. Its a one time calculation till the
+		 * connection exists.
+		 */
+		tx_info->scmd0_seqno_numivs =
+			SCMD_SEQ_NO_CTRL_V(CHCR_SCMD_SEQ_NO_CTRL_64BIT) |
+			SCMD_CIPH_AUTH_SEQ_CTRL_F |
+			SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_TLS) |
+			SCMD_CIPH_MODE_V(CHCR_SCMD_CIPHER_MODE_AES_GCM) |
+			SCMD_AUTH_MODE_V(CHCR_SCMD_AUTH_MODE_GHASH) |
+			SCMD_IV_SIZE_V(TLS_CIPHER_AES_GCM_128_IV_SIZE >> 1) |
+			SCMD_NUM_IVS_V(1);
+
+		/* keys will be sent inline. */
+		tx_info->scmd0_ivgen_hdrlen = SCMD_KEY_CTX_INLINE_F;
+
+		/* The SCMD fields used when encrypting a partial TLS
+		 * record (no trailer and possibly a truncated payload).
+		 */
+		tx_info->scmd0_short_seqno_numivs =
+			SCMD_CIPH_AUTH_SEQ_CTRL_F |
+			SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) |
+			SCMD_CIPH_MODE_V(CHCR_SCMD_CIPHER_MODE_AES_CTR) |
+			SCMD_IV_SIZE_V(AES_BLOCK_LEN >> 1);
+
+		tx_info->scmd0_short_ivgen_hdrlen =
+			tx_info->scmd0_ivgen_hdrlen | SCMD_AADIVDROP_F;
+
+		break;
+
+	default:
+		pr_err("GCM: cipher type 0x%x not supported\n",
+		       crypto_info->cipher_type);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	key_ctx_size = CHCR_KTLS_KEY_CTX_LEN +
+		       roundup(keylen, 16) + ghash_size;
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	cipher = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(cipher)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret)
+		goto out1;
+
+	memset(ghash_h, 0, ghash_size);
+	crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h);
+
+	/* fill the Key context */
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 mac_key_size,
+						 key_ctx_size >> 4);
+	} else {
+		ret = -EINVAL;
+		goto out1;
+	}
+
+	memcpy(kctx->salt, salt, tx_info->salt_size);
+	memcpy(kctx->key, key, keylen);
+	memcpy(kctx->key + keylen, ghash_h, ghash_size);
+	tx_info->key_ctx_len = key_ctx_size;
+
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+/*
+ * chcr_ktls_act_open_req: creates TCB entry for ipv4 connection.
+ * @sk - tcp socket.
+ * @tx_info - driver specific tls info.
+ * @atid - connection active tid.
+ * return - send success/failure.
+ */
+static int chcr_ktls_act_open_req(struct sock *sk,
+				  struct chcr_ktls_info *tx_info,
+				  int atid)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct cpl_t6_act_open_req *cpl6;
+	struct cpl_act_open_req *cpl;
+	struct sk_buff *skb;
+	unsigned int len;
+	int qid_atid;
+	u64 options;
+
+	len = sizeof(*cpl6);
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (unlikely(!skb))
+		return -ENOMEM;
+	/* mark it a control pkt */
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id);
+
+	cpl6 = __skb_put_zero(skb, len);
+	cpl = (struct cpl_act_open_req *)cpl6;
+	INIT_TP_WR(cpl6, 0);
+	qid_atid = TID_QID_V(tx_info->rx_qid) |
+		   TID_TID_V(atid);
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid));
+	cpl->local_port = inet->inet_sport;
+	cpl->peer_port = inet->inet_dport;
+	cpl->local_ip = inet->inet_rcv_saddr;
+	cpl->peer_ip = inet->inet_daddr;
+
+	/* fill first 64 bit option field. */
+	options = TCAM_BYPASS_F | ULP_MODE_V(ULP_MODE_NONE) | NON_OFFLOAD_F |
+		  SMAC_SEL_V(tx_info->smt_idx) | TX_CHAN_V(tx_info->tx_chan);
+	cpl->opt0 = cpu_to_be64(options);
+
+	/* next 64 bit option field. */
+	options =
+		TX_QUEUE_V(tx_info->adap->params.tp.tx_modq[tx_info->tx_chan]);
+	cpl->opt2 = htonl(options);
+
+	return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/*
+ * chcr_ktls_act_open_req6: creates TCB entry for ipv6 connection.
+ * @sk - tcp socket.
+ * @tx_info - driver specific tls info.
+ * @atid - connection active tid.
+ * return - send success/failure.
+ */
+static int chcr_ktls_act_open_req6(struct sock *sk,
+				   struct chcr_ktls_info *tx_info,
+				   int atid)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct cpl_t6_act_open_req6 *cpl6;
+	struct cpl_act_open_req6 *cpl;
+	struct sk_buff *skb;
+	unsigned int len;
+	int qid_atid;
+	u64 options;
+
+	len = sizeof(*cpl6);
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (unlikely(!skb))
+		return -ENOMEM;
+	/* mark it a control pkt */
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id);
+
+	cpl6 = __skb_put_zero(skb, len);
+	cpl = (struct cpl_act_open_req6 *)cpl6;
+	INIT_TP_WR(cpl6, 0);
+	qid_atid = TID_QID_V(tx_info->rx_qid) | TID_TID_V(atid);
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid));
+	cpl->local_port = inet->inet_sport;
+	cpl->peer_port = inet->inet_dport;
+	cpl->local_ip_hi = *(__be64 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8[0];
+	cpl->local_ip_lo = *(__be64 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8[8];
+	cpl->peer_ip_hi = *(__be64 *)&sk->sk_v6_daddr.in6_u.u6_addr8[0];
+	cpl->peer_ip_lo = *(__be64 *)&sk->sk_v6_daddr.in6_u.u6_addr8[8];
+
+	/* first 64 bit option field. */
+	options = TCAM_BYPASS_F | ULP_MODE_V(ULP_MODE_NONE) | NON_OFFLOAD_F |
+		  SMAC_SEL_V(tx_info->smt_idx) | TX_CHAN_V(tx_info->tx_chan);
+	cpl->opt0 = cpu_to_be64(options);
+	/* next 64 bit option field. */
+	options =
+		TX_QUEUE_V(tx_info->adap->params.tp.tx_modq[tx_info->tx_chan]);
+	cpl->opt2 = htonl(options);
+
+	return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te);
+}
+#endif /* #if IS_ENABLED(CONFIG_IPV6) */
+
+/*
+ * chcr_setup_connection:  create a TCB entry so that TP will form tcp packets.
+ * @sk - tcp socket.
+ * @tx_info - driver specific tls info.
+ * return: NET_TX_OK/NET_XMIT_DROP
+ */
+static int chcr_setup_connection(struct sock *sk,
+				 struct chcr_ktls_info *tx_info)
+{
+	struct tid_info *t = &tx_info->adap->tids;
+	int atid, ret = 0;
+
+	atid = cxgb4_alloc_atid(t, tx_info);
+	if (atid == -1)
+		return -EINVAL;
+
+	tx_info->atid = atid;
+
+	if (tx_info->ip_family == AF_INET) {
+		ret = chcr_ktls_act_open_req(sk, tx_info, atid);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		ret = cxgb4_clip_get(tx_info->netdev, (const u32 *)
+				     &sk->sk_v6_rcv_saddr,
+				     1);
+		if (ret)
+			return ret;
+		ret = chcr_ktls_act_open_req6(sk, tx_info, atid);
+#endif
+	}
+
+	/* if return type is NET_XMIT_CN, msg will be sent but delayed, mark ret
+	 * success, if any other return type clear atid and return that failure.
+	 */
+	if (ret) {
+		if (ret == NET_XMIT_CN) {
+			ret = 0;
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			/* clear clip entry */
+			if (tx_info->ip_family == AF_INET6)
+				cxgb4_clip_release(tx_info->netdev,
+						   (const u32 *)
+						   &sk->sk_v6_rcv_saddr,
+						   1);
+#endif
+			cxgb4_free_atid(t, atid);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * chcr_set_tcb_field: update tcb fields.
+ * @tx_info - driver specific tls info.
+ * @word - TCB word.
+ * @mask - TCB word related mask.
+ * @val - TCB word related value.
+ * @no_reply - set 1 if not looking for TP response.
+ */
+static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word,
+			      u64 mask, u64 val, int no_reply)
+{
+	struct cpl_set_tcb_field *req;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	req = (struct cpl_set_tcb_field *)__skb_put_zero(skb, sizeof(*req));
+	INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, tx_info->tid);
+	req->reply_ctrl = htons(QUEUENO_V(tx_info->rx_qid) |
+				NO_REPLY_V(no_reply));
+	req->word_cookie = htons(TCB_WORD_V(word));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id);
+	return cxgb4_ofld_send(tx_info->netdev, skb);
+}
+
+/*
+ * chcr_ktls_mark_tcb_close: mark tcb state to CLOSE
+ * @tx_info - driver specific tls info.
+ * return: NET_TX_OK/NET_XMIT_DROP.
+ */
+static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info)
+{
+	return chcr_set_tcb_field(tx_info, TCB_T_STATE_W,
+				  TCB_T_STATE_V(TCB_T_STATE_M),
+				  CHCR_TCB_STATE_CLOSED, 1);
+}
+
+/*
+ * chcr_ktls_dev_del:  call back for tls_dev_del.
+ * Remove the tid and l2t entry and close the connection.
+ * it per connection basis.
+ * @netdev - net device.
+ * @tls_cts - tls context.
+ * @direction - TX/RX crypto direction
+ */
+static void chcr_ktls_dev_del(struct net_device *netdev,
+			      struct tls_context *tls_ctx,
+			      enum tls_offload_ctx_dir direction)
+{
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx =
+				chcr_get_ktls_tx_context(tls_ctx);
+	struct chcr_ktls_info *tx_info = tx_ctx->chcr_info;
+	struct ch_ktls_port_stats_debug *port_stats;
+
+	if (!tx_info)
+		return;
+
+	/* clear l2t entry */
+	if (tx_info->l2te)
+		cxgb4_l2t_release(tx_info->l2te);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(netdev, (const u32 *)
+				   &tx_info->sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+
+	/* clear tid */
+	if (tx_info->tid != -1) {
+		/* clear tcb state and then release tid */
+		chcr_ktls_mark_tcb_close(tx_info);
+		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+				 tx_info->tid, tx_info->ip_family);
+	}
+
+	port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+	atomic64_inc(&port_stats->ktls_tx_connection_close);
+	kvfree(tx_info);
+	tx_ctx->chcr_info = NULL;
+	/* release module refcount */
+	module_put(THIS_MODULE);
+}
+
+/*
+ * chcr_ktls_dev_add:  call back for tls_dev_add.
+ * Create a tcb entry for TP. Also add l2t entry for the connection. And
+ * generate keys & save those keys locally.
+ * @netdev - net device.
+ * @tls_cts - tls context.
+ * @direction - TX/RX crypto direction
+ * return: SUCCESS/FAILURE.
+ */
+static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+			     enum tls_offload_ctx_dir direction,
+			     struct tls_crypto_info *crypto_info,
+			     u32 start_offload_tcp_sn)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_info *tx_info;
+	struct dst_entry *dst;
+	struct adapter *adap;
+	struct port_info *pi;
+	struct neighbour *n;
+	u8 daaddr[16];
+	int ret = -1;
+
+	tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+
+	pi = netdev_priv(netdev);
+	adap = pi->adapter;
+	port_stats = &adap->ch_ktls_stats.ktls_port[pi->port_id];
+	atomic64_inc(&port_stats->ktls_tx_connection_open);
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_RX) {
+		pr_err("not expecting for RX direction\n");
+		goto out;
+	}
+
+	if (tx_ctx->chcr_info)
+		goto out;
+
+	tx_info = kvzalloc(sizeof(*tx_info), GFP_KERNEL);
+	if (!tx_info)
+		goto out;
+
+	tx_info->sk = sk;
+	spin_lock_init(&tx_info->lock);
+	/* initialize tid and atid to -1, 0 is a also a valid id. */
+	tx_info->tid = -1;
+	tx_info->atid = -1;
+
+	tx_info->adap = adap;
+	tx_info->netdev = netdev;
+	tx_info->first_qset = pi->first_qset;
+	tx_info->tx_chan = pi->tx_chan;
+	tx_info->smt_idx = pi->smt_idx;
+	tx_info->port_id = pi->port_id;
+	tx_info->prev_ack = 0;
+	tx_info->prev_win = 0;
+
+	tx_info->rx_qid = chcr_get_first_rx_qid(adap);
+	if (unlikely(tx_info->rx_qid < 0))
+		goto free_tx_info;
+
+	tx_info->prev_seq = start_offload_tcp_sn;
+	tx_info->tcp_start_seq_number = start_offload_tcp_sn;
+
+	/* save crypto keys */
+	ret = chcr_ktls_save_keys(tx_info, crypto_info, direction);
+	if (ret < 0)
+		goto free_tx_info;
+
+	/* get peer ip */
+	if (sk->sk_family == AF_INET) {
+		memcpy(daaddr, &sk->sk_daddr, 4);
+		tx_info->ip_family = AF_INET;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (!sk->sk_ipv6only &&
+		    ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+			memcpy(daaddr, &sk->sk_daddr, 4);
+			tx_info->ip_family = AF_INET;
+		} else {
+			memcpy(daaddr, sk->sk_v6_daddr.in6_u.u6_addr8, 16);
+			tx_info->ip_family = AF_INET6;
+		}
+#endif
+	}
+
+	/* get the l2t index */
+	dst = sk_dst_get(sk);
+	if (!dst) {
+		pr_err("DST entry not found\n");
+		goto free_tx_info;
+	}
+	n = dst_neigh_lookup(dst, daaddr);
+	if (!n || !n->dev) {
+		pr_err("neighbour not found\n");
+		dst_release(dst);
+		goto free_tx_info;
+	}
+	tx_info->l2te  = cxgb4_l2t_get(adap->l2t, n, n->dev, 0);
+
+	neigh_release(n);
+	dst_release(dst);
+
+	if (!tx_info->l2te) {
+		pr_err("l2t entry not found\n");
+		goto free_tx_info;
+	}
+
+	/* Driver shouldn't be removed until any single connection exists */
+	if (!try_module_get(THIS_MODULE))
+		goto free_l2t;
+
+	init_completion(&tx_info->completion);
+	/* create a filter and call cxgb4_l2t_send to send the packet out, which
+	 * will take care of updating l2t entry in hw if not already done.
+	 */
+	tx_info->open_state = CH_KTLS_OPEN_PENDING;
+
+	if (chcr_setup_connection(sk, tx_info))
+		goto put_module;
+
+	/* Wait for reply */
+	wait_for_completion_timeout(&tx_info->completion, 30 * HZ);
+	spin_lock_bh(&tx_info->lock);
+	if (tx_info->open_state) {
+		/* need to wait for hw response, can't free tx_info yet. */
+		if (tx_info->open_state == CH_KTLS_OPEN_PENDING)
+			tx_info->pending_close = true;
+		/* free the lock after the cleanup */
+		goto put_module;
+	}
+	spin_unlock_bh(&tx_info->lock);
+
+	/* initialize tcb */
+	reinit_completion(&tx_info->completion);
+	/* mark it pending for hw response */
+	tx_info->open_state = CH_KTLS_OPEN_PENDING;
+
+	if (chcr_init_tcb_fields(tx_info))
+		goto free_tid;
+
+	/* Wait for reply */
+	wait_for_completion_timeout(&tx_info->completion, 30 * HZ);
+	spin_lock_bh(&tx_info->lock);
+	if (tx_info->open_state) {
+		/* need to wait for hw response, can't free tx_info yet. */
+		tx_info->pending_close = true;
+		/* free the lock after cleanup */
+		goto free_tid;
+	}
+	spin_unlock_bh(&tx_info->lock);
+
+	if (!cxgb4_check_l2t_valid(tx_info->l2te))
+		goto free_tid;
+
+	atomic64_inc(&port_stats->ktls_tx_ctx);
+	tx_ctx->chcr_info = tx_info;
+
+	return 0;
+
+free_tid:
+	chcr_ktls_mark_tcb_close(tx_info);
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(netdev, (const u32 *)
+				   &sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+	cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+			 tx_info->tid, tx_info->ip_family);
+
+put_module:
+	/* release module refcount */
+	module_put(THIS_MODULE);
+free_l2t:
+	cxgb4_l2t_release(tx_info->l2te);
+free_tx_info:
+	if (tx_info->pending_close)
+		spin_unlock_bh(&tx_info->lock);
+	else
+		kvfree(tx_info);
+out:
+	atomic64_inc(&port_stats->ktls_tx_connection_fail);
+	return -1;
+}
+
+/*
+ * chcr_init_tcb_fields:  Initialize tcb fields to handle TCP seq number
+ *			  handling.
+ * @tx_info - driver specific tls info.
+ * return: NET_TX_OK/NET_XMIT_DROP
+ */
+static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info)
+{
+	int  ret = 0;
+
+	/* set tcb in offload and bypass */
+	ret =
+	chcr_set_tcb_field(tx_info, TCB_T_FLAGS_W,
+			   TCB_T_FLAGS_V(TF_CORE_BYPASS_F | TF_NON_OFFLOAD_F),
+			   TCB_T_FLAGS_V(TF_CORE_BYPASS_F), 1);
+	if (ret)
+		return ret;
+	/* reset snd_una and snd_next fields in tcb */
+	ret = chcr_set_tcb_field(tx_info, TCB_SND_UNA_RAW_W,
+				 TCB_SND_NXT_RAW_V(TCB_SND_NXT_RAW_M) |
+				 TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M),
+				 0, 1);
+	if (ret)
+		return ret;
+
+	/* reset send max */
+	ret = chcr_set_tcb_field(tx_info, TCB_SND_MAX_RAW_W,
+				 TCB_SND_MAX_RAW_V(TCB_SND_MAX_RAW_M),
+				 0, 1);
+	if (ret)
+		return ret;
+
+	/* update l2t index and request for tp reply to confirm tcb is
+	 * initialised to handle tx traffic.
+	 */
+	ret = chcr_set_tcb_field(tx_info, TCB_L2T_IX_W,
+				 TCB_L2T_IX_V(TCB_L2T_IX_M),
+				 TCB_L2T_IX_V(tx_info->l2te->idx), 0);
+	return ret;
+}
+
+/*
+ * chcr_ktls_cpl_act_open_rpl: connection reply received from TP.
+ */
+static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
+				      unsigned char *input)
+{
+	const struct cpl_act_open_rpl *p = (void *)input;
+	struct chcr_ktls_info *tx_info = NULL;
+	unsigned int atid, tid, status;
+	struct tid_info *t;
+
+	tid = GET_TID(p);
+	status = AOPEN_STATUS_G(ntohl(p->atid_status));
+	atid = TID_TID_G(AOPEN_ATID_G(ntohl(p->atid_status)));
+
+	t = &adap->tids;
+	tx_info = lookup_atid(t, atid);
+
+	if (!tx_info || tx_info->atid != atid) {
+		pr_err("%s: incorrect tx_info or atid\n", __func__);
+		return -1;
+	}
+
+	cxgb4_free_atid(t, atid);
+	tx_info->atid = -1;
+
+	spin_lock(&tx_info->lock);
+	/* HW response is very close, finish pending cleanup */
+	if (tx_info->pending_close) {
+		spin_unlock(&tx_info->lock);
+		if (!status) {
+			/* it's a late success, tcb status is establised,
+			 * mark it close.
+			 */
+			chcr_ktls_mark_tcb_close(tx_info);
+			cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+					 tid, tx_info->ip_family);
+		}
+		kvfree(tx_info);
+		return 0;
+	}
+
+	if (!status) {
+		tx_info->tid = tid;
+		cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family);
+		tx_info->open_state = CH_KTLS_OPEN_SUCCESS;
+	} else {
+		tx_info->open_state = CH_KTLS_OPEN_FAILURE;
+	}
+	spin_unlock(&tx_info->lock);
+
+	complete(&tx_info->completion);
+	return 0;
+}
+
+/*
+ * chcr_ktls_cpl_set_tcb_rpl: TCB reply received from TP.
+ */
+static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input)
+{
+	const struct cpl_set_tcb_rpl *p = (void *)input;
+	struct chcr_ktls_info *tx_info = NULL;
+	struct tid_info *t;
+	u32 tid;
+
+	tid = GET_TID(p);
+
+	t = &adap->tids;
+	tx_info = lookup_tid(t, tid);
+
+	if (!tx_info || tx_info->tid != tid) {
+		pr_err("%s: incorrect tx_info or tid\n", __func__);
+		return -1;
+	}
+
+	spin_lock(&tx_info->lock);
+	if (tx_info->pending_close) {
+		spin_unlock(&tx_info->lock);
+		kvfree(tx_info);
+		return 0;
+	}
+	tx_info->open_state = false;
+	spin_unlock(&tx_info->lock);
+
+	complete(&tx_info->completion);
+	return 0;
+}
+
+static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
+					u32 tid, void *pos, u16 word, u64 mask,
+					u64 val, u32 reply)
+{
+	struct cpl_set_tcb_field_core *cpl;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *txpkt;
+
+	/* ULP_TXPKT */
+	txpkt = pos;
+	txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
+	txpkt->len = htonl(DIV_ROUND_UP(CHCR_SET_TCB_FIELD_LEN, 16));
+
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(txpkt + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	idata->len = htonl(sizeof(*cpl));
+	pos = idata + 1;
+
+	cpl = pos;
+	/* CPL_SET_TCB_FIELD */
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
+	cpl->reply_ctrl = htons(QUEUENO_V(tx_info->rx_qid) |
+			NO_REPLY_V(!reply));
+	cpl->word_cookie = htons(TCB_WORD_V(word));
+	cpl->mask = cpu_to_be64(mask);
+	cpl->val = cpu_to_be64(val);
+
+	/* ULPTX_NOOP */
+	idata = (struct ulptx_idata *)(cpl + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+	idata->len = htonl(0);
+	pos = idata + 1;
+
+	return pos;
+}
+
+
+/*
+ * chcr_write_cpl_set_tcb_ulp: update tcb values.
+ * TCB is responsible to create tcp headers, so all the related values
+ * should be correctly updated.
+ * @tx_info - driver specific tls info.
+ * @q - tx queue on which packet is going out.
+ * @tid - TCB identifier.
+ * @pos - current index where should we start writing.
+ * @word - TCB word.
+ * @mask - TCB word related mask.
+ * @val - TCB word related value.
+ * @reply - set 1 if looking for TP response.
+ * return - next position to write.
+ */
+static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
+					struct sge_eth_txq *q, u32 tid,
+					void *pos, u16 word, u64 mask,
+					u64 val, u32 reply)
+{
+	int left = (void *)q->q.stat - pos;
+
+	if (unlikely(left < CHCR_SET_TCB_FIELD_LEN)) {
+		if (!left) {
+			pos = q->q.desc;
+		} else {
+			u8 buf[48] = {0};
+
+			__chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word,
+						     mask, val, reply);
+
+			return chcr_copy_to_txd(buf, &q->q, pos,
+						CHCR_SET_TCB_FIELD_LEN);
+		}
+	}
+
+	pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word,
+					   mask, val, reply);
+
+	/* check again if we are at the end of the queue */
+	if (left == CHCR_SET_TCB_FIELD_LEN)
+		pos = q->q.desc;
+
+	return pos;
+}
+
+/*
+ * chcr_ktls_xmit_tcb_cpls: update tcb entry so that TP will create the header
+ * with updated values like tcp seq, ack, window etc.
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * @tcp_seq
+ * @tcp_ack
+ * @tcp_win
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
+				   struct sge_eth_txq *q, u64 tcp_seq,
+				   u64 tcp_ack, u64 tcp_win)
+{
+	bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0));
+	struct ch_ktls_port_stats_debug *port_stats;
+	u32 len, cpl = 0, ndesc, wr_len;
+	struct fw_ulptx_wr *wr;
+	int credits;
+	void *pos;
+
+	wr_len = sizeof(*wr);
+	/* there can be max 4 cpls, check if we have enough credits */
+	len = wr_len + 4 * roundup(CHCR_SET_TCB_FIELD_LEN, 16);
+	ndesc = DIV_ROUND_UP(len, 64);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	/* make space for WR, we'll fill it later when we know all the cpls
+	 * being sent out and have complete length.
+	 */
+	wr = pos;
+	pos += wr_len;
+	/* update tx_max if its a re-transmit or the first wr */
+	if (first_wr || tcp_seq != tx_info->prev_seq) {
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_TX_MAX_W,
+						 TCB_TX_MAX_V(TCB_TX_MAX_M),
+						 TCB_TX_MAX_V(tcp_seq), 0);
+		cpl++;
+	}
+	/* reset snd una if it's a re-transmit pkt */
+	if (tcp_seq != tx_info->prev_seq) {
+		/* reset snd_una */
+		port_stats =
+			&tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_SND_UNA_RAW_W,
+						 TCB_SND_UNA_RAW_V
+						 (TCB_SND_UNA_RAW_M),
+						 TCB_SND_UNA_RAW_V(0), 0);
+		atomic64_inc(&port_stats->ktls_tx_ooo);
+		cpl++;
+	}
+	/* update ack */
+	if (first_wr || tx_info->prev_ack != tcp_ack) {
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_RCV_NXT_W,
+						 TCB_RCV_NXT_V(TCB_RCV_NXT_M),
+						 TCB_RCV_NXT_V(tcp_ack), 0);
+		tx_info->prev_ack = tcp_ack;
+		cpl++;
+	}
+	/* update receive window */
+	if (first_wr || tx_info->prev_win != tcp_win) {
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_RCV_WND_W,
+						 TCB_RCV_WND_V(TCB_RCV_WND_M),
+						 TCB_RCV_WND_V(tcp_win), 0);
+		tx_info->prev_win = tcp_win;
+		cpl++;
+	}
+
+	if (cpl) {
+		/* get the actual length */
+		len = wr_len + cpl * roundup(CHCR_SET_TCB_FIELD_LEN, 16);
+		/* ULPTX wr */
+		wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+		wr->cookie = 0;
+		/* fill len in wr field */
+		wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
+
+		ndesc = DIV_ROUND_UP(len, 64);
+		chcr_txq_advance(&q->q, ndesc);
+		cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	}
+	return 0;
+}
+
+/*
+ * chcr_ktls_skb_copy
+ * @nskb - new skb where the frags to be added.
+ * @skb - old skb from which frags will be copied.
+ */
+static void chcr_ktls_skb_copy(struct sk_buff *skb, struct sk_buff *nskb)
+{
+	int i;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_shinfo(nskb)->frags[i] = skb_shinfo(skb)->frags[i];
+		__skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+	}
+
+	skb_shinfo(nskb)->nr_frags = skb_shinfo(skb)->nr_frags;
+	nskb->len += skb->data_len;
+	nskb->data_len = skb->data_len;
+	nskb->truesize += skb->data_len;
+}
+
+/*
+ * chcr_ktls_get_tx_flits
+ * returns number of flits to be sent out, it includes key context length, WR
+ * size and skb fragments.
+ */
+static unsigned int
+chcr_ktls_get_tx_flits(const struct sk_buff *skb, unsigned int key_ctx_len)
+{
+	return chcr_sgl_len(skb_shinfo(skb)->nr_frags) +
+	       DIV_ROUND_UP(key_ctx_len + CHCR_KTLS_WR_SIZE, 8);
+}
+
+/*
+ * chcr_ktls_check_tcp_options: To check if there is any TCP option availbale
+ * other than timestamp.
+ * @skb - skb contains partial record..
+ * return: 1 / 0
+ */
+static int
+chcr_ktls_check_tcp_options(struct tcphdr *tcp)
+{
+	int cnt, opt, optlen;
+	u_char *cp;
+
+	cp = (u_char *)(tcp + 1);
+	cnt = (tcp->doff << 2) - sizeof(struct tcphdr);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP) {
+			optlen = 1;
+		} else {
+			if (cnt < 2)
+				break;
+			optlen = cp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+		case TCPOPT_NOP:
+			break;
+		default:
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * chcr_ktls_write_tcp_options : TP can't send out all the options, we need to
+ * send out separately.
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record..
+ * @q - TX queue.
+ * @tx_chan - channel number.
+ * return: NETDEV_TX_OK/NETDEV_TX_BUSY.
+ */
+static int
+chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
+			    struct sge_eth_txq *q, uint32_t tx_chan)
+{
+	struct fw_eth_tx_pkt_wr *wr;
+	struct cpl_tx_pkt_core *cpl;
+	u32 ctrl, iplen, maclen;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *ip6;
+#endif
+	unsigned int ndesc;
+	struct tcphdr *tcp;
+	int len16, pktlen;
+	struct iphdr *ip;
+	int credits;
+	u8 buf[150];
+	void *pos;
+
+	iplen = skb_network_header_len(skb);
+	maclen = skb_mac_header_len(skb);
+
+	/* packet length = eth hdr len + ip hdr len + tcp hdr len
+	 * (including options).
+	 */
+	pktlen = skb->len - skb->data_len;
+
+	ctrl = sizeof(*cpl) + pktlen;
+	len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16);
+	/* check how many descriptors needed */
+	ndesc = DIV_ROUND_UP(len16, 4);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	wr = pos;
+
+	/* Firmware work request header */
+	wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+			       FW_WR_IMMDLEN_V(ctrl));
+
+	wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(len16));
+	wr->r3 = 0;
+
+	cpl = (void *)(wr + 1);
+
+	/* CPL header */
+	cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) | TXPKT_INTF_V(tx_chan) |
+			   TXPKT_PF_V(tx_info->adap->pf));
+	cpl->pack = 0;
+	cpl->len = htons(pktlen);
+	/* checksum offload */
+	cpl->ctrl1 = 0;
+
+	pos = cpl + 1;
+
+	memcpy(buf, skb->data, pktlen);
+	if (tx_info->ip_family == AF_INET) {
+		/* we need to correct ip header len */
+		ip = (struct iphdr *)(buf + maclen);
+		ip->tot_len = htons(pktlen - maclen);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		ip6 = (struct ipv6hdr *)(buf + maclen);
+		ip6->payload_len = htons(pktlen - maclen - iplen);
+#endif
+	}
+	/* now take care of the tcp header, if fin is not set then clear push
+	 * bit as well, and if fin is set, it will be sent at the last so we
+	 * need to update the tcp sequence number as per the last packet.
+	 */
+	tcp = (struct tcphdr *)(buf + maclen + iplen);
+
+	if (!tcp->fin)
+		tcp->psh = 0;
+	else
+		tcp->seq = htonl(tx_info->prev_seq);
+
+	chcr_copy_to_txd(buf, &q->q, pos, pktlen);
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	return 0;
+}
+
+/* chcr_ktls_skb_shift - Shifts request length paged data from skb to another.
+ * @tgt- buffer into which tail data gets added
+ * @skb- buffer from which the paged data comes from
+ * @shiftlen- shift up to this many bytes
+ */
+static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
+			       int shiftlen)
+{
+	skb_frag_t *fragfrom, *fragto;
+	int from, to, todo;
+
+	WARN_ON(shiftlen > skb->data_len);
+
+	todo = shiftlen;
+	from = 0;
+	to = 0;
+	fragfrom = &skb_shinfo(skb)->frags[from];
+
+	while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
+		fragfrom = &skb_shinfo(skb)->frags[from];
+		fragto = &skb_shinfo(tgt)->frags[to];
+
+		if (todo >= skb_frag_size(fragfrom)) {
+			*fragto = *fragfrom;
+			todo -= skb_frag_size(fragfrom);
+			from++;
+			to++;
+
+		} else {
+			__skb_frag_ref(fragfrom);
+			skb_frag_page_copy(fragto, fragfrom);
+			skb_frag_off_copy(fragto, fragfrom);
+			skb_frag_size_set(fragto, todo);
+
+			skb_frag_off_add(fragfrom, todo);
+			skb_frag_size_sub(fragfrom, todo);
+			todo = 0;
+
+			to++;
+			break;
+		}
+	}
+
+	/* Ready to "commit" this state change to tgt */
+	skb_shinfo(tgt)->nr_frags = to;
+
+	/* Reposition in the original skb */
+	to = 0;
+	while (from < skb_shinfo(skb)->nr_frags)
+		skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
+
+	skb_shinfo(skb)->nr_frags = to;
+
+	WARN_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
+
+	skb->len -= shiftlen;
+	skb->data_len -= shiftlen;
+	skb->truesize -= shiftlen;
+	tgt->len += shiftlen;
+	tgt->data_len += shiftlen;
+	tgt->truesize += shiftlen;
+
+	return shiftlen;
+}
+
+/*
+ * chcr_ktls_xmit_wr_complete: This sends out the complete record. If an skb
+ * received has partial end part of the record, send out the complete record, so
+ * that crypto block will be able to generate TAG/HASH.
+ * @skb - segment which has complete or partial end part.
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * @tcp_seq
+ * @tcp_push - tcp push bit.
+ * @mss - segment size.
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
+				      struct chcr_ktls_info *tx_info,
+				      struct sge_eth_txq *q, u32 tcp_seq,
+				      bool tcp_push, u32 mss)
+{
+	u32 len16, wr_mid = 0, flits = 0, ndesc, cipher_start;
+	struct adapter *adap = tx_info->adap;
+	int credits, left, last_desc;
+	struct tx_sw_desc *sgl_sdesc;
+	struct cpl_tx_data *tx_data;
+	struct cpl_tx_sec_pdu *cpl;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *ulptx;
+	struct fw_ulptx_wr *wr;
+	void *pos;
+	u64 *end;
+
+	/* get the number of flits required */
+	flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len);
+	/* number of descriptors */
+	ndesc = chcr_flits_to_desc(flits);
+	/* check if enough credits available */
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		/* Credits are below the threshold vaues, stop the queue after
+		 * injecting the Work Request for this packet.
+		 */
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	/* FW_ULPTX_WR */
+	wr = pos;
+	/* WR will need len16 */
+	len16 = DIV_ROUND_UP(flits, 2);
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->cookie = 0;
+	pos += sizeof(*wr);
+	/* ULP_TXPKT */
+	ulptx = pos;
+	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+				ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+				ULP_TXPKT_FID_V(q->q.cntxt_id) |
+				ULP_TXPKT_RO_F);
+	ulptx->len = htonl(len16 - 1);
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(ulptx + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F);
+	/* idata length will include cpl_tx_sec_pdu + key context size +
+	 * cpl_tx_data header.
+	 */
+	idata->len = htonl(sizeof(*cpl) + tx_info->key_ctx_len +
+			   sizeof(*tx_data));
+	/* SEC CPL */
+	cpl = (struct cpl_tx_sec_pdu *)(idata + 1);
+	cpl->op_ivinsrtofst =
+		htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+		      CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
+		      CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
+		      CPL_TX_SEC_PDU_IVINSRTOFST_V(TLS_HEADER_SIZE + 1));
+	cpl->pldlen = htonl(skb->data_len);
+
+	/* encryption should start after tls header size + iv size */
+	cipher_start = TLS_HEADER_SIZE + tx_info->iv_size + 1;
+
+	cpl->aadstart_cipherstop_hi =
+		htonl(CPL_TX_SEC_PDU_AADSTART_V(1) |
+		      CPL_TX_SEC_PDU_AADSTOP_V(TLS_HEADER_SIZE) |
+		      CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start));
+
+	/* authentication will also start after tls header + iv size */
+	cpl->cipherstop_lo_authinsert =
+	htonl(CPL_TX_SEC_PDU_AUTHSTART_V(cipher_start) |
+	      CPL_TX_SEC_PDU_AUTHSTOP_V(TLS_CIPHER_AES_GCM_128_TAG_SIZE) |
+	      CPL_TX_SEC_PDU_AUTHINSERT_V(TLS_CIPHER_AES_GCM_128_TAG_SIZE));
+
+	/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
+	cpl->seqno_numivs = htonl(tx_info->scmd0_seqno_numivs);
+	cpl->ivgen_hdrlen = htonl(tx_info->scmd0_ivgen_hdrlen);
+	cpl->scmd1 = cpu_to_be64(tx_info->record_no);
+
+	pos = cpl + 1;
+	/* check if space left to fill the keys */
+	left = (void *)q->q.stat - pos;
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+
+	pos = chcr_copy_to_txd(&tx_info->key_ctx, &q->q, pos,
+			       tx_info->key_ctx_len);
+	left = (void *)q->q.stat - pos;
+
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* CPL_TX_DATA */
+	tx_data = (void *)pos;
+	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
+	tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(skb->data_len));
+
+	tx_data->rsvd = htonl(tcp_seq);
+
+	tx_data->flags = htonl(TX_BYPASS_F);
+	if (tcp_push)
+		tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F);
+
+	/* check left again, it might go beyond queue limit */
+	pos = tx_data + 1;
+	left = (void *)q->q.stat - pos;
+
+	/* check the position again */
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+
+	/* send the complete packet except the header */
+	cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
+			sgl_sdesc->addr);
+	sgl_sdesc->skb = skb;
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	atomic64_inc(&adap->ch_ktls_stats.ktls_tx_send_records);
+
+	return 0;
+}
+
+/*
+ * chcr_ktls_xmit_wr_short: This is to send out partial records. If its
+ * a middle part of a record, fetch the prior data to make it 16 byte aligned
+ * and then only send it out.
+ *
+ * @skb - skb contains partial record..
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * @tcp_seq
+ * @tcp_push - tcp push bit.
+ * @mss - segment size.
+ * @tls_rec_offset - offset from start of the tls record.
+ * @perior_data - data before the current segment, required to make this record
+ *		  16 byte aligned.
+ * @prior_data_len - prior_data length (less than 16)
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
+				   struct chcr_ktls_info *tx_info,
+				   struct sge_eth_txq *q,
+				   u32 tcp_seq, bool tcp_push, u32 mss,
+				   u32 tls_rec_offset, u8 *prior_data,
+				   u32 prior_data_len)
+{
+	struct adapter *adap = tx_info->adap;
+	u32 len16, wr_mid = 0, cipher_start;
+	unsigned int flits = 0, ndesc;
+	int credits, left, last_desc;
+	struct tx_sw_desc *sgl_sdesc;
+	struct cpl_tx_data *tx_data;
+	struct cpl_tx_sec_pdu *cpl;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *ulptx;
+	struct fw_ulptx_wr *wr;
+	__be64 iv_record;
+	void *pos;
+	u64 *end;
+
+	/* get the number of flits required, it's a partial record so 2 flits
+	 * (AES_BLOCK_SIZE) will be added.
+	 */
+	flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len) + 2;
+	/* get the correct 8 byte IV of this record */
+	iv_record = cpu_to_be64(tx_info->iv + tx_info->record_no);
+	/* If it's a middle record and not 16 byte aligned to run AES CTR, need
+	 * to make it 16 byte aligned. So atleadt 2 extra flits of immediate
+	 * data will be added.
+	 */
+	if (prior_data_len)
+		flits += 2;
+	/* number of descriptors */
+	ndesc = chcr_flits_to_desc(flits);
+	/* check if enough credits available */
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	/* FW_ULPTX_WR */
+	wr = pos;
+	/* WR will need len16 */
+	len16 = DIV_ROUND_UP(flits, 2);
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->cookie = 0;
+	pos += sizeof(*wr);
+	/* ULP_TXPKT */
+	ulptx = pos;
+	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+				ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+				ULP_TXPKT_FID_V(q->q.cntxt_id) |
+				ULP_TXPKT_RO_F);
+	ulptx->len = htonl(len16 - 1);
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(ulptx + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F);
+	/* idata length will include cpl_tx_sec_pdu + key context size +
+	 * cpl_tx_data header.
+	 */
+	idata->len = htonl(sizeof(*cpl) + tx_info->key_ctx_len +
+			   sizeof(*tx_data) + AES_BLOCK_LEN + prior_data_len);
+	/* SEC CPL */
+	cpl = (struct cpl_tx_sec_pdu *)(idata + 1);
+	/* cipher start will have tls header + iv size extra if its a header
+	 * part of tls record. else only 16 byte IV will be added.
+	 */
+	cipher_start =
+		AES_BLOCK_LEN + 1 +
+		(!tls_rec_offset ? TLS_HEADER_SIZE + tx_info->iv_size : 0);
+
+	cpl->op_ivinsrtofst =
+		htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+		      CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
+		      CPL_TX_SEC_PDU_IVINSRTOFST_V(1));
+	cpl->pldlen = htonl(skb->data_len + AES_BLOCK_LEN + prior_data_len);
+	cpl->aadstart_cipherstop_hi =
+		htonl(CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start));
+	cpl->cipherstop_lo_authinsert = 0;
+	/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
+	cpl->seqno_numivs = htonl(tx_info->scmd0_short_seqno_numivs);
+	cpl->ivgen_hdrlen = htonl(tx_info->scmd0_short_ivgen_hdrlen);
+	cpl->scmd1 = 0;
+
+	pos = cpl + 1;
+	/* check if space left to fill the keys */
+	left = (void *)q->q.stat - pos;
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+
+	pos = chcr_copy_to_txd(&tx_info->key_ctx, &q->q, pos,
+			       tx_info->key_ctx_len);
+	left = (void *)q->q.stat - pos;
+
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* CPL_TX_DATA */
+	tx_data = (void *)pos;
+	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
+	tx_data->len = htonl(TX_DATA_MSS_V(mss) |
+			TX_LENGTH_V(skb->data_len + prior_data_len));
+	tx_data->rsvd = htonl(tcp_seq);
+	tx_data->flags = htonl(TX_BYPASS_F);
+	if (tcp_push)
+		tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F);
+
+	/* check left again, it might go beyond queue limit */
+	pos = tx_data + 1;
+	left = (void *)q->q.stat - pos;
+
+	/* check the position again */
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* copy the 16 byte IV for AES-CTR, which includes 4 bytes of salt, 8
+	 * bytes of actual IV and 4 bytes of 16 byte-sequence.
+	 */
+	memcpy(pos, tx_info->key_ctx.salt, tx_info->salt_size);
+	memcpy(pos + tx_info->salt_size, &iv_record, tx_info->iv_size);
+	*(__be32 *)(pos + tx_info->salt_size + tx_info->iv_size) =
+		htonl(2 + (tls_rec_offset ? ((tls_rec_offset -
+		(TLS_HEADER_SIZE + tx_info->iv_size)) / AES_BLOCK_LEN) : 0));
+
+	pos += 16;
+	/* Prior_data_len will always be less than 16 bytes, fill the
+	 * prio_data_len after AES_CTRL_BLOCK and clear the remaining length
+	 * to 0.
+	 */
+	if (prior_data_len)
+		pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16);
+	/* send the complete packet except the header */
+	cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
+			sgl_sdesc->addr);
+	sgl_sdesc->skb = skb;
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+
+	return 0;
+}
+
+/*
+ * chcr_ktls_tx_plaintxt: This handler will take care of the records which has
+ * only plain text (only tls header and iv)
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record..
+ * @tcp_seq
+ * @mss - segment size.
+ * @tcp_push - tcp push bit.
+ * @q - TX queue.
+ * @port_id : port number
+ * @perior_data - data before the current segment, required to make this record
+ *		 16 byte aligned.
+ * @prior_data_len - prior_data length (less than 16)
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
+				 struct sk_buff *skb, u32 tcp_seq, u32 mss,
+				 bool tcp_push, struct sge_eth_txq *q,
+				 u32 port_id, u8 *prior_data,
+				 u32 prior_data_len)
+{
+	int credits, left, len16, last_desc;
+	unsigned int flits = 0, ndesc;
+	struct tx_sw_desc *sgl_sdesc;
+	struct cpl_tx_data *tx_data;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *ulptx;
+	struct fw_ulptx_wr *wr;
+	u32 wr_mid = 0;
+	void *pos;
+	u64 *end;
+
+	flits = DIV_ROUND_UP(CHCR_PLAIN_TX_DATA_LEN, 8);
+	flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags);
+	if (prior_data_len)
+		flits += 2;
+	/* WR will need len16 */
+	len16 = DIV_ROUND_UP(flits, 2);
+	/* check how many descriptors needed */
+	ndesc = DIV_ROUND_UP(flits, 8);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb,
+				   sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	/* FW_ULPTX_WR */
+	wr = pos;
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->cookie = 0;
+	pos += sizeof(*wr);
+	/* ULP_TXPKT */
+	ulptx = (struct ulp_txpkt *)(wr + 1);
+	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+			ULP_TXPKT_DATAMODIFY_V(0) |
+			ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+			ULP_TXPKT_DEST_V(0) |
+			ULP_TXPKT_FID_V(q->q.cntxt_id) | ULP_TXPKT_RO_V(1));
+	ulptx->len = htonl(len16 - 1);
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(ulptx + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F);
+	idata->len = htonl(sizeof(*tx_data) + prior_data_len);
+	/* CPL_TX_DATA */
+	tx_data = (struct cpl_tx_data *)(idata + 1);
+	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
+	tx_data->len = htonl(TX_DATA_MSS_V(mss) |
+			TX_LENGTH_V(skb->data_len + prior_data_len));
+	/* set tcp seq number */
+	tx_data->rsvd = htonl(tcp_seq);
+	tx_data->flags = htonl(TX_BYPASS_F);
+	if (tcp_push)
+		tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F);
+
+	pos = tx_data + 1;
+	/* apart from prior_data_len, we should set remaining part of 16 bytes
+	 * to be zero.
+	 */
+	if (prior_data_len)
+		pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16);
+
+	/* check left again, it might go beyond queue limit */
+	left = (void *)q->q.stat - pos;
+
+	/* check the position again */
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* send the complete packet including the header */
+	cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
+			sgl_sdesc->addr);
+	sgl_sdesc->skb = skb;
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	return 0;
+}
+
+/*
+ * chcr_ktls_copy_record_in_skb
+ * @nskb - new skb where the frags to be added.
+ * @record - specific record which has complete 16k record in frags.
+ */
+static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
+					 struct tls_record_info *record)
+{
+	int i = 0;
+
+	for (i = 0; i < record->num_frags; i++) {
+		skb_shinfo(nskb)->frags[i] = record->frags[i];
+		/* increase the frag ref count */
+		__skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+	}
+
+	skb_shinfo(nskb)->nr_frags = record->num_frags;
+	nskb->data_len = record->len;
+	nskb->len += record->len;
+	nskb->truesize += record->len;
+}
+
+/*
+ * chcr_ktls_update_snd_una:  Reset the SEND_UNA. It will be done to avoid
+ * sending the same segment again. It will discard the segment which is before
+ * the current tx max.
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * return: NET_TX_OK/NET_XMIT_DROP.
+ */
+static int chcr_ktls_update_snd_una(struct chcr_ktls_info *tx_info,
+				    struct sge_eth_txq *q)
+{
+	struct fw_ulptx_wr *wr;
+	unsigned int ndesc;
+	int credits;
+	void *pos;
+	u32 len;
+
+	len = sizeof(*wr) + roundup(CHCR_SET_TCB_FIELD_LEN, 16);
+	ndesc = DIV_ROUND_UP(len, 64);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+
+	wr = pos;
+	/* ULPTX wr */
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->cookie = 0;
+	/* fill len in wr field */
+	wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
+
+	pos += sizeof(*wr);
+
+	pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+					 TCB_SND_UNA_RAW_W,
+					 TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M),
+					 TCB_SND_UNA_RAW_V(0), 0);
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+
+	return 0;
+}
+
+/*
+ * chcr_end_part_handler: This handler will handle the record which
+ * is complete or if record's end part is received. T6 adapter has a issue that
+ * it can't send out TAG with partial record so if its an end part then we have
+ * to send TAG as well and for which we need to fetch the complete record and
+ * send it to crypto module.
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record.
+ * @record - complete record of 16K size.
+ * @tcp_seq
+ * @mss - segment size in which TP needs to chop a packet.
+ * @tcp_push_no_fin - tcp push if fin is not set.
+ * @q - TX queue.
+ * @tls_end_offset - offset from end of the record.
+ * @last wr : check if this is the last part of the skb going out.
+ * return: NETDEV_TX_OK/NETDEV_TX_BUSY.
+ */
+static int chcr_end_part_handler(struct chcr_ktls_info *tx_info,
+				 struct sk_buff *skb,
+				 struct tls_record_info *record,
+				 u32 tcp_seq, int mss, bool tcp_push_no_fin,
+				 struct sge_eth_txq *q,
+				 u32 tls_end_offset, bool last_wr)
+{
+	struct sk_buff *nskb = NULL;
+	/* check if it is a complete record */
+	if (tls_end_offset == record->len) {
+		nskb = skb;
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_complete_pkts);
+	} else {
+		dev_kfree_skb_any(skb);
+
+		nskb = alloc_skb(0, GFP_KERNEL);
+		if (!nskb)
+			return NETDEV_TX_BUSY;
+		/* copy complete record in skb */
+		chcr_ktls_copy_record_in_skb(nskb, record);
+		/* packet is being sent from the beginning, update the tcp_seq
+		 * accordingly.
+		 */
+		tcp_seq = tls_record_start_seq(record);
+		/* reset snd una, so the middle record won't send the already
+		 * sent part.
+		 */
+		if (chcr_ktls_update_snd_una(tx_info, q))
+			goto out;
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_end_pkts);
+	}
+
+	if (chcr_ktls_xmit_wr_complete(nskb, tx_info, q, tcp_seq,
+				       (last_wr && tcp_push_no_fin),
+				       mss)) {
+		goto out;
+	}
+	return 0;
+out:
+	dev_kfree_skb_any(nskb);
+	return NETDEV_TX_BUSY;
+}
+
+/*
+ * chcr_short_record_handler: This handler will take care of the records which
+ * doesn't have end part (1st part or the middle part(/s) of a record). In such
+ * cases, AES CTR will be used in place of AES GCM to send out partial packet.
+ * This partial record might be the first part of the record, or the middle
+ * part. In case of middle record we should fetch the prior data to make it 16
+ * byte aligned. If it has a partial tls header or iv then get to the start of
+ * tls header. And if it has partial TAG, then remove the complete TAG and send
+ * only the payload.
+ * There is one more possibility that it gets a partial header, send that
+ * portion as a plaintext.
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record..
+ * @record - complete record of 16K size.
+ * @tcp_seq
+ * @mss - segment size in which TP needs to chop a packet.
+ * @tcp_push_no_fin - tcp push if fin is not set.
+ * @q - TX queue.
+ * @tls_end_offset - offset from end of the record.
+ * return: NETDEV_TX_OK/NETDEV_TX_BUSY.
+ */
+static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
+				     struct sk_buff *skb,
+				     struct tls_record_info *record,
+				     u32 tcp_seq, int mss, bool tcp_push_no_fin,
+				     struct sge_eth_txq *q, u32 tls_end_offset)
+{
+	u32 tls_rec_offset = tcp_seq - tls_record_start_seq(record);
+	u8 prior_data[16] = {0};
+	u32 prior_data_len = 0;
+	u32 data_len;
+
+	/* check if the skb is ending in middle of tag/HASH, its a big
+	 * trouble, send the packet before the HASH.
+	 */
+	int remaining_record = tls_end_offset - skb->data_len;
+
+	if (remaining_record > 0 &&
+	    remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) {
+		int trimmed_len = skb->data_len -
+			(TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record);
+		struct sk_buff *tmp_skb = NULL;
+		/* don't process the pkt if it is only a partial tag */
+		if (skb->data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+			goto out;
+
+		WARN_ON(trimmed_len > skb->data_len);
+
+		/* shift to those many bytes */
+		tmp_skb = alloc_skb(0, GFP_KERNEL);
+		if (unlikely(!tmp_skb))
+			goto out;
+
+		chcr_ktls_skb_shift(tmp_skb, skb, trimmed_len);
+		/* free the last trimmed portion */
+		dev_kfree_skb_any(skb);
+		skb = tmp_skb;
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_trimmed_pkts);
+	}
+	data_len = skb->data_len;
+	/* check if the middle record's start point is 16 byte aligned. CTR
+	 * needs 16 byte aligned start point to start encryption.
+	 */
+	if (tls_rec_offset) {
+		/* there is an offset from start, means its a middle record */
+		int remaining = 0;
+
+		if (tls_rec_offset < (TLS_HEADER_SIZE + tx_info->iv_size)) {
+			prior_data_len = tls_rec_offset;
+			tls_rec_offset = 0;
+			remaining = 0;
+		} else {
+			prior_data_len =
+				(tls_rec_offset -
+				(TLS_HEADER_SIZE + tx_info->iv_size))
+				% AES_BLOCK_LEN;
+			remaining = tls_rec_offset - prior_data_len;
+		}
+
+		/* if prior_data_len is not zero, means we need to fetch prior
+		 * data to make this record 16 byte aligned, or we need to reach
+		 * to start offset.
+		 */
+		if (prior_data_len) {
+			int i = 0;
+			u8 *data = NULL;
+			skb_frag_t *f;
+			u8 *vaddr;
+			int frag_size = 0, frag_delta = 0;
+
+			while (remaining > 0) {
+				frag_size = skb_frag_size(&record->frags[i]);
+				if (remaining < frag_size)
+					break;
+
+				remaining -= frag_size;
+				i++;
+			}
+			f = &record->frags[i];
+			vaddr = kmap_atomic(skb_frag_page(f));
+
+			data = vaddr + skb_frag_off(f)  + remaining;
+			frag_delta = skb_frag_size(f) - remaining;
+
+			if (frag_delta >= prior_data_len) {
+				memcpy(prior_data, data, prior_data_len);
+				kunmap_atomic(vaddr);
+			} else {
+				memcpy(prior_data, data, frag_delta);
+				kunmap_atomic(vaddr);
+				/* get the next page */
+				f = &record->frags[i + 1];
+				vaddr = kmap_atomic(skb_frag_page(f));
+				data = vaddr + skb_frag_off(f);
+				memcpy(prior_data + frag_delta,
+				       data, (prior_data_len - frag_delta));
+				kunmap_atomic(vaddr);
+			}
+			/* reset tcp_seq as per the prior_data_required len */
+			tcp_seq -= prior_data_len;
+			/* include prio_data_len for  further calculation.
+			 */
+			data_len += prior_data_len;
+		}
+		/* reset snd una, so the middle record won't send the already
+		 * sent part.
+		 */
+		if (chcr_ktls_update_snd_una(tx_info, q))
+			goto out;
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts);
+	} else {
+		/* Else means, its a partial first part of the record. Check if
+		 * its only the header, don't need to send for encryption then.
+		 */
+		if (data_len <= TLS_HEADER_SIZE + tx_info->iv_size) {
+			if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+						  tcp_push_no_fin, q,
+						  tx_info->port_id,
+						  prior_data,
+						  prior_data_len)) {
+				goto out;
+			}
+			return 0;
+		}
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts);
+	}
+
+	if (chcr_ktls_xmit_wr_short(skb, tx_info, q, tcp_seq, tcp_push_no_fin,
+				    mss, tls_rec_offset, prior_data,
+				    prior_data_len)) {
+		goto out;
+	}
+
+	return 0;
+out:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_BUSY;
+}
+
+/* nic tls TX handler */
+static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct ch_ktls_stats_debug *stats;
+	struct tcphdr *th = tcp_hdr(skb);
+	int data_len, qidx, ret = 0, mss;
+	struct tls_record_info *record;
+	struct chcr_ktls_info *tx_info;
+	u32 tls_end_offset, tcp_seq;
+	struct tls_context *tls_ctx;
+	struct sk_buff *local_skb;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	unsigned long flags;
+
+	tcp_seq = ntohl(th->seq);
+
+	mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : skb->data_len;
+
+	/* check if we haven't set it for ktls offload */
+	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+		goto out;
+
+	tls_ctx = tls_get_ctx(skb->sk);
+	if (unlikely(tls_ctx->netdev != dev))
+		goto out;
+
+	tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+	tx_info = tx_ctx->chcr_info;
+
+	if (unlikely(!tx_info))
+		goto out;
+
+	/* don't touch the original skb, make a new skb to extract each records
+	 * and send them separately.
+	 */
+	local_skb = alloc_skb(0, GFP_KERNEL);
+
+	if (unlikely(!local_skb))
+		return NETDEV_TX_BUSY;
+
+	adap = tx_info->adap;
+	stats = &adap->ch_ktls_stats;
+	port_stats = &stats->ktls_port[tx_info->port_id];
+
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + tx_info->first_qset];
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+	/* if tcp options are set but finish is not send the options first */
+	if (!th->fin && chcr_ktls_check_tcp_options(th)) {
+		ret = chcr_ktls_write_tcp_options(tx_info, skb, q,
+						  tx_info->tx_chan);
+		if (ret)
+			return NETDEV_TX_BUSY;
+	}
+	/* update tcb */
+	ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, ntohl(th->seq),
+				      ntohl(th->ack_seq),
+				      ntohs(th->window));
+	if (ret) {
+		dev_kfree_skb_any(local_skb);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* copy skb contents into local skb */
+	chcr_ktls_skb_copy(skb, local_skb);
+
+	/* go through the skb and send only one record at a time. */
+	data_len = skb->data_len;
+	/* TCP segments can be in received either complete or partial.
+	 * chcr_end_part_handler will handle cases if complete record or end
+	 * part of the record is received. Incase of partial end part of record,
+	 * we will send the complete record again.
+	 */
+
+	do {
+		int i;
+
+		cxgb4_reclaim_completed_tx(adap, &q->q, true);
+		/* lock taken */
+		spin_lock_irqsave(&tx_ctx->base.lock, flags);
+		/* fetch the tls record */
+		record = tls_get_record(&tx_ctx->base, tcp_seq,
+					&tx_info->record_no);
+		/* By the time packet reached to us, ACK is received, and record
+		 * won't be found in that case, handle it gracefully.
+		 */
+		if (unlikely(!record)) {
+			spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+			atomic64_inc(&port_stats->ktls_tx_drop_no_sync_data);
+			goto out;
+		}
+
+		if (unlikely(tls_record_is_start_marker(record))) {
+			spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+			atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data);
+			goto out;
+		}
+
+		/* increase page reference count of the record, so that there
+		 * won't be any chance of page free in middle if in case stack
+		 * receives ACK and try to delete the record.
+		 */
+		for (i = 0; i < record->num_frags; i++)
+			__skb_frag_ref(&record->frags[i]);
+		/* lock cleared */
+		spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+
+		tls_end_offset = record->end_seq - tcp_seq;
+
+		pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n",
+			 tcp_seq, record->end_seq, tx_info->prev_seq, data_len);
+		/* if a tls record is finishing in this SKB */
+		if (tls_end_offset <= data_len) {
+			struct sk_buff *nskb = NULL;
+
+			if (tls_end_offset < data_len) {
+				nskb = alloc_skb(0, GFP_KERNEL);
+				if (unlikely(!nskb)) {
+					ret = -ENOMEM;
+					goto clear_ref;
+				}
+
+				chcr_ktls_skb_shift(nskb, local_skb,
+						    tls_end_offset);
+			} else {
+				/* its the only record in this skb, directly
+				 * point it.
+				 */
+				nskb = local_skb;
+			}
+			ret = chcr_end_part_handler(tx_info, nskb, record,
+						    tcp_seq, mss,
+						    (!th->fin && th->psh), q,
+						    tls_end_offset,
+						    (nskb == local_skb));
+
+			if (ret && nskb != local_skb)
+				dev_kfree_skb_any(local_skb);
+
+			data_len -= tls_end_offset;
+			/* tcp_seq increment is required to handle next record.
+			 */
+			tcp_seq += tls_end_offset;
+		} else {
+			ret = chcr_short_record_handler(tx_info, local_skb,
+							record, tcp_seq, mss,
+							(!th->fin && th->psh),
+							q, tls_end_offset);
+			data_len = 0;
+		}
+clear_ref:
+		/* clear the frag ref count which increased locally before */
+		for (i = 0; i < record->num_frags; i++) {
+			/* clear the frag ref count */
+			__skb_frag_unref(&record->frags[i]);
+		}
+		/* if any failure, come out from the loop. */
+		if (ret)
+			goto out;
+		/* length should never be less than 0 */
+		WARN_ON(data_len < 0);
+
+	} while (data_len > 0);
+
+	tx_info->prev_seq = ntohl(th->seq) + skb->data_len;
+	atomic64_inc(&port_stats->ktls_tx_encrypted_packets);
+	atomic64_add(skb->data_len, &port_stats->ktls_tx_encrypted_bytes);
+
+	/* tcp finish is set, send a separate tcp msg including all the options
+	 * as well.
+	 */
+	if (th->fin)
+		chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan);
+
+out:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+static void *chcr_ktls_uld_add(const struct cxgb4_lld_info *lldi)
+{
+	struct chcr_ktls_uld_ctx *u_ctx;
+
+	pr_info_once("%s - version %s\n", CHCR_KTLS_DRV_DESC,
+		     CHCR_KTLS_DRV_VERSION);
+	u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+	if (!u_ctx) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	u_ctx->lldi = *lldi;
+out:
+	return u_ctx;
+}
+
+static const struct tlsdev_ops chcr_ktls_ops = {
+	.tls_dev_add = chcr_ktls_dev_add,
+	.tls_dev_del = chcr_ktls_dev_del,
+};
+
+static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
+	[CPL_ACT_OPEN_RPL] = chcr_ktls_cpl_act_open_rpl,
+	[CPL_SET_TCB_RPL] = chcr_ktls_cpl_set_tcb_rpl,
+};
+
+static int chcr_ktls_uld_rx_handler(void *handle, const __be64 *rsp,
+				    const struct pkt_gl *pgl)
+{
+	const struct cpl_act_open_rpl *rpl = (struct cpl_act_open_rpl *)rsp;
+	struct chcr_ktls_uld_ctx *u_ctx = handle;
+	u8 opcode = rpl->ot.opcode;
+	struct adapter *adap;
+
+	adap = pci_get_drvdata(u_ctx->lldi.pdev);
+
+	if (!work_handlers[opcode]) {
+		pr_err("Unsupported opcode %d received\n", opcode);
+		return 0;
+	}
+
+	work_handlers[opcode](adap, (unsigned char *)&rsp[1]);
+	return 0;
+}
+
+static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct chcr_ktls_uld_ctx *u_ctx = handle;
+
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		pr_info("%s: Up\n", pci_name(u_ctx->lldi.pdev));
+		mutex_lock(&dev_mutex);
+		list_add_tail(&u_ctx->entry, &uld_ctx_list);
+		mutex_unlock(&dev_mutex);
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+	case CXGB4_STATE_DOWN:
+	case CXGB4_STATE_DETACH:
+		pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
+		mutex_lock(&dev_mutex);
+		list_del(&u_ctx->entry);
+		mutex_unlock(&dev_mutex);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static struct cxgb4_uld_info chcr_ktls_uld_info = {
+	.name = CHCR_KTLS_DRV_MODULE_NAME,
+	.nrxq = 1,
+	.rxq_size = 1024,
+	.add = chcr_ktls_uld_add,
+	.tx_handler = chcr_ktls_xmit,
+	.rx_handler = chcr_ktls_uld_rx_handler,
+	.state_change = chcr_ktls_uld_state_change,
+	.tlsdev_ops = &chcr_ktls_ops,
+};
+
+static int __init chcr_ktls_init(void)
+{
+	cxgb4_register_uld(CXGB4_ULD_KTLS, &chcr_ktls_uld_info);
+	return 0;
+}
+
+static void __exit chcr_ktls_exit(void)
+{
+	struct chcr_ktls_uld_ctx *u_ctx, *tmp;
+	struct adapter *adap;
+
+	pr_info("driver unloaded\n");
+
+	mutex_lock(&dev_mutex);
+	list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+		adap = pci_get_drvdata(u_ctx->lldi.pdev);
+		memset(&adap->ch_ktls_stats, 0, sizeof(adap->ch_ktls_stats));
+		list_del(&u_ctx->entry);
+		kfree(u_ctx);
+	}
+	mutex_unlock(&dev_mutex);
+	cxgb4_unregister_uld(CXGB4_ULD_KTLS);
+}
+
+module_init(chcr_ktls_init);
+module_exit(chcr_ktls_exit);
+
+MODULE_DESCRIPTION("Chelsio NIC TLS ULD driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHCR_KTLS_DRV_VERSION);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
new file mode 100644
index 000000000000..c1651b1431a0
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2020 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CHCR_KTLS_H__
+#define __CHCR_KTLS_H__
+
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "t4_tcb.h"
+#include "l2t.h"
+#include "chcr_common.h"
+#include "cxgb4_uld.h"
+#include "clip_tbl.h"
+
+#define CHCR_KTLS_DRV_MODULE_NAME "ch_ktls"
+#define CHCR_KTLS_DRV_VERSION "1.0.0.0-ko"
+#define CHCR_KTLS_DRV_DESC "Chelsio NIC TLS ULD Driver"
+
+#define CHCR_TCB_STATE_CLOSED	0
+#define CHCR_KTLS_KEY_CTX_LEN	16
+#define CHCR_SET_TCB_FIELD_LEN	sizeof(struct cpl_set_tcb_field)
+#define CHCR_PLAIN_TX_DATA_LEN	(sizeof(struct fw_ulptx_wr) +\
+				 sizeof(struct ulp_txpkt) +\
+				 sizeof(struct ulptx_idata) +\
+				 sizeof(struct cpl_tx_data))
+
+#define CHCR_KTLS_WR_SIZE	(CHCR_PLAIN_TX_DATA_LEN +\
+				 sizeof(struct cpl_tx_sec_pdu))
+
+enum ch_ktls_open_state {
+	CH_KTLS_OPEN_SUCCESS = 0,
+	CH_KTLS_OPEN_PENDING = 1,
+	CH_KTLS_OPEN_FAILURE = 2,
+};
+
+struct chcr_ktls_info {
+	struct sock *sk;
+	spinlock_t lock; /* lock for pending_close */
+	struct ktls_key_ctx key_ctx;
+	struct adapter *adap;
+	struct l2t_entry *l2te;
+	struct net_device *netdev;
+	struct completion completion;
+	u64 iv;
+	u64 record_no;
+	int tid;
+	int atid;
+	int rx_qid;
+	u32 iv_size;
+	u32 prev_seq;
+	u32 prev_ack;
+	u32 salt_size;
+	u32 key_ctx_len;
+	u32 scmd0_seqno_numivs;
+	u32 scmd0_ivgen_hdrlen;
+	u32 tcp_start_seq_number;
+	u32 scmd0_short_seqno_numivs;
+	u32 scmd0_short_ivgen_hdrlen;
+	u16 prev_win;
+	u8 tx_chan;
+	u8 smt_idx;
+	u8 port_id;
+	u8 ip_family;
+	u8 first_qset;
+	enum ch_ktls_open_state open_state;
+	bool pending_close;
+};
+
+struct chcr_ktls_ofld_ctx_tx {
+	struct tls_offload_context_tx base;
+	struct chcr_ktls_info *chcr_info;
+};
+
+struct chcr_ktls_uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+};
+
+static inline struct chcr_ktls_ofld_ctx_tx *
+chcr_get_ktls_tx_context(struct tls_context *tls_ctx)
+{
+	BUILD_BUG_ON(sizeof(struct chcr_ktls_ofld_ctx_tx) >
+		     TLS_OFFLOAD_CONTEXT_SIZE_TX);
+	return container_of(tls_offload_ctx_tx(tls_ctx),
+			    struct chcr_ktls_ofld_ctx_tx,
+			    base);
+}
+
+static inline int chcr_get_first_rx_qid(struct adapter *adap)
+{
+	/* u_ctx is saved in adap, fetch it */
+	struct chcr_ktls_uld_ctx *u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
+
+	if (!u_ctx)
+		return -1;
+	return u_ctx->lldi.rxq_ids[0];
+}
+
+typedef int (*chcr_handler_func)(struct adapter *adap, unsigned char *input);
+#endif /* __CHCR_KTLS_H__ */
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile
new file mode 100644
index 000000000000..bc11495acdb3
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 \
+	     -I $(srctree)/drivers/crypto/chelsio
+
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls.o
+chtls-objs := chtls_main.o chtls_cm.o chtls_io.o chtls_hw.o
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
new file mode 100644
index 000000000000..2d3dfdd2a716
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
@@ -0,0 +1,580 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ */
+
+#ifndef __CHTLS_H__
+#define __CHTLS_H__
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/ctr.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+#include <linux/tls.h>
+#include <net/tls.h>
+#include <net/tls_toe.h>
+
+#include "t4fw_api.h"
+#include "t4_msg.h"
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+#include "l2t.h"
+#include "chcr_algo.h"
+#include "chcr_core.h"
+#include "chcr_crypto.h"
+
+#define CHTLS_DRV_VERSION "1.0.0.0-ko"
+
+#define TLS_KEYCTX_RXFLIT_CNT_S 24
+#define TLS_KEYCTX_RXFLIT_CNT_V(x) ((x) << TLS_KEYCTX_RXFLIT_CNT_S)
+
+#define TLS_KEYCTX_RXPROT_VER_S 20
+#define TLS_KEYCTX_RXPROT_VER_M 0xf
+#define TLS_KEYCTX_RXPROT_VER_V(x) ((x) << TLS_KEYCTX_RXPROT_VER_S)
+
+#define TLS_KEYCTX_RXCIPH_MODE_S 16
+#define TLS_KEYCTX_RXCIPH_MODE_M 0xf
+#define TLS_KEYCTX_RXCIPH_MODE_V(x) ((x) << TLS_KEYCTX_RXCIPH_MODE_S)
+
+#define TLS_KEYCTX_RXAUTH_MODE_S 12
+#define TLS_KEYCTX_RXAUTH_MODE_M 0xf
+#define TLS_KEYCTX_RXAUTH_MODE_V(x) ((x) << TLS_KEYCTX_RXAUTH_MODE_S)
+
+#define TLS_KEYCTX_RXCIAU_CTRL_S 11
+#define TLS_KEYCTX_RXCIAU_CTRL_V(x) ((x) << TLS_KEYCTX_RXCIAU_CTRL_S)
+
+#define TLS_KEYCTX_RX_SEQCTR_S 9
+#define TLS_KEYCTX_RX_SEQCTR_M 0x3
+#define TLS_KEYCTX_RX_SEQCTR_V(x) ((x) << TLS_KEYCTX_RX_SEQCTR_S)
+
+#define TLS_KEYCTX_RX_VALID_S 8
+#define TLS_KEYCTX_RX_VALID_V(x) ((x) << TLS_KEYCTX_RX_VALID_S)
+
+#define TLS_KEYCTX_RXCK_SIZE_S 3
+#define TLS_KEYCTX_RXCK_SIZE_M 0x7
+#define TLS_KEYCTX_RXCK_SIZE_V(x) ((x) << TLS_KEYCTX_RXCK_SIZE_S)
+
+#define TLS_KEYCTX_RXMK_SIZE_S 0
+#define TLS_KEYCTX_RXMK_SIZE_M 0x7
+#define TLS_KEYCTX_RXMK_SIZE_V(x) ((x) << TLS_KEYCTX_RXMK_SIZE_S)
+
+#define KEYCTX_TX_WR_IV_S  55
+#define KEYCTX_TX_WR_IV_M  0x1ffULL
+#define KEYCTX_TX_WR_IV_V(x) ((x) << KEYCTX_TX_WR_IV_S)
+#define KEYCTX_TX_WR_IV_G(x) \
+	(((x) >> KEYCTX_TX_WR_IV_S) & KEYCTX_TX_WR_IV_M)
+
+#define KEYCTX_TX_WR_AAD_S 47
+#define KEYCTX_TX_WR_AAD_M 0xffULL
+#define KEYCTX_TX_WR_AAD_V(x) ((x) << KEYCTX_TX_WR_AAD_S)
+#define KEYCTX_TX_WR_AAD_G(x) (((x) >> KEYCTX_TX_WR_AAD_S) & \
+				KEYCTX_TX_WR_AAD_M)
+
+#define KEYCTX_TX_WR_AADST_S 39
+#define KEYCTX_TX_WR_AADST_M 0xffULL
+#define KEYCTX_TX_WR_AADST_V(x) ((x) << KEYCTX_TX_WR_AADST_S)
+#define KEYCTX_TX_WR_AADST_G(x) \
+	(((x) >> KEYCTX_TX_WR_AADST_S) & KEYCTX_TX_WR_AADST_M)
+
+#define KEYCTX_TX_WR_CIPHER_S 30
+#define KEYCTX_TX_WR_CIPHER_M 0x1ffULL
+#define KEYCTX_TX_WR_CIPHER_V(x) ((x) << KEYCTX_TX_WR_CIPHER_S)
+#define KEYCTX_TX_WR_CIPHER_G(x) \
+	(((x) >> KEYCTX_TX_WR_CIPHER_S) & KEYCTX_TX_WR_CIPHER_M)
+
+#define KEYCTX_TX_WR_CIPHERST_S 23
+#define KEYCTX_TX_WR_CIPHERST_M 0x7f
+#define KEYCTX_TX_WR_CIPHERST_V(x) ((x) << KEYCTX_TX_WR_CIPHERST_S)
+#define KEYCTX_TX_WR_CIPHERST_G(x) \
+	(((x) >> KEYCTX_TX_WR_CIPHERST_S) & KEYCTX_TX_WR_CIPHERST_M)
+
+#define KEYCTX_TX_WR_AUTH_S 14
+#define KEYCTX_TX_WR_AUTH_M 0x1ff
+#define KEYCTX_TX_WR_AUTH_V(x) ((x) << KEYCTX_TX_WR_AUTH_S)
+#define KEYCTX_TX_WR_AUTH_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTH_S) & KEYCTX_TX_WR_AUTH_M)
+
+#define KEYCTX_TX_WR_AUTHST_S 7
+#define KEYCTX_TX_WR_AUTHST_M 0x7f
+#define KEYCTX_TX_WR_AUTHST_V(x) ((x) << KEYCTX_TX_WR_AUTHST_S)
+#define KEYCTX_TX_WR_AUTHST_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTHST_S) & KEYCTX_TX_WR_AUTHST_M)
+
+#define KEYCTX_TX_WR_AUTHIN_S 0
+#define KEYCTX_TX_WR_AUTHIN_M 0x7f
+#define KEYCTX_TX_WR_AUTHIN_V(x) ((x) << KEYCTX_TX_WR_AUTHIN_S)
+#define KEYCTX_TX_WR_AUTHIN_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M)
+
+struct sge_opaque_hdr {
+	void *dev;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+};
+
+#define MAX_IVS_PAGE			256
+#define TLS_KEY_CONTEXT_SZ		64
+#define CIPHER_BLOCK_SIZE		16
+#define GCM_TAG_SIZE			16
+#define KEY_ON_MEM_SZ			16
+#define AEAD_EXPLICIT_DATA_SIZE		8
+#define TLS_HEADER_LENGTH		5
+#define SCMD_CIPH_MODE_AES_GCM		2
+/* Any MFS size should work and come from openssl */
+#define TLS_MFS				16384
+
+#define RSS_HDR sizeof(struct rss_header)
+#define TLS_WR_CPL_LEN \
+	(sizeof(struct fw_tlstx_data_wr) + sizeof(struct cpl_tx_tls_sfo))
+
+enum {
+	CHTLS_KEY_CONTEXT_DSGL,
+	CHTLS_KEY_CONTEXT_IMM,
+	CHTLS_KEY_CONTEXT_DDR,
+};
+
+enum {
+	CHTLS_LISTEN_START,
+	CHTLS_LISTEN_STOP,
+};
+
+/* Flags for return value of CPL message handlers */
+enum {
+	CPL_RET_BUF_DONE =    1,   /* buffer processing done */
+	CPL_RET_BAD_MSG =     2,   /* bad CPL message */
+	CPL_RET_UNKNOWN_TID = 4    /* unexpected unknown TID */
+};
+
+#define LISTEN_INFO_HASH_SIZE 32
+#define RSPQ_HASH_BITS 5
+struct listen_info {
+	struct listen_info *next;  /* Link to next entry */
+	struct sock *sk;           /* The listening socket */
+	unsigned int stid;         /* The server TID */
+};
+
+enum {
+	T4_LISTEN_START_PENDING,
+	T4_LISTEN_STARTED
+};
+
+enum csk_flags {
+	CSK_CALLBACKS_CHKD,	/* socket callbacks have been sanitized */
+	CSK_ABORT_REQ_RCVD,	/* received one ABORT_REQ_RSS message */
+	CSK_TX_MORE_DATA,	/* sending ULP data; don't set SHOVE bit */
+	CSK_TX_WAIT_IDLE,	/* suspend Tx until in-flight data is ACKed */
+	CSK_ABORT_SHUTDOWN,	/* shouldn't send more abort requests */
+	CSK_ABORT_RPL_PENDING,	/* expecting an abort reply */
+	CSK_CLOSE_CON_REQUESTED,/* we've sent a close_conn_req */
+	CSK_TX_DATA_SENT,	/* sent a TX_DATA WR on this connection */
+	CSK_TX_FAILOVER,	/* Tx traffic failing over */
+	CSK_UPDATE_RCV_WND,	/* Need to update rcv window */
+	CSK_RST_ABORTED,	/* outgoing RST was aborted */
+	CSK_TLS_HANDSHK,	/* TLS Handshake */
+	CSK_CONN_INLINE,	/* Connection on HW */
+};
+
+enum chtls_cdev_state {
+	CHTLS_CDEV_STATE_UP = 1
+};
+
+struct listen_ctx {
+	struct sock *lsk;
+	struct chtls_dev *cdev;
+	struct sk_buff_head synq;
+	u32 state;
+};
+
+struct key_map {
+	unsigned long *addr;
+	unsigned int start;
+	unsigned int available;
+	unsigned int size;
+	spinlock_t lock; /* lock for key id request from map */
+} __packed;
+
+struct tls_scmd {
+	u32 seqno_numivs;
+	u32 ivgen_hdrlen;
+};
+
+struct chtls_dev {
+	struct tls_toe_device tlsdev;
+	struct list_head list;
+	struct cxgb4_lld_info *lldi;
+	struct pci_dev *pdev;
+	struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
+	spinlock_t listen_lock; /* lock for listen list */
+	struct net_device **ports;
+	struct tid_info *tids;
+	unsigned int pfvf;
+	const unsigned short *mtus;
+
+	struct idr hwtid_idr;
+	struct idr stid_idr;
+
+	spinlock_t idr_lock ____cacheline_aligned_in_smp;
+
+	struct net_device *egr_dev[NCHAN * 2];
+	struct sk_buff *rspq_skb_cache[1 << RSPQ_HASH_BITS];
+	struct sk_buff *askb;
+
+	struct sk_buff_head deferq;
+	struct work_struct deferq_task;
+
+	struct list_head list_node;
+	struct list_head rcu_node;
+	struct list_head na_node;
+	unsigned int send_page_order;
+	int max_host_sndbuf;
+	struct key_map kmap;
+	unsigned int cdev_state;
+};
+
+struct chtls_listen {
+	struct chtls_dev *cdev;
+	struct sock *sk;
+};
+
+struct chtls_hws {
+	struct sk_buff_head sk_recv_queue;
+	u8 txqid;
+	u8 ofld;
+	u16 type;
+	u16 rstate;
+	u16 keyrpl;
+	u16 pldlen;
+	u16 rcvpld;
+	u16 compute;
+	u16 expansion;
+	u16 keylen;
+	u16 pdus;
+	u16 adjustlen;
+	u16 ivsize;
+	u16 txleft;
+	u32 mfs;
+	s32 txkey;
+	s32 rxkey;
+	u32 fcplenmax;
+	u32 copied_seq;
+	u64 tx_seq_no;
+	struct tls_scmd scmd;
+	union {
+		struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
+		struct tls12_crypto_info_aes_gcm_256 aes_gcm_256;
+	} crypto_info;
+};
+
+struct chtls_sock {
+	struct sock *sk;
+	struct chtls_dev *cdev;
+	struct l2t_entry *l2t_entry;    /* pointer to the L2T entry */
+	struct net_device *egress_dev;  /* TX_CHAN for act open retry */
+
+	struct sk_buff_head txq;
+	struct sk_buff *wr_skb_head;
+	struct sk_buff *wr_skb_tail;
+	struct sk_buff *ctrl_skb_cache;
+	struct sk_buff *txdata_skb_cache; /* abort path messages */
+	struct kref kref;
+	unsigned long flags;
+	u32 opt2;
+	u32 wr_credits;
+	u32 wr_unacked;
+	u32 wr_max_credits;
+	u32 wr_nondata;
+	u32 hwtid;               /* TCP Control Block ID */
+	u32 txq_idx;
+	u32 rss_qid;
+	u32 tid;
+	u32 idr;
+	u32 mss;
+	u32 ulp_mode;
+	u32 tx_chan;
+	u32 rx_chan;
+	u32 sndbuf;
+	u32 txplen_max;
+	u32 mtu_idx;           /* MTU table index */
+	u32 smac_idx;
+	u8 port_id;
+	u8 tos;
+	u16 resv2;
+	u32 delack_mode;
+	u32 delack_seq;
+	u32 snd_win;
+	u32 rcv_win;
+
+	void *passive_reap_next;        /* placeholder for passive */
+	struct chtls_hws tlshws;
+	struct synq {
+		struct sk_buff *next;
+		struct sk_buff *prev;
+	} synq;
+	struct listen_ctx *listen_ctx;
+};
+
+struct tls_hdr {
+	u8  type;
+	u16 version;
+	u16 length;
+} __packed;
+
+struct tlsrx_cmp_hdr {
+	u8  type;
+	u16 version;
+	u16 length;
+
+	u64 tls_seq;
+	u16 reserved1;
+	u8  res_to_mac_error;
+} __packed;
+
+/* res_to_mac_error fields */
+#define TLSRX_HDR_PKT_INT_ERROR_S   4
+#define TLSRX_HDR_PKT_INT_ERROR_M   0x1
+#define TLSRX_HDR_PKT_INT_ERROR_V(x) \
+	((x) << TLSRX_HDR_PKT_INT_ERROR_S)
+#define TLSRX_HDR_PKT_INT_ERROR_G(x) \
+	(((x) >> TLSRX_HDR_PKT_INT_ERROR_S) & TLSRX_HDR_PKT_INT_ERROR_M)
+#define TLSRX_HDR_PKT_INT_ERROR_F   TLSRX_HDR_PKT_INT_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_SPP_ERROR_S        3
+#define TLSRX_HDR_PKT_SPP_ERROR_M        0x1
+#define TLSRX_HDR_PKT_SPP_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_SPP_ERROR)
+#define TLSRX_HDR_PKT_SPP_ERROR_G(x)     \
+	(((x) >> TLSRX_HDR_PKT_SPP_ERROR_S) & TLSRX_HDR_PKT_SPP_ERROR_M)
+#define TLSRX_HDR_PKT_SPP_ERROR_F        TLSRX_HDR_PKT_SPP_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_CCDX_ERROR_S       2
+#define TLSRX_HDR_PKT_CCDX_ERROR_M       0x1
+#define TLSRX_HDR_PKT_CCDX_ERROR_V(x)    ((x) << TLSRX_HDR_PKT_CCDX_ERROR_S)
+#define TLSRX_HDR_PKT_CCDX_ERROR_G(x)    \
+	(((x) >> TLSRX_HDR_PKT_CCDX_ERROR_S) & TLSRX_HDR_PKT_CCDX_ERROR_M)
+#define TLSRX_HDR_PKT_CCDX_ERROR_F       TLSRX_HDR_PKT_CCDX_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_PAD_ERROR_S        1
+#define TLSRX_HDR_PKT_PAD_ERROR_M        0x1
+#define TLSRX_HDR_PKT_PAD_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_PAD_ERROR_S)
+#define TLSRX_HDR_PKT_PAD_ERROR_G(x)     \
+	(((x) >> TLSRX_HDR_PKT_PAD_ERROR_S) & TLSRX_HDR_PKT_PAD_ERROR_M)
+#define TLSRX_HDR_PKT_PAD_ERROR_F        TLSRX_HDR_PKT_PAD_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_MAC_ERROR_S        0
+#define TLSRX_HDR_PKT_MAC_ERROR_M        0x1
+#define TLSRX_HDR_PKT_MAC_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_MAC_ERROR)
+#define TLSRX_HDR_PKT_MAC_ERROR_G(x)     \
+	(((x) >> S_TLSRX_HDR_PKT_MAC_ERROR_S) & TLSRX_HDR_PKT_MAC_ERROR_M)
+#define TLSRX_HDR_PKT_MAC_ERROR_F        TLSRX_HDR_PKT_MAC_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_ERROR_M           0x1F
+#define CONTENT_TYPE_ERROR		0x7F
+
+struct ulp_mem_rw {
+	__be32 cmd;
+	__be32 len16;             /* command length */
+	__be32 dlen;              /* data length in 32-byte units */
+	__be32 lock_addr;
+};
+
+struct tls_key_wr {
+	__be32 op_to_compl;
+	__be32 flowid_len16;
+	__be32 ftid;
+	u8   reneg_to_write_rx;
+	u8   protocol;
+	__be16 mfs;
+};
+
+struct tls_key_req {
+	struct tls_key_wr wr;
+	struct ulp_mem_rw req;
+	struct ulptx_idata sc_imm;
+};
+
+/*
+ * This lives in skb->cb and is used to chain WRs in a linked list.
+ */
+struct wr_skb_cb {
+	struct l2t_skb_cb l2t;          /* reserve space for l2t CB */
+	struct sk_buff *next_wr;        /* next write request */
+};
+
+/* Per-skb backlog handler.  Run when a socket's backlog is processed. */
+struct blog_skb_cb {
+	void (*backlog_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct chtls_dev *cdev;
+};
+
+/*
+ * Similar to tcp_skb_cb but with ULP elements added to support TLS,
+ * etc.
+ */
+struct ulp_skb_cb {
+	struct wr_skb_cb wr;		/* reserve space for write request */
+	u16 flags;			/* TCP-like flags */
+	u8 psh;
+	u8 ulp_mode;			/* ULP mode/submode of sk_buff */
+	u32 seq;			/* TCP sequence number */
+	union { /* ULP-specific fields */
+		struct {
+			u8  type;
+			u8  ofld;
+			u8  iv;
+		} tls;
+	} ulp;
+};
+
+#define ULP_SKB_CB(skb) ((struct ulp_skb_cb *)&((skb)->cb[0]))
+#define BLOG_SKB_CB(skb) ((struct blog_skb_cb *)(skb)->cb)
+
+/*
+ * Flags for ulp_skb_cb.flags.
+ */
+enum {
+	ULPCB_FLAG_NEED_HDR  = 1 << 0,	/* packet needs a TX_DATA_WR header */
+	ULPCB_FLAG_NO_APPEND = 1 << 1,	/* don't grow this skb */
+	ULPCB_FLAG_BARRIER   = 1 << 2,	/* set TX_WAIT_IDLE after sending */
+	ULPCB_FLAG_HOLD      = 1 << 3,	/* skb not ready for Tx yet */
+	ULPCB_FLAG_COMPL     = 1 << 4,	/* request WR completion */
+	ULPCB_FLAG_URG       = 1 << 5,	/* urgent data */
+	ULPCB_FLAG_TLS_HDR   = 1 << 6,  /* payload with tls hdr */
+	ULPCB_FLAG_NO_HDR    = 1 << 7,  /* not a ofld wr */
+};
+
+/* The ULP mode/submode of an skbuff */
+#define skb_ulp_mode(skb)  (ULP_SKB_CB(skb)->ulp_mode)
+#define TCP_PAGE(sk)   (sk->sk_frag.page)
+#define TCP_OFF(sk)    (sk->sk_frag.offset)
+
+static inline struct chtls_dev *to_chtls_dev(struct tls_toe_device *tlsdev)
+{
+	return container_of(tlsdev, struct chtls_dev, tlsdev);
+}
+
+static inline void csk_set_flag(struct chtls_sock *csk,
+				enum csk_flags flag)
+{
+	__set_bit(flag, &csk->flags);
+}
+
+static inline void csk_reset_flag(struct chtls_sock *csk,
+				  enum csk_flags flag)
+{
+	__clear_bit(flag, &csk->flags);
+}
+
+static inline bool csk_conn_inline(const struct chtls_sock *csk)
+{
+	return test_bit(CSK_CONN_INLINE, &csk->flags);
+}
+
+static inline int csk_flag(const struct sock *sk, enum csk_flags flag)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (!csk_conn_inline(csk))
+		return 0;
+	return test_bit(flag, &csk->flags);
+}
+
+static inline int csk_flag_nochk(const struct chtls_sock *csk,
+				 enum csk_flags flag)
+{
+	return test_bit(flag, &csk->flags);
+}
+
+static inline void *cplhdr(struct sk_buff *skb)
+{
+	return skb->data;
+}
+
+static inline int is_neg_adv(unsigned int status)
+{
+	return status == CPL_ERR_RTX_NEG_ADVICE ||
+	       status == CPL_ERR_KEEPALV_NEG_ADVICE ||
+	       status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static inline void process_cpl_msg(void (*fn)(struct sock *, struct sk_buff *),
+				   struct sock *sk,
+				   struct sk_buff *skb)
+{
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	bh_lock_sock(sk);
+	if (unlikely(sock_owned_by_user(sk))) {
+		BLOG_SKB_CB(skb)->backlog_rcv = fn;
+		__sk_add_backlog(sk, skb);
+	} else {
+		fn(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+static inline void chtls_sock_free(struct kref *ref)
+{
+	struct chtls_sock *csk = container_of(ref, struct chtls_sock,
+					      kref);
+	kfree(csk);
+}
+
+static inline void __chtls_sock_put(const char *fn, struct chtls_sock *csk)
+{
+	kref_put(&csk->kref, chtls_sock_free);
+}
+
+static inline void __chtls_sock_get(const char *fn,
+				    struct chtls_sock *csk)
+{
+	kref_get(&csk->kref);
+}
+
+static inline void send_or_defer(struct sock *sk, struct tcp_sock *tp,
+				 struct sk_buff *skb, int through_l2t)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (through_l2t) {
+		/* send through L2T */
+		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+	} else {
+		/* send directly */
+		cxgb4_ofld_send(csk->egress_dev, skb);
+	}
+}
+
+typedef int (*chtls_handler_func)(struct chtls_dev *, struct sk_buff *);
+extern chtls_handler_func chtls_handlers[NUM_CPL_CMDS];
+void chtls_install_cpl_ops(struct sock *sk);
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi);
+void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk);
+int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk);
+void chtls_close(struct sock *sk, long timeout);
+int chtls_disconnect(struct sock *sk, int flags);
+void chtls_shutdown(struct sock *sk, int how);
+void chtls_destroy_sock(struct sock *sk);
+int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
+		  size_t len, int nonblock, int flags, int *addr_len);
+int chtls_sendpage(struct sock *sk, struct page *page,
+		   int offset, size_t size, int flags);
+int send_tx_flowc_wr(struct sock *sk, int compl,
+		     u32 snd_nxt, u32 rcv_nxt);
+void chtls_tcp_push(struct sock *sk, int flags);
+int chtls_push_frames(struct chtls_sock *csk, int comp);
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val);
+int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type);
+void skb_entail(struct sock *sk, struct sk_buff *skb, int flags);
+unsigned int keyid_to_addr(int start_addr, int keyid);
+void free_tls_keyid(struct sock *sk);
+#endif
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
new file mode 100644
index 000000000000..05520dccd906
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -0,0 +1,2327 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sched/signal.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/dst.h>
+#include <net/tls.h>
+#include <net/addrconf.h>
+#include <net/secure_seq.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+#include "clip_tbl.h"
+
+/*
+ * State transitions and actions for close.  Note that if we are in SYN_SENT
+ * we remain in that state as we cannot control a connection while it's in
+ * SYN_SENT; such connections are allowed to establish and are then aborted.
+ */
+static unsigned char new_state[16] = {
+	/* current state:     new state:      action: */
+	/* (Invalid)       */ TCP_CLOSE,
+	/* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+	/* TCP_SYN_SENT    */ TCP_SYN_SENT,
+	/* TCP_SYN_RECV    */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+	/* TCP_FIN_WAIT1   */ TCP_FIN_WAIT1,
+	/* TCP_FIN_WAIT2   */ TCP_FIN_WAIT2,
+	/* TCP_TIME_WAIT   */ TCP_CLOSE,
+	/* TCP_CLOSE       */ TCP_CLOSE,
+	/* TCP_CLOSE_WAIT  */ TCP_LAST_ACK | TCP_ACTION_FIN,
+	/* TCP_LAST_ACK    */ TCP_LAST_ACK,
+	/* TCP_LISTEN      */ TCP_CLOSE,
+	/* TCP_CLOSING     */ TCP_CLOSING,
+};
+
+static struct chtls_sock *chtls_sock_create(struct chtls_dev *cdev)
+{
+	struct chtls_sock *csk = kzalloc(sizeof(*csk), GFP_ATOMIC);
+
+	if (!csk)
+		return NULL;
+
+	csk->txdata_skb_cache = alloc_skb(TXDATA_SKB_LEN, GFP_ATOMIC);
+	if (!csk->txdata_skb_cache) {
+		kfree(csk);
+		return NULL;
+	}
+
+	kref_init(&csk->kref);
+	csk->cdev = cdev;
+	skb_queue_head_init(&csk->txq);
+	csk->wr_skb_head = NULL;
+	csk->wr_skb_tail = NULL;
+	csk->mss = MAX_MSS;
+	csk->tlshws.ofld = 1;
+	csk->tlshws.txkey = -1;
+	csk->tlshws.rxkey = -1;
+	csk->tlshws.mfs = TLS_MFS;
+	skb_queue_head_init(&csk->tlshws.sk_recv_queue);
+	return csk;
+}
+
+static void chtls_sock_release(struct kref *ref)
+{
+	struct chtls_sock *csk =
+		container_of(ref, struct chtls_sock, kref);
+
+	kfree(csk);
+}
+
+static struct net_device *chtls_find_netdev(struct chtls_dev *cdev,
+					    struct sock *sk)
+{
+	struct net_device *ndev = cdev->ports[0];
+#if IS_ENABLED(CONFIG_IPV6)
+	struct net_device *temp;
+	int addr_type;
+#endif
+
+	switch (sk->sk_family) {
+	case PF_INET:
+		if (likely(!inet_sk(sk)->inet_rcv_saddr))
+			return ndev;
+		ndev = __ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr, false);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case PF_INET6:
+		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+		if (likely(addr_type == IPV6_ADDR_ANY))
+			return ndev;
+
+		for_each_netdev_rcu(&init_net, temp) {
+			if (ipv6_chk_addr(&init_net, (struct in6_addr *)
+					  &sk->sk_v6_rcv_saddr, temp, 1)) {
+				ndev = temp;
+				break;
+			}
+		}
+	break;
+#endif
+	default:
+		return NULL;
+	}
+
+	if (!ndev)
+		return NULL;
+
+	if (is_vlan_dev(ndev))
+		return vlan_dev_real_dev(ndev);
+	return ndev;
+}
+
+static void assign_rxopt(struct sock *sk, unsigned int opt)
+{
+	const struct chtls_dev *cdev;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	cdev = csk->cdev;
+	tp->tcp_header_len           = sizeof(struct tcphdr);
+	tp->rx_opt.mss_clamp         = cdev->mtus[TCPOPT_MSS_G(opt)] - 40;
+	tp->mss_cache                = tp->rx_opt.mss_clamp;
+	tp->rx_opt.tstamp_ok         = TCPOPT_TSTAMP_G(opt);
+	tp->rx_opt.snd_wscale        = TCPOPT_SACK_G(opt);
+	tp->rx_opt.wscale_ok         = TCPOPT_WSCALE_OK_G(opt);
+	SND_WSCALE(tp)               = TCPOPT_SND_WSCALE_G(opt);
+	if (!tp->rx_opt.wscale_ok)
+		tp->rx_opt.rcv_wscale = 0;
+	if (tp->rx_opt.tstamp_ok) {
+		tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+		tp->rx_opt.mss_clamp -= TCPOLEN_TSTAMP_ALIGNED;
+	} else if (csk->opt2 & TSTAMPS_EN_F) {
+		csk->opt2 &= ~TSTAMPS_EN_F;
+		csk->mtu_idx = TCPOPT_MSS_G(opt);
+	}
+}
+
+static void chtls_purge_receive_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		skb_dst_set(skb, (void *)NULL);
+		kfree_skb(skb);
+	}
+}
+
+static void chtls_purge_write_queue(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&csk->txq))) {
+		sk->sk_wmem_queued -= skb->truesize;
+		__kfree_skb(skb);
+	}
+}
+
+static void chtls_purge_recv_queue(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *tlsk = &csk->tlshws;
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) {
+		skb_dst_set(skb, NULL);
+		kfree_skb(skb);
+	}
+}
+
+static void abort_arp_failure(void *handle, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req = cplhdr(skb);
+	struct chtls_dev *cdev;
+
+	cdev = (struct chtls_dev *)handle;
+	req->cmd = CPL_ABORT_NO_RST;
+	cxgb4_ofld_send(cdev->lldi->ports[0], skb);
+}
+
+static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len)
+{
+	if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) {
+		__skb_trim(skb, 0);
+		refcount_add(2, &skb->users);
+	} else {
+		skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+	}
+	return skb;
+}
+
+static void chtls_send_abort(struct sock *sk, int mode, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	if (!skb)
+		skb = alloc_ctrl_skb(csk->txdata_skb_cache, sizeof(*req));
+
+	req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
+	INIT_TP_WR_CPL(req, CPL_ABORT_REQ, csk->tid);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+	req->rsvd0 = htonl(tp->snd_nxt);
+	req->rsvd1 = !csk_flag_nochk(csk, CSK_TX_DATA_SENT);
+	req->cmd = mode;
+	t4_set_arp_err_handler(skb, csk->cdev, abort_arp_failure);
+	send_or_defer(sk, tp, skb, mode == CPL_ABORT_SEND_RST);
+}
+
+static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (unlikely(csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) ||
+		     !csk->cdev)) {
+		if (sk->sk_state == TCP_SYN_RECV)
+			csk_set_flag(csk, CSK_RST_ABORTED);
+		goto out;
+	}
+
+	if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
+			WARN_ONCE(1, "send tx flowc error");
+		csk_set_flag(csk, CSK_TX_DATA_SENT);
+	}
+
+	csk_set_flag(csk, CSK_ABORT_RPL_PENDING);
+	chtls_purge_write_queue(sk);
+
+	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
+	if (sk->sk_state != TCP_SYN_RECV)
+		chtls_send_abort(sk, mode, skb);
+	else
+		goto out;
+
+	return;
+out:
+	kfree_skb(skb);
+}
+
+static void release_tcp_port(struct sock *sk)
+{
+	if (inet_csk(sk)->icsk_bind_hash)
+		inet_put_port(sk);
+}
+
+static void tcp_uncork(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (tp->nonagle & TCP_NAGLE_CORK) {
+		tp->nonagle &= ~TCP_NAGLE_CORK;
+		chtls_tcp_push(sk, 0);
+	}
+}
+
+static void chtls_close_conn(struct sock *sk)
+{
+	struct cpl_close_con_req *req;
+	struct chtls_sock *csk;
+	struct sk_buff *skb;
+	unsigned int tid;
+	unsigned int len;
+
+	len = roundup(sizeof(struct cpl_close_con_req), 16);
+	csk = rcu_dereference_sk_user_data(sk);
+	tid = csk->tid;
+
+	skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+	req = (struct cpl_close_con_req *)__skb_put(skb, len);
+	memset(req, 0, len);
+	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_TP_WR) |
+			      FW_WR_IMMDLEN_V(sizeof(*req) -
+					      sizeof(req->wr)));
+	req->wr.wr_mid = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)) |
+			       FW_WR_FLOWID_V(tid));
+
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
+
+	tcp_uncork(sk);
+	skb_entail(sk, skb, ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+	if (sk->sk_state != TCP_SYN_SENT)
+		chtls_push_frames(csk, 1);
+}
+
+/*
+ * Perform a state transition during close and return the actions indicated
+ * for the transition.  Do not make this function inline, the main reason
+ * it exists at all is to avoid multiple inlining of tcp_set_state.
+ */
+static int make_close_transition(struct sock *sk)
+{
+	int next = (int)new_state[sk->sk_state];
+
+	tcp_set_state(sk, next & TCP_STATE_MASK);
+	return next & TCP_ACTION_FIN;
+}
+
+void chtls_close(struct sock *sk, long timeout)
+{
+	int data_lost, prev_state;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	lock_sock(sk);
+	sk->sk_shutdown |= SHUTDOWN_MASK;
+
+	data_lost = skb_queue_len(&sk->sk_receive_queue);
+	data_lost |= skb_queue_len(&csk->tlshws.sk_recv_queue);
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+
+	if (sk->sk_state == TCP_CLOSE) {
+		goto wait;
+	} else if (data_lost || sk->sk_state == TCP_SYN_SENT) {
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+		release_tcp_port(sk);
+		goto unlock;
+	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+		sk->sk_prot->disconnect(sk, 0);
+	} else if (make_close_transition(sk)) {
+		chtls_close_conn(sk);
+	}
+wait:
+	if (timeout)
+		sk_stream_wait_close(sk, timeout);
+
+unlock:
+	prev_state = sk->sk_state;
+	sock_hold(sk);
+	sock_orphan(sk);
+
+	release_sock(sk);
+
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (prev_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
+		goto out;
+
+	if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 &&
+	    !csk_flag(sk, CSK_ABORT_SHUTDOWN)) {
+		struct sk_buff *skb;
+
+		skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+		if (skb)
+			chtls_send_reset(sk, CPL_ABORT_SEND_RST, skb);
+	}
+
+	if (sk->sk_state == TCP_CLOSE)
+		inet_csk_destroy_sock(sk);
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	sock_put(sk);
+}
+
+/*
+ * Wait until a socket enters on of the given states.
+ */
+static int wait_for_states(struct sock *sk, unsigned int states)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct socket_wq _sk_wq;
+	long current_timeo;
+	int err = 0;
+
+	current_timeo = 200;
+
+	/*
+	 * We want this to work even when there's no associated struct socket.
+	 * In that case we provide a temporary wait_queue_head_t.
+	 */
+	if (!sk->sk_wq) {
+		init_waitqueue_head(&_sk_wq.wait);
+		_sk_wq.fasync_list = NULL;
+		init_rcu_head_on_stack(&_sk_wq.rcu);
+		RCU_INIT_POINTER(sk->sk_wq, &_sk_wq);
+	}
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	while (!sk_in_state(sk, states)) {
+		if (!current_timeo) {
+			err = -EBUSY;
+			break;
+		}
+		if (signal_pending(current)) {
+			err = sock_intr_errno(current_timeo);
+			break;
+		}
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		release_sock(sk);
+		if (!sk_in_state(sk, states))
+			current_timeo = schedule_timeout(current_timeo);
+		__set_current_state(TASK_RUNNING);
+		lock_sock(sk);
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	if (rcu_dereference(sk->sk_wq) == &_sk_wq)
+		sk->sk_wq = NULL;
+	return err;
+}
+
+int chtls_disconnect(struct sock *sk, int flags)
+{
+	struct tcp_sock *tp;
+	int err;
+
+	tp = tcp_sk(sk);
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+	chtls_purge_write_queue(sk);
+
+	if (sk->sk_state != TCP_CLOSE) {
+		sk->sk_err = ECONNRESET;
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+		err = wait_for_states(sk, TCPF_CLOSE);
+		if (err)
+			return err;
+	}
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+	tp->max_window = 0xFFFF << (tp->rx_opt.snd_wscale);
+	return tcp_disconnect(sk, flags);
+}
+
+#define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
+				 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
+void chtls_shutdown(struct sock *sk, int how)
+{
+	if ((how & SEND_SHUTDOWN) &&
+	    sk_in_state(sk, SHUTDOWN_ELIGIBLE_STATE) &&
+	    make_close_transition(sk))
+		chtls_close_conn(sk);
+}
+
+void chtls_destroy_sock(struct sock *sk)
+{
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	chtls_purge_recv_queue(sk);
+	csk->ulp_mode = ULP_MODE_NONE;
+	chtls_purge_write_queue(sk);
+	free_tls_keyid(sk);
+	kref_put(&csk->kref, chtls_sock_release);
+	csk->cdev = NULL;
+	if (sk->sk_family == AF_INET)
+		sk->sk_prot = &tcp_prot;
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		sk->sk_prot = &tcpv6_prot;
+#endif
+	sk->sk_prot->destroy(sk);
+}
+
+static void reset_listen_child(struct sock *child)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(child);
+	struct sk_buff *skb;
+
+	skb = alloc_ctrl_skb(csk->txdata_skb_cache,
+			     sizeof(struct cpl_abort_req));
+
+	chtls_send_reset(child, CPL_ABORT_SEND_RST, skb);
+	sock_orphan(child);
+	INC_ORPHAN_COUNT(child);
+	if (child->sk_state == TCP_CLOSE)
+		inet_csk_destroy_sock(child);
+}
+
+static void chtls_disconnect_acceptq(struct sock *listen_sk)
+{
+	struct request_sock **pprev;
+
+	pprev = ACCEPT_QUEUE(listen_sk);
+	while (*pprev) {
+		struct request_sock *req = *pprev;
+
+		if (req->rsk_ops == &chtls_rsk_ops ||
+		    req->rsk_ops == &chtls_rsk_opsv6) {
+			struct sock *child = req->sk;
+
+			*pprev = req->dl_next;
+			sk_acceptq_removed(listen_sk);
+			reqsk_put(req);
+			sock_hold(child);
+			local_bh_disable();
+			bh_lock_sock(child);
+			release_tcp_port(child);
+			reset_listen_child(child);
+			bh_unlock_sock(child);
+			local_bh_enable();
+			sock_put(child);
+		} else {
+			pprev = &req->dl_next;
+		}
+	}
+}
+
+static int listen_hashfn(const struct sock *sk)
+{
+	return ((unsigned long)sk >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
+}
+
+static struct listen_info *listen_hash_add(struct chtls_dev *cdev,
+					   struct sock *sk,
+					   unsigned int stid)
+{
+	struct listen_info *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (p) {
+		int key = listen_hashfn(sk);
+
+		p->sk = sk;
+		p->stid = stid;
+		spin_lock(&cdev->listen_lock);
+		p->next = cdev->listen_hash_tab[key];
+		cdev->listen_hash_tab[key] = p;
+		spin_unlock(&cdev->listen_lock);
+	}
+	return p;
+}
+
+static int listen_hash_find(struct chtls_dev *cdev,
+			    struct sock *sk)
+{
+	struct listen_info *p;
+	int stid = -1;
+	int key;
+
+	key = listen_hashfn(sk);
+
+	spin_lock(&cdev->listen_lock);
+	for (p = cdev->listen_hash_tab[key]; p; p = p->next)
+		if (p->sk == sk) {
+			stid = p->stid;
+			break;
+		}
+	spin_unlock(&cdev->listen_lock);
+	return stid;
+}
+
+static int listen_hash_del(struct chtls_dev *cdev,
+			   struct sock *sk)
+{
+	struct listen_info *p, **prev;
+	int stid = -1;
+	int key;
+
+	key = listen_hashfn(sk);
+	prev = &cdev->listen_hash_tab[key];
+
+	spin_lock(&cdev->listen_lock);
+	for (p = *prev; p; prev = &p->next, p = p->next)
+		if (p->sk == sk) {
+			stid = p->stid;
+			*prev = p->next;
+			kfree(p);
+			break;
+		}
+	spin_unlock(&cdev->listen_lock);
+	return stid;
+}
+
+static void cleanup_syn_rcv_conn(struct sock *child, struct sock *parent)
+{
+	struct request_sock *req;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(child);
+	req = csk->passive_reap_next;
+
+	reqsk_queue_removed(&inet_csk(parent)->icsk_accept_queue, req);
+	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
+	chtls_reqsk_free(req);
+	csk->passive_reap_next = NULL;
+}
+
+static void chtls_reset_synq(struct listen_ctx *listen_ctx)
+{
+	struct sock *listen_sk = listen_ctx->lsk;
+
+	while (!skb_queue_empty(&listen_ctx->synq)) {
+		struct chtls_sock *csk =
+			container_of((struct synq *)__skb_dequeue
+				(&listen_ctx->synq), struct chtls_sock, synq);
+		struct sock *child = csk->sk;
+
+		cleanup_syn_rcv_conn(child, listen_sk);
+		sock_hold(child);
+		local_bh_disable();
+		bh_lock_sock(child);
+		release_tcp_port(child);
+		reset_listen_child(child);
+		bh_unlock_sock(child);
+		local_bh_enable();
+		sock_put(child);
+	}
+}
+
+int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct net_device *ndev;
+#if IS_ENABLED(CONFIG_IPV6)
+	bool clip_valid = false;
+#endif
+	struct listen_ctx *ctx;
+	struct adapter *adap;
+	struct port_info *pi;
+	int ret = 0;
+	int stid;
+
+	rcu_read_lock();
+	ndev = chtls_find_netdev(cdev, sk);
+	rcu_read_unlock();
+	if (!ndev)
+		return -EBADF;
+
+	pi = netdev_priv(ndev);
+	adap = pi->adapter;
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE))
+		return -EBADF;
+
+	if (listen_hash_find(cdev, sk) >= 0)   /* already have it */
+		return -EADDRINUSE;
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	__module_get(THIS_MODULE);
+	ctx->lsk = sk;
+	ctx->cdev = cdev;
+	ctx->state = T4_LISTEN_START_PENDING;
+	skb_queue_head_init(&ctx->synq);
+
+	stid = cxgb4_alloc_stid(cdev->tids, sk->sk_family, ctx);
+	if (stid < 0)
+		goto free_ctx;
+
+	sock_hold(sk);
+	if (!listen_hash_add(cdev, sk, stid))
+		goto free_stid;
+
+	if (sk->sk_family == PF_INET) {
+		ret = cxgb4_create_server(ndev, stid,
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_sport, 0,
+					  cdev->lldi->rxq_ids[0]);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		int addr_type;
+
+		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+		if (addr_type != IPV6_ADDR_ANY) {
+			ret = cxgb4_clip_get(ndev, (const u32 *)
+					     &sk->sk_v6_rcv_saddr, 1);
+			if (ret)
+				goto del_hash;
+			clip_valid = true;
+		}
+		ret = cxgb4_create_server6(ndev, stid,
+					   &sk->sk_v6_rcv_saddr,
+					   inet_sk(sk)->inet_sport,
+					   cdev->lldi->rxq_ids[0]);
+#endif
+	}
+	if (ret > 0)
+		ret = net_xmit_errno(ret);
+	if (ret)
+		goto del_hash;
+	return 0;
+del_hash:
+#if IS_ENABLED(CONFIG_IPV6)
+	if (clip_valid)
+		cxgb4_clip_release(ndev, (const u32 *)&sk->sk_v6_rcv_saddr, 1);
+#endif
+	listen_hash_del(cdev, sk);
+free_stid:
+	cxgb4_free_stid(cdev->tids, stid, sk->sk_family);
+	sock_put(sk);
+free_ctx:
+	kfree(ctx);
+	module_put(THIS_MODULE);
+	return -EBADF;
+}
+
+void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct listen_ctx *listen_ctx;
+	int stid;
+
+	stid = listen_hash_del(cdev, sk);
+	if (stid < 0)
+		return;
+
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	chtls_reset_synq(listen_ctx);
+
+	cxgb4_remove_server(cdev->lldi->ports[0], stid,
+			    cdev->lldi->rxq_ids[0], sk->sk_family == PF_INET6);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == PF_INET6) {
+		struct chtls_sock *csk;
+		int addr_type = 0;
+
+		csk = rcu_dereference_sk_user_data(sk);
+		addr_type = ipv6_addr_type((const struct in6_addr *)
+					  &sk->sk_v6_rcv_saddr);
+		if (addr_type != IPV6_ADDR_ANY)
+			cxgb4_clip_release(csk->egress_dev, (const u32 *)
+					   &sk->sk_v6_rcv_saddr, 1);
+	}
+#endif
+	chtls_disconnect_acceptq(sk);
+}
+
+static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_open_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int stid = GET_TID(rpl);
+	struct listen_ctx *listen_ctx;
+
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	if (!listen_ctx)
+		return CPL_RET_BUF_DONE;
+
+	if (listen_ctx->state == T4_LISTEN_START_PENDING) {
+		listen_ctx->state = T4_LISTEN_STARTED;
+		return CPL_RET_BUF_DONE;
+	}
+
+	if (rpl->status != CPL_ERR_NONE) {
+		pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
+			rpl->status, stid);
+		return CPL_RET_BUF_DONE;
+	}
+	cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+	sock_put(listen_ctx->lsk);
+	kfree(listen_ctx);
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	struct listen_ctx *listen_ctx;
+	unsigned int stid;
+	void *data;
+
+	stid = GET_TID(rpl);
+	data = lookup_stid(cdev->tids, stid);
+	listen_ctx = (struct listen_ctx *)data;
+
+	if (rpl->status != CPL_ERR_NONE) {
+		pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
+			rpl->status, stid);
+		return CPL_RET_BUF_DONE;
+	}
+
+	cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+	sock_put(listen_ctx->lsk);
+	kfree(listen_ctx);
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static void chtls_purge_wr_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = dequeue_wr(sk)) != NULL)
+		kfree_skb(skb);
+}
+
+static void chtls_release_resources(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	unsigned int tid = csk->tid;
+	struct tid_info *tids;
+
+	if (!cdev)
+		return;
+
+	tids = cdev->tids;
+	kfree_skb(csk->txdata_skb_cache);
+	csk->txdata_skb_cache = NULL;
+
+	if (csk->wr_credits != csk->wr_max_credits) {
+		chtls_purge_wr_queue(sk);
+		chtls_reset_wr_list(csk);
+	}
+
+	if (csk->l2t_entry) {
+		cxgb4_l2t_release(csk->l2t_entry);
+		csk->l2t_entry = NULL;
+	}
+
+	if (sk->sk_state != TCP_SYN_SENT) {
+		cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family);
+		sock_put(sk);
+	}
+}
+
+static void chtls_conn_done(struct sock *sk)
+{
+	if (sock_flag(sk, SOCK_DEAD))
+		chtls_purge_receive_queue(sk);
+	sk_wakeup_sleepers(sk, 0);
+	tcp_done(sk);
+}
+
+static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
+{
+	/*
+	 * If the server is still open we clean up the child connection,
+	 * otherwise the server already did the clean up as it was purging
+	 * its SYN queue and the skb was just sitting in its backlog.
+	 */
+	if (likely(parent->sk_state == TCP_LISTEN)) {
+		cleanup_syn_rcv_conn(child, parent);
+		/* Without the below call to sock_orphan,
+		 * we leak the socket resource with syn_flood test
+		 * as inet_csk_destroy_sock will not be called
+		 * in tcp_done since SOCK_DEAD flag is not set.
+		 * Kernel handles this differently where new socket is
+		 * created only after 3 way handshake is done.
+		 */
+		sock_orphan(child);
+		percpu_counter_inc((child)->sk_prot->orphan_count);
+		chtls_release_resources(child);
+		chtls_conn_done(child);
+	} else {
+		if (csk_flag(child, CSK_RST_ABORTED)) {
+			chtls_release_resources(child);
+			chtls_conn_done(child);
+		}
+	}
+}
+
+static void pass_open_abort(struct sock *child, struct sock *parent,
+			    struct sk_buff *skb)
+{
+	do_abort_syn_rcv(child, parent);
+	kfree_skb(skb);
+}
+
+static void bl_pass_open_abort(struct sock *lsk, struct sk_buff *skb)
+{
+	pass_open_abort(skb->sk, lsk, skb);
+}
+
+static void chtls_pass_open_arp_failure(struct sock *sk,
+					struct sk_buff *skb)
+{
+	const struct request_sock *oreq;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct sock *parent;
+	void *data;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+
+	/*
+	 * If the connection is being aborted due to the parent listening
+	 * socket going away there's nothing to do, the ABORT_REQ will close
+	 * the connection.
+	 */
+	if (csk_flag(sk, CSK_ABORT_RPL_PENDING)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	oreq = csk->passive_reap_next;
+	data = lookup_stid(cdev->tids, oreq->ts_recent);
+	parent = ((struct listen_ctx *)data)->lsk;
+
+	bh_lock_sock(parent);
+	if (!sock_owned_by_user(parent)) {
+		pass_open_abort(sk, parent, skb);
+	} else {
+		BLOG_SKB_CB(skb)->backlog_rcv = bl_pass_open_abort;
+		__sk_add_backlog(parent, skb);
+	}
+	bh_unlock_sock(parent);
+}
+
+static void chtls_accept_rpl_arp_failure(void *handle,
+					 struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)handle;
+
+	sock_hold(sk);
+	process_cpl_msg(chtls_pass_open_arp_failure, sk, skb);
+	sock_put(sk);
+}
+
+static unsigned int chtls_select_mss(const struct chtls_sock *csk,
+				     unsigned int pmtu,
+				     struct cpl_pass_accept_req *req)
+{
+	struct chtls_dev *cdev;
+	struct dst_entry *dst;
+	unsigned int tcpoptsz;
+	unsigned int iphdrsz;
+	unsigned int mtu_idx;
+	struct tcp_sock *tp;
+	unsigned int mss;
+	struct sock *sk;
+
+	mss = ntohs(req->tcpopt.mss);
+	sk = csk->sk;
+	dst = __sk_dst_get(sk);
+	cdev = csk->cdev;
+	tp = tcp_sk(sk);
+	tcpoptsz = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		iphdrsz = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+	else
+#endif
+		iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
+	if (req->tcpopt.tstamp)
+		tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
+
+	tp->advmss = dst_metric_advmss(dst);
+	if (USER_MSS(tp) && tp->advmss > USER_MSS(tp))
+		tp->advmss = USER_MSS(tp);
+	if (tp->advmss > pmtu - iphdrsz)
+		tp->advmss = pmtu - iphdrsz;
+	if (mss && tp->advmss > mss)
+		tp->advmss = mss;
+
+	tp->advmss = cxgb4_best_aligned_mtu(cdev->lldi->mtus,
+					    iphdrsz + tcpoptsz,
+					    tp->advmss - tcpoptsz,
+					    8, &mtu_idx);
+	tp->advmss -= iphdrsz;
+
+	inet_csk(sk)->icsk_pmtu_cookie = pmtu;
+	return mtu_idx;
+}
+
+static unsigned int select_rcv_wscale(int space, int wscale_ok, int win_clamp)
+{
+	int wscale = 0;
+
+	if (space > MAX_RCV_WND)
+		space = MAX_RCV_WND;
+	if (win_clamp && win_clamp < space)
+		space = win_clamp;
+
+	if (wscale_ok) {
+		while (wscale < 14 && (65535 << wscale) < space)
+			wscale++;
+	}
+	return wscale;
+}
+
+static void chtls_pass_accept_rpl(struct sk_buff *skb,
+				  struct cpl_pass_accept_req *req,
+				  unsigned int tid)
+
+{
+	struct cpl_t5_pass_accept_rpl *rpl5;
+	struct cxgb4_lld_info *lldi;
+	const struct tcphdr *tcph;
+	const struct tcp_sock *tp;
+	struct chtls_sock *csk;
+	unsigned int len;
+	struct sock *sk;
+	u32 opt2, hlen;
+	u64 opt0;
+
+	sk = skb->sk;
+	tp = tcp_sk(sk);
+	csk = sk->sk_user_data;
+	csk->tid = tid;
+	lldi = csk->cdev->lldi;
+	len = roundup(sizeof(*rpl5), 16);
+
+	rpl5 = __skb_put_zero(skb, len);
+	INIT_TP_WR(rpl5, tid);
+
+	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+						     csk->tid));
+	csk->mtu_idx = chtls_select_mss(csk, dst_mtu(__sk_dst_get(sk)),
+					req);
+	opt0 = TCAM_BYPASS_F |
+	       WND_SCALE_V(RCV_WSCALE(tp)) |
+	       MSS_IDX_V(csk->mtu_idx) |
+	       L2T_IDX_V(csk->l2t_entry->idx) |
+	       NAGLE_V(!(tp->nonagle & TCP_NAGLE_OFF)) |
+	       TX_CHAN_V(csk->tx_chan) |
+	       SMAC_SEL_V(csk->smac_idx) |
+	       DSCP_V(csk->tos >> 2) |
+	       ULP_MODE_V(ULP_MODE_TLS) |
+	       RCV_BUFSIZ_V(min(tp->rcv_wnd >> 10, RCV_BUFSIZ_M));
+
+	opt2 = RX_CHANNEL_V(0) |
+		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
+
+	if (!is_t5(lldi->adapter_type))
+		opt2 |= RX_FC_DISABLE_F;
+	if (req->tcpopt.tstamp)
+		opt2 |= TSTAMPS_EN_F;
+	if (req->tcpopt.sack)
+		opt2 |= SACK_EN_F;
+	hlen = ntohl(req->hdr_len);
+
+	tcph = (struct tcphdr *)((u8 *)(req + 1) +
+			T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
+	if (tcph->ece && tcph->cwr)
+		opt2 |= CCTRL_ECN_V(1);
+	opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO);
+	opt2 |= T5_ISS_F;
+	opt2 |= T5_OPT_2_VALID_F;
+	opt2 |= WND_SCALE_EN_V(WSCALE_OK(tp));
+	rpl5->opt0 = cpu_to_be64(opt0);
+	rpl5->opt2 = cpu_to_be32(opt2);
+	rpl5->iss = cpu_to_be32((prandom_u32() & ~7UL) - 1);
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->port_id);
+	t4_set_arp_err_handler(skb, sk, chtls_accept_rpl_arp_failure);
+	cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+}
+
+static void inet_inherit_port(struct inet_hashinfo *hash_info,
+			      struct sock *lsk, struct sock *newsk)
+{
+	local_bh_disable();
+	__inet_inherit_port(lsk, newsk);
+	local_bh_enable();
+}
+
+static int chtls_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb->protocol) {
+		kfree_skb(skb);
+		return 0;
+	}
+	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+	return 0;
+}
+
+static void chtls_set_tcp_window(struct chtls_sock *csk)
+{
+	struct net_device *ndev = csk->egress_dev;
+	struct port_info *pi = netdev_priv(ndev);
+	unsigned int linkspeed;
+	u8 scale;
+
+	linkspeed = pi->link_cfg.speed;
+	scale = linkspeed / SPEED_10000;
+#define CHTLS_10G_RCVWIN (256 * 1024)
+	csk->rcv_win = CHTLS_10G_RCVWIN;
+	if (scale)
+		csk->rcv_win *= scale;
+#define CHTLS_10G_SNDWIN (256 * 1024)
+	csk->snd_win = CHTLS_10G_SNDWIN;
+	if (scale)
+		csk->snd_win *= scale;
+}
+
+static struct sock *chtls_recv_sock(struct sock *lsk,
+				    struct request_sock *oreq,
+				    void *network_hdr,
+				    const struct cpl_pass_accept_req *req,
+				    struct chtls_dev *cdev)
+{
+	struct neighbour *n = NULL;
+	struct inet_sock *newinet;
+	const struct iphdr *iph;
+	struct tls_context *ctx;
+	struct net_device *ndev;
+	struct chtls_sock *csk;
+	struct dst_entry *dst;
+	struct tcp_sock *tp;
+	struct sock *newsk;
+	u16 port_id;
+	int rxq_idx;
+	int step;
+
+	iph = (const struct iphdr *)network_hdr;
+	newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
+	if (!newsk)
+		goto free_oreq;
+
+	if (lsk->sk_family == AF_INET) {
+		dst = inet_csk_route_child_sock(lsk, newsk, oreq);
+		if (!dst)
+			goto free_sk;
+
+		n = dst_neigh_lookup(dst, &iph->saddr);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		const struct ipv6hdr *ip6h;
+		struct flowi6 fl6;
+
+		ip6h = (const struct ipv6hdr *)network_hdr;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_TCP;
+		fl6.saddr = ip6h->daddr;
+		fl6.daddr = ip6h->saddr;
+		fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port;
+		fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num);
+		security_req_classify_flow(oreq, flowi6_to_flowi(&fl6));
+		dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL);
+		if (IS_ERR(dst))
+			goto free_sk;
+		n = dst_neigh_lookup(dst, &ip6h->saddr);
+#endif
+	}
+	if (!n)
+		goto free_sk;
+
+	ndev = n->dev;
+	if (!ndev)
+		goto free_dst;
+	port_id = cxgb4_port_idx(ndev);
+
+	csk = chtls_sock_create(cdev);
+	if (!csk)
+		goto free_dst;
+
+	csk->l2t_entry = cxgb4_l2t_get(cdev->lldi->l2t, n, ndev, 0);
+	if (!csk->l2t_entry)
+		goto free_csk;
+
+	newsk->sk_user_data = csk;
+	newsk->sk_backlog_rcv = chtls_backlog_rcv;
+
+	tp = tcp_sk(newsk);
+	newinet = inet_sk(newsk);
+
+	if (iph->version == 0x4) {
+		newinet->inet_daddr = iph->saddr;
+		newinet->inet_rcv_saddr = iph->daddr;
+		newinet->inet_saddr = iph->daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		struct tcp6_sock *newtcp6sk = (struct tcp6_sock *)newsk;
+		struct inet_request_sock *treq = inet_rsk(oreq);
+		struct ipv6_pinfo *newnp = inet6_sk(newsk);
+		struct ipv6_pinfo *np = inet6_sk(lsk);
+
+		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+		newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
+		newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
+		inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
+		newnp->ipv6_fl_list = NULL;
+		newnp->pktoptions = NULL;
+		newsk->sk_bound_dev_if = treq->ir_iif;
+		newinet->inet_opt = NULL;
+		newinet->inet_daddr = LOOPBACK4_IPV6;
+		newinet->inet_saddr = LOOPBACK4_IPV6;
+#endif
+	}
+
+	oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	sk_setup_caps(newsk, dst);
+	ctx = tls_get_ctx(lsk);
+	newsk->sk_destruct = ctx->sk_destruct;
+	csk->sk = newsk;
+	csk->passive_reap_next = oreq;
+	csk->tx_chan = cxgb4_port_chan(ndev);
+	csk->port_id = port_id;
+	csk->egress_dev = ndev;
+	csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+	chtls_set_tcp_window(csk);
+	tp->rcv_wnd = csk->rcv_win;
+	csk->sndbuf = csk->snd_win;
+	csk->ulp_mode = ULP_MODE_TLS;
+	step = cdev->lldi->nrxq / cdev->lldi->nchan;
+	csk->rss_qid = cdev->lldi->rxq_ids[port_id * step];
+	rxq_idx = port_id * step;
+	csk->txq_idx = (rxq_idx < cdev->lldi->ntxq) ? rxq_idx :
+			port_id * step;
+	csk->sndbuf = newsk->sk_sndbuf;
+	csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
+	RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
+					   sock_net(newsk)->
+						ipv4.sysctl_tcp_window_scaling,
+					   tp->window_clamp);
+	neigh_release(n);
+	inet_inherit_port(&tcp_hashinfo, lsk, newsk);
+	csk_set_flag(csk, CSK_CONN_INLINE);
+	bh_unlock_sock(newsk); /* tcp_create_openreq_child ->sk_clone_lock */
+
+	return newsk;
+free_csk:
+	chtls_sock_release(&csk->kref);
+free_dst:
+	dst_release(dst);
+free_sk:
+	inet_csk_prepare_forced_close(newsk);
+	tcp_done(newsk);
+free_oreq:
+	chtls_reqsk_free(oreq);
+	return NULL;
+}
+
+/*
+ * Populate a TID_RELEASE WR.  The skb must be already propely sized.
+ */
+static  void mk_tid_release(struct sk_buff *skb,
+			    unsigned int chan, unsigned int tid)
+{
+	struct cpl_tid_release *req;
+	unsigned int len;
+
+	len = roundup(sizeof(struct cpl_tid_release), 16);
+	req = (struct cpl_tid_release *)__skb_put(skb, len);
+	memset(req, 0, len);
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
+	INIT_TP_WR_CPL(req, CPL_TID_RELEASE, tid);
+}
+
+static int chtls_get_module(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (!try_module_get(icsk->icsk_ulp_ops->owner))
+		return -1;
+
+	return 0;
+}
+
+static void chtls_pass_accept_request(struct sock *sk,
+				      struct sk_buff *skb)
+{
+	struct cpl_t5_pass_accept_rpl *rpl;
+	struct cpl_pass_accept_req *req;
+	struct listen_ctx *listen_ctx;
+	struct vlan_ethhdr *vlan_eh;
+	struct request_sock *oreq;
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct ipv6hdr *ip6h;
+	struct tcphdr *tcph;
+	struct sock *newsk;
+	struct ethhdr *eh;
+	struct iphdr *iph;
+	void *network_hdr;
+	unsigned int stid;
+	unsigned int len;
+	unsigned int tid;
+	bool th_ecn, ect;
+	__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
+	u16 eth_hdr_len;
+	bool ecn_ok;
+
+	req = cplhdr(skb) + RSS_HDR;
+	tid = GET_TID(req);
+	cdev = BLOG_SKB_CB(skb)->cdev;
+	newsk = lookup_tid(cdev->tids, tid);
+	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	if (newsk) {
+		pr_info("tid (%d) already in use\n", tid);
+		return;
+	}
+
+	len = roundup(sizeof(*rpl), 16);
+	reply_skb = alloc_skb(len, GFP_ATOMIC);
+	if (!reply_skb) {
+		cxgb4_remove_tid(cdev->tids, 0, tid, sk->sk_family);
+		kfree_skb(skb);
+		return;
+	}
+
+	if (sk->sk_state != TCP_LISTEN)
+		goto reject;
+
+	if (inet_csk_reqsk_queue_is_full(sk))
+		goto reject;
+
+	if (sk_acceptq_is_full(sk))
+		goto reject;
+
+
+	eth_hdr_len = T6_ETH_HDR_LEN_G(ntohl(req->hdr_len));
+	if (eth_hdr_len == ETH_HLEN) {
+		eh = (struct ethhdr *)(req + 1);
+		iph = (struct iphdr *)(eh + 1);
+		ip6h = (struct ipv6hdr *)(eh + 1);
+		network_hdr = (void *)(eh + 1);
+	} else {
+		vlan_eh = (struct vlan_ethhdr *)(req + 1);
+		iph = (struct iphdr *)(vlan_eh + 1);
+		ip6h = (struct ipv6hdr *)(vlan_eh + 1);
+		network_hdr = (void *)(vlan_eh + 1);
+	}
+
+	if (iph->version == 0x4) {
+		tcph = (struct tcphdr *)(iph + 1);
+		skb_set_network_header(skb, (void *)iph - (void *)req);
+		oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
+	} else {
+		tcph = (struct tcphdr *)(ip6h + 1);
+		skb_set_network_header(skb, (void *)ip6h - (void *)req);
+		oreq = inet_reqsk_alloc(&chtls_rsk_opsv6, sk, false);
+	}
+
+	if (!oreq)
+		goto reject;
+
+	oreq->rsk_rcv_wnd = 0;
+	oreq->rsk_window_clamp = 0;
+	oreq->syncookie = 0;
+	oreq->mss = 0;
+	oreq->ts_recent = 0;
+
+	tcp_rsk(oreq)->tfo_listener = false;
+	tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq);
+	chtls_set_req_port(oreq, tcph->source, tcph->dest);
+	if (iph->version == 0x4) {
+		chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
+		ip_dsfield = ipv4_get_dsfield(iph);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		inet_rsk(oreq)->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+		inet_rsk(oreq)->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+		ip_dsfield = ipv6_get_dsfield(ipv6_hdr(skb));
+#endif
+	}
+	if (req->tcpopt.wsf <= 14 &&
+	    sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
+		inet_rsk(oreq)->wscale_ok = 1;
+		inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
+	}
+	inet_rsk(oreq)->ir_iif = sk->sk_bound_dev_if;
+	th_ecn = tcph->ece && tcph->cwr;
+	if (th_ecn) {
+		ect = !INET_ECN_is_not_ect(ip_dsfield);
+		ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn;
+		if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
+			inet_rsk(oreq)->ecn_ok = 1;
+	}
+
+	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
+	if (!newsk)
+		goto free_oreq;
+
+	if (chtls_get_module(newsk))
+		goto reject;
+	inet_csk_reqsk_queue_added(sk);
+	reply_skb->sk = newsk;
+	chtls_install_cpl_ops(newsk);
+	cxgb4_insert_tid(cdev->tids, newsk, tid, newsk->sk_family);
+	csk = rcu_dereference_sk_user_data(newsk);
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	csk->listen_ctx = listen_ctx;
+	__skb_queue_tail(&listen_ctx->synq, (struct sk_buff *)&csk->synq);
+	chtls_pass_accept_rpl(reply_skb, req, tid);
+	kfree_skb(skb);
+	return;
+
+free_oreq:
+	chtls_reqsk_free(oreq);
+reject:
+	mk_tid_release(reply_skb, 0, tid);
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+/*
+ * Handle a CPL_PASS_ACCEPT_REQ message.
+ */
+static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_accept_req *req = cplhdr(skb) + RSS_HDR;
+	struct listen_ctx *ctx;
+	unsigned int stid;
+	unsigned int tid;
+	struct sock *lsk;
+	void *data;
+
+	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	tid = GET_TID(req);
+
+	data = lookup_stid(cdev->tids, stid);
+	if (!data)
+		return 1;
+
+	ctx = (struct listen_ctx *)data;
+	lsk = ctx->lsk;
+
+	if (unlikely(tid_out_of_range(cdev->tids, tid))) {
+		pr_info("passive open TID %u too large\n", tid);
+		return 1;
+	}
+
+	BLOG_SKB_CB(skb)->cdev = cdev;
+	process_cpl_msg(chtls_pass_accept_request, lsk, skb);
+	return 0;
+}
+
+/*
+ * Completes some final bits of initialization for just established connections
+ * and changes their state to TCP_ESTABLISHED.
+ *
+ * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
+ */
+static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->pushed_seq = snd_isn;
+	tp->write_seq = snd_isn;
+	tp->snd_nxt = snd_isn;
+	tp->snd_una = snd_isn;
+	inet_sk(sk)->inet_id = prandom_u32();
+	assign_rxopt(sk, opt);
+
+	if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
+		tp->rcv_wup -= tp->rcv_wnd - (RCV_BUFSIZ_M << 10);
+
+	smp_mb();
+	tcp_set_state(sk, TCP_ESTABLISHED);
+}
+
+static void chtls_abort_conn(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *abort_skb;
+
+	abort_skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+	if (abort_skb)
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, abort_skb);
+}
+
+static struct sock *reap_list;
+static DEFINE_SPINLOCK(reap_list_lock);
+
+/*
+ * Process the reap list.
+ */
+DECLARE_TASK_FUNC(process_reap_list, task_param)
+{
+	spin_lock_bh(&reap_list_lock);
+	while (reap_list) {
+		struct sock *sk = reap_list;
+		struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+		reap_list = csk->passive_reap_next;
+		csk->passive_reap_next = NULL;
+		spin_unlock(&reap_list_lock);
+		sock_hold(sk);
+
+		bh_lock_sock(sk);
+		chtls_abort_conn(sk, NULL);
+		sock_orphan(sk);
+		if (sk->sk_state == TCP_CLOSE)
+			inet_csk_destroy_sock(sk);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		spin_lock(&reap_list_lock);
+	}
+	spin_unlock_bh(&reap_list_lock);
+}
+
+static DECLARE_WORK(reap_task, process_reap_list);
+
+static void add_to_reap_list(struct sock *sk)
+{
+	struct chtls_sock *csk = sk->sk_user_data;
+
+	local_bh_disable();
+	bh_lock_sock(sk);
+	release_tcp_port(sk); /* release the port immediately */
+
+	spin_lock(&reap_list_lock);
+	csk->passive_reap_next = reap_list;
+	reap_list = sk;
+	if (!csk->passive_reap_next)
+		schedule_work(&reap_task);
+	spin_unlock(&reap_list_lock);
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void add_pass_open_to_parent(struct sock *child, struct sock *lsk,
+				    struct chtls_dev *cdev)
+{
+	struct request_sock *oreq;
+	struct chtls_sock *csk;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return;
+
+	csk = child->sk_user_data;
+	oreq = csk->passive_reap_next;
+	csk->passive_reap_next = NULL;
+
+	reqsk_queue_removed(&inet_csk(lsk)->icsk_accept_queue, oreq);
+	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
+
+	if (sk_acceptq_is_full(lsk)) {
+		chtls_reqsk_free(oreq);
+		add_to_reap_list(child);
+	} else {
+		refcount_set(&oreq->rsk_refcnt, 1);
+		inet_csk_reqsk_queue_add(lsk, oreq, child);
+		lsk->sk_data_ready(lsk);
+	}
+}
+
+static void bl_add_pass_open_to_parent(struct sock *lsk, struct sk_buff *skb)
+{
+	struct sock *child = skb->sk;
+
+	skb->sk = NULL;
+	add_pass_open_to_parent(child, lsk, BLOG_SKB_CB(skb)->cdev);
+	kfree_skb(skb);
+}
+
+static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_establish *req = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct sock *lsk, *sk;
+	unsigned int hwtid;
+
+	hwtid = GET_TID(req);
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (!sk)
+		return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
+
+	bh_lock_sock(sk);
+	if (unlikely(sock_owned_by_user(sk))) {
+		kfree_skb(skb);
+	} else {
+		unsigned int stid;
+		void *data;
+
+		csk = sk->sk_user_data;
+		csk->wr_max_credits = 64;
+		csk->wr_credits = 64;
+		csk->wr_unacked = 0;
+		make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+		stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+		sk->sk_state_change(sk);
+		if (unlikely(sk->sk_socket))
+			sk_wake_async(sk, 0, POLL_OUT);
+
+		data = lookup_stid(cdev->tids, stid);
+		lsk = ((struct listen_ctx *)data)->lsk;
+
+		bh_lock_sock(lsk);
+		if (unlikely(skb_queue_empty(&csk->listen_ctx->synq))) {
+			/* removed from synq */
+			bh_unlock_sock(lsk);
+			kfree_skb(skb);
+			goto unlock;
+		}
+
+		if (likely(!sock_owned_by_user(lsk))) {
+			kfree_skb(skb);
+			add_pass_open_to_parent(sk, lsk, cdev);
+		} else {
+			skb->sk = sk;
+			BLOG_SKB_CB(skb)->cdev = cdev;
+			BLOG_SKB_CB(skb)->backlog_rcv =
+				bl_add_pass_open_to_parent;
+			__sk_add_backlog(lsk, skb);
+		}
+		bh_unlock_sock(lsk);
+	}
+unlock:
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+/*
+ * Handle receipt of an urgent pointer.
+ */
+static void handle_urg_ptr(struct sock *sk, u32 urg_seq)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	urg_seq--;
+	if (tp->urg_data && !after(urg_seq, tp->urg_seq))
+		return;	/* duplicate pointer */
+
+	sk_send_sigurg(sk);
+	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
+	    !sock_flag(sk, SOCK_URGINLINE) &&
+	    tp->copied_seq != tp->rcv_nxt) {
+		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+		tp->copied_seq++;
+		if (skb && tp->copied_seq - ULP_SKB_CB(skb)->seq >= skb->len)
+			chtls_free_skb(sk, skb);
+	}
+
+	tp->urg_data = TCP_URG_NOTYET;
+	tp->urg_seq = urg_seq;
+}
+
+static void check_sk_callbacks(struct chtls_sock *csk)
+{
+	struct sock *sk = csk->sk;
+
+	if (unlikely(sk->sk_user_data &&
+		     !csk_flag_nochk(csk, CSK_CALLBACKS_CHKD)))
+		csk_set_flag(csk, CSK_CALLBACKS_CHKD);
+}
+
+/*
+ * Handles Rx data that arrives in a state where the socket isn't accepting
+ * new data.
+ */
+static void handle_excess_rx(struct sock *sk, struct sk_buff *skb)
+{
+	if (!csk_flag(sk, CSK_ABORT_SHUTDOWN))
+		chtls_abort_conn(sk, skb);
+
+	kfree_skb(skb);
+}
+
+static void chtls_recv_data(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_rx_data *hdr = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		handle_excess_rx(sk, skb);
+		return;
+	}
+
+	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
+	ULP_SKB_CB(skb)->psh = hdr->psh;
+	skb_ulp_mode(skb) = ULP_MODE_NONE;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*hdr) + RSS_HDR);
+	if (!skb->data_len)
+		__skb_trim(skb, ntohs(hdr->len));
+
+	if (unlikely(hdr->urg))
+		handle_urg_ptr(sk, tp->rcv_nxt + ntohs(hdr->urg));
+	if (unlikely(tp->urg_data == TCP_URG_NOTYET &&
+		     tp->urg_seq - tp->rcv_nxt < skb->len))
+		tp->urg_data = TCP_URG_VALID |
+			       skb->data[tp->urg_seq - tp->rcv_nxt];
+
+	if (unlikely(hdr->dack_mode != csk->delack_mode)) {
+		csk->delack_mode = hdr->dack_mode;
+		csk->delack_seq = tp->rcv_nxt;
+	}
+
+	tcp_hdr(skb)->fin = 0;
+	tp->rcv_nxt += skb->len;
+
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		check_sk_callbacks(csk);
+		sk->sk_data_ready(sk);
+	}
+}
+
+static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_rx_data *req = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_recv_data, sk, skb);
+	return 0;
+}
+
+static void chtls_recv_pdu(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_tls_data *hdr = cplhdr(skb);
+	struct chtls_sock *csk;
+	struct chtls_hws *tlsk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tlsk = &csk->tlshws;
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		handle_excess_rx(sk, skb);
+		return;
+	}
+
+	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
+	ULP_SKB_CB(skb)->flags = 0;
+	skb_ulp_mode(skb) = ULP_MODE_TLS;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*hdr));
+	if (!skb->data_len)
+		__skb_trim(skb,
+			   CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd)));
+
+	if (unlikely(tp->urg_data == TCP_URG_NOTYET && tp->urg_seq -
+		     tp->rcv_nxt < skb->len))
+		tp->urg_data = TCP_URG_VALID |
+			       skb->data[tp->urg_seq - tp->rcv_nxt];
+
+	tcp_hdr(skb)->fin = 0;
+	tlsk->pldlen = CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd));
+	__skb_queue_tail(&tlsk->sk_recv_queue, skb);
+}
+
+static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_tls_data *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_recv_pdu, sk, skb);
+	return 0;
+}
+
+static void chtls_set_hdrlen(struct sk_buff *skb, unsigned int nlen)
+{
+	struct tlsrx_cmp_hdr *tls_cmp_hdr = cplhdr(skb);
+
+	skb->hdr_len = ntohs((__force __be16)tls_cmp_hdr->length);
+	tls_cmp_hdr->length = ntohs((__force __be16)nlen);
+}
+
+static void chtls_rx_hdr(struct sock *sk, struct sk_buff *skb)
+{
+	struct tlsrx_cmp_hdr *tls_hdr_pkt;
+	struct cpl_rx_tls_cmp *cmp_cpl;
+	struct sk_buff *skb_rec;
+	struct chtls_sock *csk;
+	struct chtls_hws *tlsk;
+	struct tcp_sock *tp;
+
+	cmp_cpl = cplhdr(skb);
+	csk = rcu_dereference_sk_user_data(sk);
+	tlsk = &csk->tlshws;
+	tp = tcp_sk(sk);
+
+	ULP_SKB_CB(skb)->seq = ntohl(cmp_cpl->seq);
+	ULP_SKB_CB(skb)->flags = 0;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*cmp_cpl));
+	tls_hdr_pkt = (struct tlsrx_cmp_hdr *)skb->data;
+	if (tls_hdr_pkt->res_to_mac_error & TLSRX_HDR_PKT_ERROR_M)
+		tls_hdr_pkt->type = CONTENT_TYPE_ERROR;
+	if (!skb->data_len)
+		__skb_trim(skb, TLS_HEADER_LENGTH);
+
+	tp->rcv_nxt +=
+		CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl->pdulength_length));
+
+	ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_HDR;
+	skb_rec = __skb_dequeue(&tlsk->sk_recv_queue);
+	if (!skb_rec) {
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+	} else {
+		chtls_set_hdrlen(skb, tlsk->pldlen);
+		tlsk->pldlen = 0;
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		__skb_queue_tail(&sk->sk_receive_queue, skb_rec);
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		check_sk_callbacks(csk);
+		sk->sk_data_ready(sk);
+	}
+}
+
+static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_rx_tls_cmp *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_rx_hdr, sk, skb);
+
+	return 0;
+}
+
+static void chtls_timewait(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->rcv_nxt++;
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+	tp->srtt_us = 0;
+	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+}
+
+static void chtls_peer_close(struct sock *sk, struct sk_buff *skb)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+		goto out;
+
+	sk->sk_shutdown |= RCV_SHUTDOWN;
+	sock_set_flag(sk, SOCK_DONE);
+
+	switch (sk->sk_state) {
+	case TCP_SYN_RECV:
+	case TCP_ESTABLISHED:
+		tcp_set_state(sk, TCP_CLOSE_WAIT);
+		break;
+	case TCP_FIN_WAIT1:
+		tcp_set_state(sk, TCP_CLOSING);
+		break;
+	case TCP_FIN_WAIT2:
+		chtls_release_resources(sk);
+		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+			chtls_conn_done(sk);
+		else
+			chtls_timewait(sk);
+		break;
+	default:
+		pr_info("cpl_peer_close in bad state %d\n", sk->sk_state);
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		/* Do not send POLL_HUP for half duplex close. */
+
+		if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+		    sk->sk_state == TCP_CLOSE)
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+		else
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	}
+out:
+	kfree_skb(skb);
+}
+
+static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_close_con_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+		goto out;
+
+	tp = tcp_sk(sk);
+
+	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
+
+	switch (sk->sk_state) {
+	case TCP_CLOSING:
+		chtls_release_resources(sk);
+		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+			chtls_conn_done(sk);
+		else
+			chtls_timewait(sk);
+		break;
+	case TCP_LAST_ACK:
+		chtls_release_resources(sk);
+		chtls_conn_done(sk);
+		break;
+	case TCP_FIN_WAIT1:
+		tcp_set_state(sk, TCP_FIN_WAIT2);
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		else if (tcp_sk(sk)->linger2 < 0 &&
+			 !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN))
+			chtls_abort_conn(sk, skb);
+		break;
+	default:
+		pr_info("close_con_rpl in bad state %d\n", sk->sk_state);
+	}
+out:
+	kfree_skb(skb);
+}
+
+static struct sk_buff *get_cpl_skb(struct sk_buff *skb,
+				   size_t len, gfp_t gfp)
+{
+	if (likely(!skb_is_nonlinear(skb) && !skb_cloned(skb))) {
+		WARN_ONCE(skb->len < len, "skb alloc error");
+		__skb_trim(skb, len);
+		skb_get(skb);
+	} else {
+		skb = alloc_skb(len, gfp);
+		if (skb)
+			__skb_put(skb, len);
+	}
+	return skb;
+}
+
+static void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid,
+			     int cmd)
+{
+	struct cpl_abort_rpl *rpl = cplhdr(skb);
+
+	INIT_TP_WR_CPL(rpl, CPL_ABORT_RPL, tid);
+	rpl->cmd = cmd;
+}
+
+static void send_defer_abort_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct sk_buff *reply_skb;
+
+	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
+			      GFP_KERNEL | __GFP_NOFAIL);
+	__skb_put(reply_skb, sizeof(struct cpl_abort_rpl));
+	set_abort_rpl_wr(reply_skb, GET_TID(req),
+			 (req->status & CPL_ABORT_NO_RST));
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, req->status >> 1);
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+/*
+ * Add an skb to the deferred skb queue for processing from process context.
+ */
+static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
+			   defer_handler_t handler)
+{
+	DEFERRED_SKB_CB(skb)->handler = handler;
+	spin_lock_bh(&cdev->deferq.lock);
+	__skb_queue_tail(&cdev->deferq, skb);
+	if (skb_queue_len(&cdev->deferq) == 1)
+		schedule_work(&cdev->deferq_task);
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
+static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
+			   struct chtls_dev *cdev, int status, int queue)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
+			      GFP_KERNEL);
+
+	if (!reply_skb) {
+		req->status = (queue << 1);
+		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
+		return;
+	}
+
+	set_abort_rpl_wr(reply_skb, GET_TID(req), status);
+	kfree_skb(skb);
+
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
+	if (csk_conn_inline(csk)) {
+		struct l2t_entry *e = csk->l2t_entry;
+
+		if (e && sk->sk_state != TCP_SYN_RECV) {
+			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
+			return;
+		}
+	}
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+}
+
+static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
+				 struct chtls_dev *cdev,
+				 int status, int queue)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+	unsigned int tid;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tid = GET_TID(req);
+
+	reply_skb = get_cpl_skb(skb, sizeof(struct cpl_abort_rpl), gfp_any());
+	if (!reply_skb) {
+		req->status = (queue << 1) | status;
+		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
+		return;
+	}
+
+	set_abort_rpl_wr(reply_skb, tid, status);
+	kfree_skb(skb);
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
+	if (csk_conn_inline(csk)) {
+		struct l2t_entry *e = csk->l2t_entry;
+
+		if (e && sk->sk_state != TCP_SYN_RECV) {
+			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
+			return;
+		}
+	}
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+}
+
+/*
+ * This is run from a listener's backlog to abort a child connection in
+ * SYN_RCV state (i.e., one on the listener's SYN queue).
+ */
+static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
+{
+	struct chtls_sock *csk;
+	struct sock *child;
+	int queue;
+
+	child = skb->sk;
+	csk = rcu_dereference_sk_user_data(child);
+	queue = csk->txq_idx;
+
+	skb->sk	= NULL;
+	do_abort_syn_rcv(child, lsk);
+	send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
+		       CPL_ABORT_NO_RST, queue);
+}
+
+static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	const struct request_sock *oreq;
+	struct listen_ctx *listen_ctx;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct sock *psk;
+	void *ctx;
+
+	csk = sk->sk_user_data;
+	oreq = csk->passive_reap_next;
+	cdev = csk->cdev;
+
+	if (!oreq)
+		return -1;
+
+	ctx = lookup_stid(cdev->tids, oreq->ts_recent);
+	if (!ctx)
+		return -1;
+
+	listen_ctx = (struct listen_ctx *)ctx;
+	psk = listen_ctx->lsk;
+
+	bh_lock_sock(psk);
+	if (!sock_owned_by_user(psk)) {
+		int queue = csk->txq_idx;
+
+		do_abort_syn_rcv(sk, psk);
+		send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
+	} else {
+		skb->sk = sk;
+		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
+		__sk_add_backlog(psk, skb);
+	}
+	bh_unlock_sock(psk);
+	return 0;
+}
+
+static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
+{
+	const struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk = sk->sk_user_data;
+	int rst_status = CPL_ABORT_NO_RST;
+	int queue = csk->txq_idx;
+
+	if (is_neg_adv(req->status)) {
+		if (sk->sk_state == TCP_SYN_RECV)
+			chtls_set_tcb_tflag(sk, 0, 0);
+
+		kfree_skb(skb);
+		return;
+	}
+
+	csk_reset_flag(csk, CSK_ABORT_REQ_RCVD);
+
+	if (!csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) &&
+	    !csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
+			WARN_ONCE(1, "send_tx_flowc error");
+		csk_set_flag(csk, CSK_TX_DATA_SENT);
+	}
+
+	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
+
+	if (!csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
+		sk->sk_err = ETIMEDOUT;
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+
+		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
+			return;
+
+		chtls_release_resources(sk);
+		chtls_conn_done(sk);
+	}
+
+	chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev,
+			     rst_status, queue);
+}
+
+static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_abort_rpl_rss *rpl = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
+		csk_reset_flag(csk, CSK_ABORT_RPL_PENDING);
+		if (!csk_flag_nochk(csk, CSK_ABORT_REQ_RCVD)) {
+			if (sk->sk_state == TCP_SYN_SENT) {
+				cxgb4_remove_tid(cdev->tids,
+						 csk->port_id,
+						 GET_TID(rpl),
+						 sk->sk_family);
+				sock_put(sk);
+			}
+			chtls_release_resources(sk);
+			chtls_conn_done(sk);
+		}
+	}
+	kfree_skb(skb);
+}
+
+static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_peer_close *req = cplhdr(skb) + RSS_HDR;
+	void (*fn)(struct sock *sk, struct sk_buff *skb);
+	unsigned int hwtid = GET_TID(req);
+	struct chtls_sock *csk;
+	struct sock *sk;
+	u8 opcode;
+
+	opcode = ((const struct rss_header *)cplhdr(skb))->opcode;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (!sk)
+		goto rel_skb;
+
+	csk = sk->sk_user_data;
+
+	switch (opcode) {
+	case CPL_PEER_CLOSE:
+		fn = chtls_peer_close;
+		break;
+	case CPL_CLOSE_CON_RPL:
+		fn = chtls_close_con_rpl;
+		break;
+	case CPL_ABORT_REQ_RSS:
+		/*
+		 * Save the offload device in the skb, we may process this
+		 * message after the socket has closed.
+		 */
+		BLOG_SKB_CB(skb)->cdev = csk->cdev;
+		fn = chtls_abort_req_rss;
+		break;
+	case CPL_ABORT_RPL_RSS:
+		fn = chtls_abort_rpl_rss;
+		break;
+	default:
+		goto rel_skb;
+	}
+
+	process_cpl_msg(fn, sk, skb);
+	return 0;
+
+rel_skb:
+	kfree_skb(skb);
+	return 0;
+}
+
+static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk = sk->sk_user_data;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 credits = hdr->credits;
+	u32 snd_una;
+
+	snd_una = ntohl(hdr->snd_una);
+	csk->wr_credits += credits;
+
+	if (csk->wr_unacked > csk->wr_max_credits - csk->wr_credits)
+		csk->wr_unacked = csk->wr_max_credits - csk->wr_credits;
+
+	while (credits) {
+		struct sk_buff *pskb = csk->wr_skb_head;
+		u32 csum;
+
+		if (unlikely(!pskb)) {
+			if (csk->wr_nondata)
+				csk->wr_nondata -= credits;
+			break;
+		}
+		csum = (__force u32)pskb->csum;
+		if (unlikely(credits < csum)) {
+			pskb->csum = (__force __wsum)(csum - credits);
+			break;
+		}
+		dequeue_wr(sk);
+		credits -= csum;
+		kfree_skb(pskb);
+	}
+	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) {
+		if (unlikely(before(snd_una, tp->snd_una))) {
+			kfree_skb(skb);
+			return;
+		}
+
+		if (tp->snd_una != snd_una) {
+			tp->snd_una = snd_una;
+			tp->rcv_tstamp = tcp_time_stamp(tp);
+			if (tp->snd_una == tp->snd_nxt &&
+			    !csk_flag_nochk(csk, CSK_TX_FAILOVER))
+				csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+		}
+	}
+
+	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_CH) {
+		unsigned int fclen16 = roundup(failover_flowc_wr_len, 16);
+
+		csk->wr_credits -= fclen16;
+		csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+		csk_reset_flag(csk, CSK_TX_FAILOVER);
+	}
+	if (skb_queue_len(&csk->txq) && chtls_push_frames(csk, 0))
+		sk->sk_write_space(sk);
+
+	kfree_skb(skb);
+}
+
+static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_fw4_ack *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(rpl);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	process_cpl_msg(chtls_rx_ack, sk, skb);
+
+	return 0;
+}
+
+chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = {
+	[CPL_PASS_OPEN_RPL]     = chtls_pass_open_rpl,
+	[CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl,
+	[CPL_PASS_ACCEPT_REQ]   = chtls_pass_accept_req,
+	[CPL_PASS_ESTABLISH]    = chtls_pass_establish,
+	[CPL_RX_DATA]           = chtls_rx_data,
+	[CPL_TLS_DATA]          = chtls_rx_pdu,
+	[CPL_RX_TLS_CMP]        = chtls_rx_cmp,
+	[CPL_PEER_CLOSE]        = chtls_conn_cpl,
+	[CPL_CLOSE_CON_RPL]     = chtls_conn_cpl,
+	[CPL_ABORT_REQ_RSS]     = chtls_conn_cpl,
+	[CPL_ABORT_RPL_RSS]     = chtls_conn_cpl,
+	[CPL_FW4_ACK]           = chtls_wr_ack,
+};
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
new file mode 100644
index 000000000000..47ba81e42f5d
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ */
+
+#ifndef __CHTLS_CM_H__
+#define __CHTLS_CM_H__
+
+/*
+ * TCB settings
+ */
+/* 3:0 */
+#define TCB_ULP_TYPE_W    0
+#define TCB_ULP_TYPE_S    0
+#define TCB_ULP_TYPE_M    0xfULL
+#define TCB_ULP_TYPE_V(x) ((x) << TCB_ULP_TYPE_S)
+
+/* 11:4 */
+#define TCB_ULP_RAW_W    0
+#define TCB_ULP_RAW_S    4
+#define TCB_ULP_RAW_M    0xffULL
+#define TCB_ULP_RAW_V(x) ((x) << TCB_ULP_RAW_S)
+
+#define TF_TLS_KEY_SIZE_S    7
+#define TF_TLS_KEY_SIZE_V(x) ((x) << TF_TLS_KEY_SIZE_S)
+
+#define TF_TLS_CONTROL_S     2
+#define TF_TLS_CONTROL_V(x) ((x) << TF_TLS_CONTROL_S)
+
+#define TF_TLS_ACTIVE_S      1
+#define TF_TLS_ACTIVE_V(x) ((x) << TF_TLS_ACTIVE_S)
+
+#define TF_TLS_ENABLE_S      0
+#define TF_TLS_ENABLE_V(x) ((x) << TF_TLS_ENABLE_S)
+
+#define TF_RX_QUIESCE_S    15
+#define TF_RX_QUIESCE_V(x) ((x) << TF_RX_QUIESCE_S)
+
+/*
+ * Max receive window supported by HW in bytes.  Only a small part of it can
+ * be set through option0, the rest needs to be set through RX_DATA_ACK.
+ */
+#define MAX_RCV_WND ((1U << 27) - 1)
+#define MAX_MSS     65536
+
+/*
+ * Min receive window.  We want it to be large enough to accommodate receive
+ * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
+ */
+#define MIN_RCV_WND (24 * 1024U)
+#define LOOPBACK(x)     (((x) & htonl(0xff000000)) == htonl(0x7f000000))
+
+/* ulp_mem_io + ulptx_idata + payload + padding */
+#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8)
+
+/* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */
+#define TX_HEADER_LEN \
+	(sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr))
+#define TX_TLSHDR_LEN \
+	(sizeof(struct fw_tlstx_data_wr) + sizeof(struct cpl_tx_tls_sfo) + \
+	 sizeof(struct sge_opaque_hdr))
+#define TXDATA_SKB_LEN 128
+
+enum {
+	CPL_TX_TLS_SFO_TYPE_CCS,
+	CPL_TX_TLS_SFO_TYPE_ALERT,
+	CPL_TX_TLS_SFO_TYPE_HANDSHAKE,
+	CPL_TX_TLS_SFO_TYPE_DATA,
+	CPL_TX_TLS_SFO_TYPE_HEARTBEAT,
+};
+
+enum {
+	TLS_HDR_TYPE_CCS = 20,
+	TLS_HDR_TYPE_ALERT,
+	TLS_HDR_TYPE_HANDSHAKE,
+	TLS_HDR_TYPE_RECORD,
+	TLS_HDR_TYPE_HEARTBEAT,
+};
+
+typedef void (*defer_handler_t)(struct chtls_dev *dev, struct sk_buff *skb);
+extern struct request_sock_ops chtls_rsk_ops;
+extern struct request_sock_ops chtls_rsk_opsv6;
+
+struct deferred_skb_cb {
+	defer_handler_t handler;
+	struct chtls_dev *dev;
+};
+
+#define DEFERRED_SKB_CB(skb) ((struct deferred_skb_cb *)(skb)->cb)
+#define failover_flowc_wr_len offsetof(struct fw_flowc_wr, mnemval[3])
+#define WR_SKB_CB(skb) ((struct wr_skb_cb *)(skb)->cb)
+#define ACCEPT_QUEUE(sk) (&inet_csk(sk)->icsk_accept_queue.rskq_accept_head)
+
+#define SND_WSCALE(tp) ((tp)->rx_opt.snd_wscale)
+#define RCV_WSCALE(tp) ((tp)->rx_opt.rcv_wscale)
+#define USER_MSS(tp) ((tp)->rx_opt.user_mss)
+#define TS_RECENT_STAMP(tp) ((tp)->rx_opt.ts_recent_stamp)
+#define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok)
+#define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok)
+#define SACK_OK(tp) ((tp)->rx_opt.sack_ok)
+#define INC_ORPHAN_COUNT(sk) percpu_counter_inc((sk)->sk_prot->orphan_count)
+
+/* TLS SKB */
+#define skb_ulp_tls_inline(skb)      (ULP_SKB_CB(skb)->ulp.tls.ofld)
+#define skb_ulp_tls_iv_imm(skb)      (ULP_SKB_CB(skb)->ulp.tls.iv)
+
+void chtls_defer_reply(struct sk_buff *skb, struct chtls_dev *dev,
+		       defer_handler_t handler);
+
+/*
+ * Returns true if the socket is in one of the supplied states.
+ */
+static inline unsigned int sk_in_state(const struct sock *sk,
+				       unsigned int states)
+{
+	return states & (1 << sk->sk_state);
+}
+
+static void chtls_rsk_destructor(struct request_sock *req)
+{
+	/* do nothing */
+}
+
+static inline void chtls_init_rsk_ops(struct proto *chtls_tcp_prot,
+				      struct request_sock_ops *chtls_tcp_ops,
+				      struct proto *tcp_prot, int family)
+{
+	memset(chtls_tcp_ops, 0, sizeof(*chtls_tcp_ops));
+	chtls_tcp_ops->family = family;
+	chtls_tcp_ops->obj_size = sizeof(struct tcp_request_sock);
+	chtls_tcp_ops->destructor = chtls_rsk_destructor;
+	chtls_tcp_ops->slab = tcp_prot->rsk_prot->slab;
+	chtls_tcp_prot->rsk_prot = chtls_tcp_ops;
+}
+
+static inline void chtls_reqsk_free(struct request_sock *req)
+{
+	if (req->rsk_listener)
+		sock_put(req->rsk_listener);
+	kmem_cache_free(req->rsk_ops->slab, req);
+}
+
+#define DECLARE_TASK_FUNC(task, task_param) \
+		static void task(struct work_struct *task_param)
+
+static inline void sk_wakeup_sleepers(struct sock *sk, bool interruptable)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq)) {
+		if (interruptable)
+			wake_up_interruptible(sk_sleep(sk));
+		else
+			wake_up_all(sk_sleep(sk));
+	}
+	rcu_read_unlock();
+}
+
+static inline void chtls_set_req_port(struct request_sock *oreq,
+				      __be16 source, __be16 dest)
+{
+	inet_rsk(oreq)->ir_rmt_port = source;
+	inet_rsk(oreq)->ir_num = ntohs(dest);
+}
+
+static inline void chtls_set_req_addr(struct request_sock *oreq,
+				      __be32 local_ip, __be32 peer_ip)
+{
+	inet_rsk(oreq)->ir_loc_addr = local_ip;
+	inet_rsk(oreq)->ir_rmt_addr = peer_ip;
+}
+
+static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb_dst_set(skb, NULL);
+	__skb_unlink(skb, &sk->sk_receive_queue);
+	__kfree_skb(skb);
+}
+
+static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb_dst_set(skb, NULL);
+	__skb_unlink(skb, &sk->sk_receive_queue);
+	kfree_skb(skb);
+}
+
+static inline void chtls_reset_wr_list(struct chtls_sock *csk)
+{
+	csk->wr_skb_head = NULL;
+	csk->wr_skb_tail = NULL;
+}
+
+static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb)
+{
+	WR_SKB_CB(skb)->next_wr = NULL;
+
+	skb_get(skb);
+
+	if (!csk->wr_skb_head)
+		csk->wr_skb_head = skb;
+	else
+		WR_SKB_CB(csk->wr_skb_tail)->next_wr = skb;
+	csk->wr_skb_tail = skb;
+}
+
+static inline struct sk_buff *dequeue_wr(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb = NULL;
+
+	skb = csk->wr_skb_head;
+
+	if (likely(skb)) {
+	 /* Don't bother clearing the tail */
+		csk->wr_skb_head = WR_SKB_CB(skb)->next_wr;
+		WR_SKB_CB(skb)->next_wr = NULL;
+	}
+	return skb;
+}
+#endif
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
new file mode 100644
index 000000000000..f1820aca0d33
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/tls.h>
+#include <net/tls.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static void __set_tcb_field_direct(struct chtls_sock *csk,
+				   struct cpl_set_tcb_field *req, u16 word,
+				   u64 mask, u64 val, u8 cookie, int no_reply)
+{
+	struct ulptx_idata *sc;
+
+	INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, csk->tid);
+	req->wr.wr_mid |= htonl(FW_WR_FLOWID_V(csk->tid));
+	req->reply_ctrl = htons(NO_REPLY_V(no_reply) |
+				QUEUENO_V(csk->rss_qid));
+	req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+	sc = (struct ulptx_idata *)(req + 1);
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+	sc->len = htonl(0);
+}
+
+static void __set_tcb_field(struct sock *sk, struct sk_buff *skb, u16 word,
+			    u64 mask, u64 val, u8 cookie, int no_reply)
+{
+	struct cpl_set_tcb_field *req;
+	struct chtls_sock *csk;
+	struct ulptx_idata *sc;
+	unsigned int wrlen;
+
+	wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen);
+	__set_tcb_field_direct(csk, req, word, mask, val, cookie, no_reply);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id);
+}
+
+/*
+ * Send control message to HW, message go as immediate data and packet
+ * is freed immediately.
+ */
+static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val)
+{
+	struct cpl_set_tcb_field *req;
+	unsigned int credits_needed;
+	struct chtls_sock *csk;
+	struct ulptx_idata *sc;
+	struct sk_buff *skb;
+	unsigned int wrlen;
+	int ret;
+
+	wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+
+	skb = alloc_skb(wrlen, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	credits_needed = DIV_ROUND_UP(wrlen, 16);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	__set_tcb_field(sk, skb, word, mask, val, 0, 1);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+	csk->wr_credits -= credits_needed;
+	csk->wr_unacked += credits_needed;
+	enqueue_wr(csk, skb);
+	ret = cxgb4_ofld_send(csk->egress_dev, skb);
+	if (ret < 0)
+		kfree_skb(skb);
+	return ret < 0 ? ret : 0;
+}
+
+/*
+ * Set one of the t_flags bits in the TCB.
+ */
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val)
+{
+	return chtls_set_tcb_field(sk, 1, 1ULL << bit_pos,
+				   (u64)val << bit_pos);
+}
+
+static int chtls_set_tcb_keyid(struct sock *sk, int keyid)
+{
+	return chtls_set_tcb_field(sk, 31, 0xFFFFFFFFULL, keyid);
+}
+
+static int chtls_set_tcb_seqno(struct sock *sk)
+{
+	return chtls_set_tcb_field(sk, 28, ~0ULL, 0);
+}
+
+static int chtls_set_tcb_quiesce(struct sock *sk, int val)
+{
+	return chtls_set_tcb_field(sk, 1, (1ULL << TF_RX_QUIESCE_S),
+				   TF_RX_QUIESCE_V(val));
+}
+
+/* TLS Key bitmap processing */
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi)
+{
+	unsigned int num_key_ctx, bsize;
+	int ksize;
+
+	num_key_ctx = (lldi->vr->key.size / TLS_KEY_CONTEXT_SZ);
+	bsize = BITS_TO_LONGS(num_key_ctx);
+
+	cdev->kmap.size = num_key_ctx;
+	cdev->kmap.available = bsize;
+	ksize = sizeof(*cdev->kmap.addr) * bsize;
+	cdev->kmap.addr = kvzalloc(ksize, GFP_KERNEL);
+	if (!cdev->kmap.addr)
+		return -ENOMEM;
+
+	cdev->kmap.start = lldi->vr->key.start;
+	spin_lock_init(&cdev->kmap.lock);
+	return 0;
+}
+
+static int get_new_keyid(struct chtls_sock *csk, u32 optname)
+{
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_dev *cdev = csk->cdev;
+	struct chtls_hws *hws;
+	struct adapter *adap;
+	int keyid;
+
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+
+	spin_lock_bh(&cdev->kmap.lock);
+	keyid = find_first_zero_bit(cdev->kmap.addr, cdev->kmap.size);
+	if (keyid < cdev->kmap.size) {
+		__set_bit(keyid, cdev->kmap.addr);
+		if (optname == TLS_RX)
+			hws->rxkey = keyid;
+		else
+			hws->txkey = keyid;
+		atomic_inc(&adap->chcr_stats.tls_key);
+	} else {
+		keyid = -1;
+	}
+	spin_unlock_bh(&cdev->kmap.lock);
+	return keyid;
+}
+
+void free_tls_keyid(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_dev *cdev = csk->cdev;
+	struct chtls_hws *hws;
+	struct adapter *adap;
+
+	if (!cdev->kmap.addr)
+		return;
+
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+
+	spin_lock_bh(&cdev->kmap.lock);
+	if (hws->rxkey >= 0) {
+		__clear_bit(hws->rxkey, cdev->kmap.addr);
+		atomic_dec(&adap->chcr_stats.tls_key);
+		hws->rxkey = -1;
+	}
+	if (hws->txkey >= 0) {
+		__clear_bit(hws->txkey, cdev->kmap.addr);
+		atomic_dec(&adap->chcr_stats.tls_key);
+		hws->txkey = -1;
+	}
+	spin_unlock_bh(&cdev->kmap.lock);
+}
+
+unsigned int keyid_to_addr(int start_addr, int keyid)
+{
+	return (start_addr + (keyid * TLS_KEY_CONTEXT_SZ)) >> 5;
+}
+
+static void chtls_rxkey_ivauth(struct _key_ctx *kctx)
+{
+	kctx->iv_to_auth = cpu_to_be64(KEYCTX_TX_WR_IV_V(6ULL) |
+				  KEYCTX_TX_WR_AAD_V(1ULL) |
+				  KEYCTX_TX_WR_AADST_V(5ULL) |
+				  KEYCTX_TX_WR_CIPHER_V(14ULL) |
+				  KEYCTX_TX_WR_CIPHERST_V(0ULL) |
+				  KEYCTX_TX_WR_AUTH_V(14ULL) |
+				  KEYCTX_TX_WR_AUTHST_V(16ULL) |
+				  KEYCTX_TX_WR_AUTHIN_V(16ULL));
+}
+
+static int chtls_key_info(struct chtls_sock *csk,
+			  struct _key_ctx *kctx,
+			  u32 keylen, u32 optname,
+			  int cipher_type)
+{
+	unsigned char key[AES_MAX_KEY_SIZE];
+	unsigned char *key_p, *salt;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	int ck_size, key_ctx_size, kctx_mackey_size, salt_size;
+	struct crypto_aes_ctx aes;
+	int ret;
+
+	key_ctx_size = sizeof(struct _key_ctx) +
+		       roundup(keylen, 16) + AEAD_H_SIZE;
+
+	/* GCM mode of AES supports 128 and 256 bit encryption, so
+	 * prepare key context base on GCM cipher type
+	 */
+	switch (cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *gcm_ctx_128 =
+			(struct tls12_crypto_info_aes_gcm_128 *)
+					&csk->tlshws.crypto_info;
+		memcpy(key, gcm_ctx_128->key, keylen);
+
+		key_p            = gcm_ctx_128->key;
+		salt             = gcm_ctx_128->salt;
+		ck_size          = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		salt_size        = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
+		kctx_mackey_size = CHCR_KEYCTX_MAC_KEY_SIZE_128;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *gcm_ctx_256 =
+			(struct tls12_crypto_info_aes_gcm_256 *)
+					&csk->tlshws.crypto_info;
+		memcpy(key, gcm_ctx_256->key, keylen);
+
+		key_p            = gcm_ctx_256->key;
+		salt             = gcm_ctx_256->salt;
+		ck_size          = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+		salt_size        = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
+		kctx_mackey_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+		break;
+	}
+	default:
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		return -EINVAL;
+	}
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	ret = aes_expandkey(&aes, key, keylen);
+	if (ret)
+		return ret;
+
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	aes_encrypt(&aes, ghash_h, ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
+	csk->tlshws.keylen = key_ctx_size;
+
+	/* Copy the Key context */
+	if (optname == TLS_RX) {
+		int key_ctx;
+
+		key_ctx = ((key_ctx_size >> 4) << 3);
+		kctx->ctx_hdr = FILL_KEY_CRX_HDR(ck_size,
+						 kctx_mackey_size,
+						 0, 0, key_ctx);
+		chtls_rxkey_ivauth(kctx);
+	} else {
+		kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 kctx_mackey_size,
+						 0, 0, key_ctx_size >> 4);
+	}
+
+	memcpy(kctx->salt, salt, salt_size);
+	memcpy(kctx->key, key_p, keylen);
+	memcpy(kctx->key + keylen, ghash_h, AEAD_H_SIZE);
+	/* erase key info from driver */
+	memset(key_p, 0, keylen);
+
+	return 0;
+}
+
+static void chtls_set_scmd(struct chtls_sock *csk)
+{
+	struct chtls_hws *hws = &csk->tlshws;
+
+	hws->scmd.seqno_numivs =
+		SCMD_SEQ_NO_CTRL_V(3) |
+		SCMD_PROTO_VERSION_V(0) |
+		SCMD_ENC_DEC_CTRL_V(0) |
+		SCMD_CIPH_AUTH_SEQ_CTRL_V(1) |
+		SCMD_CIPH_MODE_V(2) |
+		SCMD_AUTH_MODE_V(4) |
+		SCMD_HMAC_CTRL_V(0) |
+		SCMD_IV_SIZE_V(4) |
+		SCMD_NUM_IVS_V(1);
+
+	hws->scmd.ivgen_hdrlen =
+		SCMD_IV_GEN_CTRL_V(1) |
+		SCMD_KEY_CTX_INLINE_V(0) |
+		SCMD_TLS_FRAG_ENABLE_V(1);
+}
+
+int chtls_setkey(struct chtls_sock *csk, u32 keylen,
+		 u32 optname, int cipher_type)
+{
+	struct tls_key_req *kwr;
+	struct chtls_dev *cdev;
+	struct _key_ctx *kctx;
+	int wrlen, klen, len;
+	struct sk_buff *skb;
+	struct sock *sk;
+	int keyid;
+	int kaddr;
+	int ret;
+
+	cdev = csk->cdev;
+	sk = csk->sk;
+
+	klen = roundup((keylen + AEAD_H_SIZE) + sizeof(*kctx), 32);
+	wrlen = roundup(sizeof(*kwr), 16);
+	len = klen + wrlen;
+
+	/* Flush out-standing data before new key takes effect */
+	if (optname == TLS_TX) {
+		lock_sock(sk);
+		if (skb_queue_len(&csk->txq))
+			chtls_push_frames(csk, 0);
+		release_sock(sk);
+	}
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	keyid = get_new_keyid(csk, optname);
+	if (keyid < 0) {
+		ret = -ENOSPC;
+		goto out_nokey;
+	}
+
+	kaddr = keyid_to_addr(cdev->kmap.start, keyid);
+	kwr = (struct tls_key_req *)__skb_put_zero(skb, len);
+	kwr->wr.op_to_compl =
+		cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F |
+		      FW_WR_ATOMIC_V(1U));
+	kwr->wr.flowid_len16 =
+		cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16) |
+			    FW_WR_FLOWID_V(csk->tid)));
+	kwr->wr.protocol = 0;
+	kwr->wr.mfs = htons(TLS_MFS);
+	kwr->wr.reneg_to_write_rx = optname;
+
+	/* ulptx command */
+	kwr->req.cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
+			    T5_ULP_MEMIO_ORDER_V(1) |
+			    T5_ULP_MEMIO_IMM_V(1));
+	kwr->req.len16 = cpu_to_be32((csk->tid << 8) |
+			      DIV_ROUND_UP(len - sizeof(kwr->wr), 16));
+	kwr->req.dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(klen >> 5));
+	kwr->req.lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(kaddr));
+
+	/* sub command */
+	kwr->sc_imm.cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	kwr->sc_imm.len = cpu_to_be32(klen);
+
+	lock_sock(sk);
+	/* key info */
+	kctx = (struct _key_ctx *)(kwr + 1);
+	ret = chtls_key_info(csk, kctx, keylen, optname, cipher_type);
+	if (ret)
+		goto out_notcb;
+
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->tlshws.txqid);
+	csk->wr_credits -= DIV_ROUND_UP(len, 16);
+	csk->wr_unacked += DIV_ROUND_UP(len, 16);
+	enqueue_wr(csk, skb);
+	cxgb4_ofld_send(csk->egress_dev, skb);
+
+	chtls_set_scmd(csk);
+	/* Clear quiesce for Rx key */
+	if (optname == TLS_RX) {
+		ret = chtls_set_tcb_keyid(sk, keyid);
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_field(sk, 0,
+					  TCB_ULP_RAW_V(TCB_ULP_RAW_M),
+					  TCB_ULP_RAW_V((TF_TLS_KEY_SIZE_V(1) |
+					  TF_TLS_CONTROL_V(1) |
+					  TF_TLS_ACTIVE_V(1) |
+					  TF_TLS_ENABLE_V(1))));
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_seqno(sk);
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_quiesce(sk, 0);
+		if (ret)
+			goto out_notcb;
+		csk->tlshws.rxkey = keyid;
+	} else {
+		csk->tlshws.tx_seq_no = 0;
+		csk->tlshws.txkey = keyid;
+	}
+
+	release_sock(sk);
+	return ret;
+out_notcb:
+	release_sock(sk);
+	free_tls_keyid(sk);
+out_nokey:
+	kfree_skb(skb);
+	return ret;
+}
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
new file mode 100644
index 000000000000..2e9acae1cba3
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -0,0 +1,1907 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sched/signal.h>
+#include <net/tcp.h>
+#include <net/busy_poll.h>
+#include <crypto/aes.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static bool is_tls_tx(struct chtls_sock *csk)
+{
+	return csk->tlshws.txkey >= 0;
+}
+
+static bool is_tls_rx(struct chtls_sock *csk)
+{
+	return csk->tlshws.rxkey >= 0;
+}
+
+static int data_sgl_len(const struct sk_buff *skb)
+{
+	unsigned int cnt;
+
+	cnt = skb_shinfo(skb)->nr_frags;
+	return sgl_len(cnt) * 8;
+}
+
+static int nos_ivs(struct sock *sk, unsigned int size)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	return DIV_ROUND_UP(size, csk->tlshws.mfs);
+}
+
+static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
+{
+	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
+	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
+
+	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
+	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
+		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
+		return 1;
+	}
+	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
+	return 0;
+}
+
+static int max_ivs_size(struct sock *sk, int size)
+{
+	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
+}
+
+static int ivs_size(struct sock *sk, const struct sk_buff *skb)
+{
+	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
+		 CIPHER_BLOCK_SIZE) : 0;
+}
+
+static int flowc_wr_credits(int nparams, int *flowclenp)
+{
+	int flowclen16, flowclen;
+
+	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+	flowclen16 = DIV_ROUND_UP(flowclen, 16);
+	flowclen = flowclen16 * 16;
+
+	if (flowclenp)
+		*flowclenp = flowclen;
+
+	return flowclen16;
+}
+
+static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
+					   struct fw_flowc_wr *flowc,
+					   int flowclen)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(flowclen, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	__skb_put_data(skb, flowc, flowclen);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+
+	return skb;
+}
+
+static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
+			 int flowclen)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int flowclen16;
+	int ret;
+
+	flowclen16 = flowclen / 16;
+
+	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
+		skb = create_flowc_wr_skb(sk, flowc, flowclen);
+		if (!skb)
+			return -ENOMEM;
+
+		skb_entail(sk, skb,
+			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+		return 0;
+	}
+
+	ret = cxgb4_immdata_send(csk->egress_dev,
+				 csk->txq_idx,
+				 flowc, flowclen);
+	if (!ret)
+		return flowclen16;
+	skb = create_flowc_wr_skb(sk, flowc, flowclen);
+	if (!skb)
+		return -ENOMEM;
+	send_or_defer(sk, tp, skb, 0);
+	return flowclen16;
+}
+
+static u8 tcp_state_to_flowc_state(u8 state)
+{
+	switch (state) {
+	case TCP_ESTABLISHED:
+		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
+	case TCP_CLOSE_WAIT:
+		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
+	case TCP_FIN_WAIT1:
+		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
+	case TCP_CLOSING:
+		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
+	case TCP_LAST_ACK:
+		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
+	case TCP_FIN_WAIT2:
+		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
+	}
+
+	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
+}
+
+int send_tx_flowc_wr(struct sock *sk, int compl,
+		     u32 snd_nxt, u32 rcv_nxt)
+{
+	struct flowc_packed {
+		struct fw_flowc_wr fc;
+		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
+	} __packed sflowc;
+	int nparams, paramidx, flowclen16, flowclen;
+	struct fw_flowc_wr *flowc;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+	memset(&sflowc, 0, sizeof(sflowc));
+	flowc = &sflowc.fc;
+
+#define FLOWC_PARAM(__m, __v) \
+	do { \
+		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
+		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
+		paramidx++; \
+	} while (0)
+
+	paramidx = 0;
+
+	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
+	FLOWC_PARAM(CH, csk->tx_chan);
+	FLOWC_PARAM(PORT, csk->tx_chan);
+	FLOWC_PARAM(IQID, csk->rss_qid);
+	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
+	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
+	FLOWC_PARAM(SNDBUF, csk->sndbuf);
+	FLOWC_PARAM(MSS, tp->mss_cache);
+	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
+
+	if (SND_WSCALE(tp))
+		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
+
+	if (csk->ulp_mode == ULP_MODE_TLS)
+		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
+
+	if (csk->tlshws.fcplenmax)
+		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
+
+	nparams = paramidx;
+#undef FLOWC_PARAM
+
+	flowclen16 = flowc_wr_credits(nparams, &flowclen);
+	flowc->op_to_nparams =
+		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
+			    FW_WR_COMPL_V(compl) |
+			    FW_FLOWC_WR_NPARAMS_V(nparams));
+	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
+					  FW_WR_FLOWID_V(csk->tid));
+
+	return send_flowc_wr(sk, flowc, flowclen);
+}
+
+/* Copy IVs to WR */
+static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
+
+{
+	struct chtls_sock *csk;
+	unsigned char *iv_loc;
+	struct chtls_hws *hws;
+	unsigned char *ivs;
+	u16 number_of_ivs;
+	struct page *page;
+	int err = 0;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	number_of_ivs = nos_ivs(sk, skb->len);
+
+	if (number_of_ivs > MAX_IVS_PAGE) {
+		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
+		return -ENOMEM;
+	}
+
+	/* generate the  IVs */
+	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
+	if (!ivs)
+		return -ENOMEM;
+	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
+
+	if (skb_ulp_tls_iv_imm(skb)) {
+		/* send the IVs as immediate data in the WR */
+		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
+						CIPHER_BLOCK_SIZE);
+		if (iv_loc)
+			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
+
+		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
+	} else {
+		/* Send the IVs as sgls */
+		/* Already accounted IV DSGL for credits */
+		skb_shinfo(skb)->nr_frags--;
+		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
+		if (!page) {
+			pr_info("%s : Page allocation for IVs failed\n",
+				__func__);
+			err = -ENOMEM;
+			goto out;
+		}
+		memcpy(page_address(page), ivs, number_of_ivs *
+		       CIPHER_BLOCK_SIZE);
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
+				   number_of_ivs * CIPHER_BLOCK_SIZE);
+		hws->ivsize = 0;
+	}
+out:
+	kfree(ivs);
+	return err;
+}
+
+/* Copy Key to WR */
+static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
+{
+	struct ulptx_sc_memrd *sc_memrd;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct ulptx_idata *sc;
+	struct chtls_hws *hws;
+	u32 immdlen;
+	int kaddr;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	cdev = csk->cdev;
+
+	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
+	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
+	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
+	if (sc) {
+		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+		sc->len = htonl(0);
+		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
+		sc_memrd->cmd_to_len =
+				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
+				ULP_TX_SC_MORE_V(1) |
+				ULPTX_LEN16_V(hws->keylen >> 4));
+		sc_memrd->addr = htonl(kaddr);
+	}
+}
+
+static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
+{
+	return hws->tx_seq_no++;
+}
+
+static bool is_sg_request(const struct sk_buff *skb)
+{
+	return skb->peeked ||
+		(skb->len > MAX_IMM_ULPTX_WR_LEN);
+}
+
+/*
+ * Returns true if an sk_buff carries urgent data.
+ */
+static bool skb_urgent(struct sk_buff *skb)
+{
+	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
+}
+
+/* TLS content type for CPL SFO */
+static unsigned char tls_content_type(unsigned char content_type)
+{
+	switch (content_type) {
+	case TLS_HDR_TYPE_CCS:
+		return CPL_TX_TLS_SFO_TYPE_CCS;
+	case TLS_HDR_TYPE_ALERT:
+		return CPL_TX_TLS_SFO_TYPE_ALERT;
+	case TLS_HDR_TYPE_HANDSHAKE:
+		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
+	case TLS_HDR_TYPE_HEARTBEAT:
+		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
+	}
+	return CPL_TX_TLS_SFO_TYPE_DATA;
+}
+
+static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+			   int dlen, int tls_immd, u32 credits,
+			   int expn, int pdus)
+{
+	struct fw_tlstx_data_wr *req_wr;
+	struct cpl_tx_tls_sfo *req_cpl;
+	unsigned int wr_ulp_mode_force;
+	struct tls_scmd *updated_scmd;
+	unsigned char data_type;
+	struct chtls_sock *csk;
+	struct net_device *dev;
+	struct chtls_hws *hws;
+	struct tls_scmd *scmd;
+	struct adapter *adap;
+	unsigned char *req;
+	int immd_len;
+	int iv_imm;
+	int len;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	iv_imm = skb_ulp_tls_iv_imm(skb);
+	dev = csk->egress_dev;
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+	scmd = &hws->scmd;
+	len = dlen + expn;
+
+	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
+	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
+
+	updated_scmd = scmd;
+	updated_scmd->seqno_numivs &= 0xffffff80;
+	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
+	hws->scmd = *updated_scmd;
+
+	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
+	req_cpl = (struct cpl_tx_tls_sfo *)req;
+	req = (unsigned char *)__skb_push(skb, (sizeof(struct
+				fw_tlstx_data_wr)));
+
+	req_wr = (struct fw_tlstx_data_wr *)req;
+	immd_len = (tls_immd ? dlen : 0);
+	req_wr->op_to_immdlen =
+		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
+		FW_TLSTX_DATA_WR_COMPL_V(1) |
+		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
+	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
+				     FW_TLSTX_DATA_WR_LEN16_V(credits));
+	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
+
+	if (is_sg_request(skb))
+		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
+			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
+			FW_OFLD_TX_DATA_WR_SHOVE_F);
+
+	req_wr->lsodisable_to_flags =
+			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
+			      TX_URG_V(skb_urgent(skb)) |
+			      T6_TX_FORCE_F | wr_ulp_mode_force |
+			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
+					 skb_queue_empty(&csk->txq)));
+
+	req_wr->ctxloc_to_exp =
+			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
+			      FW_TLSTX_DATA_WR_EXP_V(expn) |
+			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
+			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
+			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
+
+	/* Fill in the length */
+	req_wr->plen = htonl(len);
+	req_wr->mfs = htons(hws->mfs);
+	req_wr->adjustedplen_pkd =
+		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
+	req_wr->expinplenmax_pkd =
+		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
+	req_wr->pdusinplenmax_pkd =
+		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
+	req_wr->r10 = 0;
+
+	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
+	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
+				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
+				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
+				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
+	req_cpl->pld_len = htonl(len - expn);
+
+	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
+		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
+		TLS_HDR_TYPE_HEARTBEAT : 0) |
+		CPL_TX_TLS_SFO_PROTOVER_V(0));
+
+	/* create the s-command */
+	req_cpl->r1_lo = 0;
+	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
+	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
+	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
+}
+
+/*
+ * Calculate the TLS data expansion size
+ */
+static int chtls_expansion_size(struct sock *sk, int data_len,
+				int fullpdu,
+				unsigned short *pducnt)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tls_scmd *scmd = &hws->scmd;
+	int fragsize = hws->mfs;
+	int expnsize = 0;
+	int fragleft;
+	int fragcnt;
+	int expppdu;
+
+	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
+	    SCMD_CIPH_MODE_AES_GCM) {
+		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
+			  TLS_HEADER_LENGTH;
+
+		if (fullpdu) {
+			*pducnt = data_len / (expppdu + fragsize);
+			if (*pducnt > 32)
+				*pducnt = 32;
+			else if (!*pducnt)
+				*pducnt = 1;
+			expnsize = (*pducnt) * expppdu;
+			return expnsize;
+		}
+		fragcnt = (data_len / fragsize);
+		expnsize =  fragcnt * expppdu;
+		fragleft = data_len % fragsize;
+		if (fragleft > 0)
+			expnsize += expppdu;
+	}
+	return expnsize;
+}
+
+/* WR with IV, KEY and CPL SFO added */
+static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
+			       int tls_tx_imm, int tls_len, u32 credits)
+{
+	unsigned short pdus_per_ulp = 0;
+	struct chtls_sock *csk;
+	struct chtls_hws *hws;
+	int expn_sz;
+	int pdus;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
+	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
+	if (!hws->compute) {
+		hws->expansion = chtls_expansion_size(sk,
+						      hws->fcplenmax,
+						      1, &pdus_per_ulp);
+		hws->pdus = pdus_per_ulp;
+		hws->adjustlen = hws->pdus *
+			((hws->expansion / hws->pdus) + hws->mfs);
+		hws->compute = 1;
+	}
+	if (tls_copy_ivs(sk, skb))
+		return;
+	tls_copy_tx_key(sk, skb);
+	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
+	hws->tx_seq_no += (pdus - 1);
+}
+
+static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+			    unsigned int immdlen, int len,
+			    u32 credits, u32 compl)
+{
+	struct fw_ofld_tx_data_wr *req;
+	unsigned int wr_ulp_mode_force;
+	struct chtls_sock *csk;
+	unsigned int opcode;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	opcode = FW_OFLD_TX_DATA_WR;
+
+	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
+	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
+				FW_WR_COMPL_V(compl) |
+				FW_WR_IMMDLEN_V(immdlen));
+	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
+				FW_WR_LEN16_V(credits));
+
+	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
+	if (is_sg_request(skb))
+		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
+			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
+				FW_OFLD_TX_DATA_WR_SHOVE_F);
+
+	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
+			TX_URG_V(skb_urgent(skb)) |
+			TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
+				   skb_queue_empty(&csk->txq)));
+	req->plen = htonl(len);
+}
+
+static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
+			 bool size)
+{
+	int wr_size;
+
+	wr_size = TLS_WR_CPL_LEN;
+	wr_size += KEY_ON_MEM_SZ;
+	wr_size += ivs_size(csk->sk, skb);
+
+	if (size)
+		return wr_size;
+
+	/* frags counted for IV dsgl */
+	if (!skb_ulp_tls_iv_imm(skb))
+		skb_shinfo(skb)->nr_frags++;
+
+	return wr_size;
+}
+
+static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
+{
+	int length = skb->len;
+
+	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
+		return false;
+
+	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
+		/* Check TLS header len for Immediate */
+		if (csk->ulp_mode == ULP_MODE_TLS &&
+		    skb_ulp_tls_inline(skb))
+			length += chtls_wr_size(csk, skb, true);
+		else
+			length += sizeof(struct fw_ofld_tx_data_wr);
+
+		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
+	}
+	return true;
+}
+
+static unsigned int calc_tx_flits(const struct sk_buff *skb,
+				  unsigned int immdlen)
+{
+	unsigned int flits, cnt;
+
+	flits = immdlen / 8;   /* headers */
+	cnt = skb_shinfo(skb)->nr_frags;
+	if (skb_tail_pointer(skb) != skb_transport_header(skb))
+		cnt++;
+	return flits + sgl_len(cnt);
+}
+
+static void arp_failure_discard(void *handle, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+int chtls_push_frames(struct chtls_sock *csk, int comp)
+{
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tcp_sock *tp;
+	struct sk_buff *skb;
+	int total_size = 0;
+	struct sock *sk;
+	int wr_size;
+
+	wr_size = sizeof(struct fw_ofld_tx_data_wr);
+	sk = csk->sk;
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
+		return 0;
+
+	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
+		return 0;
+
+	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
+	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
+		skb_queue_len(&csk->txq) > 1)) {
+		unsigned int credit_len = skb->len;
+		unsigned int credits_needed;
+		unsigned int completion = 0;
+		int tls_len = skb->len;/* TLS data len before IV/key */
+		unsigned int immdlen;
+		int len = skb->len;    /* length [ulp bytes] inserted by hw */
+		int flowclen16 = 0;
+		int tls_tx_imm = 0;
+
+		immdlen = skb->len;
+		if (!is_ofld_imm(csk, skb)) {
+			immdlen = skb_transport_offset(skb);
+			if (skb_ulp_tls_inline(skb))
+				wr_size = chtls_wr_size(csk, skb, false);
+			credit_len = 8 * calc_tx_flits(skb, immdlen);
+		} else {
+			if (skb_ulp_tls_inline(skb)) {
+				wr_size = chtls_wr_size(csk, skb, false);
+				tls_tx_imm = 1;
+			}
+		}
+		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
+			credit_len += wr_size;
+		credits_needed = DIV_ROUND_UP(credit_len, 16);
+		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
+						      tp->rcv_nxt);
+			if (flowclen16 <= 0)
+				break;
+			csk->wr_credits -= flowclen16;
+			csk->wr_unacked += flowclen16;
+			csk->wr_nondata += flowclen16;
+			csk_set_flag(csk, CSK_TX_DATA_SENT);
+		}
+
+		if (csk->wr_credits < credits_needed) {
+			if (skb_ulp_tls_inline(skb) &&
+			    !skb_ulp_tls_iv_imm(skb))
+				skb_shinfo(skb)->nr_frags--;
+			break;
+		}
+
+		__skb_unlink(skb, &csk->txq);
+		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
+				      CPL_PRIORITY_DATA);
+		if (hws->ofld)
+			hws->txqid = (skb->queue_mapping >> 1);
+		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
+		csk->wr_credits -= credits_needed;
+		csk->wr_unacked += credits_needed;
+		csk->wr_nondata = 0;
+		enqueue_wr(csk, skb);
+
+		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
+			if ((comp && csk->wr_unacked == credits_needed) ||
+			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
+			    csk->wr_unacked >= csk->wr_max_credits / 2) {
+				completion = 1;
+				csk->wr_unacked = 0;
+			}
+			if (skb_ulp_tls_inline(skb))
+				make_tlstx_data_wr(sk, skb, tls_tx_imm,
+						   tls_len, credits_needed);
+			else
+				make_tx_data_wr(sk, skb, immdlen, len,
+						credits_needed, completion);
+			tp->snd_nxt += len;
+			tp->lsndtime = tcp_jiffies32;
+			if (completion)
+				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
+		} else {
+			struct cpl_close_con_req *req = cplhdr(skb);
+			unsigned int cmd  = CPL_OPCODE_G(ntohl
+					     (OPCODE_TID(req)));
+
+			if (cmd == CPL_CLOSE_CON_REQ)
+				csk_set_flag(csk,
+					     CSK_CLOSE_CON_REQUESTED);
+
+			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
+			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
+				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
+				csk->wr_unacked = 0;
+			}
+		}
+		total_size += skb->truesize;
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
+			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
+		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+	}
+	sk->sk_wmem_queued -= total_size;
+	return total_size;
+}
+
+static void mark_urg(struct tcp_sock *tp, int flags,
+		     struct sk_buff *skb)
+{
+	if (unlikely(flags & MSG_OOB)) {
+		tp->snd_up = tp->write_seq;
+		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
+					 ULPCB_FLAG_BARRIER |
+					 ULPCB_FLAG_NO_APPEND |
+					 ULPCB_FLAG_NEED_HDR;
+	}
+}
+
+/*
+ * Returns true if a connection should send more data to TCP engine
+ */
+static bool should_push(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/*
+	 * If we've released our offload resources there's nothing to do ...
+	 */
+	if (!cdev)
+		return false;
+
+	/*
+	 * If there aren't any work requests in flight, or there isn't enough
+	 * data in flight, or Nagle is off then send the current TX_DATA
+	 * otherwise hold it and wait to accumulate more data.
+	 */
+	return csk->wr_credits == csk->wr_max_credits ||
+		(tp->nonagle & TCP_NAGLE_OFF);
+}
+
+/*
+ * Returns true if a TCP socket is corked.
+ */
+static bool corked(const struct tcp_sock *tp, int flags)
+{
+	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
+}
+
+/*
+ * Returns true if a send should try to push new data.
+ */
+static bool send_should_push(struct sock *sk, int flags)
+{
+	return should_push(sk) && !corked(tcp_sk(sk), flags);
+}
+
+void chtls_tcp_push(struct sock *sk, int flags)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	int qlen = skb_queue_len(&csk->txq);
+
+	if (likely(qlen)) {
+		struct sk_buff *skb = skb_peek_tail(&csk->txq);
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		mark_urg(tp, flags, skb);
+
+		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
+		    corked(tp, flags)) {
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
+			return;
+		}
+
+		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
+		if (qlen == 1 &&
+		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		     should_push(sk)))
+			chtls_push_frames(csk, 1);
+	}
+}
+
+/*
+ * Calculate the size for a new send sk_buff.  It's maximum size so we can
+ * pack lots of data into it, unless we plan to send it immediately, in which
+ * case we size it more tightly.
+ *
+ * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
+ * arise in normal cases and when it does we are just wasting memory.
+ */
+static int select_size(struct sock *sk, int io_len, int flags, int len)
+{
+	const int pgbreak = SKB_MAX_HEAD(len);
+
+	/*
+	 * If the data wouldn't fit in the main body anyway, put only the
+	 * header in the main body so it can use immediate data and place all
+	 * the payload in page fragments.
+	 */
+	if (io_len > pgbreak)
+		return 0;
+
+	/*
+	 * If we will be accumulating payload get a large main body.
+	 */
+	if (!send_should_push(sk, flags))
+		return pgbreak;
+
+	return io_len;
+}
+
+void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ULP_SKB_CB(skb)->seq = tp->write_seq;
+	ULP_SKB_CB(skb)->flags = flags;
+	__skb_queue_tail(&csk->txq, skb);
+	sk->sk_wmem_queued += skb->truesize;
+
+	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
+		put_page(TCP_PAGE(sk));
+		TCP_PAGE(sk) = NULL;
+		TCP_OFF(sk) = 0;
+	}
+}
+
+static struct sk_buff *get_tx_skb(struct sock *sk, int size)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
+	if (likely(skb)) {
+		skb_reserve(skb, TX_HEADER_LEN);
+		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
+		skb_reset_transport_header(skb);
+	}
+	return skb;
+}
+
+static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
+			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
+			sk->sk_allocation);
+	if (likely(skb)) {
+		skb_reserve(skb, (TX_TLSHDR_LEN +
+			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
+		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
+		skb_reset_transport_header(skb);
+		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
+		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
+	}
+	return skb;
+}
+
+static void tx_skb_finalize(struct sk_buff *skb)
+{
+	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
+
+	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
+		cb->flags = ULPCB_FLAG_NEED_HDR;
+	cb->flags |= ULPCB_FLAG_NO_APPEND;
+}
+
+static void push_frames_if_head(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (skb_queue_len(&csk->txq) == 1)
+		chtls_push_frames(csk, 1);
+}
+
+static int chtls_skb_copy_to_page_nocache(struct sock *sk,
+					  struct iov_iter *from,
+					  struct sk_buff *skb,
+					  struct page *page,
+					  int off, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
+				       off, copy, skb->len);
+	if (err)
+		return err;
+
+	skb->len             += copy;
+	skb->data_len        += copy;
+	skb->truesize        += copy;
+	sk->sk_wmem_queued   += copy;
+	return 0;
+}
+
+static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
+{
+	return (cdev->max_host_sndbuf - sk->sk_wmem_queued);
+}
+
+static int csk_wait_memory(struct chtls_dev *cdev,
+			   struct sock *sk, long *timeo_p)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int err = 0;
+	long current_timeo;
+	long vm_wait = 0;
+	bool noblock;
+
+	current_timeo = *timeo_p;
+	noblock = (*timeo_p ? false : true);
+	if (csk_mem_free(cdev, sk)) {
+		current_timeo = (prandom_u32() % (HZ / 5)) + 2;
+		vm_wait = (prandom_u32() % (HZ / 5)) + 2;
+	}
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	while (1) {
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo_p) {
+			if (noblock)
+				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+			goto do_nonblock;
+		}
+		if (signal_pending(current))
+			goto do_interrupted;
+		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+		if (csk_mem_free(cdev, sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_pending++;
+		sk_wait_event(sk, &current_timeo, sk->sk_err ||
+			      (sk->sk_shutdown & SEND_SHUTDOWN) ||
+			      (csk_mem_free(cdev, sk) && !vm_wait), &wait);
+		sk->sk_write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo_p;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
+				current_timeo -= vm_wait;
+				if (current_timeo < 0)
+					current_timeo = 0;
+			}
+			vm_wait = 0;
+		}
+		*timeo_p = current_timeo;
+	}
+do_rm_wq:
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return err;
+do_error:
+	err = -EPIPE;
+	goto do_rm_wq;
+do_nonblock:
+	err = -EAGAIN;
+	goto do_rm_wq;
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto do_rm_wq;
+}
+
+static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
+			       unsigned char *record_type)
+{
+	struct cmsghdr *cmsg;
+	int rc = -EINVAL;
+
+	for_each_cmsghdr(cmsg, msg) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+		if (cmsg->cmsg_level != SOL_TLS)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case TLS_SET_RECORD_TYPE:
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
+				return -EINVAL;
+
+			if (msg->msg_flags & MSG_MORE)
+				return -EINVAL;
+
+			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
+			rc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return rc;
+}
+
+int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int mss, flags, err;
+	int recordsz = 0;
+	int copied = 0;
+	long timeo;
+
+	lock_sock(sk);
+	flags = msg->msg_flags;
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
+		err = sk_stream_wait_connect(sk, &timeo);
+		if (err)
+			goto out_err;
+	}
+
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+	err = -EPIPE;
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		goto out_err;
+
+	mss = csk->mss;
+	csk_set_flag(csk, CSK_TX_MORE_DATA);
+
+	while (msg_data_left(msg)) {
+		int copy = 0;
+
+		skb = skb_peek_tail(&csk->txq);
+		if (skb) {
+			copy = mss - skb->len;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+		if (!csk_mem_free(cdev, sk))
+			goto wait_for_sndbuf;
+
+		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
+			unsigned char record_type = TLS_RECORD_TYPE_DATA;
+
+			if (unlikely(msg->msg_controllen)) {
+				err = chtls_proccess_cmsg(sk, msg,
+							  &record_type);
+				if (err)
+					goto out_err;
+
+				/* Avoid appending tls handshake, alert to tls data */
+				if (skb)
+					tx_skb_finalize(skb);
+			}
+
+			recordsz = size;
+			csk->tlshws.txleft = recordsz;
+			csk->tlshws.type = record_type;
+		}
+
+		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		    copy <= 0) {
+new_buf:
+			if (skb) {
+				tx_skb_finalize(skb);
+				push_frames_if_head(sk);
+			}
+
+			if (is_tls_tx(csk)) {
+				skb = get_record_skb(sk,
+						     select_size(sk,
+								 recordsz,
+								 flags,
+								 TX_TLSHDR_LEN),
+								 false);
+			} else {
+				skb = get_tx_skb(sk,
+						 select_size(sk, size, flags,
+							     TX_HEADER_LEN));
+			}
+			if (unlikely(!skb))
+				goto wait_for_memory;
+
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			copy = mss;
+		}
+		if (copy > size)
+			copy = size;
+
+		if (skb_tailroom(skb) > 0) {
+			copy = min(copy, skb_tailroom(skb));
+			if (is_tls_tx(csk))
+				copy = min_t(int, copy, csk->tlshws.txleft);
+			err = skb_add_data_nocache(sk, skb,
+						   &msg->msg_iter, copy);
+			if (err)
+				goto do_fault;
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			struct page *page = TCP_PAGE(sk);
+			int pg_size = PAGE_SIZE;
+			int off = TCP_OFF(sk);
+			bool merge;
+
+			if (page)
+				pg_size = page_size(page);
+			if (off < pg_size &&
+			    skb_can_coalesce(skb, i, page, off)) {
+				merge = true;
+				goto copy;
+			}
+			merge = false;
+			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
+			    MAX_SKB_FRAGS))
+				goto new_buf;
+
+			if (page && off == pg_size) {
+				put_page(page);
+				TCP_PAGE(sk) = page = NULL;
+				pg_size = PAGE_SIZE;
+			}
+
+			if (!page) {
+				gfp_t gfp = sk->sk_allocation;
+				int order = cdev->send_page_order;
+
+				if (order) {
+					page = alloc_pages(gfp | __GFP_COMP |
+							   __GFP_NOWARN |
+							   __GFP_NORETRY,
+							   order);
+					if (page)
+						pg_size <<= order;
+				}
+				if (!page) {
+					page = alloc_page(gfp);
+					pg_size = PAGE_SIZE;
+				}
+				if (!page)
+					goto wait_for_memory;
+				off = 0;
+			}
+copy:
+			if (copy > pg_size - off)
+				copy = pg_size - off;
+			if (is_tls_tx(csk))
+				copy = min_t(int, copy, csk->tlshws.txleft);
+
+			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
+							     skb, page,
+							     off, copy);
+			if (unlikely(err)) {
+				if (!TCP_PAGE(sk)) {
+					TCP_PAGE(sk) = page;
+					TCP_OFF(sk) = 0;
+				}
+				goto do_fault;
+			}
+			/* Update the skb. */
+			if (merge) {
+				skb_frag_size_add(
+						&skb_shinfo(skb)->frags[i - 1],
+						copy);
+			} else {
+				skb_fill_page_desc(skb, i, page, off, copy);
+				if (off + copy < pg_size) {
+					/* space left keep page */
+					get_page(page);
+					TCP_PAGE(sk) = page;
+				} else {
+					TCP_PAGE(sk) = NULL;
+				}
+			}
+			TCP_OFF(sk) = off + copy;
+		}
+		if (unlikely(skb->len == mss))
+			tx_skb_finalize(skb);
+		tp->write_seq += copy;
+		copied += copy;
+		size -= copy;
+
+		if (is_tls_tx(csk))
+			csk->tlshws.txleft -= copy;
+
+		if (corked(tp, flags) &&
+		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
+
+		if (size == 0)
+			goto out;
+
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
+			push_frames_if_head(sk);
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = csk_wait_memory(cdev, sk, &timeo);
+		if (err)
+			goto do_error;
+	}
+out:
+	csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	if (copied)
+		chtls_tcp_push(sk, flags);
+done:
+	release_sock(sk);
+	return copied;
+do_fault:
+	if (!skb->len) {
+		__skb_unlink(skb, &csk->txq);
+		sk->sk_wmem_queued -= skb->truesize;
+		__kfree_skb(skb);
+	}
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	if (csk_conn_inline(csk))
+		csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	copied = sk_stream_error(sk, flags, err);
+	goto done;
+}
+
+int chtls_sendpage(struct sock *sk, struct page *page,
+		   int offset, size_t size, int flags)
+{
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	int mss, err, copied;
+	struct tcp_sock *tp;
+	long timeo;
+
+	tp = tcp_sk(sk);
+	copied = 0;
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	err = sk_stream_wait_connect(sk, &timeo);
+	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+	    err != 0)
+		goto out_err;
+
+	mss = csk->mss;
+	csk_set_flag(csk, CSK_TX_MORE_DATA);
+
+	while (size > 0) {
+		struct sk_buff *skb = skb_peek_tail(&csk->txq);
+		int copy, i;
+
+		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		    (copy = mss - skb->len) <= 0) {
+new_buf:
+			if (!csk_mem_free(cdev, sk))
+				goto wait_for_sndbuf;
+
+			if (is_tls_tx(csk)) {
+				skb = get_record_skb(sk,
+						     select_size(sk, size,
+								 flags,
+								 TX_TLSHDR_LEN),
+						     true);
+			} else {
+				skb = get_tx_skb(sk, 0);
+			}
+			if (!skb)
+				goto wait_for_memory;
+			copy = mss;
+		}
+		if (copy > size)
+			copy = size;
+
+		i = skb_shinfo(skb)->nr_frags;
+		if (skb_can_coalesce(skb, i, page, offset)) {
+			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+		} else if (i < MAX_SKB_FRAGS) {
+			get_page(page);
+			skb_fill_page_desc(skb, i, page, offset, copy);
+		} else {
+			tx_skb_finalize(skb);
+			push_frames_if_head(sk);
+			goto new_buf;
+		}
+
+		skb->len += copy;
+		if (skb->len == mss)
+			tx_skb_finalize(skb);
+		skb->data_len += copy;
+		skb->truesize += copy;
+		sk->sk_wmem_queued += copy;
+		tp->write_seq += copy;
+		copied += copy;
+		offset += copy;
+		size -= copy;
+
+		if (corked(tp, flags) &&
+		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
+
+		if (!size)
+			break;
+
+		if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
+			push_frames_if_head(sk);
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = csk_wait_memory(cdev, sk, &timeo);
+		if (err)
+			goto do_error;
+	}
+out:
+	csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	if (copied)
+		chtls_tcp_push(sk, flags);
+done:
+	release_sock(sk);
+	return copied;
+
+do_error:
+	if (copied)
+		goto out;
+
+out_err:
+	if (csk_conn_inline(csk))
+		csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	copied = sk_stream_error(sk, flags, err);
+	goto done;
+}
+
+static void chtls_select_window(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int wnd = tp->rcv_wnd;
+
+	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
+	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
+
+	if (wnd > MAX_RCV_WND)
+		wnd = MAX_RCV_WND;
+
+/*
+ * Check if we need to grow the receive window in response to an increase in
+ * the socket's receive buffer size.  Some applications increase the buffer
+ * size dynamically and rely on the window to grow accordingly.
+ */
+
+	if (wnd > tp->rcv_wnd) {
+		tp->rcv_wup -= wnd - tp->rcv_wnd;
+		tp->rcv_wnd = wnd;
+		/* Mark the receive window as updated */
+		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
+	}
+}
+
+/*
+ * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
+ * to return without sending the message in case we cannot allocate
+ * an sk_buff.  Returns the number of credits sent.
+ */
+static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
+{
+	struct cpl_rx_data_ack *req;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
+	if (!skb)
+		return 0;
+	__skb_put(skb, sizeof(*req));
+	req = (struct cpl_rx_data_ack *)skb->head;
+
+	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
+	INIT_TP_WR(req, csk->tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
+						    csk->tid));
+	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
+				       RX_FORCE_ACK_F);
+	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
+	return credits;
+}
+
+#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
+			     TCPF_FIN_WAIT1 | \
+			     TCPF_FIN_WAIT2)
+
+/*
+ * Called after some received data has been read.  It returns RX credits
+ * to the HW for the amount of data processed.
+ */
+static void chtls_cleanup_rbuf(struct sock *sk, int copied)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp;
+	int must_send;
+	u32 credits;
+	u32 thres;
+
+	thres = 15 * 1024;
+
+	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
+		return;
+
+	chtls_select_window(sk);
+	tp = tcp_sk(sk);
+	credits = tp->copied_seq - tp->rcv_wup;
+	if (unlikely(!credits))
+		return;
+
+/*
+ * For coalescing to work effectively ensure the receive window has
+ * at least 16KB left.
+ */
+	must_send = credits + 16384 >= tp->rcv_wnd;
+
+	if (must_send || credits >= thres)
+		tp->rcv_wup += send_rx_credits(csk, credits);
+}
+
+static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			    int nonblock, int flags, int *addr_len)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *hws = &csk->tlshws;
+	struct net_device *dev = csk->egress_dev;
+	struct adapter *adap = netdev2adap(dev);
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned long avail;
+	int buffers_freed;
+	int copied = 0;
+	int target;
+	long timeo;
+
+	buffers_freed = 0;
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
+		chtls_cleanup_rbuf(sk, copied);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset = 0;
+
+		if (unlikely(tp->urg_data &&
+			     tp->urg_seq == tp->copied_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+					-EAGAIN;
+				break;
+			}
+		}
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			goto found_ok_skb;
+		if (csk->wr_credits &&
+		    skb_queue_len(&csk->txq) &&
+		    chtls_push_frames(csk, csk->wr_credits ==
+				      csk->wr_max_credits))
+			sk->sk_write_space(sk);
+
+		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
+			break;
+
+		if (copied) {
+			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    signal_pending(current))
+				break;
+
+			if (!timeo)
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				copied = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+		if (READ_ONCE(sk->sk_backlog.tail)) {
+			release_sock(sk);
+			lock_sock(sk);
+			chtls_cleanup_rbuf(sk, copied);
+			continue;
+		}
+
+		if (copied >= target)
+			break;
+		chtls_cleanup_rbuf(sk, copied);
+		sk_wait_data(sk, &timeo, NULL);
+		continue;
+found_ok_skb:
+		if (!skb->len) {
+			skb_dst_set(skb, NULL);
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			kfree_skb(skb);
+
+			if (!copied && !timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (copied < target) {
+				release_sock(sk);
+				lock_sock(sk);
+				continue;
+			}
+			break;
+		}
+		offset = hws->copied_seq;
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - tp->copied_seq;
+
+			if (urg_offset < avail) {
+				if (urg_offset) {
+					avail = urg_offset;
+				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
+					/* First byte is urgent, skip */
+					tp->copied_seq++;
+					offset++;
+					avail--;
+					if (!avail)
+						goto skip_copy;
+				}
+			}
+		}
+		/* Set record type if not already done. For a non-data record,
+		 * do not proceed if record type could not be copied.
+		 */
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
+			struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
+			int cerr = 0;
+
+			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+					sizeof(thdr->type), &thdr->type);
+
+			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA)
+				return -EIO;
+			/*  don't send tls header, skip copy */
+			goto skip_copy;
+		}
+
+		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
+			if (!copied) {
+				copied = -EFAULT;
+				break;
+			}
+		}
+
+		copied += avail;
+		len -= avail;
+		hws->copied_seq += avail;
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+			tp->urg_data = 0;
+
+		if ((avail + offset) >= skb->len) {
+			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
+				tp->copied_seq += skb->len;
+				hws->rcvpld = skb->hdr_len;
+			} else {
+				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
+				tp->copied_seq += hws->rcvpld;
+			}
+			chtls_free_skb(sk, skb);
+			buffers_freed++;
+			hws->copied_seq = 0;
+			if (copied >= target &&
+			    !skb_peek(&sk->sk_receive_queue))
+				break;
+		}
+	} while (len > 0);
+
+	if (buffers_freed)
+		chtls_cleanup_rbuf(sk, copied);
+	release_sock(sk);
+	return copied;
+}
+
+/*
+ * Peek at data in a socket's receive buffer.
+ */
+static int peekmsg(struct sock *sk, struct msghdr *msg,
+		   size_t len, int nonblock, int flags)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 peek_seq, offset;
+	struct sk_buff *skb;
+	int copied = 0;
+	size_t avail;          /* amount of available data in current skb */
+	long timeo;
+
+	lock_sock(sk);
+	timeo = sock_rcvtimeo(sk, nonblock);
+	peek_seq = tp->copied_seq;
+
+	do {
+		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+				-EAGAIN;
+				break;
+			}
+		}
+
+		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			offset = peek_seq - ULP_SKB_CB(skb)->seq;
+			if (offset < skb->len)
+				goto found_ok_skb;
+		}
+
+		/* empty receive queue */
+		if (copied)
+			break;
+		if (sock_flag(sk, SOCK_DONE))
+			break;
+		if (sk->sk_err) {
+			copied = sock_error(sk);
+			break;
+		}
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		if (sk->sk_state == TCP_CLOSE) {
+			copied = -ENOTCONN;
+			break;
+		}
+		if (!timeo) {
+			copied = -EAGAIN;
+			break;
+		}
+		if (signal_pending(current)) {
+			copied = sock_intr_errno(timeo);
+			break;
+		}
+
+		if (READ_ONCE(sk->sk_backlog.tail)) {
+			/* Do not sleep, just process backlog. */
+			release_sock(sk);
+			lock_sock(sk);
+		} else {
+			sk_wait_data(sk, &timeo, NULL);
+		}
+
+		if (unlikely(peek_seq != tp->copied_seq)) {
+			if (net_ratelimit())
+				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
+					current->comm, current->pid);
+			peek_seq = tp->copied_seq;
+		}
+		continue;
+
+found_ok_skb:
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+		/*
+		 * Do we have urgent data here?  We need to skip over the
+		 * urgent byte.
+		 */
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - peek_seq;
+
+			if (urg_offset < avail) {
+				/*
+				 * The amount of data we are preparing to copy
+				 * contains urgent data.
+				 */
+				if (!urg_offset) { /* First byte is urgent */
+					if (!sock_flag(sk, SOCK_URGINLINE)) {
+						peek_seq++;
+						offset++;
+						avail--;
+					}
+					if (!avail)
+						continue;
+				} else {
+					/* stop short of the urgent data */
+					avail = urg_offset;
+				}
+			}
+		}
+
+		/*
+		 * If MSG_TRUNC is specified the data is discarded.
+		 */
+		if (likely(!(flags & MSG_TRUNC)))
+			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		peek_seq += avail;
+		copied += avail;
+		len -= avail;
+	} while (len > 0);
+
+	release_sock(sk);
+	return copied;
+}
+
+int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		  int nonblock, int flags, int *addr_len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct chtls_sock *csk;
+	unsigned long avail;    /* amount of available data in current skb */
+	int buffers_freed;
+	int copied = 0;
+	long timeo;
+	int target;             /* Read at least this many bytes */
+
+	buffers_freed = 0;
+
+	if (unlikely(flags & MSG_OOB))
+		return tcp_prot.recvmsg(sk, msg, len, nonblock, flags,
+					addr_len);
+
+	if (unlikely(flags & MSG_PEEK))
+		return peekmsg(sk, msg, len, nonblock, flags);
+
+	if (sk_can_busy_loop(sk) &&
+	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+	    sk->sk_state == TCP_ESTABLISHED)
+		sk_busy_loop(sk, nonblock);
+
+	lock_sock(sk);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (is_tls_rx(csk))
+		return chtls_pt_recvmsg(sk, msg, len, nonblock,
+					flags, addr_len);
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
+		chtls_cleanup_rbuf(sk, copied);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset;
+
+		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+					-EAGAIN;
+				break;
+			}
+		}
+
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			goto found_ok_skb;
+
+		if (csk->wr_credits &&
+		    skb_queue_len(&csk->txq) &&
+		    chtls_push_frames(csk, csk->wr_credits ==
+				      csk->wr_max_credits))
+			sk->sk_write_space(sk);
+
+		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
+			break;
+
+		if (copied) {
+			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    signal_pending(current))
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				copied = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+
+		if (READ_ONCE(sk->sk_backlog.tail)) {
+			release_sock(sk);
+			lock_sock(sk);
+			chtls_cleanup_rbuf(sk, copied);
+			continue;
+		}
+
+		if (copied >= target)
+			break;
+		chtls_cleanup_rbuf(sk, copied);
+		sk_wait_data(sk, &timeo, NULL);
+		continue;
+
+found_ok_skb:
+		if (!skb->len) {
+			chtls_kfree_skb(sk, skb);
+			if (!copied && !timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (copied < target)
+				continue;
+
+			break;
+		}
+
+		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - tp->copied_seq;
+
+			if (urg_offset < avail) {
+				if (urg_offset) {
+					avail = urg_offset;
+				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
+					tp->copied_seq++;
+					offset++;
+					avail--;
+					if (!avail)
+						goto skip_copy;
+				}
+			}
+		}
+
+		if (likely(!(flags & MSG_TRUNC))) {
+			if (skb_copy_datagram_msg(skb, offset,
+						  msg, avail)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		}
+
+		tp->copied_seq += avail;
+		copied += avail;
+		len -= avail;
+
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+			tp->urg_data = 0;
+
+		if (avail + offset >= skb->len) {
+			chtls_free_skb(sk, skb);
+			buffers_freed++;
+
+			if  (copied >= target &&
+			     !skb_peek(&sk->sk_receive_queue))
+				break;
+		}
+	} while (len > 0);
+
+	if (buffers_freed)
+		chtls_cleanup_rbuf(sk, copied);
+
+	release_sock(sk);
+	return copied;
+}
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
new file mode 100644
index 000000000000..9098b3eed4da
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/tcp.h>
+#include <net/tls.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+#define DRV_NAME "chtls"
+
+/*
+ * chtls device management
+ * maintains a list of the chtls devices
+ */
+static LIST_HEAD(cdev_list);
+static DEFINE_MUTEX(cdev_mutex);
+
+static DEFINE_MUTEX(notify_mutex);
+static RAW_NOTIFIER_HEAD(listen_notify_list);
+static struct proto chtls_cpl_prot, chtls_cpl_protv6;
+struct request_sock_ops chtls_rsk_ops, chtls_rsk_opsv6;
+static uint send_page_order = (14 - PAGE_SHIFT < 0) ? 0 : 14 - PAGE_SHIFT;
+
+static void register_listen_notifier(struct notifier_block *nb)
+{
+	mutex_lock(&notify_mutex);
+	raw_notifier_chain_register(&listen_notify_list, nb);
+	mutex_unlock(&notify_mutex);
+}
+
+static void unregister_listen_notifier(struct notifier_block *nb)
+{
+	mutex_lock(&notify_mutex);
+	raw_notifier_chain_unregister(&listen_notify_list, nb);
+	mutex_unlock(&notify_mutex);
+}
+
+static int listen_notify_handler(struct notifier_block *this,
+				 unsigned long event, void *data)
+{
+	struct chtls_listen *clisten;
+	int ret = NOTIFY_DONE;
+
+	clisten = (struct chtls_listen *)data;
+
+	switch (event) {
+	case CHTLS_LISTEN_START:
+		ret = chtls_listen_start(clisten->cdev, clisten->sk);
+		kfree(clisten);
+		break;
+	case CHTLS_LISTEN_STOP:
+		chtls_listen_stop(clisten->cdev, clisten->sk);
+		kfree(clisten);
+		break;
+	}
+	return ret;
+}
+
+static struct notifier_block listen_notifier = {
+	.notifier_call = listen_notify_handler
+};
+
+static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (likely(skb_transport_header(skb) != skb_network_header(skb)))
+		return tcp_v4_do_rcv(sk, skb);
+	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+	return 0;
+}
+
+static int chtls_start_listen(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct chtls_listen *clisten;
+
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return -EPROTONOSUPPORT;
+
+	if (sk->sk_family == PF_INET &&
+	    LOOPBACK(inet_sk(sk)->inet_rcv_saddr))
+		return -EADDRNOTAVAIL;
+
+	sk->sk_backlog_rcv = listen_backlog_rcv;
+	clisten = kmalloc(sizeof(*clisten), GFP_KERNEL);
+	if (!clisten)
+		return -ENOMEM;
+	clisten->cdev = cdev;
+	clisten->sk = sk;
+	mutex_lock(&notify_mutex);
+	raw_notifier_call_chain(&listen_notify_list,
+				      CHTLS_LISTEN_START, clisten);
+	mutex_unlock(&notify_mutex);
+	return 0;
+}
+
+static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct chtls_listen *clisten;
+
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return;
+
+	clisten = kmalloc(sizeof(*clisten), GFP_KERNEL);
+	if (!clisten)
+		return;
+	clisten->cdev = cdev;
+	clisten->sk = sk;
+	mutex_lock(&notify_mutex);
+	raw_notifier_call_chain(&listen_notify_list,
+				CHTLS_LISTEN_STOP, clisten);
+	mutex_unlock(&notify_mutex);
+}
+
+static int chtls_inline_feature(struct tls_toe_device *dev)
+{
+	struct net_device *netdev;
+	struct chtls_dev *cdev;
+	int i;
+
+	cdev = to_chtls_dev(dev);
+
+	for (i = 0; i < cdev->lldi->nports; i++) {
+		netdev = cdev->ports[i];
+		if (netdev->features & NETIF_F_HW_TLS_RECORD)
+			return 1;
+	}
+	return 0;
+}
+
+static int chtls_create_hash(struct tls_toe_device *dev, struct sock *sk)
+{
+	struct chtls_dev *cdev = to_chtls_dev(dev);
+
+	if (sk->sk_state == TCP_LISTEN)
+		return chtls_start_listen(cdev, sk);
+	return 0;
+}
+
+static void chtls_destroy_hash(struct tls_toe_device *dev, struct sock *sk)
+{
+	struct chtls_dev *cdev = to_chtls_dev(dev);
+
+	if (sk->sk_state == TCP_LISTEN)
+		chtls_stop_listen(cdev, sk);
+}
+
+static void chtls_free_uld(struct chtls_dev *cdev)
+{
+	int i;
+
+	tls_toe_unregister_device(&cdev->tlsdev);
+	kvfree(cdev->kmap.addr);
+	idr_destroy(&cdev->hwtid_idr);
+	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++)
+		kfree_skb(cdev->rspq_skb_cache[i]);
+	kfree(cdev->lldi);
+	kfree_skb(cdev->askb);
+	kfree(cdev);
+}
+
+static inline void chtls_dev_release(struct kref *kref)
+{
+	struct tls_toe_device *dev;
+	struct chtls_dev *cdev;
+	struct adapter *adap;
+
+	dev = container_of(kref, struct tls_toe_device, kref);
+	cdev = to_chtls_dev(dev);
+
+	/* Reset tls rx/tx stats */
+	adap = pci_get_drvdata(cdev->pdev);
+	atomic_set(&adap->chcr_stats.tls_pdu_tx, 0);
+	atomic_set(&adap->chcr_stats.tls_pdu_rx, 0);
+
+	chtls_free_uld(cdev);
+}
+
+static void chtls_register_dev(struct chtls_dev *cdev)
+{
+	struct tls_toe_device *tlsdev = &cdev->tlsdev;
+
+	strlcpy(tlsdev->name, "chtls", TLS_TOE_DEVICE_NAME_MAX);
+	strlcat(tlsdev->name, cdev->lldi->ports[0]->name,
+		TLS_TOE_DEVICE_NAME_MAX);
+	tlsdev->feature = chtls_inline_feature;
+	tlsdev->hash = chtls_create_hash;
+	tlsdev->unhash = chtls_destroy_hash;
+	tlsdev->release = chtls_dev_release;
+	kref_init(&tlsdev->kref);
+	tls_toe_register_device(tlsdev);
+	cdev->cdev_state = CHTLS_CDEV_STATE_UP;
+}
+
+static void process_deferq(struct work_struct *task_param)
+{
+	struct chtls_dev *cdev = container_of(task_param,
+				struct chtls_dev, deferq_task);
+	struct sk_buff *skb;
+
+	spin_lock_bh(&cdev->deferq.lock);
+	while ((skb = __skb_dequeue(&cdev->deferq)) != NULL) {
+		spin_unlock_bh(&cdev->deferq.lock);
+		DEFERRED_SKB_CB(skb)->handler(cdev, skb);
+		spin_lock_bh(&cdev->deferq.lock);
+	}
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
+static int chtls_get_skb(struct chtls_dev *cdev)
+{
+	cdev->askb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
+	if (!cdev->askb)
+		return -ENOMEM;
+
+	skb_put(cdev->askb, sizeof(struct tcphdr));
+	skb_reset_transport_header(cdev->askb);
+	memset(cdev->askb->data, 0, cdev->askb->len);
+	return 0;
+}
+
+static void *chtls_uld_add(const struct cxgb4_lld_info *info)
+{
+	struct cxgb4_lld_info *lldi;
+	struct chtls_dev *cdev;
+	int i, j;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		goto out;
+
+	lldi = kzalloc(sizeof(*lldi), GFP_KERNEL);
+	if (!lldi)
+		goto out_lldi;
+
+	if (chtls_get_skb(cdev))
+		goto out_skb;
+
+	*lldi = *info;
+	cdev->lldi = lldi;
+	cdev->pdev = lldi->pdev;
+	cdev->tids = lldi->tids;
+	cdev->ports = lldi->ports;
+	cdev->mtus = lldi->mtus;
+	cdev->tids = lldi->tids;
+	cdev->pfvf = FW_VIID_PFN_G(cxgb4_port_viid(lldi->ports[0]))
+			<< FW_VIID_PFN_S;
+
+	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) {
+		unsigned int size = 64 - sizeof(struct rsp_ctrl) - 8;
+
+		cdev->rspq_skb_cache[i] = __alloc_skb(size,
+						      gfp_any(), 0,
+						      lldi->nodeid);
+		if (unlikely(!cdev->rspq_skb_cache[i]))
+			goto out_rspq_skb;
+	}
+
+	idr_init(&cdev->hwtid_idr);
+	INIT_WORK(&cdev->deferq_task, process_deferq);
+	spin_lock_init(&cdev->listen_lock);
+	spin_lock_init(&cdev->idr_lock);
+	cdev->send_page_order = min_t(uint, get_order(32768),
+				      send_page_order);
+	cdev->max_host_sndbuf = 48 * 1024;
+
+	if (lldi->vr->key.size)
+		if (chtls_init_kmap(cdev, lldi))
+			goto out_rspq_skb;
+
+	mutex_lock(&cdev_mutex);
+	list_add_tail(&cdev->list, &cdev_list);
+	mutex_unlock(&cdev_mutex);
+
+	return cdev;
+out_rspq_skb:
+	for (j = 0; j < i; j++)
+		kfree_skb(cdev->rspq_skb_cache[j]);
+	kfree_skb(cdev->askb);
+out_skb:
+	kfree(lldi);
+out_lldi:
+	kfree(cdev);
+out:
+	return NULL;
+}
+
+static void chtls_free_all_uld(void)
+{
+	struct chtls_dev *cdev, *tmp;
+
+	mutex_lock(&cdev_mutex);
+	list_for_each_entry_safe(cdev, tmp, &cdev_list, list) {
+		if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) {
+			list_del(&cdev->list);
+			kref_put(&cdev->tlsdev.kref, cdev->tlsdev.release);
+		}
+	}
+	mutex_unlock(&cdev_mutex);
+}
+
+static int chtls_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct chtls_dev *cdev = handle;
+
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		chtls_register_dev(cdev);
+		break;
+	case CXGB4_STATE_DOWN:
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+		break;
+	case CXGB4_STATE_DETACH:
+		mutex_lock(&cdev_mutex);
+		list_del(&cdev->list);
+		mutex_unlock(&cdev_mutex);
+		kref_put(&cdev->tlsdev.kref, cdev->tlsdev.release);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
+					  const __be64 *rsp,
+					  u32 pktshift)
+{
+	struct sk_buff *skb;
+
+	/* Allocate space for cpl_pass_accpet_req which will be synthesized by
+	 * driver. Once driver synthesizes cpl_pass_accpet_req the skb will go
+	 * through the regular cpl_pass_accept_req processing in TOM.
+	 */
+	skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req)
+			- pktshift, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+	__skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req)
+		   - pktshift);
+	/* For now we will copy  cpl_rx_pkt in the skb */
+	skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_rx_pkt));
+	skb_copy_to_linear_data_offset(skb, sizeof(struct cpl_pass_accept_req)
+				       , gl->va + pktshift,
+				       gl->tot_len - pktshift);
+
+	return skb;
+}
+
+static int chtls_recv_packet(struct chtls_dev *cdev,
+			     const struct pkt_gl *gl, const __be64 *rsp)
+{
+	unsigned int opcode = *(u8 *)rsp;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = copy_gl_to_skb_pkt(gl, rsp, cdev->lldi->sge_pktshift);
+	if (!skb)
+		return -ENOMEM;
+
+	ret = chtls_handlers[opcode](cdev, skb);
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+
+	return 0;
+}
+
+static int chtls_recv_rsp(struct chtls_dev *cdev, const __be64 *rsp)
+{
+	unsigned long rspq_bin;
+	unsigned int opcode;
+	struct sk_buff *skb;
+	unsigned int len;
+	int ret;
+
+	len = 64 - sizeof(struct rsp_ctrl) - 8;
+	opcode = *(u8 *)rsp;
+
+	rspq_bin = hash_ptr((void *)rsp, RSPQ_HASH_BITS);
+	skb = cdev->rspq_skb_cache[rspq_bin];
+	if (skb && !skb_is_nonlinear(skb) &&
+	    !skb_shared(skb) && !skb_cloned(skb)) {
+		refcount_inc(&skb->users);
+		if (refcount_read(&skb->users) == 2) {
+			__skb_trim(skb, 0);
+			if (skb_tailroom(skb) >= len)
+				goto copy_out;
+		}
+		refcount_dec(&skb->users);
+	}
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+copy_out:
+	__skb_put(skb, len);
+	skb_copy_to_linear_data(skb, rsp, len);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	ret = chtls_handlers[opcode](cdev, skb);
+
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+	return 0;
+}
+
+static void chtls_recv(struct chtls_dev *cdev,
+		       struct sk_buff **skbs, const __be64 *rsp)
+{
+	struct sk_buff *skb = *skbs;
+	unsigned int opcode;
+	int ret;
+
+	opcode = *(u8 *)rsp;
+
+	__skb_push(skb, sizeof(struct rss_header));
+	skb_copy_to_linear_data(skb, rsp, sizeof(struct rss_header));
+
+	ret = chtls_handlers[opcode](cdev, skb);
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+}
+
+static int chtls_uld_rx_handler(void *handle, const __be64 *rsp,
+				const struct pkt_gl *gl)
+{
+	struct chtls_dev *cdev = handle;
+	unsigned int opcode;
+	struct sk_buff *skb;
+
+	opcode = *(u8 *)rsp;
+
+	if (unlikely(opcode == CPL_RX_PKT)) {
+		if (chtls_recv_packet(cdev, gl, rsp) < 0)
+			goto nomem;
+		return 0;
+	}
+
+	if (!gl)
+		return chtls_recv_rsp(cdev, rsp);
+
+#define RX_PULL_LEN 128
+	skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
+	if (unlikely(!skb))
+		goto nomem;
+	chtls_recv(cdev, &skb, rsp);
+	return 0;
+
+nomem:
+	return -ENOMEM;
+}
+
+static int do_chtls_getsockopt(struct sock *sk, char __user *optval,
+			       int __user *optlen)
+{
+	struct tls_crypto_info crypto_info = { 0 };
+
+	crypto_info.version = TLS_1_2_VERSION;
+	if (copy_to_user(optval, &crypto_info, sizeof(struct tls_crypto_info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int chtls_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->sk_proto->getsockopt(sk, level,
+						 optname, optval, optlen);
+
+	return do_chtls_getsockopt(sk, optval, optlen);
+}
+
+static int do_chtls_setsockopt(struct sock *sk, int optname,
+			       sockptr_t optval, unsigned int optlen)
+{
+	struct tls_crypto_info *crypto_info, tmp_crypto_info;
+	struct chtls_sock *csk;
+	int keylen;
+	int cipher_type;
+	int rc = 0;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (sockptr_is_null(optval) || optlen < sizeof(*crypto_info)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = copy_from_sockptr(&tmp_crypto_info, optval, sizeof(*crypto_info));
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/* check version */
+	if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+		rc = -ENOTSUPP;
+		goto out;
+	}
+
+	crypto_info = (struct tls_crypto_info *)&csk->tlshws.crypto_info;
+
+	/* GCM mode of AES supports 128 and 256 bit encryption, so
+	 * copy keys from user based on GCM cipher type.
+	 */
+	switch (tmp_crypto_info.cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		/* Obtain version and type from previous copy */
+		crypto_info[0] = tmp_crypto_info;
+		/* Now copy the following data */
+		rc = copy_from_sockptr_offset((char *)crypto_info +
+				sizeof(*crypto_info),
+				optval, sizeof(*crypto_info),
+				sizeof(struct tls12_crypto_info_aes_gcm_128)
+				- sizeof(*crypto_info));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		cipher_type = TLS_CIPHER_AES_GCM_128;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		crypto_info[0] = tmp_crypto_info;
+		rc = copy_from_sockptr_offset((char *)crypto_info +
+				sizeof(*crypto_info),
+				optval, sizeof(*crypto_info),
+				sizeof(struct tls12_crypto_info_aes_gcm_256)
+				- sizeof(*crypto_info));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		keylen = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
+		cipher_type = TLS_CIPHER_AES_GCM_256;
+		break;
+	}
+	default:
+		rc = -EINVAL;
+		goto out;
+	}
+	rc = chtls_setkey(csk, keylen, optname, cipher_type);
+out:
+	return rc;
+}
+
+static int chtls_setsockopt(struct sock *sk, int level, int optname,
+			    sockptr_t optval, unsigned int optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->sk_proto->setsockopt(sk, level,
+						 optname, optval, optlen);
+
+	return do_chtls_setsockopt(sk, optname, optval, optlen);
+}
+
+static struct cxgb4_uld_info chtls_uld_info = {
+	.name = DRV_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.ntxq = MAX_ULD_QSETS,
+	.rxq_size = 1024,
+	.add = chtls_uld_add,
+	.state_change = chtls_uld_state_change,
+	.rx_handler = chtls_uld_rx_handler,
+};
+
+void chtls_install_cpl_ops(struct sock *sk)
+{
+	if (sk->sk_family == AF_INET)
+		sk->sk_prot = &chtls_cpl_prot;
+	else
+		sk->sk_prot = &chtls_cpl_protv6;
+}
+
+static void __init chtls_init_ulp_ops(void)
+{
+	chtls_cpl_prot			= tcp_prot;
+	chtls_init_rsk_ops(&chtls_cpl_prot, &chtls_rsk_ops,
+			   &tcp_prot, PF_INET);
+	chtls_cpl_prot.close		= chtls_close;
+	chtls_cpl_prot.disconnect	= chtls_disconnect;
+	chtls_cpl_prot.destroy		= chtls_destroy_sock;
+	chtls_cpl_prot.shutdown		= chtls_shutdown;
+	chtls_cpl_prot.sendmsg		= chtls_sendmsg;
+	chtls_cpl_prot.sendpage		= chtls_sendpage;
+	chtls_cpl_prot.recvmsg		= chtls_recvmsg;
+	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
+	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
+#if IS_ENABLED(CONFIG_IPV6)
+	chtls_cpl_protv6		= chtls_cpl_prot;
+	chtls_init_rsk_ops(&chtls_cpl_protv6, &chtls_rsk_opsv6,
+			   &tcpv6_prot, PF_INET6);
+#endif
+}
+
+static int __init chtls_register(void)
+{
+	chtls_init_ulp_ops();
+	register_listen_notifier(&listen_notifier);
+	cxgb4_register_uld(CXGB4_ULD_TLS, &chtls_uld_info);
+	return 0;
+}
+
+static void __exit chtls_unregister(void)
+{
+	unregister_listen_notifier(&listen_notifier);
+	chtls_free_all_uld();
+	cxgb4_unregister_uld(CXGB4_ULD_TLS);
+}
+
+module_init(chtls_register);
+module_exit(chtls_unregister);
+
+MODULE_DESCRIPTION("Chelsio TLS Inline driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHTLS_DRV_VERSION);
diff --git a/drivers/net/ethernet/cirrus/cs89x0.h b/drivers/net/ethernet/cirrus/cs89x0.h
index 91423b70bb45..210f9ec9af4b 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.h
+++ b/drivers/net/ethernet/cirrus/cs89x0.h
@@ -459,7 +459,3 @@
 #define PNP_CNF_INT 0x70
 #define PNP_CNF_DMA 0x74
 #define PNP_CNF_MEM 0x48
-
-#define BIT0 1
-#define BIT15 0x8000
-
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 18f3aeb88f22..c67a16a48d62 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -169,6 +169,7 @@ struct enic {
 	u16 num_vfs;
 #endif
 	spinlock_t enic_api_lock;
+	bool enic_api_busy;
 	struct enic_port_profile *pp;
 
 	/* work queue cache line section */
diff --git a/drivers/net/ethernet/cisco/enic/enic_api.c b/drivers/net/ethernet/cisco/enic/enic_api.c
index b161f24522b8..3bdc74fba1e3 100644
--- a/drivers/net/ethernet/cisco/enic/enic_api.c
+++ b/drivers/net/ethernet/cisco/enic/enic_api.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright 2013 Cisco Systems, Inc.  All rights reserved.
  *
  * This program is free software; you may redistribute it and/or modify
@@ -34,6 +34,12 @@ int enic_api_devcmd_proxy_by_index(struct net_device *netdev, int vf,
 	struct vnic_dev *vdev = enic->vdev;
 
 	spin_lock(&enic->enic_api_lock);
+	while (enic->enic_api_busy) {
+		spin_unlock(&enic->enic_api_lock);
+		cpu_relax();
+		spin_lock(&enic->enic_api_lock);
+	}
+
 	spin_lock_bh(&enic->devcmd_lock);
 
 	vnic_dev_cmd_proxy_by_index_start(vdev, vf);
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 4d8e0aa447fb..a4dd52bba2c3 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright 2013 Cisco Systems, Inc.  All rights reserved.
  *
  * This program is free software; you may redistribute it and/or modify
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 552d89fdf54a..fb269d587b74 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -326,11 +326,11 @@ static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
 	struct enic *enic = vnic_dev_priv(wq->vdev);
 
 	if (buf->sop)
-		pci_unmap_single(enic->pdev, buf->dma_addr,
-			buf->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
+				 DMA_TO_DEVICE);
 	else
-		pci_unmap_page(enic->pdev, buf->dma_addr,
-			buf->len, PCI_DMA_TODEVICE);
+		dma_unmap_page(&enic->pdev->dev, buf->dma_addr, buf->len,
+			       DMA_TO_DEVICE);
 
 	if (buf->os_buf)
 		dev_kfree_skb_any(buf->os_buf);
@@ -574,8 +574,8 @@ static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq,
 	dma_addr_t dma_addr;
 	int err = 0;
 
-	dma_addr = pci_map_single(enic->pdev, skb->data, head_len,
-				  PCI_DMA_TODEVICE);
+	dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len,
+				  DMA_TO_DEVICE);
 	if (unlikely(enic_dma_map_check(enic, dma_addr)))
 		return -ENOMEM;
 
@@ -605,8 +605,8 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 	dma_addr_t dma_addr;
 	int err = 0;
 
-	dma_addr = pci_map_single(enic->pdev, skb->data, head_len,
-				  PCI_DMA_TODEVICE);
+	dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len,
+				  DMA_TO_DEVICE);
 	if (unlikely(enic_dma_map_check(enic, dma_addr)))
 		return -ENOMEM;
 
@@ -693,8 +693,9 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
 	 */
 	while (frag_len_left) {
 		len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN);
-		dma_addr = pci_map_single(enic->pdev, skb->data + offset, len,
-					  PCI_DMA_TODEVICE);
+		dma_addr = dma_map_single(&enic->pdev->dev,
+					  skb->data + offset, len,
+					  DMA_TO_DEVICE);
 		if (unlikely(enic_dma_map_check(enic, dma_addr)))
 			return -ENOMEM;
 		enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len,
@@ -752,8 +753,8 @@ static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq,
 	dma_addr_t dma_addr;
 	int err = 0;
 
-	dma_addr = pci_map_single(enic->pdev, skb->data, head_len,
-				  PCI_DMA_TODEVICE);
+	dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len,
+				  DMA_TO_DEVICE);
 	if (unlikely(enic_dma_map_check(enic, dma_addr)))
 		return -ENOMEM;
 
@@ -1222,8 +1223,8 @@ static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
 	if (!buf->os_buf)
 		return;
 
-	pci_unmap_single(enic->pdev, buf->dma_addr,
-		buf->len, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
+			 DMA_FROM_DEVICE);
 	dev_kfree_skb_any(buf->os_buf);
 	buf->os_buf = NULL;
 }
@@ -1248,8 +1249,8 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq)
 	if (!skb)
 		return -ENOMEM;
 
-	dma_addr = pci_map_single(enic->pdev, skb->data, len,
-				  PCI_DMA_FROMDEVICE);
+	dma_addr = dma_map_single(&enic->pdev->dev, skb->data, len,
+				  DMA_FROM_DEVICE);
 	if (unlikely(enic_dma_map_check(enic, dma_addr))) {
 		dev_kfree_skb(skb);
 		return -ENOMEM;
@@ -1281,8 +1282,8 @@ static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb,
 	new_skb = netdev_alloc_skb_ip_align(netdev, len);
 	if (!new_skb)
 		return false;
-	pci_dma_sync_single_for_cpu(enic->pdev, buf->dma_addr, len,
-				    DMA_FROM_DEVICE);
+	dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, len,
+				DMA_FROM_DEVICE);
 	memcpy(new_skb->data, (*skb)->data, len);
 	*skb = new_skb;
 
@@ -1331,8 +1332,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 				enic->rq_truncated_pkts++;
 		}
 
-		pci_unmap_single(enic->pdev, buf->dma_addr, buf->len,
-				 PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
+				 DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
 		buf->os_buf = NULL;
 
@@ -1346,8 +1347,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 
 		if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) {
 			buf->os_buf = NULL;
-			pci_unmap_single(enic->pdev, buf->dma_addr, buf->len,
-					 PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&enic->pdev->dev, buf->dma_addr,
+					 buf->len, DMA_FROM_DEVICE);
 		}
 		prefetch(skb->data - NET_IP_ALIGN);
 
@@ -1420,8 +1421,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 		/* Buffer overflow
 		 */
 
-		pci_unmap_single(enic->pdev, buf->dma_addr, buf->len,
-				 PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
+				 DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
 		buf->os_buf = NULL;
 	}
@@ -2106,8 +2107,6 @@ static int enic_dev_wait(struct vnic_dev *vdev,
 	int done;
 	int err;
 
-	BUG_ON(in_interrupt());
-
 	err = start(vdev, arg);
 	if (err)
 		return err;
@@ -2178,9 +2177,9 @@ int __enic_set_rsskey(struct enic *enic)
 	dma_addr_t rss_key_buf_pa;
 	int i, kidx, bidx, err;
 
-	rss_key_buf_va = pci_zalloc_consistent(enic->pdev,
-					       sizeof(union vnic_rss_key),
-					       &rss_key_buf_pa);
+	rss_key_buf_va = dma_alloc_coherent(&enic->pdev->dev,
+					    sizeof(union vnic_rss_key),
+					    &rss_key_buf_pa, GFP_ATOMIC);
 	if (!rss_key_buf_va)
 		return -ENOMEM;
 
@@ -2195,8 +2194,8 @@ int __enic_set_rsskey(struct enic *enic)
 		sizeof(union vnic_rss_key));
 	spin_unlock_bh(&enic->devcmd_lock);
 
-	pci_free_consistent(enic->pdev, sizeof(union vnic_rss_key),
-		rss_key_buf_va, rss_key_buf_pa);
+	dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_key),
+			  rss_key_buf_va, rss_key_buf_pa);
 
 	return err;
 }
@@ -2215,8 +2214,9 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
 	unsigned int i;
 	int err;
 
-	rss_cpu_buf_va = pci_alloc_consistent(enic->pdev,
-		sizeof(union vnic_rss_cpu), &rss_cpu_buf_pa);
+	rss_cpu_buf_va = dma_alloc_coherent(&enic->pdev->dev,
+					    sizeof(union vnic_rss_cpu),
+					    &rss_cpu_buf_pa, GFP_ATOMIC);
 	if (!rss_cpu_buf_va)
 		return -ENOMEM;
 
@@ -2229,8 +2229,8 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
 		sizeof(union vnic_rss_cpu));
 	spin_unlock_bh(&enic->devcmd_lock);
 
-	pci_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu),
-		rss_cpu_buf_va, rss_cpu_buf_pa);
+	dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_cpu),
+			  rss_cpu_buf_va, rss_cpu_buf_pa);
 
 	return err;
 }
@@ -2295,6 +2295,13 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 		rss_hash_bits, rss_base_cpu, rss_enable);
 }
 
+static void enic_set_api_busy(struct enic *enic, bool busy)
+{
+	spin_lock(&enic->enic_api_lock);
+	enic->enic_api_busy = busy;
+	spin_unlock(&enic->enic_api_lock);
+}
+
 static void enic_reset(struct work_struct *work)
 {
 	struct enic *enic = container_of(work, struct enic, reset);
@@ -2304,7 +2311,9 @@ static void enic_reset(struct work_struct *work)
 
 	rtnl_lock();
 
-	spin_lock(&enic->enic_api_lock);
+	/* Stop any activity from infiniband */
+	enic_set_api_busy(enic, true);
+
 	enic_stop(enic->netdev);
 	enic_dev_soft_reset(enic);
 	enic_reset_addr_lists(enic);
@@ -2312,7 +2321,10 @@ static void enic_reset(struct work_struct *work)
 	enic_set_rss_nic_cfg(enic);
 	enic_dev_set_ig_vlan_rewrite_mode(enic);
 	enic_open(enic->netdev);
-	spin_unlock(&enic->enic_api_lock);
+
+	/* Allow infiniband to fiddle with the device again */
+	enic_set_api_busy(enic, false);
+
 	call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev);
 
 	rtnl_unlock();
@@ -2324,7 +2336,9 @@ static void enic_tx_hang_reset(struct work_struct *work)
 
 	rtnl_lock();
 
-	spin_lock(&enic->enic_api_lock);
+	/* Stop any activity from infiniband */
+	enic_set_api_busy(enic, true);
+
 	enic_dev_hang_notify(enic);
 	enic_stop(enic->netdev);
 	enic_dev_hang_reset(enic);
@@ -2333,7 +2347,10 @@ static void enic_tx_hang_reset(struct work_struct *work)
 	enic_set_rss_nic_cfg(enic);
 	enic_dev_set_ig_vlan_rewrite_mode(enic);
 	enic_open(enic->netdev);
-	spin_unlock(&enic->enic_api_lock);
+
+	/* Allow infiniband to fiddle with the device again */
+	enic_set_api_busy(enic, false);
+
 	call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev);
 
 	rtnl_unlock();
@@ -2527,13 +2544,15 @@ static void enic_dev_deinit(struct enic *enic)
 {
 	unsigned int i;
 
-	for (i = 0; i < enic->rq_count; i++) {
-		napi_hash_del(&enic->napi[i]);
-		netif_napi_del(&enic->napi[i]);
-	}
+	for (i = 0; i < enic->rq_count; i++)
+		__netif_napi_del(&enic->napi[i]);
+
 	if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
 		for (i = 0; i < enic->wq_count; i++)
-			netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
+			__netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
+
+	/* observe RCU grace period after __netif_napi_del() calls */
+	synchronize_net();
 
 	enic_free_vnic_resources(enic);
 	enic_clear_intr_mode(enic);
@@ -2699,21 +2718,21 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * fail to 32-bit.
 	 */
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47));
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(47));
 	if (err) {
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (err) {
 			dev_err(dev, "No usable DMA configuration, aborting\n");
 			goto err_out_release_regions;
 		}
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (err) {
 			dev_err(dev, "Unable to obtain %u-bit DMA "
 				"for consistent allocations, aborting\n", 32);
 			goto err_out_release_regions;
 		}
 	} else {
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47));
+		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(47));
 		if (err) {
 			dev_err(dev, "Unable to obtain %u-bit DMA "
 				"for consistent allocations, aborting\n", 47);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 901e44b0b795..45015931b335 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -193,9 +193,10 @@ int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
 {
 	vnic_dev_desc_ring_size(ring, desc_count, desc_size);
 
-	ring->descs_unaligned = pci_alloc_consistent(vdev->pdev,
-		ring->size_unaligned,
-		&ring->base_addr_unaligned);
+	ring->descs_unaligned = dma_alloc_coherent(&vdev->pdev->dev,
+						   ring->size_unaligned,
+						   &ring->base_addr_unaligned,
+						   GFP_KERNEL);
 
 	if (!ring->descs_unaligned) {
 		vdev_err(vdev, "Failed to allocate ring (size=%d), aborting\n",
@@ -218,10 +219,9 @@ int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
 void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring)
 {
 	if (ring->descs) {
-		pci_free_consistent(vdev->pdev,
-			ring->size_unaligned,
-			ring->descs_unaligned,
-			ring->base_addr_unaligned);
+		dma_free_coherent(&vdev->pdev->dev, ring->size_unaligned,
+				  ring->descs_unaligned,
+				  ring->base_addr_unaligned);
 		ring->descs = NULL;
 	}
 }
@@ -551,9 +551,9 @@ int vnic_dev_fw_info(struct vnic_dev *vdev,
 	int err = 0;
 
 	if (!vdev->fw_info) {
-		vdev->fw_info = pci_zalloc_consistent(vdev->pdev,
-						      sizeof(struct vnic_devcmd_fw_info),
-						      &vdev->fw_info_pa);
+		vdev->fw_info = dma_alloc_coherent(&vdev->pdev->dev,
+						   sizeof(struct vnic_devcmd_fw_info),
+						   &vdev->fw_info_pa, GFP_ATOMIC);
 		if (!vdev->fw_info)
 			return -ENOMEM;
 
@@ -603,8 +603,9 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
 	int wait = 1000;
 
 	if (!vdev->stats) {
-		vdev->stats = pci_alloc_consistent(vdev->pdev,
-			sizeof(struct vnic_stats), &vdev->stats_pa);
+		vdev->stats = dma_alloc_coherent(&vdev->pdev->dev,
+						 sizeof(struct vnic_stats),
+						 &vdev->stats_pa, GFP_ATOMIC);
 		if (!vdev->stats)
 			return -ENOMEM;
 	}
@@ -852,9 +853,9 @@ int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr)
 		return -EINVAL;
 	}
 
-	notify_addr = pci_alloc_consistent(vdev->pdev,
-			sizeof(struct vnic_devcmd_notify),
-			&notify_pa);
+	notify_addr = dma_alloc_coherent(&vdev->pdev->dev,
+					 sizeof(struct vnic_devcmd_notify),
+					 &notify_pa, GFP_ATOMIC);
 	if (!notify_addr)
 		return -ENOMEM;
 
@@ -882,10 +883,9 @@ static int vnic_dev_notify_unsetcmd(struct vnic_dev *vdev)
 int vnic_dev_notify_unset(struct vnic_dev *vdev)
 {
 	if (vdev->notify) {
-		pci_free_consistent(vdev->pdev,
-			sizeof(struct vnic_devcmd_notify),
-			vdev->notify,
-			vdev->notify_pa);
+		dma_free_coherent(&vdev->pdev->dev,
+				  sizeof(struct vnic_devcmd_notify),
+				  vdev->notify, vdev->notify_pa);
 	}
 
 	return vnic_dev_notify_unsetcmd(vdev);
@@ -1046,18 +1046,17 @@ void vnic_dev_unregister(struct vnic_dev *vdev)
 {
 	if (vdev) {
 		if (vdev->notify)
-			pci_free_consistent(vdev->pdev,
-				sizeof(struct vnic_devcmd_notify),
-				vdev->notify,
-				vdev->notify_pa);
+			dma_free_coherent(&vdev->pdev->dev,
+					  sizeof(struct vnic_devcmd_notify),
+					  vdev->notify, vdev->notify_pa);
 		if (vdev->stats)
-			pci_free_consistent(vdev->pdev,
-				sizeof(struct vnic_stats),
-				vdev->stats, vdev->stats_pa);
+			dma_free_coherent(&vdev->pdev->dev,
+					  sizeof(struct vnic_stats),
+					  vdev->stats, vdev->stats_pa);
 		if (vdev->fw_info)
-			pci_free_consistent(vdev->pdev,
-				sizeof(struct vnic_devcmd_fw_info),
-				vdev->fw_info, vdev->fw_info_pa);
+			dma_free_coherent(&vdev->pdev->dev,
+					  sizeof(struct vnic_devcmd_fw_info),
+					  vdev->fw_info, vdev->fw_info_pa);
 		if (vdev->devcmd2)
 			vnic_dev_deinit_devcmd2(vdev);
 
@@ -1127,7 +1126,7 @@ int vnic_dev_init_prov2(struct vnic_dev *vdev, u8 *buf, u32 len)
 	void *prov_buf;
 	int ret;
 
-	prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa);
+	prov_buf = dma_alloc_coherent(&vdev->pdev->dev, len, &prov_pa, GFP_ATOMIC);
 	if (!prov_buf)
 		return -ENOMEM;
 
@@ -1137,7 +1136,7 @@ int vnic_dev_init_prov2(struct vnic_dev *vdev, u8 *buf, u32 len)
 
 	ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO2, &a0, &a1, wait);
 
-	pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa);
+	dma_free_coherent(&vdev->pdev->dev, len, prov_buf, prov_pa);
 
 	return ret;
 }
@@ -1217,7 +1216,8 @@ int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry,
 		tlv_size = sizeof(struct filter) +
 			   sizeof(struct filter_action) +
 			   2 * sizeof(struct filter_tlv);
-		tlv_va = pci_alloc_consistent(vdev->pdev, tlv_size, &tlv_pa);
+		tlv_va = dma_alloc_coherent(&vdev->pdev->dev, tlv_size,
+					    &tlv_pa, GFP_ATOMIC);
 		if (!tlv_va)
 			return -ENOMEM;
 		tlv = tlv_va;
@@ -1240,7 +1240,7 @@ int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry,
 
 		ret = vnic_dev_cmd(vdev, CMD_ADD_FILTER, &a0, &a1, wait);
 		*entry = (u16)a0;
-		pci_free_consistent(vdev->pdev, tlv_size, tlv_va, tlv_pa);
+		dma_free_coherent(&vdev->pdev->dev, tlv_size, tlv_va, tlv_pa);
 	} else if (cmd == CLSF_DEL) {
 		a0 = *entry;
 		ret = vnic_dev_cmd(vdev, CMD_DEL_FILTER, &a0, &a1, wait);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index ffec0f3dd957..8df6f081f244 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -85,6 +85,8 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
 /**
  * struct gmac_queue_page - page buffer per-page info
+ * @page: the page struct
+ * @mapping: the dma address handle
  */
 struct gmac_queue_page {
 	struct page *page;
@@ -509,7 +511,6 @@ static int gmac_init(struct net_device *netdev)
 		.rel_threshold = 0,
 	} };
 	union gmac_config0 tmp;
-	u32 val;
 
 	config0.bits.max_len = gmac_pick_rx_max_len(netdev->mtu);
 	tmp.bits32 = readl(port->gmac_base + GMAC_CONFIG0);
@@ -519,7 +520,7 @@ static int gmac_init(struct net_device *netdev)
 	writel(config2.bits32, port->gmac_base + GMAC_CONFIG2);
 	writel(config3.bits32, port->gmac_base + GMAC_CONFIG3);
 
-	val = readl(port->dma_base + GMAC_AHB_WEIGHT_REG);
+	readl(port->dma_base + GMAC_AHB_WEIGHT_REG);
 	writel(ahb_weight.bits32, port->dma_base + GMAC_AHB_WEIGHT_REG);
 
 	writel(hw_weigh.bits32,
@@ -539,12 +540,6 @@ static int gmac_init(struct net_device *netdev)
 	return 0;
 }
 
-static void gmac_uninit(struct net_device *netdev)
-{
-	if (netdev->phydev)
-		phy_disconnect(netdev->phydev);
-}
-
 static int gmac_setup_txqs(struct net_device *netdev)
 {
 	struct gemini_ethernet_port *port = netdev_priv(netdev);
@@ -1768,15 +1763,6 @@ static int gmac_open(struct net_device *netdev)
 	struct gemini_ethernet_port *port = netdev_priv(netdev);
 	int err;
 
-	if (!netdev->phydev) {
-		err = gmac_setup_phy(netdev);
-		if (err) {
-			netif_err(port, ifup, netdev,
-				  "PHY init failed: %d\n", err);
-			return err;
-		}
-	}
-
 	err = request_irq(netdev->irq, gmac_irq,
 			  IRQF_SHARED, netdev->name, netdev);
 	if (err) {
@@ -2122,9 +2108,8 @@ static void gmac_get_ringparam(struct net_device *netdev,
 			       struct ethtool_ringparam *rp)
 {
 	struct gemini_ethernet_port *port = netdev_priv(netdev);
-	union gmac_config0 config0;
 
-	config0.bits32 = readl(port->gmac_base + GMAC_CONFIG0);
+	readl(port->gmac_base + GMAC_CONFIG0);
 
 	rp->rx_max_pending = 1 << 15;
 	rp->rx_mini_max_pending = 0;
@@ -2209,7 +2194,6 @@ static void gmac_get_drvinfo(struct net_device *netdev,
 
 static const struct net_device_ops gmac_351x_ops = {
 	.ndo_init		= gmac_init,
-	.ndo_uninit		= gmac_uninit,
 	.ndo_open		= gmac_open,
 	.ndo_stop		= gmac_stop,
 	.ndo_start_xmit		= gmac_start_xmit,
@@ -2295,8 +2279,10 @@ static irqreturn_t gemini_port_irq(int irq, void *data)
 
 static void gemini_port_remove(struct gemini_ethernet_port *port)
 {
-	if (port->netdev)
+	if (port->netdev) {
+		phy_disconnect(port->netdev->phydev);
 		unregister_netdev(port->netdev);
+	}
 	clk_disable_unprepare(port->pclk);
 	geth_cleanup_freeq(port->geth);
 }
@@ -2505,6 +2491,13 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 	if (ret)
 		goto unprepare;
 
+	ret = gmac_setup_phy(netdev);
+	if (ret) {
+		netdev_err(netdev,
+			   "PHY init failed\n");
+		goto unprepare;
+	}
+
 	ret = register_netdev(netdev);
 	if (ret)
 		goto unprepare;
@@ -2513,10 +2506,6 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 		    "irq %d, DMA @ 0x%pap, GMAC @ 0x%pap\n",
 		    port->irq, &dmares->start,
 		    &gmacres->start);
-	ret = gmac_setup_phy(netdev);
-	if (ret)
-		netdev_info(netdev,
-			    "PHY init failed, deferring to ifup time\n");
 	return 0;
 
 unprepare:
@@ -2529,6 +2518,7 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev)
 	struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
 
 	gemini_port_remove(port);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index 2610efe4f873..d9f6c19940ef 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -443,21 +443,23 @@ static void de_rx (struct de_private *de)
 		}
 
 		if (!copying_skb) {
-			pci_unmap_single(de->pdev, mapping,
-					 buflen, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&de->pdev->dev, mapping, buflen,
+					 DMA_FROM_DEVICE);
 			skb_put(skb, len);
 
 			mapping =
 			de->rx_skb[rx_tail].mapping =
-				pci_map_single(de->pdev, copy_skb->data,
-					       buflen, PCI_DMA_FROMDEVICE);
+				dma_map_single(&de->pdev->dev, copy_skb->data,
+					       buflen, DMA_FROM_DEVICE);
 			de->rx_skb[rx_tail].skb = copy_skb;
 		} else {
-			pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_cpu(&de->pdev->dev, mapping, len,
+						DMA_FROM_DEVICE);
 			skb_reserve(copy_skb, RX_OFFSET);
 			skb_copy_from_linear_data(skb, skb_put(copy_skb, len),
 						  len);
-			pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_device(&de->pdev->dev, mapping,
+						   len, DMA_FROM_DEVICE);
 
 			/* We'll reuse the original ring buffer. */
 			skb = copy_skb;
@@ -554,13 +556,15 @@ static void de_tx (struct de_private *de)
 			goto next;
 
 		if (unlikely(skb == DE_SETUP_SKB)) {
-			pci_unmap_single(de->pdev, de->tx_skb[tx_tail].mapping,
-					 sizeof(de->setup_frame), PCI_DMA_TODEVICE);
+			dma_unmap_single(&de->pdev->dev,
+					 de->tx_skb[tx_tail].mapping,
+					 sizeof(de->setup_frame),
+					 DMA_TO_DEVICE);
 			goto next;
 		}
 
-		pci_unmap_single(de->pdev, de->tx_skb[tx_tail].mapping,
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&de->pdev->dev, de->tx_skb[tx_tail].mapping,
+				 skb->len, DMA_TO_DEVICE);
 
 		if (status & LastFrag) {
 			if (status & TxError) {
@@ -620,7 +624,8 @@ static netdev_tx_t de_start_xmit (struct sk_buff *skb,
 	txd = &de->tx_ring[entry];
 
 	len = skb->len;
-	mapping = pci_map_single(de->pdev, skb->data, len, PCI_DMA_TODEVICE);
+	mapping = dma_map_single(&de->pdev->dev, skb->data, len,
+				 DMA_TO_DEVICE);
 	if (entry == (DE_TX_RING_SIZE - 1))
 		flags |= RingEnd;
 	if (!tx_free || (tx_free == (DE_TX_RING_SIZE / 2)))
@@ -763,8 +768,8 @@ static void __de_set_rx_mode (struct net_device *dev)
 
 	de->tx_skb[entry].skb = DE_SETUP_SKB;
 	de->tx_skb[entry].mapping = mapping =
-	    pci_map_single (de->pdev, de->setup_frame,
-			    sizeof (de->setup_frame), PCI_DMA_TODEVICE);
+	    dma_map_single(&de->pdev->dev, de->setup_frame,
+			   sizeof(de->setup_frame), DMA_TO_DEVICE);
 
 	/* Put the setup frame on the Tx list. */
 	txd = &de->tx_ring[entry];
@@ -1279,8 +1284,10 @@ static int de_refill_rx (struct de_private *de)
 		if (!skb)
 			goto err_out;
 
-		de->rx_skb[i].mapping = pci_map_single(de->pdev,
-			skb->data, de->rx_buf_sz, PCI_DMA_FROMDEVICE);
+		de->rx_skb[i].mapping = dma_map_single(&de->pdev->dev,
+						       skb->data,
+						       de->rx_buf_sz,
+						       DMA_FROM_DEVICE);
 		de->rx_skb[i].skb = skb;
 
 		de->rx_ring[i].opts1 = cpu_to_le32(DescOwn);
@@ -1313,7 +1320,8 @@ static int de_init_rings (struct de_private *de)
 
 static int de_alloc_rings (struct de_private *de)
 {
-	de->rx_ring = pci_alloc_consistent(de->pdev, DE_RING_BYTES, &de->ring_dma);
+	de->rx_ring = dma_alloc_coherent(&de->pdev->dev, DE_RING_BYTES,
+					 &de->ring_dma, GFP_KERNEL);
 	if (!de->rx_ring)
 		return -ENOMEM;
 	de->tx_ring = &de->rx_ring[DE_RX_RING_SIZE];
@@ -1333,8 +1341,9 @@ static void de_clean_rings (struct de_private *de)
 
 	for (i = 0; i < DE_RX_RING_SIZE; i++) {
 		if (de->rx_skb[i].skb) {
-			pci_unmap_single(de->pdev, de->rx_skb[i].mapping,
-					 de->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&de->pdev->dev,
+					 de->rx_skb[i].mapping, de->rx_buf_sz,
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb(de->rx_skb[i].skb);
 		}
 	}
@@ -1344,15 +1353,15 @@ static void de_clean_rings (struct de_private *de)
 		if ((skb) && (skb != DE_DUMMY_SKB)) {
 			if (skb != DE_SETUP_SKB) {
 				de->dev->stats.tx_dropped++;
-				pci_unmap_single(de->pdev,
-					de->tx_skb[i].mapping,
-					skb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&de->pdev->dev,
+						 de->tx_skb[i].mapping,
+						 skb->len, DMA_TO_DEVICE);
 				dev_kfree_skb(skb);
 			} else {
-				pci_unmap_single(de->pdev,
-					de->tx_skb[i].mapping,
-					sizeof(de->setup_frame),
-					PCI_DMA_TODEVICE);
+				dma_unmap_single(&de->pdev->dev,
+						 de->tx_skb[i].mapping,
+						 sizeof(de->setup_frame),
+						 DMA_TO_DEVICE);
 			}
 		}
 	}
@@ -1364,7 +1373,8 @@ static void de_clean_rings (struct de_private *de)
 static void de_free_rings (struct de_private *de)
 {
 	de_clean_rings(de);
-	pci_free_consistent(de->pdev, DE_RING_BYTES, de->rx_ring, de->ring_dma);
+	dma_free_coherent(&de->pdev->dev, DE_RING_BYTES, de->rx_ring,
+			  de->ring_dma);
 	de->rx_ring = NULL;
 	de->tx_ring = NULL;
 }
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index f9dd1aa9f2da..683e328b5461 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -4925,11 +4925,11 @@ mii_get_oui(u_char phyaddr, u_long ioaddr)
 	u_char breg[2];
     } a;
     int i, r2, r3, ret=0;*/
-    int r2, r3;
+    int r2;
 
     /* Read r2 and r3 */
     r2 = mii_rd(MII_ID0, phyaddr, ioaddr);
-    r3 = mii_rd(MII_ID1, phyaddr, ioaddr);
+    mii_rd(MII_ID1, phyaddr, ioaddr);
                                                 /* SEEQ and Cypress way * /
     / * Shuffle r2 and r3 * /
     a.reg=0;
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index c3b4abff48b5..87a27fe2992d 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -380,7 +380,7 @@ static int dmfe_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENOMEM;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		pr_warn("32-bit PCI DMA not available\n");
 		err = -ENODEV;
 		goto err_out_free;
@@ -422,15 +422,17 @@ static int dmfe_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	db = netdev_priv(dev);
 
 	/* Allocate Tx/Rx descriptor memory */
-	db->desc_pool_ptr = pci_alloc_consistent(pdev, sizeof(struct tx_desc) *
-			DESC_ALL_CNT + 0x20, &db->desc_pool_dma_ptr);
+	db->desc_pool_ptr = dma_alloc_coherent(&pdev->dev,
+					       sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
+					       &db->desc_pool_dma_ptr, GFP_KERNEL);
 	if (!db->desc_pool_ptr) {
 		err = -ENOMEM;
 		goto err_out_res;
 	}
 
-	db->buf_pool_ptr = pci_alloc_consistent(pdev, TX_BUF_ALLOC *
-			TX_DESC_CNT + 4, &db->buf_pool_dma_ptr);
+	db->buf_pool_ptr = dma_alloc_coherent(&pdev->dev,
+					      TX_BUF_ALLOC * TX_DESC_CNT + 4,
+					      &db->buf_pool_dma_ptr, GFP_KERNEL);
 	if (!db->buf_pool_ptr) {
 		err = -ENOMEM;
 		goto err_out_free_desc;
@@ -492,11 +494,12 @@ static int dmfe_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_out_unmap:
 	pci_iounmap(pdev, db->ioaddr);
 err_out_free_buf:
-	pci_free_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
-			    db->buf_pool_ptr, db->buf_pool_dma_ptr);
+	dma_free_coherent(&pdev->dev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
+			  db->buf_pool_ptr, db->buf_pool_dma_ptr);
 err_out_free_desc:
-	pci_free_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
-			    db->desc_pool_ptr, db->desc_pool_dma_ptr);
+	dma_free_coherent(&pdev->dev,
+			  sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
+			  db->desc_pool_ptr, db->desc_pool_dma_ptr);
 err_out_res:
 	pci_release_regions(pdev);
 err_out_disable:
@@ -519,11 +522,12 @@ static void dmfe_remove_one(struct pci_dev *pdev)
 
 		unregister_netdev(dev);
 		pci_iounmap(db->pdev, db->ioaddr);
-		pci_free_consistent(db->pdev, sizeof(struct tx_desc) *
-					DESC_ALL_CNT + 0x20, db->desc_pool_ptr,
- 					db->desc_pool_dma_ptr);
-		pci_free_consistent(db->pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
-					db->buf_pool_ptr, db->buf_pool_dma_ptr);
+		dma_free_coherent(&db->pdev->dev,
+				  sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
+				  db->desc_pool_ptr, db->desc_pool_dma_ptr);
+		dma_free_coherent(&db->pdev->dev,
+				  TX_BUF_ALLOC * TX_DESC_CNT + 4,
+				  db->buf_pool_ptr, db->buf_pool_dma_ptr);
 		pci_release_regions(pdev);
 		free_netdev(dev);	/* free board information */
 	}
@@ -955,8 +959,8 @@ static void dmfe_rx_packet(struct net_device *dev, struct dmfe_board_info *db)
 		db->rx_avail_cnt--;
 		db->interval_rx_cnt++;
 
-		pci_unmap_single(db->pdev, le32_to_cpu(rxptr->rdes2),
-				 RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&db->pdev->dev, le32_to_cpu(rxptr->rdes2),
+				 RX_ALLOC_SIZE, DMA_FROM_DEVICE);
 
 		if ( (rdes0 & 0x300) != 0x300) {
 			/* A packet without First/Last flag */
@@ -1329,8 +1333,8 @@ static void dmfe_reuse_skb(struct dmfe_board_info *db, struct sk_buff * skb)
 
 	if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
 		rxptr->rx_skb_ptr = skb;
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev,
-			    skb->data, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(dma_map_single(&db->pdev->dev, skb->data,
+							  RX_ALLOC_SIZE, DMA_FROM_DEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		db->rx_avail_cnt++;
@@ -1544,8 +1548,8 @@ static void allocate_rx_buffer(struct net_device *dev)
 		if ( ( skb = netdev_alloc_skb(dev, RX_ALLOC_SIZE) ) == NULL )
 			break;
 		rxptr->rx_skb_ptr = skb; /* FIXME (?) */
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->data,
-				    RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(dma_map_single(&db->pdev->dev, skb->data,
+							  RX_ALLOC_SIZE, DMA_FROM_DEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		rxptr = rxptr->next_rx_desc;
diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c
index c1ca0765d56d..54560f9a1651 100644
--- a/drivers/net/ethernet/dec/tulip/interrupt.c
+++ b/drivers/net/ethernet/dec/tulip/interrupt.c
@@ -74,8 +74,8 @@ int tulip_refill_rx(struct net_device *dev)
 			if (skb == NULL)
 				break;
 
-			mapping = pci_map_single(tp->pdev, skb->data, PKT_BUF_SZ,
-						 PCI_DMA_FROMDEVICE);
+			mapping = dma_map_single(&tp->pdev->dev, skb->data,
+						 PKT_BUF_SZ, DMA_FROM_DEVICE);
 			if (dma_mapping_error(&tp->pdev->dev, mapping)) {
 				dev_kfree_skb(skb);
 				tp->rx_buffers[entry].skb = NULL;
@@ -210,9 +210,10 @@ int tulip_poll(struct napi_struct *napi, int budget)
                                if (pkt_len < tulip_rx_copybreak &&
                                    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
                                        skb_reserve(skb, 2);    /* 16 byte align the IP header */
-                                       pci_dma_sync_single_for_cpu(tp->pdev,
-								   tp->rx_buffers[entry].mapping,
-								   pkt_len, PCI_DMA_FROMDEVICE);
+					dma_sync_single_for_cpu(&tp->pdev->dev,
+								tp->rx_buffers[entry].mapping,
+								pkt_len,
+								DMA_FROM_DEVICE);
 #if ! defined(__alpha__)
                                        skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
                                                         pkt_len);
@@ -222,9 +223,10 @@ int tulip_poll(struct napi_struct *napi, int budget)
                                                     tp->rx_buffers[entry].skb->data,
                                                     pkt_len);
 #endif
-                                       pci_dma_sync_single_for_device(tp->pdev,
-								      tp->rx_buffers[entry].mapping,
-								      pkt_len, PCI_DMA_FROMDEVICE);
+					dma_sync_single_for_device(&tp->pdev->dev,
+								   tp->rx_buffers[entry].mapping,
+								   pkt_len,
+								   DMA_FROM_DEVICE);
                                } else {        /* Pass up the skb already on the Rx ring. */
                                        char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
                                                             pkt_len);
@@ -240,8 +242,10 @@ int tulip_poll(struct napi_struct *napi, int budget)
                                        }
 #endif
 
-                                       pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping,
-                                                        PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+					dma_unmap_single(&tp->pdev->dev,
+							 tp->rx_buffers[entry].mapping,
+							 PKT_BUF_SZ,
+							 DMA_FROM_DEVICE);
 
                                        tp->rx_buffers[entry].skb = NULL;
                                        tp->rx_buffers[entry].mapping = 0;
@@ -436,9 +440,10 @@ static int tulip_rx(struct net_device *dev)
 			if (pkt_len < tulip_rx_copybreak &&
 			    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				pci_dma_sync_single_for_cpu(tp->pdev,
-							    tp->rx_buffers[entry].mapping,
-							    pkt_len, PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&tp->pdev->dev,
+							tp->rx_buffers[entry].mapping,
+							pkt_len,
+							DMA_FROM_DEVICE);
 #if ! defined(__alpha__)
 				skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
 						 pkt_len);
@@ -448,9 +453,10 @@ static int tulip_rx(struct net_device *dev)
 					     tp->rx_buffers[entry].skb->data,
 					     pkt_len);
 #endif
-				pci_dma_sync_single_for_device(tp->pdev,
-							       tp->rx_buffers[entry].mapping,
-							       pkt_len, PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_device(&tp->pdev->dev,
+							   tp->rx_buffers[entry].mapping,
+							   pkt_len,
+							   DMA_FROM_DEVICE);
 			} else { 	/* Pass up the skb already on the Rx ring. */
 				char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
 						     pkt_len);
@@ -466,8 +472,9 @@ static int tulip_rx(struct net_device *dev)
 				}
 #endif
 
-				pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping,
-						 PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&tp->pdev->dev,
+						 tp->rx_buffers[entry].mapping,
+						 PKT_BUF_SZ, DMA_FROM_DEVICE);
 
 				tp->rx_buffers[entry].skb = NULL;
 				tp->rx_buffers[entry].mapping = 0;
@@ -597,10 +604,10 @@ irqreturn_t tulip_interrupt(int irq, void *dev_instance)
 				if (tp->tx_buffers[entry].skb == NULL) {
 					/* test because dummy frames not mapped */
 					if (tp->tx_buffers[entry].mapping)
-						pci_unmap_single(tp->pdev,
-							 tp->tx_buffers[entry].mapping,
-							 sizeof(tp->setup_frame),
-							 PCI_DMA_TODEVICE);
+						dma_unmap_single(&tp->pdev->dev,
+								 tp->tx_buffers[entry].mapping,
+								 sizeof(tp->setup_frame),
+								 DMA_TO_DEVICE);
 					continue;
 				}
 
@@ -629,9 +636,10 @@ irqreturn_t tulip_interrupt(int irq, void *dev_instance)
 					dev->stats.tx_packets++;
 				}
 
-				pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
+				dma_unmap_single(&tp->pdev->dev,
+						 tp->tx_buffers[entry].mapping,
 						 tp->tx_buffers[entry].skb->len,
-						 PCI_DMA_TODEVICE);
+						 DMA_TO_DEVICE);
 
 				/* Free the original skb. */
 				dev_kfree_skb_irq(tp->tx_buffers[entry].skb);
diff --git a/drivers/net/ethernet/dec/tulip/media.c b/drivers/net/ethernet/dec/tulip/media.c
index dcf21a36a9cf..011604787b8e 100644
--- a/drivers/net/ethernet/dec/tulip/media.c
+++ b/drivers/net/ethernet/dec/tulip/media.c
@@ -319,13 +319,8 @@ void tulip_select_media(struct net_device *dev, int startup)
 			break;
 		}
 		case 5: case 6: {
-			u16 setup[5];
-
 			new_csr6 = 0; /* FIXME */
 
-			for (i = 0; i < 5; i++)
-				setup[i] = get_u16(&p[i*2 + 1]);
-
 			if (startup && mtable->has_reset) {
 				struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset];
 				unsigned char *rst = rleaf->leafdata;
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 3a8659c5da06..e7b0d7de40fd 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -350,9 +350,9 @@ static void tulip_up(struct net_device *dev)
 		*setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
 		*setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
 
-		mapping = pci_map_single(tp->pdev, tp->setup_frame,
+		mapping = dma_map_single(&tp->pdev->dev, tp->setup_frame,
 					 sizeof(tp->setup_frame),
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 		tp->tx_buffers[tp->cur_tx].skb = NULL;
 		tp->tx_buffers[tp->cur_tx].mapping = mapping;
 
@@ -630,8 +630,8 @@ static void tulip_init_ring(struct net_device *dev)
 		tp->rx_buffers[i].skb = skb;
 		if (skb == NULL)
 			break;
-		mapping = pci_map_single(tp->pdev, skb->data,
-					 PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+		mapping = dma_map_single(&tp->pdev->dev, skb->data,
+					 PKT_BUF_SZ, DMA_FROM_DEVICE);
 		tp->rx_buffers[i].mapping = mapping;
 		tp->rx_ring[i].status = cpu_to_le32(DescOwned);	/* Owned by Tulip chip */
 		tp->rx_ring[i].buffer1 = cpu_to_le32(mapping);
@@ -664,8 +664,8 @@ tulip_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	entry = tp->cur_tx % TX_RING_SIZE;
 
 	tp->tx_buffers[entry].skb = skb;
-	mapping = pci_map_single(tp->pdev, skb->data,
-				 skb->len, PCI_DMA_TODEVICE);
+	mapping = dma_map_single(&tp->pdev->dev, skb->data, skb->len,
+				 DMA_TO_DEVICE);
 	tp->tx_buffers[entry].mapping = mapping;
 	tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping);
 
@@ -716,16 +716,17 @@ static void tulip_clean_tx_ring(struct tulip_private *tp)
 		if (tp->tx_buffers[entry].skb == NULL) {
 			/* test because dummy frames not mapped */
 			if (tp->tx_buffers[entry].mapping)
-				pci_unmap_single(tp->pdev,
-					tp->tx_buffers[entry].mapping,
-					sizeof(tp->setup_frame),
-					PCI_DMA_TODEVICE);
+				dma_unmap_single(&tp->pdev->dev,
+						 tp->tx_buffers[entry].mapping,
+						 sizeof(tp->setup_frame),
+						 DMA_TO_DEVICE);
 			continue;
 		}
 
-		pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
-				tp->tx_buffers[entry].skb->len,
-				PCI_DMA_TODEVICE);
+		dma_unmap_single(&tp->pdev->dev,
+				 tp->tx_buffers[entry].mapping,
+				 tp->tx_buffers[entry].skb->len,
+				 DMA_TO_DEVICE);
 
 		/* Free the original skb. */
 		dev_kfree_skb_irq(tp->tx_buffers[entry].skb);
@@ -795,8 +796,8 @@ static void tulip_free_ring (struct net_device *dev)
 		/* An invalid address. */
 		tp->rx_ring[i].buffer1 = cpu_to_le32(0xBADF00D0);
 		if (skb) {
-			pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ,
-					 PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&tp->pdev->dev, mapping, PKT_BUF_SZ,
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb (skb);
 		}
 	}
@@ -805,8 +806,9 @@ static void tulip_free_ring (struct net_device *dev)
 		struct sk_buff *skb = tp->tx_buffers[i].skb;
 
 		if (skb != NULL) {
-			pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping,
-					 skb->len, PCI_DMA_TODEVICE);
+			dma_unmap_single(&tp->pdev->dev,
+					 tp->tx_buffers[i].mapping, skb->len,
+					 DMA_TO_DEVICE);
 			dev_kfree_skb (skb);
 		}
 		tp->tx_buffers[i].skb = NULL;
@@ -1149,9 +1151,10 @@ static void set_rx_mode(struct net_device *dev)
 
 			tp->tx_buffers[entry].skb = NULL;
 			tp->tx_buffers[entry].mapping =
-				pci_map_single(tp->pdev, tp->setup_frame,
+				dma_map_single(&tp->pdev->dev,
+					       tp->setup_frame,
 					       sizeof(tp->setup_frame),
-					       PCI_DMA_TODEVICE);
+					       DMA_TO_DEVICE);
 			/* Put the setup frame on the Tx list. */
 			if (entry == TX_RING_SIZE-1)
 				tx_flags |= DESC_RING_WRAP;		/* Wrap ring. */
@@ -1422,10 +1425,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp = netdev_priv(dev);
 	tp->dev = dev;
 
-	tp->rx_ring = pci_alloc_consistent(pdev,
-					   sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
-					   sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
-					   &tp->rx_ring_dma);
+	tp->rx_ring = dma_alloc_coherent(&pdev->dev,
+					 sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
+					 sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
+					 &tp->rx_ring_dma, GFP_KERNEL);
 	if (!tp->rx_ring)
 		goto err_out_mtable;
 	tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE);
@@ -1757,10 +1760,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 err_out_free_ring:
-	pci_free_consistent (pdev,
-			     sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
-			     sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
-			     tp->rx_ring, tp->rx_ring_dma);
+	dma_free_coherent(&pdev->dev,
+			  sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
+			  sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
+			  tp->rx_ring, tp->rx_ring_dma);
 
 err_out_mtable:
 	kfree (tp->mtable);
@@ -1878,10 +1881,10 @@ static void tulip_remove_one(struct pci_dev *pdev)
 
 	tp = netdev_priv(dev);
 	unregister_netdev(dev);
-	pci_free_consistent (pdev,
-			     sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
-			     sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
-			     tp->rx_ring, tp->rx_ring_dma);
+	dma_free_coherent(&pdev->dev,
+			  sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
+			  sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
+			  tp->rx_ring, tp->rx_ring_dma);
 	kfree (tp->mtable);
 	pci_iounmap(pdev, tp->base_addr);
 	free_netdev (dev);
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index f942399f0f32..13e73ed15ef0 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -282,7 +282,7 @@ static int uli526x_init_one(struct pci_dev *pdev,
 		return -ENOMEM;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		pr_warn("32-bit PCI DMA not available\n");
 		err = -ENODEV;
 		goto err_out_free;
@@ -317,11 +317,15 @@ static int uli526x_init_one(struct pci_dev *pdev,
 	/* Allocate Tx/Rx descriptor memory */
 	err = -ENOMEM;
 
-	db->desc_pool_ptr = pci_alloc_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20, &db->desc_pool_dma_ptr);
+	db->desc_pool_ptr = dma_alloc_coherent(&pdev->dev,
+					       sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
+					       &db->desc_pool_dma_ptr, GFP_KERNEL);
 	if (!db->desc_pool_ptr)
 		goto err_out_release;
 
-	db->buf_pool_ptr = pci_alloc_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, &db->buf_pool_dma_ptr);
+	db->buf_pool_ptr = dma_alloc_coherent(&pdev->dev,
+					      TX_BUF_ALLOC * TX_DESC_CNT + 4,
+					      &db->buf_pool_dma_ptr, GFP_KERNEL);
 	if (!db->buf_pool_ptr)
 		goto err_out_free_tx_desc;
 
@@ -401,11 +405,12 @@ static int uli526x_init_one(struct pci_dev *pdev,
 err_out_unmap:
 	pci_iounmap(pdev, db->ioaddr);
 err_out_free_tx_buf:
-	pci_free_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
-			    db->buf_pool_ptr, db->buf_pool_dma_ptr);
+	dma_free_coherent(&pdev->dev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
+			  db->buf_pool_ptr, db->buf_pool_dma_ptr);
 err_out_free_tx_desc:
-	pci_free_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
-			    db->desc_pool_ptr, db->desc_pool_dma_ptr);
+	dma_free_coherent(&pdev->dev,
+			  sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
+			  db->desc_pool_ptr, db->desc_pool_dma_ptr);
 err_out_release:
 	pci_release_regions(pdev);
 err_out_disable:
@@ -424,11 +429,11 @@ static void uli526x_remove_one(struct pci_dev *pdev)
 
 	unregister_netdev(dev);
 	pci_iounmap(pdev, db->ioaddr);
-	pci_free_consistent(db->pdev, sizeof(struct tx_desc) *
-				DESC_ALL_CNT + 0x20, db->desc_pool_ptr,
- 				db->desc_pool_dma_ptr);
-	pci_free_consistent(db->pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
-				db->buf_pool_ptr, db->buf_pool_dma_ptr);
+	dma_free_coherent(&db->pdev->dev,
+			  sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20,
+			  db->desc_pool_ptr, db->desc_pool_dma_ptr);
+	dma_free_coherent(&db->pdev->dev, TX_BUF_ALLOC * TX_DESC_CNT + 4,
+			  db->buf_pool_ptr, db->buf_pool_dma_ptr);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	free_netdev(dev);
@@ -810,7 +815,8 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 		db->rx_avail_cnt--;
 		db->interval_rx_cnt++;
 
-		pci_unmap_single(db->pdev, le32_to_cpu(rxptr->rdes2), RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&db->pdev->dev, le32_to_cpu(rxptr->rdes2),
+				 RX_ALLOC_SIZE, DMA_FROM_DEVICE);
 		if ( (rdes0 & 0x300) != 0x300) {
 			/* A packet without First/Last flag */
 			/* reuse this SKB */
@@ -1234,10 +1240,8 @@ static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * sk
 
 	if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
 		rxptr->rx_skb_ptr = skb;
-		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
-							  skb_tail_pointer(skb),
-							  RX_ALLOC_SIZE,
-							  PCI_DMA_FROMDEVICE));
+		rxptr->rdes2 = cpu_to_le32(dma_map_single(&db->pdev->dev, skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE, DMA_FROM_DEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		db->rx_avail_cnt++;
@@ -1409,10 +1413,8 @@ static void allocate_rx_buffer(struct net_device *dev)
 		if (skb == NULL)
 			break;
 		rxptr->rx_skb_ptr = skb; /* FIXME (?) */
-		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
-							  skb_tail_pointer(skb),
-							  RX_ALLOC_SIZE,
-							  PCI_DMA_FROMDEVICE));
+		rxptr->rdes2 = cpu_to_le32(dma_map_single(&db->pdev->dev, skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE, DMA_FROM_DEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		rxptr = rxptr->next_rx_desc;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 5a43be327f58..89cbdc1f4857 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -364,7 +364,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	irq = pdev->irq;
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		pr_warn("Device %s disabled due to DMA limitations\n",
 			pci_name(pdev));
 		return -EIO;
@@ -630,9 +630,10 @@ static int netdev_open(struct net_device *dev)
 		goto out_err;
 
 	if (debug > 1)
-		netdev_dbg(dev, "w89c840_open() irq %d\n", irq);
+		netdev_dbg(dev, "%s() irq %d\n", __func__, irq);
 
-	if((i=alloc_ringdesc(dev)))
+	i = alloc_ringdesc(dev);
+	if (i)
 		goto out_err;
 
 	spin_lock_irq(&np->lock);
@@ -642,7 +643,7 @@ static int netdev_open(struct net_device *dev)
 
 	netif_start_queue(dev);
 	if (debug > 2)
-		netdev_dbg(dev, "Done netdev_open()\n");
+		netdev_dbg(dev, "Done %s()\n", __func__);
 
 	/* Set the timer to check for link beat. */
 	timer_setup(&np->timer, netdev_timer, 0);
@@ -802,8 +803,9 @@ static void init_rxtx_rings(struct net_device *dev)
 		np->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		np->rx_addr[i] = pci_map_single(np->pci_dev,skb->data,
-					np->rx_buf_sz,PCI_DMA_FROMDEVICE);
+		np->rx_addr[i] = dma_map_single(&np->pci_dev->dev, skb->data,
+						np->rx_buf_sz,
+						DMA_FROM_DEVICE);
 
 		np->rx_ring[i].buffer1 = np->rx_addr[i];
 		np->rx_ring[i].status = DescOwned;
@@ -833,20 +835,17 @@ static void free_rxtx_rings(struct netdev_private* np)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		np->rx_ring[i].status = 0;
 		if (np->rx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev,
-						np->rx_addr[i],
-						np->rx_skbuff[i]->len,
-						PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&np->pci_dev->dev, np->rx_addr[i],
+					 np->rx_skbuff[i]->len,
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb(np->rx_skbuff[i]);
 		}
 		np->rx_skbuff[i] = NULL;
 	}
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		if (np->tx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev,
-						np->tx_addr[i],
-						np->tx_skbuff[i]->len,
-						PCI_DMA_TODEVICE);
+			dma_unmap_single(&np->pci_dev->dev, np->tx_addr[i],
+					 np->tx_skbuff[i]->len, DMA_TO_DEVICE);
 			dev_kfree_skb(np->tx_skbuff[i]);
 		}
 		np->tx_skbuff[i] = NULL;
@@ -964,10 +963,10 @@ static int alloc_ringdesc(struct net_device *dev)
 
 	np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
 
-	np->rx_ring = pci_alloc_consistent(np->pci_dev,
-			sizeof(struct w840_rx_desc)*RX_RING_SIZE +
-			sizeof(struct w840_tx_desc)*TX_RING_SIZE,
-			&np->ring_dma_addr);
+	np->rx_ring = dma_alloc_coherent(&np->pci_dev->dev,
+					 sizeof(struct w840_rx_desc) * RX_RING_SIZE +
+					 sizeof(struct w840_tx_desc) * TX_RING_SIZE,
+					 &np->ring_dma_addr, GFP_KERNEL);
 	if(!np->rx_ring)
 		return -ENOMEM;
 	init_rxtx_rings(dev);
@@ -976,10 +975,10 @@ static int alloc_ringdesc(struct net_device *dev)
 
 static void free_ringdesc(struct netdev_private *np)
 {
-	pci_free_consistent(np->pci_dev,
-			sizeof(struct w840_rx_desc)*RX_RING_SIZE +
-			sizeof(struct w840_tx_desc)*TX_RING_SIZE,
-			np->rx_ring, np->ring_dma_addr);
+	dma_free_coherent(&np->pci_dev->dev,
+			  sizeof(struct w840_rx_desc) * RX_RING_SIZE +
+			  sizeof(struct w840_tx_desc) * TX_RING_SIZE,
+			  np->rx_ring, np->ring_dma_addr);
 
 }
 
@@ -994,8 +993,8 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 	/* Calculate the next Tx descriptor entry. */
 	entry = np->cur_tx % TX_RING_SIZE;
 
-	np->tx_addr[entry] = pci_map_single(np->pci_dev,
-				skb->data,skb->len, PCI_DMA_TODEVICE);
+	np->tx_addr[entry] = dma_map_single(&np->pci_dev->dev, skb->data,
+					    skb->len, DMA_TO_DEVICE);
 	np->tx_skbuff[entry] = skb;
 
 	np->tx_ring[entry].buffer1 = np->tx_addr[entry];
@@ -1078,9 +1077,8 @@ static void netdev_tx_done(struct net_device *dev)
 			np->stats.tx_packets++;
 		}
 		/* Free the original skb. */
-		pci_unmap_single(np->pci_dev,np->tx_addr[entry],
-					np->tx_skbuff[entry]->len,
-					PCI_DMA_TODEVICE);
+		dma_unmap_single(&np->pci_dev->dev, np->tx_addr[entry],
+				 np->tx_skbuff[entry]->len, DMA_TO_DEVICE);
 		np->tx_q_bytes -= np->tx_skbuff[entry]->len;
 		dev_kfree_skb_irq(np->tx_skbuff[entry]);
 		np->tx_skbuff[entry] = NULL;
@@ -1217,18 +1215,21 @@ static int netdev_rx(struct net_device *dev)
 			if (pkt_len < rx_copybreak &&
 			    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
-							    np->rx_skbuff[entry]->len,
-							    PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&np->pci_dev->dev,
+							np->rx_addr[entry],
+							np->rx_skbuff[entry]->len,
+							DMA_FROM_DEVICE);
 				skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
-				pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry],
-							       np->rx_skbuff[entry]->len,
-							       PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_device(&np->pci_dev->dev,
+							   np->rx_addr[entry],
+							   np->rx_skbuff[entry]->len,
+							   DMA_FROM_DEVICE);
 			} else {
-				pci_unmap_single(np->pci_dev,np->rx_addr[entry],
-							np->rx_skbuff[entry]->len,
-							PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&np->pci_dev->dev,
+						 np->rx_addr[entry],
+						 np->rx_skbuff[entry]->len,
+						 DMA_FROM_DEVICE);
 				skb_put(skb = np->rx_skbuff[entry], pkt_len);
 				np->rx_skbuff[entry] = NULL;
 			}
@@ -1258,9 +1259,10 @@ static int netdev_rx(struct net_device *dev)
 			np->rx_skbuff[entry] = skb;
 			if (skb == NULL)
 				break;			/* Better luck next round. */
-			np->rx_addr[entry] = pci_map_single(np->pci_dev,
-							skb->data,
-							np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			np->rx_addr[entry] = dma_map_single(&np->pci_dev->dev,
+							    skb->data,
+							    np->rx_buf_sz,
+							    DMA_FROM_DEVICE);
 			np->rx_ring[entry].buffer1 = np->rx_addr[entry];
 		}
 		wmb();
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index be6d8a9ada27..734acb834c98 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -7,7 +7,6 @@
 
 */
 
-#define DRV_NAME	"DL2000/TC902x-based linux driver"
 #include "dl2k.h"
 #include <linux/dma-mapping.h>
 
@@ -223,13 +222,15 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata (pdev, dev);
 
-	ring_space = pci_alloc_consistent (pdev, TX_TOTAL_SIZE, &ring_dma);
+	ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, &ring_dma,
+					GFP_KERNEL);
 	if (!ring_space)
 		goto err_out_iounmap;
 	np->tx_ring = ring_space;
 	np->tx_ring_dma = ring_dma;
 
-	ring_space = pci_alloc_consistent (pdev, RX_TOTAL_SIZE, &ring_dma);
+	ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, &ring_dma,
+					GFP_KERNEL);
 	if (!ring_space)
 		goto err_out_unmap_tx;
 	np->rx_ring = ring_space;
@@ -280,9 +281,11 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 err_out_unmap_rx:
-	pci_free_consistent (pdev, RX_TOTAL_SIZE, np->rx_ring, np->rx_ring_dma);
+	dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring,
+			  np->rx_ring_dma);
 err_out_unmap_tx:
-	pci_free_consistent (pdev, TX_TOTAL_SIZE, np->tx_ring, np->tx_ring_dma);
+	dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring,
+			  np->tx_ring_dma);
 err_out_iounmap:
 #ifdef MEM_MAPPING
 	pci_iounmap(pdev, np->ioaddr);
@@ -436,8 +439,9 @@ static void free_list(struct net_device *dev)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		skb = np->rx_skbuff[i];
 		if (skb) {
-			pci_unmap_single(np->pdev, desc_to_dma(&np->rx_ring[i]),
-					 skb->len, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&np->pdev->dev,
+					 desc_to_dma(&np->rx_ring[i]),
+					 skb->len, DMA_FROM_DEVICE);
 			dev_kfree_skb(skb);
 			np->rx_skbuff[i] = NULL;
 		}
@@ -447,8 +451,9 @@ static void free_list(struct net_device *dev)
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		skb = np->tx_skbuff[i];
 		if (skb) {
-			pci_unmap_single(np->pdev, desc_to_dma(&np->tx_ring[i]),
-					 skb->len, PCI_DMA_TODEVICE);
+			dma_unmap_single(&np->pdev->dev,
+					 desc_to_dma(&np->tx_ring[i]),
+					 skb->len, DMA_TO_DEVICE);
 			dev_kfree_skb(skb);
 			np->tx_skbuff[i] = NULL;
 		}
@@ -505,9 +510,8 @@ static int alloc_list(struct net_device *dev)
 						sizeof(struct netdev_desc));
 		/* Rubicon now supports 40 bits of addressing space. */
 		np->rx_ring[i].fraginfo =
-		    cpu_to_le64(pci_map_single(
-				  np->pdev, skb->data, np->rx_buf_sz,
-				  PCI_DMA_FROMDEVICE));
+		    cpu_to_le64(dma_map_single(&np->pdev->dev, skb->data,
+					       np->rx_buf_sz, DMA_FROM_DEVICE));
 		np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48);
 	}
 
@@ -673,9 +677,8 @@ rio_timer (struct timer_list *t)
 				}
 				np->rx_skbuff[entry] = skb;
 				np->rx_ring[entry].fraginfo =
-				    cpu_to_le64 (pci_map_single
-					 (np->pdev, skb->data, np->rx_buf_sz,
-					  PCI_DMA_FROMDEVICE));
+				    cpu_to_le64 (dma_map_single(&np->pdev->dev, skb->data,
+								np->rx_buf_sz, DMA_FROM_DEVICE));
 			}
 			np->rx_ring[entry].fraginfo |=
 			    cpu_to_le64((u64)np->rx_buf_sz << 48);
@@ -729,9 +732,8 @@ start_xmit (struct sk_buff *skb, struct net_device *dev)
 		    ((u64)np->vlan << 32) |
 		    ((u64)skb->priority << 45);
 	}
-	txdesc->fraginfo = cpu_to_le64 (pci_map_single (np->pdev, skb->data,
-							skb->len,
-							PCI_DMA_TODEVICE));
+	txdesc->fraginfo = cpu_to_le64 (dma_map_single(&np->pdev->dev, skb->data,
+						       skb->len, DMA_TO_DEVICE));
 	txdesc->fraginfo |= cpu_to_le64((u64)skb->len << 48);
 
 	/* DL2K bug: DMA fails to get next descriptor ptr in 10Mbps mode
@@ -828,9 +830,9 @@ rio_free_tx (struct net_device *dev, int irq)
 		if (!(np->tx_ring[entry].status & cpu_to_le64(TFDDone)))
 			break;
 		skb = np->tx_skbuff[entry];
-		pci_unmap_single (np->pdev,
-				  desc_to_dma(&np->tx_ring[entry]),
-				  skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&np->pdev->dev,
+				 desc_to_dma(&np->tx_ring[entry]), skb->len,
+				 DMA_TO_DEVICE);
 		if (irq)
 			dev_consume_skb_irq(skb);
 		else
@@ -950,25 +952,25 @@ receive_packet (struct net_device *dev)
 
 			/* Small skbuffs for short packets */
 			if (pkt_len > copy_thresh) {
-				pci_unmap_single (np->pdev,
-						  desc_to_dma(desc),
-						  np->rx_buf_sz,
-						  PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&np->pdev->dev,
+						 desc_to_dma(desc),
+						 np->rx_buf_sz,
+						 DMA_FROM_DEVICE);
 				skb_put (skb = np->rx_skbuff[entry], pkt_len);
 				np->rx_skbuff[entry] = NULL;
 			} else if ((skb = netdev_alloc_skb_ip_align(dev, pkt_len))) {
-				pci_dma_sync_single_for_cpu(np->pdev,
-							    desc_to_dma(desc),
-							    np->rx_buf_sz,
-							    PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&np->pdev->dev,
+							desc_to_dma(desc),
+							np->rx_buf_sz,
+							DMA_FROM_DEVICE);
 				skb_copy_to_linear_data (skb,
 						  np->rx_skbuff[entry]->data,
 						  pkt_len);
 				skb_put (skb, pkt_len);
-				pci_dma_sync_single_for_device(np->pdev,
-							       desc_to_dma(desc),
-							       np->rx_buf_sz,
-							       PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_device(&np->pdev->dev,
+							   desc_to_dma(desc),
+							   np->rx_buf_sz,
+							   DMA_FROM_DEVICE);
 			}
 			skb->protocol = eth_type_trans (skb, dev);
 #if 0
@@ -1001,9 +1003,8 @@ receive_packet (struct net_device *dev)
 			}
 			np->rx_skbuff[entry] = skb;
 			np->rx_ring[entry].fraginfo =
-			    cpu_to_le64 (pci_map_single
-					 (np->pdev, skb->data, np->rx_buf_sz,
-					  PCI_DMA_FROMDEVICE));
+			    cpu_to_le64(dma_map_single(&np->pdev->dev, skb->data,
+						       np->rx_buf_sz, DMA_FROM_DEVICE));
 		}
 		np->rx_ring[entry].fraginfo |=
 		    cpu_to_le64((u64)np->rx_buf_sz << 48);
@@ -1797,10 +1798,10 @@ rio_remove1 (struct pci_dev *pdev)
 		struct netdev_private *np = netdev_priv(dev);
 
 		unregister_netdev (dev);
-		pci_free_consistent (pdev, RX_TOTAL_SIZE, np->rx_ring,
-				     np->rx_ring_dma);
-		pci_free_consistent (pdev, TX_TOTAL_SIZE, np->tx_ring,
-				     np->tx_ring_dma);
+		dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring,
+				  np->rx_ring_dma);
+		dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring,
+				  np->tx_ring_dma);
 #ifdef MEM_MAPPING
 		pci_iounmap(pdev, np->ioaddr);
 #endif
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index b3f8597e77aa..e3a8858915b3 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -367,6 +367,7 @@ struct netdev_private {
         dma_addr_t tx_ring_dma;
         dma_addr_t rx_ring_dma;
 	struct timer_list timer;		/* Media monitoring timer. */
+	struct net_device *ndev;		/* backpointer */
 	/* ethtool extra stats */
 	struct {
 		u64 tx_multiple_collisions;
@@ -429,8 +430,8 @@ static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static int reset_tx (struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
-static void rx_poll(unsigned long data);
-static void tx_poll(unsigned long data);
+static void rx_poll(struct tasklet_struct *t);
+static void tx_poll(struct tasklet_struct *t);
 static void refill_rx (struct net_device *dev);
 static void netdev_error(struct net_device *dev, int intr_status);
 static void netdev_error(struct net_device *dev, int intr_status);
@@ -531,14 +532,15 @@ static int sundance_probe1(struct pci_dev *pdev,
 			cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET));
 
 	np = netdev_priv(dev);
+	np->ndev = dev;
 	np->base = ioaddr;
 	np->pci_dev = pdev;
 	np->chip_id = chip_idx;
 	np->msg_enable = (1 << debug) - 1;
 	spin_lock_init(&np->lock);
 	spin_lock_init(&np->statlock);
-	tasklet_init(&np->rx_tasklet, rx_poll, (unsigned long)dev);
-	tasklet_init(&np->tx_tasklet, tx_poll, (unsigned long)dev);
+	tasklet_setup(&np->rx_tasklet, rx_poll);
+	tasklet_setup(&np->tx_tasklet, tx_poll);
 
 	ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE,
 			&ring_dma, GFP_KERNEL);
@@ -1054,10 +1056,9 @@ static void init_ring(struct net_device *dev)
 	}
 }
 
-static void tx_poll (unsigned long data)
+static void tx_poll(struct tasklet_struct *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_tasklet(np, t, tx_tasklet);
 	unsigned head = np->cur_task % TX_RING_SIZE;
 	struct netdev_desc *txdesc =
 		&np->tx_ring[(np->cur_tx - 1) % TX_RING_SIZE];
@@ -1312,10 +1313,10 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 	return IRQ_RETVAL(handled);
 }
 
-static void rx_poll(unsigned long data)
+static void rx_poll(struct tasklet_struct *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_tasklet(np, t, rx_tasklet);
+	struct net_device *dev = np->ndev;
 	int entry = np->cur_rx % RX_RING_SIZE;
 	int boguscnt = np->budget;
 	void __iomem *ioaddr = np->base;
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index db98274501a0..48c6eb142dcc 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -507,23 +507,20 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 
 	struct dnet *bp = netdev_priv(dev);
-	u32 tx_status, irq_enable;
-	unsigned int len, i, tx_cmd, wrsz;
+	unsigned int i, tx_cmd, wrsz;
 	unsigned long flags;
 	unsigned int *bufp;
+	u32 irq_enable;
 
-	tx_status = dnet_readl(bp, TX_STATUS);
+	dnet_readl(bp, TX_STATUS);
 
 	pr_debug("start_xmit: len %u head %p data %p\n",
 	       skb->len, skb->head, skb->data);
 	dnet_print_skb(skb);
 
-	/* frame size (words) */
-	len = (skb->len + 3) >> 2;
-
 	spin_lock_irqsave(&bp->lock, flags);
 
-	tx_status = dnet_readl(bp, TX_STATUS);
+	dnet_readl(bp, TX_STATUS);
 
 	bufp = (unsigned int *)(((unsigned long) skb->data) & ~0x3UL);
 	wrsz = (u32) skb->len + 3;
@@ -545,7 +542,7 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (dnet_readl(bp, TX_FIFO_WCNT) > DNET_FIFO_TX_DATA_AF_TH) {
 		netif_stop_queue(dev);
-		tx_status = dnet_readl(bp, INTR_SRC);
+		dnet_readl(bp, INTR_SRC);
 		irq_enable = dnet_readl(bp, INTR_ENB);
 		irq_enable |= DNET_INTR_ENB_TX_FIFOAE;
 		dnet_writel(bp, irq_enable, INTR_ENB);
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index a817ca661c1f..0981fe9652e5 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -177,6 +177,7 @@ MODULE_PARM_DESC(buffer_size, "DMA buffer allocation size");
  * struct ethoc - driver-private device structure
  * @iobase:	pointer to I/O memory region
  * @membase:	pointer to buffer memory region
+ * @big_endian: just big or little (endian)
  * @num_bd:	number of buffer descriptors
  * @num_tx:	number of send buffers
  * @cur_tx:	last send buffer written
@@ -189,7 +190,10 @@ MODULE_PARM_DESC(buffer_size, "DMA buffer allocation size");
  * @msg_enable:	device state flags
  * @lock:	device lock
  * @mdio:	MDIO bus for PHY access
+ * @clk:	clock
  * @phy_id:	address of attached PHY
+ * @old_link:	previous link info
+ * @old_duplex: previous duplex info
  */
 struct ethoc {
 	void __iomem *iobase;
@@ -1015,7 +1019,7 @@ static const struct net_device_ops ethoc_netdev_ops = {
 
 /**
  * ethoc_probe - initialize OpenCores ethernet MAC
- * pdev:	platform device
+ * @pdev:	platform device
  */
 static int ethoc_probe(struct platform_device *pdev)
 {
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index fdff3b4723ba..06cc863f4dd6 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
 
 #define DPAA_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
 			  NETIF_MSG_LINK | NETIF_MSG_IFUP | \
-			  NETIF_MSG_IFDOWN)
+			  NETIF_MSG_IFDOWN | NETIF_MSG_HW)
 
 #define DPAA_INGRESS_CS_THRESHOLD 0x10000000
 /* Ingress congestion threshold on FMan ports
diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index feea797cde02..cfd369cf4c8c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -3,6 +3,7 @@ config FSL_DPAA2_ETH
 	tristate "Freescale DPAA2 Ethernet"
 	depends on FSL_MC_BUS && FSL_MC_DPIO
 	select PHYLINK
+	select PCS_LYNX
 	help
 	  This is the DPAA2 Ethernet driver supporting Freescale SoCs
 	  with DPAA2 (DataPath Acceleration Architecture v2).
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index 6e7f33c956bf..146cb3540e61 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_FSL_DPAA2_ETH)		+= fsl-dpaa2-eth.o
 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)	+= fsl-dpaa2-ptp.o
 
-fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o
+fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o
 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
index 83dee575c2fa..84de0644168d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
@@ -17,12 +17,12 @@ static int dpaa2_eth_dcbnl_ieee_getpfc(struct net_device *net_dev,
 	return 0;
 }
 
-static inline bool is_prio_enabled(u8 pfc_en, u8 tc)
+static inline bool dpaa2_eth_is_prio_enabled(u8 pfc_en, u8 tc)
 {
 	return !!(pfc_en & (1 << tc));
 }
 
-static int set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
+static int dpaa2_eth_set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
 {
 	struct dpni_congestion_notification_cfg cfg = {0};
 	int i, err;
@@ -33,7 +33,7 @@ static int set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
 	cfg.message_ctx = 0ULL;
 
 	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
-		if (is_prio_enabled(pfc_en, i)) {
+		if (dpaa2_eth_is_prio_enabled(pfc_en, i)) {
 			cfg.threshold_entry = DPAA2_ETH_CN_THRESH_ENTRY(priv);
 			cfg.threshold_exit = DPAA2_ETH_CN_THRESH_EXIT(priv);
 		} else {
@@ -93,7 +93,7 @@ static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
 	}
 
 	/* Configure congestion notifications for the enabled priorities */
-	err = set_pfc_cn(priv, pfc->pfc_en);
+	err = dpaa2_eth_set_pfc_cn(priv, pfc->pfc_en);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
index 56d9927fbfda..b87db0846e10 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
@@ -42,24 +42,7 @@ static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
 	return 0;
 }
 
-static int dpaa2_dbg_cpu_open(struct inode *inode, struct file *file)
-{
-	int err;
-	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
-
-	err = single_open(file, dpaa2_dbg_cpu_show, priv);
-	if (err < 0)
-		netdev_err(priv->net_dev, "single_open() failed\n");
-
-	return err;
-}
-
-static const struct file_operations dpaa2_dbg_cpu_ops = {
-	.open = dpaa2_dbg_cpu_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(dpaa2_dbg_cpu);
 
 static char *fq_type_to_str(struct dpaa2_eth_fq *fq)
 {
@@ -106,24 +89,7 @@ static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
 	return 0;
 }
 
-static int dpaa2_dbg_fqs_open(struct inode *inode, struct file *file)
-{
-	int err;
-	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
-
-	err = single_open(file, dpaa2_dbg_fqs_show, priv);
-	if (err < 0)
-		netdev_err(priv->net_dev, "single_open() failed\n");
-
-	return err;
-}
-
-static const struct file_operations dpaa2_dbg_fq_ops = {
-	.open = dpaa2_dbg_fqs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(dpaa2_dbg_fqs);
 
 static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
 {
@@ -151,24 +117,7 @@ static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
 	return 0;
 }
 
-static int dpaa2_dbg_ch_open(struct inode *inode, struct file *file)
-{
-	int err;
-	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
-
-	err = single_open(file, dpaa2_dbg_ch_show, priv);
-	if (err < 0)
-		netdev_err(priv->net_dev, "single_open() failed\n");
-
-	return err;
-}
-
-static const struct file_operations dpaa2_dbg_ch_ops = {
-	.open = dpaa2_dbg_ch_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(dpaa2_dbg_ch);
 
 void dpaa2_dbg_add(struct dpaa2_eth_priv *priv)
 {
@@ -179,13 +128,13 @@ void dpaa2_dbg_add(struct dpaa2_eth_priv *priv)
 	priv->dbg.dir = dir;
 
 	/* per-cpu stats file */
-	debugfs_create_file("cpu_stats", 0444, dir, priv, &dpaa2_dbg_cpu_ops);
+	debugfs_create_file("cpu_stats", 0444, dir, priv, &dpaa2_dbg_cpu_fops);
 
 	/* per-fq stats file */
-	debugfs_create_file("fq_stats", 0444, dir, priv, &dpaa2_dbg_fq_ops);
+	debugfs_create_file("fq_stats", 0444, dir, priv, &dpaa2_dbg_fqs_fops);
 
 	/* per-fq stats file */
-	debugfs_create_file("ch_stats", 0444, dir, priv, &dpaa2_dbg_ch_ops);
+	debugfs_create_file("ch_stats", 0444, dir, priv, &dpaa2_dbg_ch_fops);
 }
 
 void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
new file mode 100644
index 000000000000..833696245565
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+#include "dpaa2-eth.h"
+/* Copyright 2020 NXP
+ */
+
+#define DPAA2_ETH_TRAP_DROP(_id, _group_id)					\
+	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,					\
+			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, 0)
+
+static const struct devlink_trap_group dpaa2_eth_trap_groups_arr[] = {
+	DEVLINK_TRAP_GROUP_GENERIC(PARSER_ERROR_DROPS, 0),
+};
+
+static const struct devlink_trap dpaa2_eth_traps_arr[] = {
+	DPAA2_ETH_TRAP_DROP(VXLAN_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(LLC_SNAP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(VLAN_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(PPPOE_PPP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(MPLS_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(ARP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(IP_1_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(IP_N_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(GRE_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(UDP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(TCP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(IPSEC_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(SCTP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(DCCP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(GTP_PARSING, PARSER_ERROR_DROPS),
+	DPAA2_ETH_TRAP_DROP(ESP_PARSING, PARSER_ERROR_DROPS),
+};
+
+static int dpaa2_eth_dl_info_get(struct devlink *devlink,
+				 struct devlink_info_req *req,
+				 struct netlink_ext_ack *extack)
+{
+	struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink);
+	struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv;
+	char buf[10];
+	int err;
+
+	err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	scnprintf(buf, 10, "%d.%d", priv->dpni_ver_major, priv->dpni_ver_minor);
+	err = devlink_info_version_running_put(req, "dpni", buf);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static struct dpaa2_eth_trap_item *
+dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id)
+{
+	struct dpaa2_eth_trap_data *dpaa2_eth_trap_data = priv->trap_data;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpaa2_eth_traps_arr); i++) {
+		if (dpaa2_eth_traps_arr[i].id == trap_id)
+			return &dpaa2_eth_trap_data->trap_items_arr[i];
+	}
+
+	return NULL;
+}
+
+struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
+						  struct dpaa2_fapr *fapr)
+{
+	struct dpaa2_faf_error_bit {
+		int position;
+		enum devlink_trap_generic_id trap_id;
+	} faf_bits[] = {
+		{ .position = 5,  .trap_id = DEVLINK_TRAP_GENERIC_ID_VXLAN_PARSING },
+		{ .position = 20, .trap_id = DEVLINK_TRAP_GENERIC_ID_LLC_SNAP_PARSING },
+		{ .position = 24, .trap_id = DEVLINK_TRAP_GENERIC_ID_VLAN_PARSING },
+		{ .position = 26, .trap_id = DEVLINK_TRAP_GENERIC_ID_PPPOE_PPP_PARSING },
+		{ .position = 29, .trap_id = DEVLINK_TRAP_GENERIC_ID_MPLS_PARSING },
+		{ .position = 31, .trap_id = DEVLINK_TRAP_GENERIC_ID_ARP_PARSING },
+		{ .position = 52, .trap_id = DEVLINK_TRAP_GENERIC_ID_IP_1_PARSING },
+		{ .position = 61, .trap_id = DEVLINK_TRAP_GENERIC_ID_IP_N_PARSING },
+		{ .position = 67, .trap_id = DEVLINK_TRAP_GENERIC_ID_GRE_PARSING },
+		{ .position = 71, .trap_id = DEVLINK_TRAP_GENERIC_ID_UDP_PARSING },
+		{ .position = 76, .trap_id = DEVLINK_TRAP_GENERIC_ID_TCP_PARSING },
+		{ .position = 80, .trap_id = DEVLINK_TRAP_GENERIC_ID_IPSEC_PARSING },
+		{ .position = 82, .trap_id = DEVLINK_TRAP_GENERIC_ID_SCTP_PARSING },
+		{ .position = 84, .trap_id = DEVLINK_TRAP_GENERIC_ID_DCCP_PARSING },
+		{ .position = 88, .trap_id = DEVLINK_TRAP_GENERIC_ID_GTP_PARSING },
+		{ .position = 90, .trap_id = DEVLINK_TRAP_GENERIC_ID_ESP_PARSING },
+	};
+	u64 faf_word;
+	u64 mask;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(faf_bits); i++) {
+		if (faf_bits[i].position < 32) {
+			/* Low part of FAF.
+			 * position ranges from 31 to 0, mask from 0 to 31.
+			 */
+			mask = 1ull << (31 - faf_bits[i].position);
+			faf_word = __le32_to_cpu(fapr->faf_lo);
+		} else {
+			/* High part of FAF.
+			 * position ranges from 95 to 32, mask from 0 to 63.
+			 */
+			mask = 1ull << (63 - (faf_bits[i].position - 32));
+			faf_word = __le64_to_cpu(fapr->faf_hi);
+		}
+		if (faf_word & mask)
+			return dpaa2_eth_dl_trap_item_lookup(priv, faf_bits[i].trap_id);
+	}
+	return NULL;
+}
+
+static int dpaa2_eth_dl_trap_init(struct devlink *devlink,
+				  const struct devlink_trap *trap,
+				  void *trap_ctx)
+{
+	struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink);
+	struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv;
+	struct dpaa2_eth_trap_item *dpaa2_eth_trap_item;
+
+	dpaa2_eth_trap_item = dpaa2_eth_dl_trap_item_lookup(priv, trap->id);
+	if (WARN_ON(!dpaa2_eth_trap_item))
+		return -ENOENT;
+
+	dpaa2_eth_trap_item->trap_ctx = trap_ctx;
+
+	return 0;
+}
+
+static int dpaa2_eth_dl_trap_action_set(struct devlink *devlink,
+					const struct devlink_trap *trap,
+					enum devlink_trap_action action,
+					struct netlink_ext_ack *extack)
+{
+	/* No support for changing the action of an independent packet trap,
+	 * only per trap group - parser error drops
+	 */
+	NL_SET_ERR_MSG_MOD(extack,
+			   "Cannot change trap action independently of group");
+	return -EOPNOTSUPP;
+}
+
+static int dpaa2_eth_dl_trap_group_action_set(struct devlink *devlink,
+					      const struct devlink_trap_group *group,
+					      enum devlink_trap_action action,
+					      struct netlink_ext_ack *extack)
+{
+	struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink);
+	struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv;
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	struct dpni_error_cfg err_cfg = {0};
+	int err;
+
+	if (group->id != DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS)
+		return -EOPNOTSUPP;
+
+	/* Configure handling of frames marked as errors from the parser */
+	err_cfg.errors = DPAA2_FAS_RX_ERR_MASK;
+	err_cfg.set_frame_annotation = 1;
+
+	switch (action) {
+	case DEVLINK_TRAP_ACTION_DROP:
+		err_cfg.error_action = DPNI_ERROR_ACTION_DISCARD;
+		break;
+	case DEVLINK_TRAP_ACTION_TRAP:
+		err_cfg.error_action = DPNI_ERROR_ACTION_SEND_TO_ERROR_QUEUE;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = dpni_set_errors_behavior(priv->mc_io, 0, priv->mc_token, &err_cfg);
+	if (err) {
+		dev_err(dev, "dpni_set_errors_behavior failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct devlink_ops dpaa2_eth_devlink_ops = {
+	.info_get = dpaa2_eth_dl_info_get,
+	.trap_init = dpaa2_eth_dl_trap_init,
+	.trap_action_set = dpaa2_eth_dl_trap_action_set,
+	.trap_group_action_set = dpaa2_eth_dl_trap_group_action_set,
+};
+
+int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	struct dpaa2_eth_devlink_priv *dl_priv;
+	int err;
+
+	priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv));
+	if (!priv->devlink) {
+		dev_err(dev, "devlink_alloc failed\n");
+		return -ENOMEM;
+	}
+	dl_priv = devlink_priv(priv->devlink);
+	dl_priv->dpaa2_priv = priv;
+
+	err = devlink_register(priv->devlink, dev);
+	if (err) {
+		dev_err(dev, "devlink_register() = %d\n", err);
+		goto devlink_free;
+	}
+
+	return 0;
+
+devlink_free:
+	devlink_free(priv->devlink);
+
+	return err;
+}
+
+void dpaa2_eth_dl_unregister(struct dpaa2_eth_priv *priv)
+{
+	devlink_unregister(priv->devlink);
+	devlink_free(priv->devlink);
+}
+
+int dpaa2_eth_dl_port_add(struct dpaa2_eth_priv *priv)
+{
+	struct devlink_port *devlink_port = &priv->devlink_port;
+	struct devlink_port_attrs attrs = {};
+	int err;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+	devlink_port_attrs_set(devlink_port, &attrs);
+
+	err = devlink_port_register(priv->devlink, devlink_port, 0);
+	if (err)
+		return err;
+
+	devlink_port_type_eth_set(devlink_port, priv->net_dev);
+
+	return 0;
+}
+
+void dpaa2_eth_dl_port_del(struct dpaa2_eth_priv *priv)
+{
+	struct devlink_port *devlink_port = &priv->devlink_port;
+
+	devlink_port_type_clear(devlink_port);
+	devlink_port_unregister(devlink_port);
+}
+
+int dpaa2_eth_dl_traps_register(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_eth_trap_data *dpaa2_eth_trap_data;
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	int err;
+
+	dpaa2_eth_trap_data = kzalloc(sizeof(*dpaa2_eth_trap_data), GFP_KERNEL);
+	if (!dpaa2_eth_trap_data)
+		return -ENOMEM;
+	priv->trap_data = dpaa2_eth_trap_data;
+
+	dpaa2_eth_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(dpaa2_eth_traps_arr),
+						      sizeof(struct dpaa2_eth_trap_item),
+						      GFP_KERNEL);
+	if (!dpaa2_eth_trap_data->trap_items_arr) {
+		err = -ENOMEM;
+		goto trap_data_free;
+	}
+
+	err = devlink_trap_groups_register(priv->devlink, dpaa2_eth_trap_groups_arr,
+					   ARRAY_SIZE(dpaa2_eth_trap_groups_arr));
+	if (err) {
+		dev_err(dev, "devlink_trap_groups_register() = %d\n", err);
+		goto trap_items_arr_free;
+	}
+
+	err = devlink_traps_register(priv->devlink, dpaa2_eth_traps_arr,
+				     ARRAY_SIZE(dpaa2_eth_traps_arr), priv);
+	if (err) {
+		dev_err(dev, "devlink_traps_register() = %d\n", err);
+		goto trap_groups_unregiser;
+	}
+
+	return 0;
+
+trap_groups_unregiser:
+	devlink_trap_groups_unregister(priv->devlink, dpaa2_eth_trap_groups_arr,
+				       ARRAY_SIZE(dpaa2_eth_trap_groups_arr));
+trap_items_arr_free:
+	kfree(dpaa2_eth_trap_data->trap_items_arr);
+trap_data_free:
+	kfree(dpaa2_eth_trap_data);
+	priv->trap_data = NULL;
+
+	return err;
+}
+
+void dpaa2_eth_dl_traps_unregister(struct dpaa2_eth_priv *priv)
+{
+	devlink_traps_unregister(priv->devlink, dpaa2_eth_traps_arr,
+				 ARRAY_SIZE(dpaa2_eth_traps_arr));
+	devlink_trap_groups_unregister(priv->devlink, dpaa2_eth_trap_groups_arr,
+				       ARRAY_SIZE(dpaa2_eth_trap_groups_arr));
+	kfree(priv->trap_data->trap_items_arr);
+	kfree(priv->trap_data);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index cf5383bb8331..cf9400a9886d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -11,10 +11,11 @@
 #include <linux/msi.h>
 #include <linux/kthread.h>
 #include <linux/iommu.h>
-#include <linux/net_tstamp.h>
 #include <linux/fsl/mc.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
+#include <linux/fsl/ptp_qoriq.h>
+#include <linux/ptp_classify.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 
@@ -30,6 +31,9 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Freescale Semiconductor, Inc");
 MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver");
 
+struct ptp_qoriq *dpaa2_ptp;
+EXPORT_SYMBOL(dpaa2_ptp);
+
 static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
 				dma_addr_t iova_addr)
 {
@@ -40,9 +44,9 @@ static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
 	return phys_to_virt(phys_addr);
 }
 
-static void validate_rx_csum(struct dpaa2_eth_priv *priv,
-			     u32 fd_status,
-			     struct sk_buff *skb)
+static void dpaa2_eth_validate_rx_csum(struct dpaa2_eth_priv *priv,
+				       u32 fd_status,
+				       struct sk_buff *skb)
 {
 	skb_checksum_none_assert(skb);
 
@@ -62,9 +66,9 @@ static void validate_rx_csum(struct dpaa2_eth_priv *priv,
 /* Free a received FD.
  * Not to be used for Tx conf FDs or on any other paths.
  */
-static void free_rx_fd(struct dpaa2_eth_priv *priv,
-		       const struct dpaa2_fd *fd,
-		       void *vaddr)
+static void dpaa2_eth_free_rx_fd(struct dpaa2_eth_priv *priv,
+				 const struct dpaa2_fd *fd,
+				 void *vaddr)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	dma_addr_t addr = dpaa2_fd_get_addr(fd);
@@ -100,9 +104,9 @@ free_buf:
 }
 
 /* Build a linear skb based on a single-buffer frame descriptor */
-static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
-					const struct dpaa2_fd *fd,
-					void *fd_vaddr)
+static struct sk_buff *dpaa2_eth_build_linear_skb(struct dpaa2_eth_channel *ch,
+						  const struct dpaa2_fd *fd,
+						  void *fd_vaddr)
 {
 	struct sk_buff *skb = NULL;
 	u16 fd_offset = dpaa2_fd_get_offset(fd);
@@ -121,9 +125,9 @@ static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
 }
 
 /* Build a non linear (fragmented) skb based on a S/G table */
-static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
-				      struct dpaa2_eth_channel *ch,
-				      struct dpaa2_sg_entry *sgt)
+static struct sk_buff *dpaa2_eth_build_frag_skb(struct dpaa2_eth_priv *priv,
+						struct dpaa2_eth_channel *ch,
+						struct dpaa2_sg_entry *sgt)
 {
 	struct sk_buff *skb = NULL;
 	struct device *dev = priv->net_dev->dev.parent;
@@ -204,7 +208,8 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
 /* Free buffers acquired from the buffer pool or which were meant to
  * be released in the pool
  */
-static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
+static void dpaa2_eth_free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array,
+				int count)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	void *vaddr;
@@ -218,9 +223,9 @@ static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
 	}
 }
 
-static void xdp_release_buf(struct dpaa2_eth_priv *priv,
-			    struct dpaa2_eth_channel *ch,
-			    dma_addr_t addr)
+static void dpaa2_eth_xdp_release_buf(struct dpaa2_eth_priv *priv,
+				      struct dpaa2_eth_channel *ch,
+				      dma_addr_t addr)
 {
 	int retries = 0;
 	int err;
@@ -238,7 +243,7 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
 	}
 
 	if (err) {
-		free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
+		dpaa2_eth_free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
 		ch->buf_count -= ch->xdp.drop_cnt;
 	}
 
@@ -274,9 +279,9 @@ static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv,
 	return total_enqueued;
 }
 
-static void xdp_tx_flush(struct dpaa2_eth_priv *priv,
-			 struct dpaa2_eth_channel *ch,
-			 struct dpaa2_eth_fq *fq)
+static void dpaa2_eth_xdp_tx_flush(struct dpaa2_eth_priv *priv,
+				   struct dpaa2_eth_channel *ch,
+				   struct dpaa2_eth_fq *fq)
 {
 	struct rtnl_link_stats64 *percpu_stats;
 	struct dpaa2_fd *fds;
@@ -295,17 +300,17 @@ static void xdp_tx_flush(struct dpaa2_eth_priv *priv,
 		ch->stats.xdp_tx++;
 	}
 	for (i = enqueued; i < fq->xdp_tx_fds.num; i++) {
-		xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
+		dpaa2_eth_xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
 		percpu_stats->tx_errors++;
 		ch->stats.xdp_tx_err++;
 	}
 	fq->xdp_tx_fds.num = 0;
 }
 
-static void xdp_enqueue(struct dpaa2_eth_priv *priv,
-			struct dpaa2_eth_channel *ch,
-			struct dpaa2_fd *fd,
-			void *buf_start, u16 queue_id)
+static void dpaa2_eth_xdp_enqueue(struct dpaa2_eth_priv *priv,
+				  struct dpaa2_eth_channel *ch,
+				  struct dpaa2_fd *fd,
+				  void *buf_start, u16 queue_id)
 {
 	struct dpaa2_faead *faead;
 	struct dpaa2_fd *dest_fd;
@@ -333,13 +338,13 @@ static void xdp_enqueue(struct dpaa2_eth_priv *priv,
 	if (fq->xdp_tx_fds.num < DEV_MAP_BULK_SIZE)
 		return;
 
-	xdp_tx_flush(priv, ch, fq);
+	dpaa2_eth_xdp_tx_flush(priv, ch, fq);
 }
 
-static u32 run_xdp(struct dpaa2_eth_priv *priv,
-		   struct dpaa2_eth_channel *ch,
-		   struct dpaa2_eth_fq *rx_fq,
-		   struct dpaa2_fd *fd, void *vaddr)
+static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
+			     struct dpaa2_eth_channel *ch,
+			     struct dpaa2_eth_fq *rx_fq,
+			     struct dpaa2_fd *fd, void *vaddr)
 {
 	dma_addr_t addr = dpaa2_fd_get_addr(fd);
 	struct bpf_prog *xdp_prog;
@@ -372,7 +377,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid);
+		dpaa2_eth_xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid);
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(xdp_act);
@@ -381,7 +386,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
 		trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
 		fallthrough;
 	case XDP_DROP:
-		xdp_release_buf(priv, ch, addr);
+		dpaa2_eth_xdp_release_buf(priv, ch, addr);
 		ch->stats.xdp_drop++;
 		break;
 	case XDP_REDIRECT:
@@ -441,7 +446,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
 	if (fd_format == dpaa2_fd_single) {
-		xdp_act = run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
+		xdp_act = dpaa2_eth_run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
 		if (xdp_act != XDP_PASS) {
 			percpu_stats->rx_packets++;
 			percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
@@ -450,13 +455,13 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 
 		dma_unmap_page(dev, addr, priv->rx_buf_size,
 			       DMA_BIDIRECTIONAL);
-		skb = build_linear_skb(ch, fd, vaddr);
+		skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
 	} else if (fd_format == dpaa2_fd_sg) {
 		WARN_ON(priv->xdp_prog);
 
 		dma_unmap_page(dev, addr, priv->rx_buf_size,
 			       DMA_BIDIRECTIONAL);
-		skb = build_frag_skb(priv, ch, buf_data);
+		skb = dpaa2_eth_build_frag_skb(priv, ch, buf_data);
 		free_pages((unsigned long)vaddr, 0);
 		percpu_extras->rx_sg_frames++;
 		percpu_extras->rx_sg_bytes += dpaa2_fd_get_len(fd);
@@ -485,7 +490,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 	/* Check if we need to validate the L4 csum */
 	if (likely(dpaa2_fd_get_frc(fd) & DPAA2_FD_FRC_FASV)) {
 		status = le32_to_cpu(fas->status);
-		validate_rx_csum(priv, status, skb);
+		dpaa2_eth_validate_rx_csum(priv, status, skb);
 	}
 
 	skb->protocol = eth_type_trans(skb, priv->net_dev);
@@ -499,19 +504,71 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 	return;
 
 err_build_skb:
-	free_rx_fd(priv, fd, vaddr);
+	dpaa2_eth_free_rx_fd(priv, fd, vaddr);
 err_frame_format:
 	percpu_stats->rx_dropped++;
 }
 
+/* Processing of Rx frames received on the error FQ
+ * We check and print the error bits and then free the frame
+ */
+static void dpaa2_eth_rx_err(struct dpaa2_eth_priv *priv,
+			     struct dpaa2_eth_channel *ch,
+			     const struct dpaa2_fd *fd,
+			     struct dpaa2_eth_fq *fq __always_unused)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	u8 fd_format = dpaa2_fd_get_format(fd);
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_trap_item *trap_item;
+	struct dpaa2_fapr *fapr;
+	struct sk_buff *skb;
+	void *buf_data;
+	void *vaddr;
+
+	vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
+	dma_sync_single_for_cpu(dev, addr, priv->rx_buf_size,
+				DMA_BIDIRECTIONAL);
+
+	buf_data = vaddr + dpaa2_fd_get_offset(fd);
+
+	if (fd_format == dpaa2_fd_single) {
+		dma_unmap_page(dev, addr, priv->rx_buf_size,
+			       DMA_BIDIRECTIONAL);
+		skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+	} else if (fd_format == dpaa2_fd_sg) {
+		dma_unmap_page(dev, addr, priv->rx_buf_size,
+			       DMA_BIDIRECTIONAL);
+		skb = dpaa2_eth_build_frag_skb(priv, ch, buf_data);
+		free_pages((unsigned long)vaddr, 0);
+	} else {
+		/* We don't support any other format */
+		dpaa2_eth_free_rx_fd(priv, fd, vaddr);
+		goto err_frame_format;
+	}
+
+	fapr = dpaa2_get_fapr(vaddr, false);
+	trap_item = dpaa2_eth_dl_get_trap(priv, fapr);
+	if (trap_item)
+		devlink_trap_report(priv->devlink, skb, trap_item->trap_ctx,
+				    &priv->devlink_port, NULL);
+	consume_skb(skb);
+
+err_frame_format:
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	percpu_stats->rx_errors++;
+	ch->buf_count--;
+}
+
 /* Consume all frames pull-dequeued into the store. This is the simplest way to
  * make sure we don't accidentally issue another volatile dequeue which would
  * overwrite (leak) frames already in the store.
  *
  * Observance of NAPI budget is not our concern, leaving that to the caller.
  */
-static int consume_frames(struct dpaa2_eth_channel *ch,
-			  struct dpaa2_eth_fq **src)
+static int dpaa2_eth_consume_frames(struct dpaa2_eth_channel *ch,
+				    struct dpaa2_eth_fq **src)
 {
 	struct dpaa2_eth_priv *priv = ch->priv;
 	struct dpaa2_eth_fq *fq = NULL;
@@ -559,11 +616,57 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
 	return cleaned;
 }
 
+static int dpaa2_eth_ptp_parse(struct sk_buff *skb,
+			       u8 *msgtype, u8 *twostep, u8 *udp,
+			       u16 *correction_offset,
+			       u16 *origintimestamp_offset)
+{
+	unsigned int ptp_class;
+	struct ptp_header *hdr;
+	unsigned int type;
+	u8 *base;
+
+	ptp_class = ptp_classify_raw(skb);
+	if (ptp_class == PTP_CLASS_NONE)
+		return -EINVAL;
+
+	hdr = ptp_parse_header(skb, ptp_class);
+	if (!hdr)
+		return -EINVAL;
+
+	*msgtype = ptp_get_msgtype(hdr, ptp_class);
+	*twostep = hdr->flag_field[0] & 0x2;
+
+	type = ptp_class & PTP_CLASS_PMASK;
+	if (type == PTP_CLASS_IPV4 ||
+	    type == PTP_CLASS_IPV6)
+		*udp = 1;
+	else
+		*udp = 0;
+
+	base = skb_mac_header(skb);
+	*correction_offset = (u8 *)&hdr->correction - base;
+	*origintimestamp_offset = (u8 *)hdr + sizeof(struct ptp_header) - base;
+
+	return 0;
+}
+
 /* Configure the egress frame annotation for timestamp update */
-static void enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
+static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv,
+				       struct dpaa2_fd *fd,
+				       void *buf_start,
+				       struct sk_buff *skb)
 {
+	struct ptp_tstamp origin_timestamp;
+	struct dpni_single_step_cfg cfg;
+	u8 msgtype, twostep, udp;
 	struct dpaa2_faead *faead;
+	struct dpaa2_fas *fas;
+	struct timespec64 ts;
+	u16 offset1, offset2;
 	u32 ctrl, frc;
+	__le64 *ns;
+	u8 *data;
 
 	/* Mark the egress frame annotation area as valid */
 	frc = dpaa2_fd_get_frc(fd);
@@ -579,12 +682,52 @@ static void enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
 	ctrl = DPAA2_FAEAD_A2V | DPAA2_FAEAD_UPDV | DPAA2_FAEAD_UPD;
 	faead = dpaa2_get_faead(buf_start, true);
 	faead->ctrl = cpu_to_le32(ctrl);
+
+	if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
+		if (dpaa2_eth_ptp_parse(skb, &msgtype, &twostep, &udp,
+					&offset1, &offset2) ||
+		    msgtype != 0 || twostep) {
+			WARN_ONCE(1, "Bad packet for one-step timestamping\n");
+			return;
+		}
+
+		/* Mark the frame annotation status as valid */
+		frc = dpaa2_fd_get_frc(fd);
+		dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FASV);
+
+		/* Mark the PTP flag for one step timestamping */
+		fas = dpaa2_get_fas(buf_start, true);
+		fas->status = cpu_to_le32(DPAA2_FAS_PTP);
+
+		dpaa2_ptp->caps.gettime64(&dpaa2_ptp->caps, &ts);
+		ns = dpaa2_get_ts(buf_start, true);
+		*ns = cpu_to_le64(timespec64_to_ns(&ts) /
+				  DPAA2_PTP_CLK_PERIOD_NS);
+
+		/* Update current time to PTP message originTimestamp field */
+		ns_to_ptp_tstamp(&origin_timestamp, le64_to_cpup(ns));
+		data = skb_mac_header(skb);
+		*(__be16 *)(data + offset2) = htons(origin_timestamp.sec_msb);
+		*(__be32 *)(data + offset2 + 2) =
+			htonl(origin_timestamp.sec_lsb);
+		*(__be32 *)(data + offset2 + 6) = htonl(origin_timestamp.nsec);
+
+		cfg.en = 1;
+		cfg.ch_update = udp;
+		cfg.offset = offset1;
+		cfg.peer_delay = 0;
+
+		if (dpni_set_single_step_cfg(priv->mc_io, 0, priv->mc_token,
+					     &cfg))
+			WARN_ONCE(1, "Failed to set single step register");
+	}
 }
 
 /* Create a frame descriptor based on a fragmented skb */
-static int build_sg_fd(struct dpaa2_eth_priv *priv,
-		       struct sk_buff *skb,
-		       struct dpaa2_fd *fd)
+static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
+				 struct sk_buff *skb,
+				 struct dpaa2_fd *fd,
+				 void **swa_addr)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	void *sgt_buf = NULL;
@@ -606,7 +749,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
 	if (unlikely(PAGE_SIZE / sizeof(struct scatterlist) < nr_frags + 1))
 		return -EINVAL;
 
-	scl = kcalloc(nr_frags + 1, sizeof(struct scatterlist), GFP_ATOMIC);
+	scl = kmalloc_array(nr_frags + 1, sizeof(struct scatterlist), GFP_ATOMIC);
 	if (unlikely(!scl))
 		return -ENOMEM;
 
@@ -653,6 +796,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
 	 * skb backpointer in the software annotation area. We'll need
 	 * all of them on Tx Conf.
 	 */
+	*swa_addr = (void *)sgt_buf;
 	swa = (struct dpaa2_eth_swa *)sgt_buf;
 	swa->type = DPAA2_ETH_SWA_SG;
 	swa->sg.skb = skb;
@@ -672,9 +816,6 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
 	dpaa2_fd_set_len(fd, skb->len);
 	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
 
-	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
-		enable_tx_tstamp(fd, sgt_buf);
-
 	return 0;
 
 dma_map_single_failed:
@@ -692,9 +833,10 @@ dma_map_sg_failed:
  * enough for the HW requirements, thus instead of realloc-ing the skb we
  * create a SG frame descriptor with only one entry.
  */
-static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
-				  struct sk_buff *skb,
-				  struct dpaa2_fd *fd)
+static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
+					    struct sk_buff *skb,
+					    struct dpaa2_fd *fd,
+					    void **swa_addr)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpaa2_eth_sgt_cache *sgt_cache;
@@ -732,6 +874,7 @@ static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
 	dpaa2_sg_set_final(sgt, true);
 
 	/* Store the skb backpointer in the SGT buffer */
+	*swa_addr = (void *)sgt_buf;
 	swa = (struct dpaa2_eth_swa *)sgt_buf;
 	swa->type = DPAA2_ETH_SWA_SINGLE;
 	swa->single.skb = skb;
@@ -750,9 +893,6 @@ static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
 	dpaa2_fd_set_len(fd, skb->len);
 	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
 
-	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
-		enable_tx_tstamp(fd, sgt_buf);
-
 	return 0;
 
 sgt_map_failed:
@@ -767,16 +907,17 @@ data_map_failed:
 }
 
 /* Create a frame descriptor based on a linear skb */
-static int build_single_fd(struct dpaa2_eth_priv *priv,
-			   struct sk_buff *skb,
-			   struct dpaa2_fd *fd)
+static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
+				     struct sk_buff *skb,
+				     struct dpaa2_fd *fd,
+				     void **swa_addr)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	u8 *buffer_start, *aligned_start;
 	struct dpaa2_eth_swa *swa;
 	dma_addr_t addr;
 
-	buffer_start = skb->data - dpaa2_eth_needed_headroom(priv, skb);
+	buffer_start = skb->data - dpaa2_eth_needed_headroom(skb);
 
 	/* If there's enough room to align the FD address, do it.
 	 * It will help hardware optimize accesses.
@@ -790,6 +931,7 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
 	 * (in the private data area) such that we can release it
 	 * on Tx confirm
 	 */
+	*swa_addr = (void *)buffer_start;
 	swa = (struct dpaa2_eth_swa *)buffer_start;
 	swa->type = DPAA2_ETH_SWA_SINGLE;
 	swa->single.skb = skb;
@@ -806,9 +948,6 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
 	dpaa2_fd_set_format(fd, dpaa2_fd_single);
 	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
 
-	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
-		enable_tx_tstamp(fd, buffer_start);
-
 	return 0;
 }
 
@@ -819,9 +958,9 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
  * This can be called either from dpaa2_eth_tx_conf() or on the error path of
  * dpaa2_eth_tx().
  */
-static void free_tx_fd(const struct dpaa2_eth_priv *priv,
-		       struct dpaa2_eth_fq *fq,
-		       const struct dpaa2_fd *fd, bool in_napi)
+static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
+				 struct dpaa2_eth_fq *fq,
+				 const struct dpaa2_fd *fd, bool in_napi)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	dma_addr_t fd_addr, sg_addr;
@@ -892,7 +1031,7 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 	}
 
 	/* Get the timestamp value */
-	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+	if (skb->cb[0] == TX_TSTAMP) {
 		struct skb_shared_hwtstamps shhwtstamps;
 		__le64 *ts = dpaa2_get_ts(buffer_start, true);
 		u64 ns;
@@ -902,6 +1041,8 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 		ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
 		shhwtstamps.hwtstamp = ns_to_ktime(ns);
 		skb_tstamp_tx(skb, &shhwtstamps);
+	} else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
+		mutex_unlock(&priv->onestep_tstamp_lock);
 	}
 
 	/* Free SGT buffer allocated on tx */
@@ -921,7 +1062,8 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 	napi_consume_skb(skb, in_napi);
 }
 
-static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
+static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
+				  struct net_device *net_dev)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct dpaa2_fd fd;
@@ -934,11 +1076,12 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	u32 fd_len;
 	u8 prio = 0;
 	int err, i;
+	void *swa;
 
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
-	needed_headroom = dpaa2_eth_needed_headroom(priv, skb);
+	needed_headroom = dpaa2_eth_needed_headroom(skb);
 
 	/* We'll be holding a back-reference to the skb until Tx Confirmation;
 	 * we don't want that overwritten by a concurrent Tx with a cloned skb.
@@ -954,17 +1097,17 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	memset(&fd, 0, sizeof(fd));
 
 	if (skb_is_nonlinear(skb)) {
-		err = build_sg_fd(priv, skb, &fd);
+		err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa);
 		percpu_extras->tx_sg_frames++;
 		percpu_extras->tx_sg_bytes += skb->len;
 	} else if (skb_headroom(skb) < needed_headroom) {
-		err = build_sg_fd_single_buf(priv, skb, &fd);
+		err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa);
 		percpu_extras->tx_sg_frames++;
 		percpu_extras->tx_sg_bytes += skb->len;
 		percpu_extras->tx_converted_sg_frames++;
 		percpu_extras->tx_converted_sg_bytes += skb->len;
 	} else {
-		err = build_single_fd(priv, skb, &fd);
+		err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa);
 	}
 
 	if (unlikely(err)) {
@@ -972,6 +1115,9 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 		goto err_build_fd;
 	}
 
+	if (skb->cb[0])
+		dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb);
+
 	/* Tracing point */
 	trace_dpaa2_tx_fd(net_dev, &fd);
 
@@ -1010,7 +1156,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	if (unlikely(err < 0)) {
 		percpu_stats->tx_errors++;
 		/* Clean up everything, including freeing the skb */
-		free_tx_fd(priv, fq, &fd, false);
+		dpaa2_eth_free_tx_fd(priv, fq, &fd, false);
 		netdev_tx_completed_queue(nq, 1, fd_len);
 	} else {
 		percpu_stats->tx_packets++;
@@ -1025,6 +1171,63 @@ err_build_fd:
 	return NETDEV_TX_OK;
 }
 
+static void dpaa2_eth_tx_onestep_tstamp(struct work_struct *work)
+{
+	struct dpaa2_eth_priv *priv = container_of(work, struct dpaa2_eth_priv,
+						   tx_onestep_tstamp);
+	struct sk_buff *skb;
+
+	while (true) {
+		skb = skb_dequeue(&priv->tx_skbs);
+		if (!skb)
+			return;
+
+		/* Lock just before TX one-step timestamping packet,
+		 * and release the lock in dpaa2_eth_free_tx_fd when
+		 * confirm the packet has been sent on hardware, or
+		 * when clean up during transmit failure.
+		 */
+		mutex_lock(&priv->onestep_tstamp_lock);
+		__dpaa2_eth_tx(skb, priv->net_dev);
+	}
+}
+
+static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	u8 msgtype, twostep, udp;
+	u16 offset1, offset2;
+
+	/* Utilize skb->cb[0] for timestamping request per skb */
+	skb->cb[0] = 0;
+
+	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && dpaa2_ptp) {
+		if (priv->tx_tstamp_type == HWTSTAMP_TX_ON)
+			skb->cb[0] = TX_TSTAMP;
+		else if (priv->tx_tstamp_type == HWTSTAMP_TX_ONESTEP_SYNC)
+			skb->cb[0] = TX_TSTAMP_ONESTEP_SYNC;
+	}
+
+	/* TX for one-step timestamping PTP Sync packet */
+	if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
+		if (!dpaa2_eth_ptp_parse(skb, &msgtype, &twostep, &udp,
+					 &offset1, &offset2))
+			if (msgtype == 0 && twostep == 0) {
+				skb_queue_tail(&priv->tx_skbs, skb);
+				queue_work(priv->dpaa2_ptp_wq,
+					   &priv->tx_onestep_tstamp);
+				return NETDEV_TX_OK;
+			}
+		/* Use two-step timestamping if not one-step timestamping
+		 * PTP Sync packet
+		 */
+		skb->cb[0] = TX_TSTAMP;
+	}
+
+	/* TX for other packets */
+	return __dpaa2_eth_tx(skb, net_dev);
+}
+
 /* Tx confirmation frame processing routine */
 static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
 			      struct dpaa2_eth_channel *ch __always_unused,
@@ -1045,7 +1248,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
 
 	/* Check frame errors in the FD field */
 	fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
-	free_tx_fd(priv, fq, fd, true);
+	dpaa2_eth_free_tx_fd(priv, fq, fd, true);
 
 	if (likely(!fd_errors))
 		return;
@@ -1059,7 +1262,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
 	percpu_stats->tx_errors++;
 }
 
-static int set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
+static int dpaa2_eth_set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
 {
 	int err;
 
@@ -1082,7 +1285,7 @@ static int set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
 	return 0;
 }
 
-static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
+static int dpaa2_eth_set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
 {
 	int err;
 
@@ -1106,8 +1309,8 @@ static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
 /* Perform a single release command to add buffers
  * to the specified buffer pool
  */
-static int add_bufs(struct dpaa2_eth_priv *priv,
-		    struct dpaa2_eth_channel *ch, u16 bpid)
+static int dpaa2_eth_add_bufs(struct dpaa2_eth_priv *priv,
+			      struct dpaa2_eth_channel *ch, u16 bpid)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
@@ -1155,7 +1358,7 @@ release_bufs:
 	 * not much else we can do about it
 	 */
 	if (err) {
-		free_bufs(priv, buf_array, i);
+		dpaa2_eth_free_bufs(priv, buf_array, i);
 		return 0;
 	}
 
@@ -1173,7 +1376,7 @@ err_alloc:
 	return 0;
 }
 
-static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
+static int dpaa2_eth_seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 {
 	int i, j;
 	int new_count;
@@ -1181,7 +1384,7 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 	for (j = 0; j < priv->num_channels; j++) {
 		for (i = 0; i < DPAA2_ETH_NUM_BUFS;
 		     i += DPAA2_ETH_BUFS_PER_CMD) {
-			new_count = add_bufs(priv, priv->channel[j], bpid);
+			new_count = dpaa2_eth_add_bufs(priv, priv->channel[j], bpid);
 			priv->channel[j]->buf_count += new_count;
 
 			if (new_count < DPAA2_ETH_BUFS_PER_CMD) {
@@ -1193,11 +1396,11 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 	return 0;
 }
 
-/**
+/*
  * Drain the specified number of buffers from the DPNI's private buffer pool.
  * @count must not exceeed DPAA2_ETH_BUFS_PER_CMD
  */
-static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
+static void dpaa2_eth_drain_bufs(struct dpaa2_eth_priv *priv, int count)
 {
 	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
 	int retries = 0;
@@ -1213,17 +1416,17 @@ static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
 			netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
 			return;
 		}
-		free_bufs(priv, buf_array, ret);
+		dpaa2_eth_free_bufs(priv, buf_array, ret);
 		retries = 0;
 	} while (ret);
 }
 
-static void drain_pool(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_drain_pool(struct dpaa2_eth_priv *priv)
 {
 	int i;
 
-	drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD);
-	drain_bufs(priv, 1);
+	dpaa2_eth_drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD);
+	dpaa2_eth_drain_bufs(priv, 1);
 
 	for (i = 0; i < priv->num_channels; i++)
 		priv->channel[i]->buf_count = 0;
@@ -1232,9 +1435,9 @@ static void drain_pool(struct dpaa2_eth_priv *priv)
 /* Function is called from softirq context only, so we don't need to guard
  * the access to percpu count
  */
-static int refill_pool(struct dpaa2_eth_priv *priv,
-		       struct dpaa2_eth_channel *ch,
-		       u16 bpid)
+static int dpaa2_eth_refill_pool(struct dpaa2_eth_priv *priv,
+				 struct dpaa2_eth_channel *ch,
+				 u16 bpid)
 {
 	int new_count;
 
@@ -1242,7 +1445,7 @@ static int refill_pool(struct dpaa2_eth_priv *priv,
 		return 0;
 
 	do {
-		new_count = add_bufs(priv, ch, bpid);
+		new_count = dpaa2_eth_add_bufs(priv, ch, bpid);
 		if (unlikely(!new_count)) {
 			/* Out of memory; abort for now, we'll try later on */
 			break;
@@ -1272,7 +1475,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
 	}
 }
 
-static int pull_channel(struct dpaa2_eth_channel *ch)
+static int dpaa2_eth_pull_channel(struct dpaa2_eth_channel *ch)
 {
 	int err;
 	int dequeues = -1;
@@ -1319,14 +1522,14 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 	ch->rx_list = &rx_list;
 
 	do {
-		err = pull_channel(ch);
+		err = dpaa2_eth_pull_channel(ch);
 		if (unlikely(err))
 			break;
 
 		/* Refill pool if appropriate */
-		refill_pool(priv, ch, priv->bpid);
+		dpaa2_eth_refill_pool(priv, ch, priv->bpid);
 
-		store_cleaned = consume_frames(ch, &fq);
+		store_cleaned = dpaa2_eth_consume_frames(ch, &fq);
 		if (store_cleaned <= 0)
 			break;
 		if (fq->type == DPAA2_RX_FQ) {
@@ -1375,12 +1578,12 @@ out:
 	if (ch->xdp.res & XDP_REDIRECT)
 		xdp_do_flush_map();
 	else if (rx_cleaned && ch->xdp.res & XDP_TX)
-		xdp_tx_flush(priv, ch, &priv->fq[flowid]);
+		dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]);
 
 	return work_done;
 }
 
-static void enable_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_enable_ch_napi(struct dpaa2_eth_priv *priv)
 {
 	struct dpaa2_eth_channel *ch;
 	int i;
@@ -1391,7 +1594,7 @@ static void enable_ch_napi(struct dpaa2_eth_priv *priv)
 	}
 }
 
-static void disable_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_disable_ch_napi(struct dpaa2_eth_priv *priv)
 {
 	struct dpaa2_eth_channel *ch;
 	int i;
@@ -1465,7 +1668,7 @@ set_cgtd:
 	priv->rx_cgtd_enabled = td.enable;
 }
 
-static int link_state_update(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_link_state_update(struct dpaa2_eth_priv *priv)
 {
 	struct dpni_link_state state = {0};
 	bool tx_pause;
@@ -1517,7 +1720,7 @@ static int dpaa2_eth_open(struct net_device *net_dev)
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	int err;
 
-	err = seed_pool(priv, priv->bpid);
+	err = dpaa2_eth_seed_pool(priv, priv->bpid);
 	if (err) {
 		/* Not much to do; the buffer pool, though not filled up,
 		 * may still contain some buffers which would enable us
@@ -1541,7 +1744,7 @@ static int dpaa2_eth_open(struct net_device *net_dev)
 		 */
 		netif_carrier_off(net_dev);
 	}
-	enable_ch_napi(priv);
+	dpaa2_eth_enable_ch_napi(priv);
 
 	err = dpni_enable(priv->mc_io, 0, priv->mc_token);
 	if (err < 0) {
@@ -1549,30 +1752,19 @@ static int dpaa2_eth_open(struct net_device *net_dev)
 		goto enable_err;
 	}
 
-	if (!priv->mac) {
-		/* If the DPMAC object has already processed the link up
-		 * interrupt, we have to learn the link state ourselves.
-		 */
-		err = link_state_update(priv);
-		if (err < 0) {
-			netdev_err(net_dev, "Can't update link state\n");
-			goto link_state_err;
-		}
-	} else {
+	if (priv->mac)
 		phylink_start(priv->mac->phylink);
-	}
 
 	return 0;
 
-link_state_err:
 enable_err:
-	disable_ch_napi(priv);
-	drain_pool(priv);
+	dpaa2_eth_disable_ch_napi(priv);
+	dpaa2_eth_drain_pool(priv);
 	return err;
 }
 
 /* Total number of in-flight frames on ingress queues */
-static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
+static u32 dpaa2_eth_ingress_fq_count(struct dpaa2_eth_priv *priv)
 {
 	struct dpaa2_eth_fq *fq;
 	u32 fcnt = 0, bcnt = 0, total = 0;
@@ -1591,13 +1783,13 @@ static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
 	return total;
 }
 
-static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
 {
 	int retries = 10;
 	u32 pending;
 
 	do {
-		pending = ingress_fq_count(priv);
+		pending = dpaa2_eth_ingress_fq_count(priv);
 		if (pending)
 			msleep(100);
 	} while (pending && --retries);
@@ -1605,7 +1797,7 @@ static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
 
 #define DPNI_TX_PENDING_VER_MAJOR	7
 #define DPNI_TX_PENDING_VER_MINOR	13
-static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
 {
 	union dpni_statistics stats;
 	int retries = 10;
@@ -1651,7 +1843,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 	 * on WRIOP. After it finishes, wait until all remaining frames on Rx
 	 * and Tx conf queues are consumed on NAPI poll.
 	 */
-	wait_for_egress_fq_empty(priv);
+	dpaa2_eth_wait_for_egress_fq_empty(priv);
 
 	do {
 		dpni_disable(priv->mc_io, 0, priv->mc_token);
@@ -1667,11 +1859,11 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 		 */
 	}
 
-	wait_for_ingress_fq_empty(priv);
-	disable_ch_napi(priv);
+	dpaa2_eth_wait_for_ingress_fq_empty(priv);
+	dpaa2_eth_disable_ch_napi(priv);
 
 	/* Empty the buffer pool */
-	drain_pool(priv);
+	dpaa2_eth_drain_pool(priv);
 
 	/* Empty the Scatter-Gather Buffer cache */
 	dpaa2_eth_sgt_cache_drain(priv);
@@ -1725,8 +1917,8 @@ static void dpaa2_eth_get_stats(struct net_device *net_dev,
 /* Copy mac unicast addresses from @net_dev to @priv.
  * Its sole purpose is to make dpaa2_eth_set_rx_mode() more readable.
  */
-static void add_uc_hw_addr(const struct net_device *net_dev,
-			   struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_add_uc_hw_addr(const struct net_device *net_dev,
+				     struct dpaa2_eth_priv *priv)
 {
 	struct netdev_hw_addr *ha;
 	int err;
@@ -1744,8 +1936,8 @@ static void add_uc_hw_addr(const struct net_device *net_dev,
 /* Copy mac multicast addresses from @net_dev to @priv
  * Its sole purpose is to make dpaa2_eth_set_rx_mode() more readable.
  */
-static void add_mc_hw_addr(const struct net_device *net_dev,
-			   struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_add_mc_hw_addr(const struct net_device *net_dev,
+				     struct dpaa2_eth_priv *priv)
 {
 	struct netdev_hw_addr *ha;
 	int err;
@@ -1810,7 +2002,7 @@ static void dpaa2_eth_set_rx_mode(struct net_device *net_dev)
 		err = dpni_clear_mac_filters(mc_io, 0, mc_token, 1, 0);
 		if (err)
 			netdev_warn(net_dev, "Can't clear uc filters\n");
-		add_uc_hw_addr(net_dev, priv);
+		dpaa2_eth_add_uc_hw_addr(net_dev, priv);
 
 		/* Finally, clear uc promisc and set mc promisc as requested. */
 		err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 0);
@@ -1833,8 +2025,8 @@ static void dpaa2_eth_set_rx_mode(struct net_device *net_dev)
 	err = dpni_clear_mac_filters(mc_io, 0, mc_token, 1, 1);
 	if (err)
 		netdev_warn(net_dev, "Can't clear mac filters\n");
-	add_mc_hw_addr(net_dev, priv);
-	add_uc_hw_addr(net_dev, priv);
+	dpaa2_eth_add_mc_hw_addr(net_dev, priv);
+	dpaa2_eth_add_uc_hw_addr(net_dev, priv);
 
 	/* Now we can clear both ucast and mcast promisc, without risking
 	 * to drop legitimate frames anymore.
@@ -1868,14 +2060,14 @@ static int dpaa2_eth_set_features(struct net_device *net_dev,
 
 	if (changed & NETIF_F_RXCSUM) {
 		enable = !!(features & NETIF_F_RXCSUM);
-		err = set_rx_csum(priv, enable);
+		err = dpaa2_eth_set_rx_csum(priv, enable);
 		if (err)
 			return err;
 	}
 
 	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
 		enable = !!(features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM));
-		err = set_tx_csum(priv, enable);
+		err = dpaa2_eth_set_tx_csum(priv, enable);
 		if (err)
 			return err;
 	}
@@ -1888,15 +2080,17 @@ static int dpaa2_eth_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	struct dpaa2_eth_priv *priv = netdev_priv(dev);
 	struct hwtstamp_config config;
 
+	if (!dpaa2_ptp)
+		return -EINVAL;
+
 	if (copy_from_user(&config, rq->ifr_data, sizeof(config)))
 		return -EFAULT;
 
 	switch (config.tx_type) {
 	case HWTSTAMP_TX_OFF:
-		priv->tx_tstamp = false;
-		break;
 	case HWTSTAMP_TX_ON:
-		priv->tx_tstamp = true;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		priv->tx_tstamp_type = config.tx_type;
 		break;
 	default:
 		return -ERANGE;
@@ -1944,7 +2138,7 @@ static bool xdp_mtu_valid(struct dpaa2_eth_priv *priv, int mtu)
 	return true;
 }
 
-static int set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
+static int dpaa2_eth_set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
 {
 	int mfl, err;
 
@@ -1978,7 +2172,7 @@ static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu)
 	if (!xdp_mtu_valid(priv, new_mtu))
 		return -EINVAL;
 
-	err = set_rx_mfl(priv, new_mtu, true);
+	err = dpaa2_eth_set_rx_mfl(priv, new_mtu, true);
 	if (err)
 		return err;
 
@@ -1987,7 +2181,7 @@ out:
 	return 0;
 }
 
-static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
+static int dpaa2_eth_update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
 {
 	struct dpni_buffer_layout buf_layout = {0};
 	int err;
@@ -2013,7 +2207,7 @@ static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
 	return 0;
 }
 
-static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
+static int dpaa2_eth_setup_xdp(struct net_device *dev, struct bpf_prog *prog)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(dev);
 	struct dpaa2_eth_channel *ch;
@@ -2039,10 +2233,10 @@ static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
 	 * so we are sure no old format buffers will be used from now on.
 	 */
 	if (need_update) {
-		err = set_rx_mfl(priv, dev->mtu, !!prog);
+		err = dpaa2_eth_set_rx_mfl(priv, dev->mtu, !!prog);
 		if (err)
 			goto out_err;
-		err = update_rx_buffer_headroom(priv, !!prog);
+		err = dpaa2_eth_update_rx_buffer_headroom(priv, !!prog);
 		if (err)
 			goto out_err;
 	}
@@ -2079,7 +2273,7 @@ static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
-		return setup_xdp(dev, xdp->prog);
+		return dpaa2_eth_setup_xdp(dev, xdp->prog);
 	default:
 		return -EINVAL;
 	}
@@ -2091,7 +2285,6 @@ static int dpaa2_eth_xdp_create_fd(struct net_device *net_dev,
 				   struct xdp_frame *xdpf,
 				   struct dpaa2_fd *fd)
 {
-	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct device *dev = net_dev->dev.parent;
 	unsigned int needed_headroom;
 	struct dpaa2_eth_swa *swa;
@@ -2101,7 +2294,7 @@ static int dpaa2_eth_xdp_create_fd(struct net_device *net_dev,
 	/* We require a minimum headroom to be able to transmit the frame.
 	 * Otherwise return an error and let the original net_device handle it
 	 */
-	needed_headroom = dpaa2_eth_needed_headroom(priv, NULL);
+	needed_headroom = dpaa2_eth_needed_headroom(NULL);
 	if (xdpf->headroom < needed_headroom)
 		return -EINVAL;
 
@@ -2316,7 +2509,7 @@ static const struct net_device_ops dpaa2_eth_ops = {
 	.ndo_setup_tc = dpaa2_eth_setup_tc,
 };
 
-static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
+static void dpaa2_eth_cdan_cb(struct dpaa2_io_notification_ctx *ctx)
 {
 	struct dpaa2_eth_channel *ch;
 
@@ -2329,7 +2522,7 @@ static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
 }
 
 /* Allocate and configure a DPCON object */
-static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
+static struct fsl_mc_device *dpaa2_eth_setup_dpcon(struct dpaa2_eth_priv *priv)
 {
 	struct fsl_mc_device *dpcon;
 	struct device *dev = priv->net_dev->dev.parent;
@@ -2373,16 +2566,15 @@ free:
 	return ERR_PTR(err);
 }
 
-static void free_dpcon(struct dpaa2_eth_priv *priv,
-		       struct fsl_mc_device *dpcon)
+static void dpaa2_eth_free_dpcon(struct dpaa2_eth_priv *priv,
+				 struct fsl_mc_device *dpcon)
 {
 	dpcon_disable(priv->mc_io, 0, dpcon->mc_handle);
 	dpcon_close(priv->mc_io, 0, dpcon->mc_handle);
 	fsl_mc_object_free(dpcon);
 }
 
-static struct dpaa2_eth_channel *
-alloc_channel(struct dpaa2_eth_priv *priv)
+static struct dpaa2_eth_channel *dpaa2_eth_alloc_channel(struct dpaa2_eth_priv *priv)
 {
 	struct dpaa2_eth_channel *channel;
 	struct dpcon_attr attr;
@@ -2393,7 +2585,7 @@ alloc_channel(struct dpaa2_eth_priv *priv)
 	if (!channel)
 		return NULL;
 
-	channel->dpcon = setup_dpcon(priv);
+	channel->dpcon = dpaa2_eth_setup_dpcon(priv);
 	if (IS_ERR(channel->dpcon)) {
 		err = PTR_ERR(channel->dpcon);
 		goto err_setup;
@@ -2413,23 +2605,23 @@ alloc_channel(struct dpaa2_eth_priv *priv)
 	return channel;
 
 err_get_attr:
-	free_dpcon(priv, channel->dpcon);
+	dpaa2_eth_free_dpcon(priv, channel->dpcon);
 err_setup:
 	kfree(channel);
 	return ERR_PTR(err);
 }
 
-static void free_channel(struct dpaa2_eth_priv *priv,
-			 struct dpaa2_eth_channel *channel)
+static void dpaa2_eth_free_channel(struct dpaa2_eth_priv *priv,
+				   struct dpaa2_eth_channel *channel)
 {
-	free_dpcon(priv, channel->dpcon);
+	dpaa2_eth_free_dpcon(priv, channel->dpcon);
 	kfree(channel);
 }
 
 /* DPIO setup: allocate and configure QBMan channels, setup core affinity
  * and register data availability notifications
  */
-static int setup_dpio(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_setup_dpio(struct dpaa2_eth_priv *priv)
 {
 	struct dpaa2_io_notification_ctx *nctx;
 	struct dpaa2_eth_channel *channel;
@@ -2449,7 +2641,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
 	cpumask_clear(&priv->dpio_cpumask);
 	for_each_online_cpu(i) {
 		/* Try to allocate a channel */
-		channel = alloc_channel(priv);
+		channel = dpaa2_eth_alloc_channel(priv);
 		if (IS_ERR_OR_NULL(channel)) {
 			err = PTR_ERR_OR_ZERO(channel);
 			if (err != -EPROBE_DEFER)
@@ -2462,7 +2654,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
 
 		nctx = &channel->nctx;
 		nctx->is_cdan = 1;
-		nctx->cb = cdan_cb;
+		nctx->cb = dpaa2_eth_cdan_cb;
 		nctx->id = channel->ch_id;
 		nctx->desired_cpu = i;
 
@@ -2510,14 +2702,14 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
 err_set_cdan:
 	dpaa2_io_service_deregister(channel->dpio, nctx, dev);
 err_service_reg:
-	free_channel(priv, channel);
+	dpaa2_eth_free_channel(priv, channel);
 err_alloc_ch:
 	if (err == -EPROBE_DEFER) {
 		for (i = 0; i < priv->num_channels; i++) {
 			channel = priv->channel[i];
 			nctx = &channel->nctx;
 			dpaa2_io_service_deregister(channel->dpio, nctx, dev);
-			free_channel(priv, channel);
+			dpaa2_eth_free_channel(priv, channel);
 		}
 		priv->num_channels = 0;
 		return err;
@@ -2534,7 +2726,7 @@ err_alloc_ch:
 	return 0;
 }
 
-static void free_dpio(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_dpio(struct dpaa2_eth_priv *priv)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpaa2_eth_channel *ch;
@@ -2544,12 +2736,12 @@ static void free_dpio(struct dpaa2_eth_priv *priv)
 	for (i = 0; i < priv->num_channels; i++) {
 		ch = priv->channel[i];
 		dpaa2_io_service_deregister(ch->dpio, &ch->nctx, dev);
-		free_channel(priv, ch);
+		dpaa2_eth_free_channel(priv, ch);
 	}
 }
 
-static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
-						    int cpu)
+static struct dpaa2_eth_channel *dpaa2_eth_get_affine_channel(struct dpaa2_eth_priv *priv,
+							      int cpu)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	int i;
@@ -2566,7 +2758,7 @@ static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
 	return priv->channel[0];
 }
 
-static void set_fq_affinity(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_set_fq_affinity(struct dpaa2_eth_priv *priv)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpaa2_eth_fq *fq;
@@ -2583,6 +2775,7 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 		fq = &priv->fq[i];
 		switch (fq->type) {
 		case DPAA2_RX_FQ:
+		case DPAA2_RX_ERR_FQ:
 			fq->target_cpu = rx_cpu;
 			rx_cpu = cpumask_next(rx_cpu, &priv->dpio_cpumask);
 			if (rx_cpu >= nr_cpu_ids)
@@ -2597,13 +2790,13 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 		default:
 			dev_err(dev, "Unknown FQ type: %d\n", fq->type);
 		}
-		fq->channel = get_affine_channel(priv, fq->target_cpu);
+		fq->channel = dpaa2_eth_get_affine_channel(priv, fq->target_cpu);
 	}
 
 	update_xps(priv);
 }
 
-static void setup_fqs(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_setup_fqs(struct dpaa2_eth_priv *priv)
 {
 	int i, j;
 
@@ -2626,12 +2819,16 @@ static void setup_fqs(struct dpaa2_eth_priv *priv)
 		}
 	}
 
+	/* We have exactly one Rx error queue per DPNI */
+	priv->fq[priv->num_fqs].type = DPAA2_RX_ERR_FQ;
+	priv->fq[priv->num_fqs++].consume = dpaa2_eth_rx_err;
+
 	/* For each FQ, decide on which core to process incoming frames */
-	set_fq_affinity(priv);
+	dpaa2_eth_set_fq_affinity(priv);
 }
 
 /* Allocate and configure one buffer pool for each interface */
-static int setup_dpbp(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_setup_dpbp(struct dpaa2_eth_priv *priv)
 {
 	int err;
 	struct fsl_mc_device *dpbp_dev;
@@ -2690,15 +2887,15 @@ err_open:
 	return err;
 }
 
-static void free_dpbp(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_dpbp(struct dpaa2_eth_priv *priv)
 {
-	drain_pool(priv);
+	dpaa2_eth_drain_pool(priv);
 	dpbp_disable(priv->mc_io, 0, priv->dpbp_dev->mc_handle);
 	dpbp_close(priv->mc_io, 0, priv->dpbp_dev->mc_handle);
 	fsl_mc_object_free(priv->dpbp_dev);
 }
 
-static int set_buffer_layout(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_buffer_layout(struct dpaa2_eth_priv *priv)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_buffer_layout buf_layout = {0};
@@ -2723,8 +2920,10 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 	/* tx buffer */
 	buf_layout.private_data_size = DPAA2_ETH_SWA_SIZE;
 	buf_layout.pass_timestamp = true;
+	buf_layout.pass_frame_status = true;
 	buf_layout.options = DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE |
-			     DPNI_BUF_LAYOUT_OPT_TIMESTAMP;
+			     DPNI_BUF_LAYOUT_OPT_TIMESTAMP |
+			     DPNI_BUF_LAYOUT_OPT_FRAME_STATUS;
 	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
 				     DPNI_QUEUE_TX, &buf_layout);
 	if (err) {
@@ -2733,7 +2932,8 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 	}
 
 	/* tx-confirm buffer */
-	buf_layout.options = DPNI_BUF_LAYOUT_OPT_TIMESTAMP;
+	buf_layout.options = DPNI_BUF_LAYOUT_OPT_TIMESTAMP |
+			     DPNI_BUF_LAYOUT_OPT_FRAME_STATUS;
 	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
 				     DPNI_QUEUE_TX_CONFIRM, &buf_layout);
 	if (err) {
@@ -2815,7 +3015,7 @@ static inline int dpaa2_eth_enqueue_fq_multiple(struct dpaa2_eth_priv *priv,
 	return 0;
 }
 
-static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_set_enqueue_mode(struct dpaa2_eth_priv *priv)
 {
 	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
 				   DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
@@ -2824,7 +3024,7 @@ static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
 		priv->enqueue = dpaa2_eth_enqueue_fq_multiple;
 }
 
-static int set_pause(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_pause(struct dpaa2_eth_priv *priv)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_link_cfg link_cfg = {0};
@@ -2851,7 +3051,7 @@ static int set_pause(struct dpaa2_eth_priv *priv)
 	return 0;
 }
 
-static void update_tx_fqids(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_update_tx_fqids(struct dpaa2_eth_priv *priv)
 {
 	struct dpni_queue_id qid = {0};
 	struct dpaa2_eth_fq *fq;
@@ -2893,7 +3093,7 @@ out_err:
 }
 
 /* Configure ingress classification based on VLAN PCP */
-static int set_vlan_qos(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_vlan_qos(struct dpaa2_eth_priv *priv)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpkg_profile_cfg kg_cfg = {0};
@@ -3005,7 +3205,7 @@ out_free_tbl:
 }
 
 /* Configure the DPNI object this interface is associated with */
-static int setup_dpni(struct fsl_mc_device *ls_dev)
+static int dpaa2_eth_setup_dpni(struct fsl_mc_device *ls_dev)
 {
 	struct device *dev = &ls_dev->dev;
 	struct dpaa2_eth_priv *priv;
@@ -3053,20 +3253,20 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
 		goto close;
 	}
 
-	err = set_buffer_layout(priv);
+	err = dpaa2_eth_set_buffer_layout(priv);
 	if (err)
 		goto close;
 
-	set_enqueue_mode(priv);
+	dpaa2_eth_set_enqueue_mode(priv);
 
 	/* Enable pause frame support */
 	if (dpaa2_eth_has_pause_support(priv)) {
-		err = set_pause(priv);
+		err = dpaa2_eth_set_pause(priv);
 		if (err)
 			goto close;
 	}
 
-	err = set_vlan_qos(priv);
+	err = dpaa2_eth_set_vlan_qos(priv);
 	if (err && err != -EOPNOTSUPP)
 		goto close;
 
@@ -3086,7 +3286,7 @@ close:
 	return err;
 }
 
-static void free_dpni(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_dpni(struct dpaa2_eth_priv *priv)
 {
 	int err;
 
@@ -3098,8 +3298,8 @@ static void free_dpni(struct dpaa2_eth_priv *priv)
 	dpni_close(priv->mc_io, 0, priv->mc_token);
 }
 
-static int setup_rx_flow(struct dpaa2_eth_priv *priv,
-			 struct dpaa2_eth_fq *fq)
+static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv,
+				   struct dpaa2_eth_fq *fq)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_queue queue;
@@ -3150,8 +3350,8 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	return 0;
 }
 
-static int setup_tx_flow(struct dpaa2_eth_priv *priv,
-			 struct dpaa2_eth_fq *fq)
+static int dpaa2_eth_setup_tx_flow(struct dpaa2_eth_priv *priv,
+				   struct dpaa2_eth_fq *fq)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_queue queue;
@@ -3198,6 +3398,38 @@ static int setup_tx_flow(struct dpaa2_eth_priv *priv,
 	return 0;
 }
 
+static int setup_rx_err_flow(struct dpaa2_eth_priv *priv,
+			     struct dpaa2_eth_fq *fq)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_queue q = { { 0 } };
+	struct dpni_queue_id qid;
+	u8 q_opt = DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST;
+	int err;
+
+	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+			     DPNI_QUEUE_RX_ERR, 0, 0, &q, &qid);
+	if (err) {
+		dev_err(dev, "dpni_get_queue() failed (%d)\n", err);
+		return err;
+	}
+
+	fq->fqid = qid.fqid;
+
+	q.destination.id = fq->channel->dpcon_id;
+	q.destination.type = DPNI_DEST_DPCON;
+	q.destination.priority = 1;
+	q.user_context = (u64)(uintptr_t)fq;
+	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
+			     DPNI_QUEUE_RX_ERR, 0, 0, q_opt, &q);
+	if (err) {
+		dev_err(dev, "dpni_set_queue() failed (%d)\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
 /* Supported header fields for Rx hash distribution key */
 static const struct dpaa2_eth_dist_fields dist_fields[] = {
 	{
@@ -3266,7 +3498,7 @@ static const struct dpaa2_eth_dist_fields dist_fields[] = {
 };
 
 /* Configure the Rx hash key using the legacy API */
-static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+static int dpaa2_eth_config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_rx_tc_dist_cfg dist_cfg;
@@ -3291,7 +3523,7 @@ static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 }
 
 /* Configure the Rx hash key using the new API */
-static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+static int dpaa2_eth_config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_rx_dist_cfg dist_cfg;
@@ -3311,13 +3543,19 @@ static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 			dev_err(dev, "dpni_set_rx_hash_dist failed\n");
 			break;
 		}
+
+		/* If the flow steering / hashing key is shared between all
+		 * traffic classes, install it just once
+		 */
+		if (priv->dpni_attrs.options & DPNI_OPT_SHARED_FS)
+			break;
 	}
 
 	return err;
 }
 
 /* Configure the Rx flow classification key */
-static int config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+static int dpaa2_eth_config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_rx_dist_cfg dist_cfg;
@@ -3337,6 +3575,12 @@ static int config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 			dev_err(dev, "dpni_set_rx_fs_dist failed\n");
 			break;
 		}
+
+		/* If the flow steering / hashing key is shared between all
+		 * traffic classes, install it just once
+		 */
+		if (priv->dpni_attrs.options & DPNI_OPT_SHARED_FS)
+			break;
 	}
 
 	return err;
@@ -3452,11 +3696,11 @@ static int dpaa2_eth_set_dist_key(struct net_device *net_dev,
 
 	if (type == DPAA2_ETH_RX_DIST_HASH) {
 		if (dpaa2_eth_has_legacy_dist(priv))
-			err = config_legacy_hash_key(priv, key_iova);
+			err = dpaa2_eth_config_legacy_hash_key(priv, key_iova);
 		else
-			err = config_hash_key(priv, key_iova);
+			err = dpaa2_eth_config_hash_key(priv, key_iova);
 	} else {
-		err = config_cls_key(priv, key_iova);
+		err = dpaa2_eth_config_cls_key(priv, key_iova);
 	}
 
 	dma_unmap_single(dev, key_iova, DPAA2_CLASSIFIER_DMA_SIZE,
@@ -3531,7 +3775,7 @@ out:
 /* Bind the DPNI to its needed objects and resources: buffer pool, DPIOs,
  * frame queues and channels
  */
-static int bind_dpni(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv)
 {
 	struct net_device *net_dev = priv->net_dev;
 	struct device *dev = net_dev->dev.parent;
@@ -3579,10 +3823,13 @@ static int bind_dpni(struct dpaa2_eth_priv *priv)
 	for (i = 0; i < priv->num_fqs; i++) {
 		switch (priv->fq[i].type) {
 		case DPAA2_RX_FQ:
-			err = setup_rx_flow(priv, &priv->fq[i]);
+			err = dpaa2_eth_setup_rx_flow(priv, &priv->fq[i]);
 			break;
 		case DPAA2_TX_CONF_FQ:
-			err = setup_tx_flow(priv, &priv->fq[i]);
+			err = dpaa2_eth_setup_tx_flow(priv, &priv->fq[i]);
+			break;
+		case DPAA2_RX_ERR_FQ:
+			err = setup_rx_err_flow(priv, &priv->fq[i]);
 			break;
 		default:
 			dev_err(dev, "Invalid FQ type %d\n", priv->fq[i].type);
@@ -3603,7 +3850,7 @@ static int bind_dpni(struct dpaa2_eth_priv *priv)
 }
 
 /* Allocate rings for storing incoming frame descriptors */
-static int alloc_rings(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_alloc_rings(struct dpaa2_eth_priv *priv)
 {
 	struct net_device *net_dev = priv->net_dev;
 	struct device *dev = net_dev->dev.parent;
@@ -3630,7 +3877,7 @@ err_ring:
 	return -ENOMEM;
 }
 
-static void free_rings(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_rings(struct dpaa2_eth_priv *priv)
 {
 	int i;
 
@@ -3638,7 +3885,7 @@ static void free_rings(struct dpaa2_eth_priv *priv)
 		dpaa2_io_store_destroy(priv->channel[i]->store);
 }
 
-static int set_mac_addr(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_mac_addr(struct dpaa2_eth_priv *priv)
 {
 	struct net_device *net_dev = priv->net_dev;
 	struct device *dev = net_dev->dev.parent;
@@ -3703,7 +3950,7 @@ static int set_mac_addr(struct dpaa2_eth_priv *priv)
 	return 0;
 }
 
-static int netdev_init(struct net_device *net_dev)
+static int dpaa2_eth_netdev_init(struct net_device *net_dev)
 {
 	struct device *dev = net_dev->dev.parent;
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
@@ -3716,7 +3963,7 @@ static int netdev_init(struct net_device *net_dev)
 	net_dev->netdev_ops = &dpaa2_eth_ops;
 	net_dev->ethtool_ops = &dpaa2_ethtool_ops;
 
-	err = set_mac_addr(priv);
+	err = dpaa2_eth_set_mac_addr(priv);
 	if (err)
 		return err;
 
@@ -3771,13 +4018,13 @@ static int netdev_init(struct net_device *net_dev)
 	return 0;
 }
 
-static int poll_link_state(void *arg)
+static int dpaa2_eth_poll_link_state(void *arg)
 {
 	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)arg;
 	int err;
 
 	while (!kthread_should_stop()) {
-		err = link_state_update(priv);
+		err = dpaa2_eth_link_state_update(priv);
 		if (unlikely(err))
 			return err;
 
@@ -3847,11 +4094,11 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
 	}
 
 	if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
-		link_state_update(netdev_priv(net_dev));
+		dpaa2_eth_link_state_update(netdev_priv(net_dev));
 
 	if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED) {
-		set_mac_addr(netdev_priv(net_dev));
-		update_tx_fqids(priv);
+		dpaa2_eth_set_mac_addr(netdev_priv(net_dev));
+		dpaa2_eth_update_tx_fqids(priv);
 
 		rtnl_lock();
 		if (priv->mac)
@@ -3864,7 +4111,7 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
 	return IRQ_HANDLED;
 }
 
-static int setup_irqs(struct fsl_mc_device *ls_dev)
+static int dpaa2_eth_setup_irqs(struct fsl_mc_device *ls_dev)
 {
 	int err = 0;
 	struct fsl_mc_device_irq *irq;
@@ -3910,7 +4157,7 @@ free_mc_irq:
 	return err;
 }
 
-static void add_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_add_ch_napi(struct dpaa2_eth_priv *priv)
 {
 	int i;
 	struct dpaa2_eth_channel *ch;
@@ -3923,7 +4170,7 @@ static void add_ch_napi(struct dpaa2_eth_priv *priv)
 	}
 }
 
-static void del_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv)
 {
 	int i;
 	struct dpaa2_eth_channel *ch;
@@ -3958,6 +4205,19 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 
 	priv->iommu_domain = iommu_get_domain_for_dev(dev);
 
+	priv->tx_tstamp_type = HWTSTAMP_TX_OFF;
+	priv->rx_tstamp = false;
+
+	priv->dpaa2_ptp_wq = alloc_workqueue("dpaa2_ptp_wq", 0, 0);
+	if (!priv->dpaa2_ptp_wq) {
+		err = -ENOMEM;
+		goto err_wq_alloc;
+	}
+
+	INIT_WORK(&priv->tx_onestep_tstamp, dpaa2_eth_tx_onestep_tstamp);
+
+	skb_queue_head_init(&priv->tx_skbs);
+
 	/* Obtain a MC portal */
 	err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
 				     &priv->mc_io);
@@ -3970,26 +4230,26 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	}
 
 	/* MC objects initialization and configuration */
-	err = setup_dpni(dpni_dev);
+	err = dpaa2_eth_setup_dpni(dpni_dev);
 	if (err)
 		goto err_dpni_setup;
 
-	err = setup_dpio(priv);
+	err = dpaa2_eth_setup_dpio(priv);
 	if (err)
 		goto err_dpio_setup;
 
-	setup_fqs(priv);
+	dpaa2_eth_setup_fqs(priv);
 
-	err = setup_dpbp(priv);
+	err = dpaa2_eth_setup_dpbp(priv);
 	if (err)
 		goto err_dpbp_setup;
 
-	err = bind_dpni(priv);
+	err = dpaa2_eth_bind_dpni(priv);
 	if (err)
 		goto err_bind;
 
 	/* Add a NAPI context for each channel */
-	add_ch_napi(priv);
+	dpaa2_eth_add_ch_napi(priv);
 
 	/* Percpu statistics */
 	priv->percpu_stats = alloc_percpu(*priv->percpu_stats);
@@ -4012,21 +4272,21 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 		goto err_alloc_sgt_cache;
 	}
 
-	err = netdev_init(net_dev);
+	err = dpaa2_eth_netdev_init(net_dev);
 	if (err)
 		goto err_netdev_init;
 
 	/* Configure checksum offload based on current interface flags */
-	err = set_rx_csum(priv, !!(net_dev->features & NETIF_F_RXCSUM));
+	err = dpaa2_eth_set_rx_csum(priv, !!(net_dev->features & NETIF_F_RXCSUM));
 	if (err)
 		goto err_csum;
 
-	err = set_tx_csum(priv, !!(net_dev->features &
-				   (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)));
+	err = dpaa2_eth_set_tx_csum(priv,
+				    !!(net_dev->features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)));
 	if (err)
 		goto err_csum;
 
-	err = alloc_rings(priv);
+	err = dpaa2_eth_alloc_rings(priv);
 	if (err)
 		goto err_alloc_rings;
 
@@ -4039,10 +4299,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	}
 #endif
 
-	err = setup_irqs(dpni_dev);
+	err = dpaa2_eth_setup_irqs(dpni_dev);
 	if (err) {
 		netdev_warn(net_dev, "Failed to set link interrupt, fall back to polling\n");
-		priv->poll_thread = kthread_run(poll_link_state, priv,
+		priv->poll_thread = kthread_run(dpaa2_eth_poll_link_state, priv,
 						"%s_poll_link", net_dev->name);
 		if (IS_ERR(priv->poll_thread)) {
 			dev_err(dev, "Error starting polling thread\n");
@@ -4055,6 +4315,18 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	if (err)
 		goto err_connect_mac;
 
+	err = dpaa2_eth_dl_register(priv);
+	if (err)
+		goto err_dl_register;
+
+	err = dpaa2_eth_dl_traps_register(priv);
+	if (err)
+		goto err_dl_trap_register;
+
+	err = dpaa2_eth_dl_port_add(priv);
+	if (err)
+		goto err_dl_port_add;
+
 	err = register_netdev(net_dev);
 	if (err < 0) {
 		dev_err(dev, "register_netdev() failed\n");
@@ -4069,6 +4341,12 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	return 0;
 
 err_netdev_reg:
+	dpaa2_eth_dl_port_del(priv);
+err_dl_port_add:
+	dpaa2_eth_dl_traps_unregister(priv);
+err_dl_trap_register:
+	dpaa2_eth_dl_unregister(priv);
+err_dl_register:
 	dpaa2_eth_disconnect_mac(priv);
 err_connect_mac:
 	if (priv->do_link_poll)
@@ -4076,7 +4354,7 @@ err_connect_mac:
 	else
 		fsl_mc_free_irqs(dpni_dev);
 err_poll_thread:
-	free_rings(priv);
+	dpaa2_eth_free_rings(priv);
 err_alloc_rings:
 err_csum:
 err_netdev_init:
@@ -4086,16 +4364,18 @@ err_alloc_sgt_cache:
 err_alloc_percpu_extras:
 	free_percpu(priv->percpu_stats);
 err_alloc_percpu_stats:
-	del_ch_napi(priv);
+	dpaa2_eth_del_ch_napi(priv);
 err_bind:
-	free_dpbp(priv);
+	dpaa2_eth_free_dpbp(priv);
 err_dpbp_setup:
-	free_dpio(priv);
+	dpaa2_eth_free_dpio(priv);
 err_dpio_setup:
-	free_dpni(priv);
+	dpaa2_eth_free_dpni(priv);
 err_dpni_setup:
 	fsl_mc_portal_free(priv->mc_io);
 err_portal_alloc:
+	destroy_workqueue(priv->dpaa2_ptp_wq);
+err_wq_alloc:
 	dev_set_drvdata(dev, NULL);
 	free_netdev(net_dev);
 
@@ -4121,20 +4401,24 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 
 	unregister_netdev(net_dev);
 
+	dpaa2_eth_dl_port_del(priv);
+	dpaa2_eth_dl_traps_unregister(priv);
+	dpaa2_eth_dl_unregister(priv);
+
 	if (priv->do_link_poll)
 		kthread_stop(priv->poll_thread);
 	else
 		fsl_mc_free_irqs(ls_dev);
 
-	free_rings(priv);
+	dpaa2_eth_free_rings(priv);
 	free_percpu(priv->sgt_cache);
 	free_percpu(priv->percpu_stats);
 	free_percpu(priv->percpu_extras);
 
-	del_ch_napi(priv);
-	free_dpbp(priv);
-	free_dpio(priv);
-	free_dpni(priv);
+	dpaa2_eth_del_ch_napi(priv);
+	dpaa2_eth_free_dpbp(priv);
+	dpaa2_eth_free_dpio(priv);
+	dpaa2_eth_free_dpni(priv);
 
 	fsl_mc_portal_free(priv->mc_io);
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 7f3c41dc98f2..d236b8695c39 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -10,6 +10,8 @@
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/fsl/mc.h>
+#include <linux/net_tstamp.h>
+#include <net/devlink.h>
 
 #include <soc/fsl/dpaa2-io.h>
 #include <soc/fsl/dpaa2-fd.h>
@@ -180,6 +182,49 @@ struct dpaa2_fas {
  */
 #define DPAA2_TS_OFFSET			0x8
 
+/* Frame annotation parse results */
+struct dpaa2_fapr {
+	/* 64-bit word 1 */
+	__le32 faf_lo;
+	__le16 faf_ext;
+	__le16 nxt_hdr;
+	/* 64-bit word 2 */
+	__le64 faf_hi;
+	/* 64-bit word 3 */
+	u8 last_ethertype_offset;
+	u8 vlan_tci_offset_n;
+	u8 vlan_tci_offset_1;
+	u8 llc_snap_offset;
+	u8 eth_offset;
+	u8 ip1_pid_offset;
+	u8 shim_offset_2;
+	u8 shim_offset_1;
+	/* 64-bit word 4 */
+	u8 l5_offset;
+	u8 l4_offset;
+	u8 gre_offset;
+	u8 l3_offset_n;
+	u8 l3_offset_1;
+	u8 mpls_offset_n;
+	u8 mpls_offset_1;
+	u8 pppoe_offset;
+	/* 64-bit word 5 */
+	__le16 running_sum;
+	__le16 gross_running_sum;
+	u8 ipv6_frag_offset;
+	u8 nxt_hdr_offset;
+	u8 routing_hdr_offset_2;
+	u8 routing_hdr_offset_1;
+	/* 64-bit word 6 */
+	u8 reserved[5]; /* Soft-parsing context */
+	u8 ip_proto_offset_n;
+	u8 nxt_hdr_frag_offset;
+	u8 parse_error_code;
+};
+
+#define DPAA2_FAPR_OFFSET		0x10
+#define DPAA2_FAPR_SIZE			sizeof((struct dpaa2_fapr))
+
 /* Frame annotation egress action descriptor */
 #define DPAA2_FAEAD_OFFSET		0x58
 
@@ -194,6 +239,24 @@ struct dpaa2_faead {
 #define DPAA2_FAEAD_EBDDV		0x00002000
 #define DPAA2_FAEAD_UPD			0x00000010
 
+struct ptp_tstamp {
+	u16 sec_msb;
+	u32 sec_lsb;
+	u32 nsec;
+};
+
+static inline void ns_to_ptp_tstamp(struct ptp_tstamp *tstamp, u64 ns)
+{
+	u64 sec, nsec;
+
+	sec = ns;
+	nsec = do_div(sec, 1000000000);
+
+	tstamp->sec_lsb = sec & 0xFFFFFFFF;
+	tstamp->sec_msb = (sec >> 32) & 0xFFFF;
+	tstamp->nsec = nsec;
+}
+
 /* Accessors for the hardware annotation fields that we use */
 static inline void *dpaa2_get_hwa(void *buf_addr, bool swa)
 {
@@ -210,6 +273,11 @@ static inline __le64 *dpaa2_get_ts(void *buf_addr, bool swa)
 	return dpaa2_get_hwa(buf_addr, swa) + DPAA2_TS_OFFSET;
 }
 
+static inline struct dpaa2_fapr *dpaa2_get_fapr(void *buf_addr, bool swa)
+{
+	return dpaa2_get_hwa(buf_addr, swa) + DPAA2_FAPR_OFFSET;
+}
+
 static inline struct dpaa2_faead *dpaa2_get_faead(void *buf_addr, bool swa)
 {
 	return dpaa2_get_hwa(buf_addr, swa) + DPAA2_FAEAD_OFFSET;
@@ -324,8 +392,10 @@ struct dpaa2_eth_ch_stats {
 #define DPAA2_ETH_MAX_RX_QUEUES		\
 	(DPAA2_ETH_MAX_RX_QUEUES_PER_TC * DPAA2_ETH_MAX_TCS)
 #define DPAA2_ETH_MAX_TX_QUEUES		16
+#define DPAA2_ETH_MAX_RX_ERR_QUEUES	1
 #define DPAA2_ETH_MAX_QUEUES		(DPAA2_ETH_MAX_RX_QUEUES + \
-					DPAA2_ETH_MAX_TX_QUEUES)
+					DPAA2_ETH_MAX_TX_QUEUES + \
+					DPAA2_ETH_MAX_RX_ERR_QUEUES)
 #define DPAA2_ETH_MAX_NETDEV_QUEUES	\
 	(DPAA2_ETH_MAX_TX_QUEUES * DPAA2_ETH_MAX_TCS)
 
@@ -334,6 +404,7 @@ struct dpaa2_eth_ch_stats {
 enum dpaa2_eth_fq_type {
 	DPAA2_RX_FQ = 0,
 	DPAA2_TX_CONF_FQ,
+	DPAA2_RX_ERR_FQ
 };
 
 struct dpaa2_eth_priv;
@@ -407,6 +478,15 @@ struct dpaa2_eth_sgt_cache {
 	u16 count;
 };
 
+struct dpaa2_eth_trap_item {
+	void *trap_ctx;
+};
+
+struct dpaa2_eth_trap_data {
+	struct dpaa2_eth_trap_item *trap_items_arr;
+	struct dpaa2_eth_priv *priv;
+};
+
 /* Driver private data */
 struct dpaa2_eth_priv {
 	struct net_device *net_dev;
@@ -433,8 +513,8 @@ struct dpaa2_eth_priv {
 	u16 bpid;
 	struct iommu_domain *iommu_domain;
 
-	bool tx_tstamp; /* Tx timestamping enabled */
-	bool rx_tstamp; /* Rx timestamping enabled */
+	enum hwtstamp_tx_types tx_tstamp_type;	/* Tx timestamping type */
+	bool rx_tstamp;				/* Rx timestamping enabled */
 
 	u16 tx_qdid;
 	struct fsl_mc_io *mc_io;
@@ -473,8 +553,29 @@ struct dpaa2_eth_priv {
 #endif
 
 	struct dpaa2_mac *mac;
+	struct workqueue_struct	*dpaa2_ptp_wq;
+	struct work_struct	tx_onestep_tstamp;
+	struct sk_buff_head	tx_skbs;
+	/* The one-step timestamping configuration on hardware
+	 * registers could only be done when no one-step
+	 * timestamping frames are in flight. So we use a mutex
+	 * lock here to make sure the lock is released by last
+	 * one-step timestamping packet through TX confirmation
+	 * queue before transmit current packet.
+	 */
+	struct mutex		onestep_tstamp_lock;
+	struct devlink *devlink;
+	struct dpaa2_eth_trap_data *trap_data;
+	struct devlink_port devlink_port;
 };
 
+struct dpaa2_eth_devlink_priv {
+	struct dpaa2_eth_priv *dpaa2_priv;
+};
+
+#define TX_TSTAMP		0x1
+#define TX_TSTAMP_ONESTEP_SYNC	0x2
+
 #define DPAA2_RXH_SUPPORTED	(RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
 				| RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 \
 				| RXH_L4_B_2_3)
@@ -491,6 +592,7 @@ struct dpaa2_eth_priv {
 
 extern const struct ethtool_ops dpaa2_ethtool_ops;
 extern int dpaa2_phc_index;
+extern struct ptp_qoriq *dpaa2_ptp;
 
 static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv,
 					 u16 ver_major, u16 ver_minor)
@@ -560,9 +662,7 @@ static inline bool dpaa2_eth_rx_pause_enabled(u64 link_options)
 	return !!(link_options & DPNI_LINK_OPT_PAUSE);
 }
 
-static inline
-unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv,
-				       struct sk_buff *skb)
+static inline unsigned int dpaa2_eth_needed_headroom(struct sk_buff *skb)
 {
 	unsigned int headroom = DPAA2_ETH_SWA_SIZE;
 
@@ -579,7 +679,7 @@ unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv,
 		return 0;
 
 	/* If we have Tx timestamping, need 128B hardware annotation */
-	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+	if (skb->cb[0])
 		headroom += DPAA2_ETH_TX_HWA_SIZE;
 
 	return headroom;
@@ -604,4 +704,15 @@ void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
 
 extern const struct dcbnl_rtnl_ops dpaa2_eth_dcbnl_ops;
 
+int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv);
+void dpaa2_eth_dl_unregister(struct dpaa2_eth_priv *priv);
+
+int dpaa2_eth_dl_port_add(struct dpaa2_eth_priv *priv);
+void dpaa2_eth_dl_port_del(struct dpaa2_eth_priv *priv);
+
+int dpaa2_eth_dl_traps_register(struct dpaa2_eth_priv *priv);
+void dpaa2_eth_dl_traps_unregister(struct dpaa2_eth_priv *priv);
+
+struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
+						  struct dpaa2_fapr *fapr);
 #endif	/* __DPAA2_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 8356f1fbbee1..f981a523e13a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
  * Copyright 2016 NXP
+ * Copyright 2020 NXP
  */
 
 #include <linux/net_tstamp.h>
@@ -316,8 +317,8 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 		dpaa2_mac_get_ethtool_stats(priv->mac, data + i);
 }
 
-static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
-			 void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
+				   void *key, void *mask, u64 *fields)
 {
 	int off;
 
@@ -345,9 +346,9 @@ static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
 	return 0;
 }
 
-static int prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
-			 struct ethtool_usrip4_spec *uip_mask,
-			 void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
+				   struct ethtool_usrip4_spec *uip_mask,
+				   void *key, void *mask, u64 *fields)
 {
 	int off;
 	u32 tmp_value, tmp_mask;
@@ -400,9 +401,9 @@ static int prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
 	return 0;
 }
 
-static int prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
-			struct ethtool_tcpip4_spec *l4_mask,
-			void *key, void *mask, u8 l4_proto, u64 *fields)
+static int dpaa2_eth_prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
+				  struct ethtool_tcpip4_spec *l4_mask,
+				  void *key, void *mask, u8 l4_proto, u64 *fields)
 {
 	int off;
 
@@ -451,9 +452,9 @@ static int prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
 	return 0;
 }
 
-static int prep_ext_rule(struct ethtool_flow_ext *ext_value,
-			 struct ethtool_flow_ext *ext_mask,
-			 void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_ext_rule(struct ethtool_flow_ext *ext_value,
+				   struct ethtool_flow_ext *ext_mask,
+				   void *key, void *mask, u64 *fields)
 {
 	int off;
 
@@ -470,9 +471,9 @@ static int prep_ext_rule(struct ethtool_flow_ext *ext_value,
 	return 0;
 }
 
-static int prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
-			     struct ethtool_flow_ext *ext_mask,
-			     void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
+				       struct ethtool_flow_ext *ext_mask,
+				       void *key, void *mask, u64 *fields)
 {
 	int off;
 
@@ -486,32 +487,32 @@ static int prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
 	return 0;
 }
 
-static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
-			 u64 *fields)
+static int dpaa2_eth_prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key,
+				   void *mask, u64 *fields)
 {
 	int err;
 
 	switch (fs->flow_type & 0xFF) {
 	case ETHER_FLOW:
-		err = prep_eth_rule(&fs->h_u.ether_spec, &fs->m_u.ether_spec,
-				    key, mask, fields);
+		err = dpaa2_eth_prep_eth_rule(&fs->h_u.ether_spec, &fs->m_u.ether_spec,
+					      key, mask, fields);
 		break;
 	case IP_USER_FLOW:
-		err = prep_uip_rule(&fs->h_u.usr_ip4_spec,
-				    &fs->m_u.usr_ip4_spec, key, mask, fields);
+		err = dpaa2_eth_prep_uip_rule(&fs->h_u.usr_ip4_spec,
+					      &fs->m_u.usr_ip4_spec, key, mask, fields);
 		break;
 	case TCP_V4_FLOW:
-		err = prep_l4_rule(&fs->h_u.tcp_ip4_spec, &fs->m_u.tcp_ip4_spec,
-				   key, mask, IPPROTO_TCP, fields);
+		err = dpaa2_eth_prep_l4_rule(&fs->h_u.tcp_ip4_spec, &fs->m_u.tcp_ip4_spec,
+					     key, mask, IPPROTO_TCP, fields);
 		break;
 	case UDP_V4_FLOW:
-		err = prep_l4_rule(&fs->h_u.udp_ip4_spec, &fs->m_u.udp_ip4_spec,
-				   key, mask, IPPROTO_UDP, fields);
+		err = dpaa2_eth_prep_l4_rule(&fs->h_u.udp_ip4_spec, &fs->m_u.udp_ip4_spec,
+					     key, mask, IPPROTO_UDP, fields);
 		break;
 	case SCTP_V4_FLOW:
-		err = prep_l4_rule(&fs->h_u.sctp_ip4_spec,
-				   &fs->m_u.sctp_ip4_spec, key, mask,
-				   IPPROTO_SCTP, fields);
+		err = dpaa2_eth_prep_l4_rule(&fs->h_u.sctp_ip4_spec,
+					     &fs->m_u.sctp_ip4_spec, key, mask,
+					     IPPROTO_SCTP, fields);
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -521,14 +522,14 @@ static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
 		return err;
 
 	if (fs->flow_type & FLOW_EXT) {
-		err = prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask, fields);
+		err = dpaa2_eth_prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask, fields);
 		if (err)
 			return err;
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
-		err = prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key, mask,
-					fields);
+		err = dpaa2_eth_prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key,
+						  mask, fields);
 		if (err)
 			return err;
 	}
@@ -536,9 +537,9 @@ static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
 	return 0;
 }
 
-static int do_cls_rule(struct net_device *net_dev,
-		       struct ethtool_rx_flow_spec *fs,
-		       bool add)
+static int dpaa2_eth_do_cls_rule(struct net_device *net_dev,
+				 struct ethtool_rx_flow_spec *fs,
+				 bool add)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct device *dev = net_dev->dev.parent;
@@ -561,7 +562,7 @@ static int do_cls_rule(struct net_device *net_dev,
 		return -ENOMEM;
 
 	/* Fill the key and mask memory areas */
-	err = prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size, &fields);
+	err = dpaa2_eth_prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size, &fields);
 	if (err)
 		goto free_mem;
 
@@ -617,7 +618,7 @@ static int do_cls_rule(struct net_device *net_dev,
 			err = dpni_remove_fs_entry(priv->mc_io, 0,
 						   priv->mc_token, i,
 						   &rule_cfg);
-		if (err)
+		if (err || priv->dpni_attrs.options & DPNI_OPT_SHARED_FS)
 			break;
 	}
 
@@ -629,7 +630,7 @@ free_mem:
 	return err;
 }
 
-static int num_rules(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_num_cls_rules(struct dpaa2_eth_priv *priv)
 {
 	int i, rules = 0;
 
@@ -640,9 +641,9 @@ static int num_rules(struct dpaa2_eth_priv *priv)
 	return rules;
 }
 
-static int update_cls_rule(struct net_device *net_dev,
-			   struct ethtool_rx_flow_spec *new_fs,
-			   unsigned int location)
+static int dpaa2_eth_update_cls_rule(struct net_device *net_dev,
+				     struct ethtool_rx_flow_spec *new_fs,
+				     unsigned int location)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct dpaa2_eth_cls_rule *rule;
@@ -658,13 +659,14 @@ static int update_cls_rule(struct net_device *net_dev,
 
 	/* If a rule is present at the specified location, delete it. */
 	if (rule->in_use) {
-		err = do_cls_rule(net_dev, &rule->fs, false);
+		err = dpaa2_eth_do_cls_rule(net_dev, &rule->fs, false);
 		if (err)
 			return err;
 
 		rule->in_use = 0;
 
-		if (!dpaa2_eth_fs_mask_enabled(priv) && !num_rules(priv))
+		if (!dpaa2_eth_fs_mask_enabled(priv) &&
+		    !dpaa2_eth_num_cls_rules(priv))
 			priv->rx_cls_fields = 0;
 	}
 
@@ -672,7 +674,7 @@ static int update_cls_rule(struct net_device *net_dev,
 	if (!new_fs)
 		return err;
 
-	err = do_cls_rule(net_dev, new_fs, true);
+	err = dpaa2_eth_do_cls_rule(net_dev, new_fs, true);
 	if (err)
 		return err;
 
@@ -702,7 +704,7 @@ static int dpaa2_eth_get_rxnfc(struct net_device *net_dev,
 		break;
 	case ETHTOOL_GRXCLSRLCNT:
 		rxnfc->rule_cnt = 0;
-		rxnfc->rule_cnt = num_rules(priv);
+		rxnfc->rule_cnt = dpaa2_eth_num_cls_rules(priv);
 		rxnfc->data = max_rules;
 		break;
 	case ETHTOOL_GRXCLSRULE:
@@ -744,10 +746,10 @@ static int dpaa2_eth_set_rxnfc(struct net_device *net_dev,
 		err = dpaa2_eth_set_hash(net_dev, rxnfc->data);
 		break;
 	case ETHTOOL_SRXCLSRLINS:
-		err = update_cls_rule(net_dev, &rxnfc->fs, rxnfc->fs.location);
+		err = dpaa2_eth_update_cls_rule(net_dev, &rxnfc->fs, rxnfc->fs.location);
 		break;
 	case ETHTOOL_SRXCLSRLDEL:
-		err = update_cls_rule(net_dev, NULL, rxnfc->fs.location);
+		err = dpaa2_eth_update_cls_rule(net_dev, NULL, rxnfc->fs.location);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -762,6 +764,9 @@ EXPORT_SYMBOL(dpaa2_phc_index);
 static int dpaa2_eth_get_ts_info(struct net_device *dev,
 				 struct ethtool_ts_info *info)
 {
+	if (!dpaa2_ptp)
+		return ethtool_op_get_ts_info(dev, info);
+
 	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
@@ -769,7 +774,8 @@ static int dpaa2_eth_get_ts_info(struct net_device *dev,
 	info->phc_index = dpaa2_phc_index;
 
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
-			 (1 << HWTSTAMP_TX_ON);
+			 (1 << HWTSTAMP_TX_ON) |
+			 (1 << HWTSTAMP_TX_ONESTEP_SYNC);
 
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
 			   (1 << HWTSTAMP_FILTER_ALL);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index 3ee236c5fc37..90cd243070d7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -15,6 +15,18 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
 	case DPMAC_ETH_IF_RGMII:
 		*if_mode = PHY_INTERFACE_MODE_RGMII;
 		break;
+	case DPMAC_ETH_IF_USXGMII:
+		*if_mode = PHY_INTERFACE_MODE_USXGMII;
+		break;
+	case DPMAC_ETH_IF_QSGMII:
+		*if_mode = PHY_INTERFACE_MODE_QSGMII;
+		break;
+	case DPMAC_ETH_IF_SGMII:
+		*if_mode = PHY_INTERFACE_MODE_SGMII;
+		break;
+	case DPMAC_ETH_IF_XFI:
+		*if_mode = PHY_INTERFACE_MODE_10GBASER;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -67,6 +79,10 @@ static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac,
 					phy_interface_t interface)
 {
 	switch (interface) {
+	case PHY_INTERFACE_MODE_10GBASER:
+	case PHY_INTERFACE_MODE_USXGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+	case PHY_INTERFACE_MODE_SGMII:
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII_RXID:
@@ -95,6 +111,17 @@ static void dpaa2_mac_validate(struct phylink_config *config,
 	phylink_set(mask, Asym_Pause);
 
 	switch (state->interface) {
+	case PHY_INTERFACE_MODE_NA:
+	case PHY_INTERFACE_MODE_10GBASER:
+	case PHY_INTERFACE_MODE_USXGMII:
+		phylink_set(mask, 10000baseT_Full);
+		if (state->interface == PHY_INTERFACE_MODE_10GBASER)
+			break;
+		phylink_set(mask, 5000baseT_Full);
+		phylink_set(mask, 2500baseT_Full);
+		fallthrough;
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII_RXID:
@@ -227,6 +254,51 @@ out:
 	return fixed;
 }
 
+static int dpaa2_pcs_create(struct dpaa2_mac *mac,
+			    struct device_node *dpmac_node, int id)
+{
+	struct mdio_device *mdiodev;
+	struct device_node *node;
+
+	node = of_parse_phandle(dpmac_node, "pcs-handle", 0);
+	if (!node) {
+		/* do not error out on old DTS files */
+		netdev_warn(mac->net_dev, "pcs-handle node not found\n");
+		return 0;
+	}
+
+	if (!of_device_is_available(node)) {
+		netdev_err(mac->net_dev, "pcs-handle node not available\n");
+		return -ENODEV;
+	}
+
+	mdiodev = of_mdio_find_device(node);
+	of_node_put(node);
+	if (!mdiodev)
+		return -EPROBE_DEFER;
+
+	mac->pcs = lynx_pcs_create(mdiodev);
+	if (!mac->pcs) {
+		netdev_err(mac->net_dev, "lynx_pcs_create() failed\n");
+		put_device(&mdiodev->dev);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void dpaa2_pcs_destroy(struct dpaa2_mac *mac)
+{
+	struct lynx_pcs *pcs = mac->pcs;
+
+	if (pcs) {
+		struct device *dev = &pcs->mdio->dev;
+		lynx_pcs_destroy(pcs);
+		put_device(dev);
+		mac->pcs = NULL;
+	}
+}
+
 int dpaa2_mac_connect(struct dpaa2_mac *mac)
 {
 	struct fsl_mc_device *dpmac_dev = mac->mc_dev;
@@ -278,6 +350,13 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 		goto err_put_node;
 	}
 
+	if (attr.link_type == DPMAC_LINK_TYPE_PHY &&
+	    attr.eth_if != DPMAC_ETH_IF_RGMII) {
+		err = dpaa2_pcs_create(mac, dpmac_node, attr.id);
+		if (err)
+			goto err_put_node;
+	}
+
 	mac->phylink_config.dev = &net_dev->dev;
 	mac->phylink_config.type = PHYLINK_NETDEV;
 
@@ -286,10 +365,13 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 				 &dpaa2_mac_phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
-		goto err_put_node;
+		goto err_pcs_destroy;
 	}
 	mac->phylink = phylink;
 
+	if (mac->pcs)
+		phylink_set_pcs(mac->phylink, &mac->pcs->pcs);
+
 	err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
 	if (err) {
 		netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
@@ -302,6 +384,8 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 
 err_phylink_destroy:
 	phylink_destroy(mac->phylink);
+err_pcs_destroy:
+	dpaa2_pcs_destroy(mac);
 err_put_node:
 	of_node_put(dpmac_node);
 err_close_dpmac:
@@ -316,6 +400,8 @@ void dpaa2_mac_disconnect(struct dpaa2_mac *mac)
 
 	phylink_disconnect_phy(mac->phylink);
 	phylink_destroy(mac->phylink);
+	dpaa2_pcs_destroy(mac);
+
 	dpmac_close(mac->mc_io, 0, mac->mc_dev->mc_handle);
 }
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
index 2130d9c7d40e..955a52856210 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
@@ -7,6 +7,7 @@
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/phylink.h>
+#include <linux/pcs-lynx.h>
 
 #include "dpmac.h"
 #include "dpmac-cmd.h"
@@ -21,6 +22,7 @@ struct dpaa2_mac {
 	struct phylink *phylink;
 	phy_interface_t if_mode;
 	enum dpmac_link_type if_link_type;
+	struct lynx_pcs *pcs;
 };
 
 bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
index cc1b7f85e433..32b5faa87bb8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
@@ -2,6 +2,7 @@
 /*
  * Copyright 2013-2016 Freescale Semiconductor Inc.
  * Copyright 2016-2018 NXP
+ * Copyright 2020 NXP
  */
 
 #include <linux/module.h>
@@ -9,7 +10,6 @@
 #include <linux/of_address.h>
 #include <linux/msi.h>
 #include <linux/fsl/mc.h>
-#include <linux/fsl/ptp_qoriq.h>
 
 #include "dpaa2-ptp.h"
 
@@ -201,6 +201,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
 		goto err_free_threaded_irq;
 
 	dpaa2_phc_index = ptp_qoriq->phc_index;
+	dpaa2_ptp = ptp_qoriq;
 	dev_set_drvdata(dev, ptp_qoriq);
 
 	return 0;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
index df2458a5e9ef..e1023538b4c3 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
@@ -1,14 +1,18 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2018 NXP
+ * Copyright 2020 NXP
  */
 
 #ifndef __RTC_H
 #define __RTC_H
 
+#include <linux/fsl/ptp_qoriq.h>
+
 #include "dprtc.h"
 #include "dprtc-cmd.h"
 
 extern int dpaa2_phc_index;
+extern struct ptp_qoriq *dpaa2_ptp;
 
 #endif
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
index 3c06f5fb5759..90453dc7baef 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
 /* Copyright 2013-2016 Freescale Semiconductor Inc.
  * Copyright 2016 NXP
+ * Copyright 2020 NXP
  */
 #ifndef _FSL_DPNI_CMD_H
 #define _FSL_DPNI_CMD_H
@@ -92,6 +93,9 @@
 #define DPNI_CMDID_SET_RX_HASH_DIST			DPNI_CMD(0x274)
 #define DPNI_CMDID_GET_LINK_CFG				DPNI_CMD(0x278)
 
+#define DPNI_CMDID_SET_SINGLE_STEP_CFG			DPNI_CMD(0x279)
+#define DPNI_CMDID_GET_SINGLE_STEP_CFG			DPNI_CMD(0x27a)
+
 /* Macros for accessing command fields smaller than 1byte */
 #define DPNI_MASK(field)	\
 	GENMASK(DPNI_##field##_SHIFT + DPNI_##field##_SIZE - 1, \
@@ -641,4 +645,21 @@ struct dpni_cmd_set_tx_shaping {
 	u8 coupled;
 };
 
+#define DPNI_PTP_ENABLE_SHIFT			0
+#define DPNI_PTP_ENABLE_SIZE			1
+#define DPNI_PTP_CH_UPDATE_SHIFT		1
+#define DPNI_PTP_CH_UPDATE_SIZE			1
+
+struct dpni_cmd_single_step_cfg {
+	__le16 flags;
+	__le16 offset;
+	__le32 peer_delay;
+};
+
+struct dpni_rsp_single_step_cfg {
+	__le16 flags;
+	__le16 offset;
+	__le32 peer_delay;
+};
+
 #endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 68ed4c41b282..6ea7db66a632 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2013-2016 Freescale Semiconductor Inc.
  * Copyright 2016 NXP
+ * Copyright 2020 NXP
  */
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -1999,3 +2000,81 @@ int dpni_set_tx_shaping(struct fsl_mc_io *mc_io,
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpni_get_single_step_cfg() - return current configuration for
+ *                              single step PTP
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @ptp_cfg:	ptp single step configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ *
+ */
+int dpni_get_single_step_cfg(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     struct dpni_single_step_cfg *ptp_cfg)
+{
+	struct dpni_rsp_single_step_cfg *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_SINGLE_STEP_CFG,
+					  cmd_flags, token);
+	/* send command to mc*/
+	err =  mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* read command response */
+	rsp_params = (struct dpni_rsp_single_step_cfg *)cmd.params;
+	ptp_cfg->offset = le16_to_cpu(rsp_params->offset);
+	ptp_cfg->en = dpni_get_field(le16_to_cpu(rsp_params->flags),
+				     PTP_ENABLE) ? 1 : 0;
+	ptp_cfg->ch_update = dpni_get_field(le16_to_cpu(rsp_params->flags),
+					    PTP_CH_UPDATE) ? 1 : 0;
+	ptp_cfg->peer_delay = le32_to_cpu(rsp_params->peer_delay);
+
+	return err;
+}
+
+/**
+ * dpni_set_single_step_cfg() - enable/disable and configure single step PTP
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @ptp_cfg:	ptp single step configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ *
+ * The function has effect only when dpni object is connected to a dpmac
+ * object. If the dpni is not connected to a dpmac the configuration will
+ * be stored inside and applied when connection is made.
+ */
+int dpni_set_single_step_cfg(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     struct dpni_single_step_cfg *ptp_cfg)
+{
+	struct dpni_cmd_single_step_cfg *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	u16 flags;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_SINGLE_STEP_CFG,
+					  cmd_flags, token);
+	cmd_params = (struct dpni_cmd_single_step_cfg *)cmd.params;
+	cmd_params->offset = cpu_to_le16(ptp_cfg->offset);
+	cmd_params->peer_delay = cpu_to_le32(ptp_cfg->peer_delay);
+
+	flags = le16_to_cpu(cmd_params->flags);
+	dpni_set_field(flags, PTP_ENABLE, !!ptp_cfg->en);
+	dpni_set_field(flags, PTP_CH_UPDATE, !!ptp_cfg->ch_update);
+	cmd_params->flags = cpu_to_le16(flags);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 39387991a1f9..e7b9e195b534 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
 /* Copyright 2013-2016 Freescale Semiconductor Inc.
  * Copyright 2016 NXP
+ * Copyright 2020 NXP
  */
 #ifndef __FSL_DPNI_H
 #define __FSL_DPNI_H
@@ -74,6 +75,10 @@ struct fsl_mc_io;
  * Disables the flow steering table.
  */
 #define DPNI_OPT_NO_FS				0x000020
+/**
+ * Flow steering table is shared between all traffic classes
+ */
+#define DPNI_OPT_SHARED_FS			0x001000
 
 int dpni_open(struct fsl_mc_io	*mc_io,
 	      u32		cmd_flags,
@@ -1079,4 +1084,34 @@ int dpni_set_tx_shaping(struct fsl_mc_io *mc_io,
 			const struct dpni_tx_shaping_cfg *tx_er_shaper,
 			int coupled);
 
+/**
+ * struct dpni_single_step_cfg - configure single step PTP (IEEE 1588)
+ * @en:		enable single step PTP. When enabled the PTPv1 functionality
+ *		will not work. If the field is zero, offset and ch_update
+ *		parameters will be ignored
+ * @offset:	start offset from the beginning of the frame where
+ *		timestamp field is found. The offset must respect all MAC
+ *		headers, VLAN tags and other protocol headers
+ * @ch_update:	when set UDP checksum will be updated inside packet
+ * @peer_delay:	For peer-to-peer transparent clocks add this value to the
+ *		correction field in addition to the transient time update.
+ *		The value expresses nanoseconds.
+ */
+struct dpni_single_step_cfg {
+	u8	en;
+	u8	ch_update;
+	u16	offset;
+	u32	peer_delay;
+};
+
+int dpni_set_single_step_cfg(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     struct dpni_single_step_cfg *ptp_cfg);
+
+int dpni_get_single_step_cfg(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     struct dpni_single_step_cfg *ptp_cfg);
+
 #endif /* __FSL_DPNI_H */
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 37b804f8bd76..0fa18b00c49b 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -3,7 +3,8 @@ config FSL_ENETC
 	tristate "ENETC PF driver"
 	depends on PCI && PCI_MSI
 	select FSL_ENETC_MDIO
-	select PHYLIB
+	select PHYLINK
+	select PCS_LYNX
 	select DIMLIB
 	help
 	  This driver supports NXP ENETC gigabit ethernet controller PCIe
@@ -15,7 +16,7 @@ config FSL_ENETC
 config FSL_ENETC_VF
 	tristate "ENETC VF driver"
 	depends on PCI && PCI_MSI
-	select PHYLIB
+	select PHYLINK
 	select DIMLIB
 	help
 	  This driver supports NXP ENETC gigabit ethernet controller PCIe
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index f78ca7b343d2..52be6e315752 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -4,7 +4,6 @@
 #include "enetc.h"
 #include <linux/tcp.h>
 #include <linux/udp.h>
-#include <linux/of_mdio.h>
 #include <linux/vmalloc.h>
 
 /* ENETC overhead: optional extension BD + 1 BD gap */
@@ -1392,38 +1391,24 @@ static void enetc_clear_interrupts(struct enetc_ndev_priv *priv)
 		enetc_rxbdr_wr(&priv->si->hw, i, ENETC_RBIER, 0);
 }
 
-static void adjust_link(struct net_device *ndev)
+static int enetc_phylink_connect(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = ndev->phydev;
-
-	if (priv->active_offloads & ENETC_F_QBV)
-		enetc_sched_speed_set(ndev);
-
-	phy_print_status(phydev);
-}
-
-static int enetc_phy_connect(struct net_device *ndev)
-{
-	struct enetc_ndev_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev;
 	struct ethtool_eee edata;
+	int err;
 
-	if (!priv->phy_node)
+	if (!priv->phylink)
 		return 0; /* phy-less mode */
 
-	phydev = of_phy_connect(ndev, priv->phy_node, &adjust_link,
-				0, priv->if_mode);
-	if (!phydev) {
+	err = phylink_of_phy_connect(priv->phylink, priv->dev->of_node, 0);
+	if (err) {
 		dev_err(&ndev->dev, "could not attach to PHY\n");
-		return -ENODEV;
+		return err;
 	}
 
-	phy_attached_info(phydev);
-
 	/* disable EEE autoneg, until ENETC driver supports it */
 	memset(&edata, 0, sizeof(struct ethtool_eee));
-	phy_ethtool_set_eee(phydev, &edata);
+	phylink_ethtool_set_eee(priv->phylink, &edata);
 
 	return 0;
 }
@@ -1443,8 +1428,8 @@ void enetc_start(struct net_device *ndev)
 		enable_irq(irq);
 	}
 
-	if (ndev->phydev)
-		phy_start(ndev->phydev);
+	if (priv->phylink)
+		phylink_start(priv->phylink);
 	else
 		netif_carrier_on(ndev);
 
@@ -1460,7 +1445,7 @@ int enetc_open(struct net_device *ndev)
 	if (err)
 		return err;
 
-	err = enetc_phy_connect(ndev);
+	err = enetc_phylink_connect(ndev);
 	if (err)
 		goto err_phy_connect;
 
@@ -1490,8 +1475,8 @@ err_set_queues:
 err_alloc_rx:
 	enetc_free_tx_resources(priv);
 err_alloc_tx:
-	if (ndev->phydev)
-		phy_disconnect(ndev->phydev);
+	if (priv->phylink)
+		phylink_disconnect_phy(priv->phylink);
 err_phy_connect:
 	enetc_free_irqs(priv);
 
@@ -1514,8 +1499,8 @@ void enetc_stop(struct net_device *ndev)
 		napi_disable(&priv->int_vector[i]->napi);
 	}
 
-	if (ndev->phydev)
-		phy_stop(ndev->phydev);
+	if (priv->phylink)
+		phylink_stop(priv->phylink);
 	else
 		netif_carrier_off(ndev);
 
@@ -1529,8 +1514,8 @@ int enetc_close(struct net_device *ndev)
 	enetc_stop(ndev);
 	enetc_clear_bdrs(priv);
 
-	if (ndev->phydev)
-		phy_disconnect(ndev->phydev);
+	if (priv->phylink)
+		phylink_disconnect_phy(priv->phylink);
 	enetc_free_rxtx_rings(priv);
 	enetc_free_rx_resources(priv);
 	enetc_free_tx_resources(priv);
@@ -1780,6 +1765,7 @@ static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
 
 int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 	if (cmd == SIOCSHWTSTAMP)
 		return enetc_hwtstamp_set(ndev, rq);
@@ -1787,9 +1773,10 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 		return enetc_hwtstamp_get(ndev, rq);
 #endif
 
-	if (!ndev->phydev)
+	if (!priv->phylink)
 		return -EOPNOTSUPP;
-	return phy_mii_ioctl(ndev->phydev, rq, cmd);
+
+	return phylink_mii_ioctl(priv->phylink, rq, cmd);
 }
 
 int enetc_alloc_msix(struct enetc_ndev_priv *priv)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index d309803cfeb6..dd0fb0c066d7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -9,7 +9,7 @@
 #include <linux/skbuff.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/dim.h>
 
 #include "enetc_hw.h"
@@ -264,8 +264,7 @@ struct enetc_ndev_priv {
 
 	struct psfp_cap psfp_cap;
 
-	struct device_node *phy_node;
-	phy_interface_t if_mode;
+	struct phylink *phylink;
 	int ic_mode;
 	u32 tx_ictt;
 };
@@ -323,7 +322,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd);
 
 #ifdef CONFIG_FSL_ENETC_QOS
 int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
-void enetc_sched_speed_set(struct net_device *ndev);
+void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed);
 int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data);
 int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data);
 int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
@@ -388,7 +387,7 @@ static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv)
 
 #else
 #define enetc_setup_tc_taprio(ndev, type_data) -EOPNOTSUPP
-#define enetc_sched_speed_set(ndev) (void)0
+#define enetc_sched_speed_set(priv, speed) (void)0
 #define enetc_setup_tc_cbs(ndev, type_data) -EOPNOTSUPP
 #define enetc_setup_tc_txtime(ndev, type_data) -EOPNOTSUPP
 #define enetc_setup_tc_psfp(ndev, type_data) -EOPNOTSUPP
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 1dab83fbca77..8ed1ebd5a183 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -686,6 +686,28 @@ static int enetc_set_wol(struct net_device *dev,
 	return ret;
 }
 
+static int enetc_get_link_ksettings(struct net_device *dev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(dev);
+
+	if (!priv->phylink)
+		return -EOPNOTSUPP;
+
+	return phylink_ethtool_ksettings_get(priv->phylink, cmd);
+}
+
+static int enetc_set_link_ksettings(struct net_device *dev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(dev);
+
+	if (!priv->phylink)
+		return -EOPNOTSUPP;
+
+	return phylink_ethtool_ksettings_set(priv->phylink, cmd);
+}
+
 static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES |
@@ -704,8 +726,8 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_ringparam = enetc_get_ringparam,
 	.get_coalesce = enetc_get_coalesce,
 	.set_coalesce = enetc_set_coalesce,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_link_ksettings = enetc_get_link_ksettings,
+	.set_link_ksettings = enetc_set_link_ksettings,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = enetc_get_ts_info,
 	.get_wol = enetc_get_wol,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 177334f0adb1..419306342ac5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -482,8 +482,7 @@ static void enetc_port_si_configure(struct enetc_si *si)
 	enetc_port_wr(hw, ENETC_PSIVLANFMR, ENETC_PSIVLANFMR_VS);
 }
 
-static void enetc_configure_port_mac(struct enetc_hw *hw,
-				     phy_interface_t phy_mode)
+static void enetc_configure_port_mac(struct enetc_hw *hw)
 {
 	enetc_port_wr(hw, ENETC_PM0_MAXFRM,
 		      ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE));
@@ -492,12 +491,14 @@ static void enetc_configure_port_mac(struct enetc_hw *hw,
 	enetc_port_wr(hw, ENETC_PTXMBAR, 2 * ENETC_MAC_MAXFRM_SIZE);
 
 	enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
-		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC |
-		      ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
+		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC);
 
 	enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
-		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC |
-		      ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
+		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC);
+}
+
+static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode)
+{
 	/* set auto-speed for RGMII */
 	if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG ||
 	    phy_interface_mode_is_rgmii(phy_mode))
@@ -507,6 +508,17 @@ static void enetc_configure_port_mac(struct enetc_hw *hw,
 		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII);
 }
 
+static void enetc_mac_enable(struct enetc_hw *hw, bool en)
+{
+	u32 val = enetc_port_rd(hw, ENETC_PM0_CMD_CFG);
+
+	val &= ~(ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
+	val |= en ? (ENETC_PM0_TX_EN | ENETC_PM0_RX_EN) : 0;
+
+	enetc_port_wr(hw, ENETC_PM0_CMD_CFG, val);
+	enetc_port_wr(hw, ENETC_PM1_CMD_CFG, val);
+}
+
 static void enetc_configure_port_pmac(struct enetc_hw *hw)
 {
 	u32 temp;
@@ -527,7 +539,7 @@ static void enetc_configure_port(struct enetc_pf *pf)
 
 	enetc_configure_port_pmac(hw);
 
-	enetc_configure_port_mac(hw, pf->if_mode);
+	enetc_configure_port_mac(hw);
 
 	enetc_port_si_configure(pf->si);
 
@@ -733,11 +745,10 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
 	enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
 }
 
-static int enetc_mdio_probe(struct enetc_pf *pf)
+static int enetc_mdio_probe(struct enetc_pf *pf, struct device_node *np)
 {
 	struct device *dev = &pf->si->pdev->dev;
 	struct enetc_mdio_priv *mdio_priv;
-	struct device_node *np;
 	struct mii_bus *bus;
 	int err;
 
@@ -754,20 +765,12 @@ static int enetc_mdio_probe(struct enetc_pf *pf)
 	mdio_priv->mdio_base = ENETC_EMDIO_BASE;
 	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
 
-	np = of_get_child_by_name(dev->of_node, "mdio");
-	if (!np) {
-		dev_err(dev, "MDIO node missing\n");
-		return -EINVAL;
-	}
-
 	err = of_mdiobus_register(bus, np);
 	if (err) {
-		of_node_put(np);
 		dev_err(dev, "cannot register MDIO bus\n");
 		return err;
 	}
 
-	of_node_put(np);
 	pf->mdio = bus;
 
 	return 0;
@@ -779,69 +782,12 @@ static void enetc_mdio_remove(struct enetc_pf *pf)
 		mdiobus_unregister(pf->mdio);
 }
 
-static int enetc_of_get_phy(struct enetc_pf *pf)
-{
-	struct device *dev = &pf->si->pdev->dev;
-	struct device_node *np = dev->of_node;
-	struct device_node *mdio_np;
-	int err;
-
-	pf->phy_node = of_parse_phandle(np, "phy-handle", 0);
-	if (!pf->phy_node) {
-		if (!of_phy_is_fixed_link(np)) {
-			dev_err(dev, "PHY not specified\n");
-			return -ENODEV;
-		}
-
-		err = of_phy_register_fixed_link(np);
-		if (err < 0) {
-			dev_err(dev, "fixed link registration failed\n");
-			return err;
-		}
-
-		pf->phy_node = of_node_get(np);
-	}
-
-	mdio_np = of_get_child_by_name(np, "mdio");
-	if (mdio_np) {
-		of_node_put(mdio_np);
-		err = enetc_mdio_probe(pf);
-		if (err) {
-			of_node_put(pf->phy_node);
-			return err;
-		}
-	}
-
-	err = of_get_phy_mode(np, &pf->if_mode);
-	if (err) {
-		dev_err(dev, "missing phy type\n");
-		of_node_put(pf->phy_node);
-		if (of_phy_is_fixed_link(np))
-			of_phy_deregister_fixed_link(np);
-		else
-			enetc_mdio_remove(pf);
-
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void enetc_of_put_phy(struct enetc_pf *pf)
-{
-	struct device_node *np = pf->si->pdev->dev.of_node;
-
-	if (np && of_phy_is_fixed_link(np))
-		of_phy_deregister_fixed_link(np);
-	if (pf->phy_node)
-		of_node_put(pf->phy_node);
-}
-
-static int enetc_imdio_init(struct enetc_pf *pf, bool is_c45)
+static int enetc_imdio_create(struct enetc_pf *pf)
 {
 	struct device *dev = &pf->si->pdev->dev;
 	struct enetc_mdio_priv *mdio_priv;
-	struct phy_device *pcs;
+	struct lynx_pcs *pcs_lynx;
+	struct mdio_device *pcs;
 	struct mii_bus *bus;
 	int err;
 
@@ -865,15 +811,23 @@ static int enetc_imdio_init(struct enetc_pf *pf, bool is_c45)
 		goto free_mdio_bus;
 	}
 
-	pcs = get_phy_device(bus, 0, is_c45);
+	pcs = mdio_device_create(bus, 0);
 	if (IS_ERR(pcs)) {
 		err = PTR_ERR(pcs);
-		dev_err(dev, "cannot get internal PCS PHY (%d)\n", err);
+		dev_err(dev, "cannot create pcs (%d)\n", err);
+		goto unregister_mdiobus;
+	}
+
+	pcs_lynx = lynx_pcs_create(pcs);
+	if (!pcs_lynx) {
+		mdio_device_free(pcs);
+		err = -ENOMEM;
+		dev_err(dev, "cannot create lynx pcs (%d)\n", err);
 		goto unregister_mdiobus;
 	}
 
 	pf->imdio = bus;
-	pf->pcs = pcs;
+	pf->pcs = pcs_lynx;
 
 	return 0;
 
@@ -886,91 +840,168 @@ free_mdio_bus:
 
 static void enetc_imdio_remove(struct enetc_pf *pf)
 {
-	if (pf->pcs)
-		put_device(&pf->pcs->mdio.dev);
+	if (pf->pcs) {
+		mdio_device_free(pf->pcs->mdio);
+		lynx_pcs_destroy(pf->pcs);
+	}
 	if (pf->imdio) {
 		mdiobus_unregister(pf->imdio);
 		mdiobus_free(pf->imdio);
 	}
 }
 
-static void enetc_configure_sgmii(struct phy_device *pcs)
+static bool enetc_port_has_pcs(struct enetc_pf *pf)
+{
+	return (pf->if_mode == PHY_INTERFACE_MODE_SGMII ||
+		pf->if_mode == PHY_INTERFACE_MODE_2500BASEX ||
+		pf->if_mode == PHY_INTERFACE_MODE_USXGMII);
+}
+
+static int enetc_mdiobus_create(struct enetc_pf *pf)
+{
+	struct device *dev = &pf->si->pdev->dev;
+	struct device_node *mdio_np;
+	int err;
+
+	mdio_np = of_get_child_by_name(dev->of_node, "mdio");
+	if (mdio_np) {
+		err = enetc_mdio_probe(pf, mdio_np);
+
+		of_node_put(mdio_np);
+		if (err)
+			return err;
+	}
+
+	if (enetc_port_has_pcs(pf)) {
+		err = enetc_imdio_create(pf);
+		if (err) {
+			enetc_mdio_remove(pf);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void enetc_mdiobus_destroy(struct enetc_pf *pf)
 {
-	/* SGMII spec requires tx_config_Reg[15:0] to be exactly 0x4001
-	 * for the MAC PCS in order to acknowledge the AN.
-	 */
-	phy_write(pcs, MII_ADVERTISE, ADVERTISE_SGMII | ADVERTISE_LPACK);
+	enetc_mdio_remove(pf);
+	enetc_imdio_remove(pf);
+}
 
-	phy_write(pcs, ENETC_PCS_IF_MODE,
-		  ENETC_PCS_IF_MODE_SGMII_EN |
-		  ENETC_PCS_IF_MODE_USE_SGMII_AN);
+static void enetc_pl_mac_validate(struct phylink_config *config,
+				  unsigned long *supported,
+				  struct phylink_link_state *state)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	if (state->interface != PHY_INTERFACE_MODE_NA &&
+	    state->interface != PHY_INTERFACE_MODE_INTERNAL &&
+	    state->interface != PHY_INTERFACE_MODE_SGMII &&
+	    state->interface != PHY_INTERFACE_MODE_2500BASEX &&
+	    state->interface != PHY_INTERFACE_MODE_USXGMII &&
+	    !phy_interface_mode_is_rgmii(state->interface)) {
+		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		return;
+	}
 
-	/* Adjust link timer for SGMII */
-	phy_write(pcs, ENETC_PCS_LINK_TIMER1, ENETC_PCS_LINK_TIMER1_VAL);
-	phy_write(pcs, ENETC_PCS_LINK_TIMER2, ENETC_PCS_LINK_TIMER2_VAL);
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Autoneg);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+	phylink_set(mask, 10baseT_Half);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 100baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 1000baseT_Half);
+	phylink_set(mask, 1000baseT_Full);
+
+	if (state->interface == PHY_INTERFACE_MODE_INTERNAL ||
+	    state->interface == PHY_INTERFACE_MODE_2500BASEX ||
+	    state->interface == PHY_INTERFACE_MODE_USXGMII) {
+		phylink_set(mask, 2500baseT_Full);
+		phylink_set(mask, 2500baseX_Full);
+	}
 
-	phy_write(pcs, MII_BMCR, BMCR_ANRESTART | BMCR_ANENABLE);
+	bitmap_and(supported, supported, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static void enetc_configure_2500basex(struct phy_device *pcs)
+static void enetc_pl_mac_config(struct phylink_config *config,
+				unsigned int mode,
+				const struct phylink_link_state *state)
 {
-	phy_write(pcs, ENETC_PCS_IF_MODE,
-		  ENETC_PCS_IF_MODE_SGMII_EN |
-		  ENETC_PCS_IF_MODE_SGMII_SPEED(ENETC_PCS_SPEED_2500));
+	struct enetc_pf *pf = phylink_to_enetc_pf(config);
+	struct enetc_ndev_priv *priv;
+
+	enetc_mac_config(&pf->si->hw, state->interface);
 
-	phy_write(pcs, MII_BMCR, BMCR_SPEED1000 | BMCR_FULLDPLX | BMCR_RESET);
+	priv = netdev_priv(pf->si->ndev);
+	if (pf->pcs)
+		phylink_set_pcs(priv->phylink, &pf->pcs->pcs);
 }
 
-static void enetc_configure_usxgmii(struct phy_device *pcs)
+static void enetc_pl_mac_link_up(struct phylink_config *config,
+				 struct phy_device *phy, unsigned int mode,
+				 phy_interface_t interface, int speed,
+				 int duplex, bool tx_pause, bool rx_pause)
 {
-	/* Configure device ability for the USXGMII Replicator */
-	phy_write_mmd(pcs, MDIO_MMD_VEND2, MII_ADVERTISE,
-		      ADVERTISE_SGMII | ADVERTISE_LPACK |
-		      MDIO_USXGMII_FULL_DUPLEX);
-
-	/* Restart PCS AN */
-	phy_write_mmd(pcs, MDIO_MMD_VEND2, MII_BMCR,
-		      BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
+	struct enetc_pf *pf = phylink_to_enetc_pf(config);
+	struct enetc_ndev_priv *priv;
+
+	priv = netdev_priv(pf->si->ndev);
+	if (priv->active_offloads & ENETC_F_QBV)
+		enetc_sched_speed_set(priv, speed);
+
+	enetc_mac_enable(&pf->si->hw, true);
 }
 
-static int enetc_configure_serdes(struct enetc_ndev_priv *priv)
+static void enetc_pl_mac_link_down(struct phylink_config *config,
+				   unsigned int mode,
+				   phy_interface_t interface)
+{
+	struct enetc_pf *pf = phylink_to_enetc_pf(config);
+
+	enetc_mac_enable(&pf->si->hw, false);
+}
+
+static const struct phylink_mac_ops enetc_mac_phylink_ops = {
+	.validate = enetc_pl_mac_validate,
+	.mac_config = enetc_pl_mac_config,
+	.mac_link_up = enetc_pl_mac_link_up,
+	.mac_link_down = enetc_pl_mac_link_down,
+};
+
+static int enetc_phylink_create(struct enetc_ndev_priv *priv)
 {
-	bool is_c45 = priv->if_mode == PHY_INTERFACE_MODE_USXGMII;
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	struct device *dev = &pf->si->pdev->dev;
+	struct phylink *phylink;
 	int err;
 
-	if (priv->if_mode != PHY_INTERFACE_MODE_SGMII &&
-	    priv->if_mode != PHY_INTERFACE_MODE_2500BASEX &&
-	    priv->if_mode != PHY_INTERFACE_MODE_USXGMII)
-		return 0;
+	pf->phylink_config.dev = &priv->ndev->dev;
+	pf->phylink_config.type = PHYLINK_NETDEV;
 
-	err = enetc_imdio_init(pf, is_c45);
-	if (err)
+	phylink = phylink_create(&pf->phylink_config,
+				 of_fwnode_handle(dev->of_node),
+				 pf->if_mode, &enetc_mac_phylink_ops);
+	if (IS_ERR(phylink)) {
+		err = PTR_ERR(phylink);
 		return err;
-
-	switch (priv->if_mode) {
-	case PHY_INTERFACE_MODE_SGMII:
-		enetc_configure_sgmii(pf->pcs);
-		break;
-	case PHY_INTERFACE_MODE_2500BASEX:
-		enetc_configure_2500basex(pf->pcs);
-		break;
-	case PHY_INTERFACE_MODE_USXGMII:
-		enetc_configure_usxgmii(pf->pcs);
-		break;
-	default:
-		dev_err(&pf->si->pdev->dev, "Unsupported link mode %s\n",
-			phy_modes(priv->if_mode));
 	}
 
+	priv->phylink = phylink;
+
 	return 0;
 }
 
-static void enetc_teardown_serdes(struct enetc_ndev_priv *priv)
+static void enetc_phylink_destroy(struct enetc_ndev_priv *priv)
 {
-	struct enetc_pf *pf = enetc_si_priv(priv->si);
-
-	enetc_imdio_remove(pf);
+	if (priv->phylink)
+		phylink_destroy(priv->phylink);
 }
 
 static int enetc_pf_probe(struct pci_dev *pdev,
@@ -1004,10 +1035,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	pf->si = si;
 	pf->total_vfs = pci_sriov_get_totalvfs(pdev);
 
-	err = enetc_of_get_phy(pf);
-	if (err)
-		dev_warn(&pdev->dev, "Fallback to PHY-less operation\n");
-
 	enetc_configure_port(pf);
 
 	enetc_get_si_caps(si);
@@ -1022,8 +1049,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	enetc_pf_netdev_setup(si, ndev, &enetc_ndev_ops);
 
 	priv = netdev_priv(ndev);
-	priv->phy_node = pf->phy_node;
-	priv->if_mode = pf->if_mode;
 
 	enetc_init_si_rings_params(priv);
 
@@ -1039,20 +1064,27 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_alloc_msix;
 	}
 
-	err = enetc_configure_serdes(priv);
-	if (err)
-		dev_warn(&pdev->dev, "Attempted SerDes config but failed\n");
+	if (!of_get_phy_mode(pdev->dev.of_node, &pf->if_mode)) {
+		err = enetc_mdiobus_create(pf);
+		if (err)
+			goto err_mdiobus_create;
+
+		err = enetc_phylink_create(priv);
+		if (err)
+			goto err_phylink_create;
+	}
 
 	err = register_netdev(ndev);
 	if (err)
 		goto err_reg_netdev;
 
-	netif_carrier_off(ndev);
-
 	return 0;
 
 err_reg_netdev:
-	enetc_teardown_serdes(priv);
+	enetc_phylink_destroy(priv);
+err_phylink_create:
+	enetc_mdiobus_destroy(pf);
+err_mdiobus_create:
 	enetc_free_msix(priv);
 err_alloc_msix:
 	enetc_free_si_resources(priv);
@@ -1060,8 +1092,6 @@ err_alloc_si_res:
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
-	enetc_mdio_remove(pf);
-	enetc_of_put_phy(pf);
 err_map_pf_space:
 	enetc_pci_remove(pdev);
 
@@ -1074,16 +1104,15 @@ static void enetc_pf_remove(struct pci_dev *pdev)
 	struct enetc_pf *pf = enetc_si_priv(si);
 	struct enetc_ndev_priv *priv;
 
+	priv = netdev_priv(si->ndev);
+	enetc_phylink_destroy(priv);
+	enetc_mdiobus_destroy(pf);
+
 	if (pf->num_vfs)
 		enetc_sriov_configure(pdev, 0);
 
-	priv = netdev_priv(si->ndev);
 	unregister_netdev(si->ndev);
 
-	enetc_teardown_serdes(priv);
-	enetc_mdio_remove(pf);
-	enetc_of_put_phy(pf);
-
 	enetc_free_msix(priv);
 
 	enetc_free_si_resources(priv);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
index 0d0ee91282a5..263946c51e37 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
@@ -2,6 +2,7 @@
 /* Copyright 2017-2019 NXP */
 
 #include "enetc.h"
+#include <linux/pcs-lynx.h>
 
 #define ENETC_PF_NUM_RINGS	8
 
@@ -45,12 +46,15 @@ struct enetc_pf {
 
 	struct mii_bus *mdio; /* saved for cleanup */
 	struct mii_bus *imdio;
-	struct phy_device *pcs;
+	struct lynx_pcs *pcs;
 
-	struct device_node *phy_node;
 	phy_interface_t if_mode;
+	struct phylink_config phylink_config;
 };
 
+#define phylink_to_enetc_pf(config) \
+	container_of((config), struct enetc_pf, phylink_config)
+
 int enetc_msg_psi_init(struct enetc_pf *pf);
 void enetc_msg_psi_free(struct enetc_pf *pf);
 void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 1c4a535890da..c81be32bcedf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -15,17 +15,14 @@ static u16 enetc_get_max_gcl_len(struct enetc_hw *hw)
 		& ENETC_QBV_MAX_GCL_LEN_MASK;
 }
 
-void enetc_sched_speed_set(struct net_device *ndev)
+void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed)
 {
-	struct enetc_ndev_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = ndev->phydev;
 	u32 old_speed = priv->speed;
-	u32 speed, pspeed;
+	u32 pspeed;
 
-	if (phydev->speed == old_speed)
+	if (speed == old_speed)
 		return;
 
-	speed = phydev->speed;
 	switch (speed) {
 	case SPEED_1000:
 		pspeed = ENETC_PMR_PSPEED_1000M;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index f14576212a0e..7b5c82c7e4e5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -78,16 +78,11 @@ static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	struct sockaddr *saddr = addr;
-	int err;
 
 	if (!is_valid_ether_addr(saddr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	err = enetc_msg_vsi_set_primary_mac_addr(priv, saddr);
-	if (err)
-		return err;
-
-	return 0;
+	return enetc_msg_vsi_set_primary_mac_addr(priv, saddr);
 }
 
 static int enetc_vf_set_features(struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index fb37816a74db..8f7eca1e7716 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1912,6 +1912,27 @@ out:
 	return ret;
 }
 
+static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct phy_device *phy_dev = ndev->phydev;
+
+	if (phy_dev) {
+		phy_reset_after_clk_enable(phy_dev);
+	} else if (fep->phy_node) {
+		/*
+		 * If the PHY still is not bound to the MAC, but there is
+		 * OF PHY node and a matching PHY device instance already,
+		 * use the OF PHY node to obtain the PHY device instance,
+		 * and then use that PHY device instance when triggering
+		 * the PHY reset.
+		 */
+		phy_dev = of_phy_find_device(fep->phy_node);
+		phy_reset_after_clk_enable(phy_dev);
+		put_device(&phy_dev->mdio.dev);
+	}
+}
+
 static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1938,7 +1959,7 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 		if (ret)
 			goto failed_clk_ref;
 
-		phy_reset_after_clk_enable(ndev->phydev);
+		fec_enet_phy_reset_after_clk_enable(ndev);
 	} else {
 		clk_disable_unprepare(fep->clk_enet_out);
 		if (fep->clk_ptp) {
@@ -1960,8 +1981,7 @@ failed_clk_ref:
 		mutex_unlock(&fep->ptp_clk_mutex);
 	}
 failed_clk_ptp:
-	if (fep->clk_enet_out)
-		clk_disable_unprepare(fep->clk_enet_out);
+	clk_disable_unprepare(fep->clk_enet_out);
 
 	return ret;
 }
@@ -2984,16 +3004,16 @@ fec_enet_open(struct net_device *ndev)
 	/* Init MAC prior to mii bus probe */
 	fec_restart(ndev);
 
-	/* Probe and connect to PHY when open the interface */
-	ret = fec_enet_mii_probe(ndev);
-	if (ret)
-		goto err_enet_mii_probe;
-
 	/* Call phy_reset_after_clk_enable() again if it failed during
 	 * phy_reset_after_clk_enable() before because the PHY wasn't probed.
 	 */
 	if (reset_again)
-		phy_reset_after_clk_enable(ndev->phydev);
+		fec_enet_phy_reset_after_clk_enable(ndev);
+
+	/* Probe and connect to PHY when open the interface */
+	ret = fec_enet_mii_probe(ndev);
+	if (ret)
+		goto err_enet_mii_probe;
 
 	if (fep->quirks & FEC_QUIRK_ERR006687)
 		imx6q_cpuidle_fec_irqs_used();
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 7a3f066e611d..b3bad429e03b 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -74,7 +74,7 @@ struct mpc52xx_fec_priv {
 static irqreturn_t mpc52xx_fec_interrupt(int, void *);
 static irqreturn_t mpc52xx_fec_rx_interrupt(int, void *);
 static irqreturn_t mpc52xx_fec_tx_interrupt(int, void *);
-static void mpc52xx_fec_stop(struct net_device *dev);
+static void mpc52xx_fec_stop(struct net_device *dev, bool may_sleep);
 static void mpc52xx_fec_start(struct net_device *dev);
 static void mpc52xx_fec_reset(struct net_device *dev);
 
@@ -283,7 +283,7 @@ static int mpc52xx_fec_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	mpc52xx_fec_stop(dev);
+	mpc52xx_fec_stop(dev, true);
 
 	mpc52xx_fec_free_rx_buffers(dev, priv->rx_dmatsk);
 
@@ -693,7 +693,7 @@ static void mpc52xx_fec_start(struct net_device *dev)
  *
  * stop all activity on fec and empty dma buffers
  */
-static void mpc52xx_fec_stop(struct net_device *dev)
+static void mpc52xx_fec_stop(struct net_device *dev, bool may_sleep)
 {
 	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 	struct mpc52xx_fec __iomem *fec = priv->fec;
@@ -706,7 +706,7 @@ static void mpc52xx_fec_stop(struct net_device *dev)
 	bcom_disable(priv->rx_dmatsk);
 
 	/* Wait for tx queue to drain, but only if we're in process context */
-	if (!in_interrupt()) {
+	if (may_sleep) {
 		timeout = jiffies + msecs_to_jiffies(2000);
 		while (time_before(jiffies, timeout) &&
 				!bcom_queue_empty(priv->tx_dmatsk))
@@ -738,7 +738,7 @@ static void mpc52xx_fec_reset(struct net_device *dev)
 	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 	struct mpc52xx_fec __iomem *fec = priv->fec;
 
-	mpc52xx_fec_stop(dev);
+	mpc52xx_fec_stop(dev, false);
 
 	out_be32(&fec->rfifo_status, in_be32(&fec->rfifo_status));
 	out_be32(&fec->reset_cntrl, FEC_RESET_CNTRL_RESET_FIFO);
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index a0c1f4410306..2e344aada4c6 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -512,7 +512,7 @@ int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr)
 		-EFAULT : 0;
 }
 
-/**
+/*
  * fec_time_keep - call timecounter_read every second to avoid timer overrun
  *                 because ENET just support 32bit counter, will timeout in 4s
  */
@@ -520,13 +520,12 @@ static void fec_time_keep(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct fec_enet_private *fep = container_of(dwork, struct fec_enet_private, time_keep);
-	u64 ns;
 	unsigned long flags;
 
 	mutex_lock(&fep->ptp_clk_mutex);
 	if (fep->ptp_clk_on) {
 		spin_lock_irqsave(&fep->tmreg_lock, flags);
-		ns = timecounter_read(&fep->tc);
+		timecounter_read(&fep->tc);
 		spin_unlock_irqrestore(&fep->tmreg_lock, flags);
 	}
 	mutex_unlock(&fep->ptp_clk_mutex);
@@ -567,7 +566,8 @@ static irqreturn_t fec_pps_interrupt(int irq, void *dev_id)
 
 /**
  * fec_ptp_init
- * @ndev: The FEC network adapter
+ * @pdev: The FEC network adapter
+ * @irq_idx: the interrupt index
  *
  * This function performs the required steps for enabling ptp
  * support. If ptp support has already been loaded it simply calls the
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index ef67e8599b39..ce0a121580f6 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -2063,11 +2063,11 @@ static int fman_set_exception(struct fman *fman,
 /**
  * fman_register_intr
  * @fman:	A Pointer to FMan device
- * @mod:	Calling module
+ * @module:	Calling module
  * @mod_id:	Module id (if more than 1 exists, '0' if not)
  * @intr_type:	Interrupt type (error/normal) selection.
- * @f_isr:	The interrupt service routine.
- * @h_src_arg:	Argument to be passed to f_isr.
+ * @isr_cb:	The interrupt service routine.
+ * @src_arg:	Argument to be passed to isr_cb.
  *
  * Used to register an event handler to be processed by FMan
  *
@@ -2091,7 +2091,7 @@ EXPORT_SYMBOL(fman_register_intr);
 /**
  * fman_unregister_intr
  * @fman:	A Pointer to FMan device
- * @mod:	Calling module
+ * @module:	Calling module
  * @mod_id:	Module id (if more than 1 exists, '0' if not)
  * @intr_type:	Interrupt type (error/normal) selection.
  *
@@ -2342,8 +2342,8 @@ EXPORT_SYMBOL(fman_get_bmi_max_fifo_size);
 
 /**
  * fman_get_revision
- * @fman		- Pointer to the FMan module
- * @rev_info		- A structure of revision information parameters.
+ * @fman:		- Pointer to the FMan module
+ * @rev_info:		- A structure of revision information parameters.
  *
  * Returns the FM revision
  *
@@ -2508,7 +2508,7 @@ EXPORT_SYMBOL(fman_get_rx_extra_headroom);
 
 /**
  * fman_bind
- * @dev:	FMan OF device pointer
+ * @fm_dev:	FMan OF device pointer
  *
  * Bind to a specific FMan device.
  *
diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.c b/drivers/net/ethernet/freescale/fman/fman_muram.c
index 5ec94d243da0..7ad317e622bc 100644
--- a/drivers/net/ethernet/freescale/fman/fman_muram.c
+++ b/drivers/net/ethernet/freescale/fman/fman_muram.c
@@ -144,9 +144,9 @@ unsigned long fman_muram_alloc(struct muram_info *muram, size_t size)
 
 /**
  * fman_muram_free_mem
- * muram:	FM-MURAM module pointer.
- * offset:	offset of the memory region to be freed.
- * size:	size of the memory to be freed.
+ * @muram:	FM-MURAM module pointer.
+ * @offset:	offset of the memory region to be freed.
+ * @size:	size of the memory to be freed.
  *
  * Free an allocated memory from FM-MURAM partition.
  */
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 624b2eb6f01d..d9baac0dbc7d 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1410,9 +1410,11 @@ err_port_cfg:
 }
 EXPORT_SYMBOL(fman_port_config);
 
-/**
+/*
  * fman_port_use_kg_hash
- * port:        A pointer to a FM Port module.
+ * @port: A pointer to a FM Port module.
+ * @enable: enable or disable
+ *
  * Sets the HW KeyGen or the BMI as HW Parser next engine, enabling
  * or bypassing the KeyGen hashing of Rx traffic
  */
@@ -1430,7 +1432,8 @@ EXPORT_SYMBOL(fman_port_use_kg_hash);
 
 /**
  * fman_port_init
- * port:	A pointer to a FM Port module.
+ * @port:	A pointer to a FM Port module.
+ *
  * Initializes the FM PORT module by defining the software structure and
  * configuring the hardware registers.
  *
@@ -1524,8 +1527,8 @@ EXPORT_SYMBOL(fman_port_init);
 
 /**
  * fman_port_cfg_buf_prefix_content
- * @port			A pointer to a FM Port module.
- * @buffer_prefix_content	A structure of parameters describing
+ * @port:			A pointer to a FM Port module.
+ * @buffer_prefix_content:	A structure of parameters describing
  *				the structure of the buffer.
  *				Out parameter:
  *				Start margin - offset of data from
@@ -1570,7 +1573,7 @@ EXPORT_SYMBOL(fman_port_cfg_buf_prefix_content);
 
 /**
  * fman_port_disable
- * port:	A pointer to a FM Port module.
+ * @port:	A pointer to a FM Port module.
  *
  * Gracefully disable an FM port. The port will not start new	tasks after all
  * tasks associated with the port are terminated.
@@ -1651,7 +1654,7 @@ EXPORT_SYMBOL(fman_port_disable);
 
 /**
  * fman_port_enable
- * port:	A pointer to a FM Port module.
+ * @port:	A pointer to a FM Port module.
  *
  * A runtime routine provided to allow disable/enable of port.
  *
@@ -1697,7 +1700,7 @@ EXPORT_SYMBOL(fman_port_enable);
 
 /**
  * fman_port_bind
- * dev:		FMan Port OF device pointer
+ * @dev:		FMan Port OF device pointer
  *
  * Bind to a specific FMan Port.
  *
@@ -1713,7 +1716,7 @@ EXPORT_SYMBOL(fman_port_bind);
 
 /**
  * fman_port_get_qman_channel_id
- * port:	Pointer to the FMan port devuce
+ * @port:	Pointer to the FMan port devuce
  *
  * Get the QMan channel ID for the specific port
  *
@@ -1727,7 +1730,7 @@ EXPORT_SYMBOL(fman_port_get_qman_channel_id);
 
 /**
  * fman_port_get_device
- * port:	Pointer to the FMan port device
+ * @port:	Pointer to the FMan port device
  *
  * Get the 'struct device' associated to the specified FMan port device
  *
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 43427c5b9396..901749a7a318 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -359,8 +359,8 @@ EXPORT_SYMBOL(fman_set_mac_active_pause);
 /**
  * fman_get_pause_cfg
  * @mac_dev:	A pointer to the MAC device
- * @rx:		Return value for RX setting
- * @tx:		Return value for TX setting
+ * @rx_pause:	Return value for RX setting
+ * @tx_pause:	Return value for TX setting
  *
  * Determine the MAC RX/TX PAUSE frames settings based on PHY
  * autonegotiation or values set by eththool.
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index bf846b42bc74..78e008b81374 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -562,10 +562,13 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			BD_ENET_TX_TC);
 		CBDS_SC(bdp, BD_ENET_TX_READY);
 
-		if ((CBDR_SC(bdp) & BD_ENET_TX_WRAP) == 0)
-			bdp++, curidx++;
-		else
-			bdp = fep->tx_bd_base, curidx = 0;
+		if ((CBDR_SC(bdp) & BD_ENET_TX_WRAP) == 0) {
+			bdp++;
+			curidx++;
+		} else {
+			bdp = fep->tx_bd_base;
+			curidx = 0;
+		}
 
 		len = skb_frag_size(frag);
 		CBDW_BUFADDR(bdp, skb_frag_dma_map(fep->dev, frag, 0, len,
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index ebc37e256922..f5c80229ea96 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -27,6 +27,17 @@
 /* 1 for management, 1 for rx, 1 for tx */
 #define GVE_MIN_MSIX 3
 
+/* Numbers of gve tx/rx stats in stats report. */
+#define GVE_TX_STATS_REPORT_NUM	5
+#define GVE_RX_STATS_REPORT_NUM	2
+
+/* Interval to schedule a stats report update, 20000ms. */
+#define GVE_STATS_REPORT_TIMER_PERIOD	20000
+
+/* Numbers of NIC tx/rx stats in stats report. */
+#define NIC_TX_STATS_REPORT_NUM	0
+#define NIC_RX_STATS_REPORT_NUM	4
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -71,6 +82,11 @@ struct gve_rx_ring {
 	u32 cnt; /* free-running total number of completed packets */
 	u32 fill_cnt; /* free-running total number of descs and buffs posted */
 	u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+	u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
+	u64 rx_copied_pkt; /* free-running total number of copied packets */
+	u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
+	u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */
+	u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */
 	u32 q_num; /* queue index */
 	u32 ntfy_id; /* notification block index */
 	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
@@ -202,24 +218,63 @@ struct gve_priv {
 	dma_addr_t adminq_bus_addr;
 	u32 adminq_mask; /* masks prod_cnt to adminq size */
 	u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
-
+	u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
+	u32 adminq_timeouts; /* free-running count of AQ cmds timeouts */
+	/* free-running count of per AQ cmd executed */
+	u32 adminq_describe_device_cnt;
+	u32 adminq_cfg_device_resources_cnt;
+	u32 adminq_register_page_list_cnt;
+	u32 adminq_unregister_page_list_cnt;
+	u32 adminq_create_tx_queue_cnt;
+	u32 adminq_create_rx_queue_cnt;
+	u32 adminq_destroy_tx_queue_cnt;
+	u32 adminq_destroy_rx_queue_cnt;
+	u32 adminq_dcfg_device_resources_cnt;
+	u32 adminq_set_driver_parameter_cnt;
+	u32 adminq_report_stats_cnt;
+	u32 adminq_report_link_speed_cnt;
+
+	/* Global stats */
+	u32 interface_up_cnt; /* count of times interface turned up since last reset */
+	u32 interface_down_cnt; /* count of times interface turned down since last reset */
+	u32 reset_cnt; /* count of reset */
+	u32 page_alloc_fail; /* count of page alloc fails */
+	u32 dma_mapping_error; /* count of dma mapping errors */
+	u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */
 	struct workqueue_struct *gve_wq;
 	struct work_struct service_task;
+	struct work_struct stats_report_task;
 	unsigned long service_task_flags;
 	unsigned long state_flags;
+
+	struct gve_stats_report *stats_report;
+	u64 stats_report_len;
+	dma_addr_t stats_report_bus; /* dma address for the stats report */
+	unsigned long ethtool_flags;
+
+	unsigned long stats_report_timer_period;
+	struct timer_list stats_report_timer;
+
+	/* Gvnic device link speed from hypervisor. */
+	u64 link_speed;
 };
 
-enum gve_service_task_flags {
-	GVE_PRIV_FLAGS_DO_RESET			= BIT(1),
-	GVE_PRIV_FLAGS_RESET_IN_PROGRESS	= BIT(2),
-	GVE_PRIV_FLAGS_PROBE_IN_PROGRESS	= BIT(3),
+enum gve_service_task_flags_bit {
+	GVE_PRIV_FLAGS_DO_RESET			= 1,
+	GVE_PRIV_FLAGS_RESET_IN_PROGRESS	= 2,
+	GVE_PRIV_FLAGS_PROBE_IN_PROGRESS	= 3,
+	GVE_PRIV_FLAGS_DO_REPORT_STATS = 4,
 };
 
-enum gve_state_flags {
-	GVE_PRIV_FLAGS_ADMIN_QUEUE_OK		= BIT(1),
-	GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK	= BIT(2),
-	GVE_PRIV_FLAGS_DEVICE_RINGS_OK		= BIT(3),
-	GVE_PRIV_FLAGS_NAPI_ENABLED		= BIT(4),
+enum gve_state_flags_bit {
+	GVE_PRIV_FLAGS_ADMIN_QUEUE_OK		= 1,
+	GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK	= 2,
+	GVE_PRIV_FLAGS_DEVICE_RINGS_OK		= 3,
+	GVE_PRIV_FLAGS_NAPI_ENABLED		= 4,
+};
+
+enum gve_ethtool_flags_bit {
+	GVE_PRIV_FLAGS_REPORT_STATS		= 0,
 };
 
 static inline bool gve_get_do_reset(struct gve_priv *priv)
@@ -269,6 +324,22 @@ static inline void gve_clear_probe_in_progress(struct gve_priv *priv)
 	clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
 }
 
+static inline bool gve_get_do_report_stats(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS,
+			&priv->service_task_flags);
+}
+
+static inline void gve_set_do_report_stats(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_do_report_stats(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS, &priv->service_task_flags);
+}
+
 static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
 {
 	return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
@@ -329,6 +400,16 @@ static inline void gve_clear_napi_enabled(struct gve_priv *priv)
 	clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
 }
 
+static inline bool gve_get_report_stats(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
+static inline void gve_clear_report_stats(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
 /* Returns the address of the ntfy_blocks irq doorbell
  */
 static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
@@ -426,7 +507,8 @@ static inline bool gve_can_recycle_pages(struct net_device *dev)
 }
 
 /* buffers */
-int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+int gve_alloc_page(struct gve_priv *priv, struct device *dev,
+		   struct page **page, dma_addr_t *dma,
 		   enum dma_data_direction);
 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
 		   enum dma_data_direction);
@@ -450,6 +532,8 @@ int gve_reset(struct gve_priv *priv, bool attempt_teardown);
 int gve_adjust_queues(struct gve_priv *priv,
 		      struct gve_queue_config new_rx_config,
 		      struct gve_queue_config new_tx_config);
+/* report stats handling */
+void gve_handle_report_stats(struct gve_priv *priv);
 /* exported by ethtool.c */
 extern const struct ethtool_ops gve_ethtool_ops;
 /* needed by ethtool */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index c3ba7baf0107..24ae6a28a806 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -23,6 +23,20 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
 
 	priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
 	priv->adminq_prod_cnt = 0;
+	priv->adminq_cmd_fail = 0;
+	priv->adminq_timeouts = 0;
+	priv->adminq_describe_device_cnt = 0;
+	priv->adminq_cfg_device_resources_cnt = 0;
+	priv->adminq_register_page_list_cnt = 0;
+	priv->adminq_unregister_page_list_cnt = 0;
+	priv->adminq_create_tx_queue_cnt = 0;
+	priv->adminq_create_rx_queue_cnt = 0;
+	priv->adminq_destroy_tx_queue_cnt = 0;
+	priv->adminq_destroy_rx_queue_cnt = 0;
+	priv->adminq_dcfg_device_resources_cnt = 0;
+	priv->adminq_set_driver_parameter_cnt = 0;
+	priv->adminq_report_stats_cnt = 0;
+	priv->adminq_report_link_speed_cnt = 0;
 
 	/* Setup Admin queue with the device */
 	iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
@@ -81,17 +95,18 @@ static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
 	return false;
 }
 
-static int gve_adminq_parse_err(struct device *dev, u32 status)
+static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
 {
 	if (status != GVE_ADMINQ_COMMAND_PASSED &&
-	    status != GVE_ADMINQ_COMMAND_UNSET)
-		dev_err(dev, "AQ command failed with status %d\n", status);
-
+	    status != GVE_ADMINQ_COMMAND_UNSET) {
+		dev_err(&priv->pdev->dev, "AQ command failed with status %d\n", status);
+		priv->adminq_cmd_fail++;
+	}
 	switch (status) {
 	case GVE_ADMINQ_COMMAND_PASSED:
 		return 0;
 	case GVE_ADMINQ_COMMAND_UNSET:
-		dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
+		dev_err(&priv->pdev->dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
 		return -EINVAL;
 	case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
 	case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
@@ -116,36 +131,145 @@ static int gve_adminq_parse_err(struct device *dev, u32 status)
 	case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
 		return -ENOTSUPP;
 	default:
-		dev_err(dev, "parse_aq_err: unknown status code %d\n", status);
+		dev_err(&priv->pdev->dev, "parse_aq_err: unknown status code %d\n", status);
 		return -EINVAL;
 	}
 }
 
+/* Flushes all AQ commands currently queued and waits for them to complete.
+ * If there are failures, it will return the first error.
+ */
+static int gve_adminq_kick_and_wait(struct gve_priv *priv)
+{
+	u32 tail, head;
+	int i;
+
+	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+	head = priv->adminq_prod_cnt;
+
+	gve_adminq_kick_cmd(priv, head);
+	if (!gve_adminq_wait_for_cmd(priv, head)) {
+		dev_err(&priv->pdev->dev, "AQ commands timed out, need to reset AQ\n");
+		priv->adminq_timeouts++;
+		return -ENOTRECOVERABLE;
+	}
+
+	for (i = tail; i < head; i++) {
+		union gve_adminq_command *cmd;
+		u32 status, err;
+
+		cmd = &priv->adminq[i & priv->adminq_mask];
+		status = be32_to_cpu(READ_ONCE(cmd->status));
+		err = gve_adminq_parse_err(priv, status);
+		if (err)
+			// Return the first error if we failed.
+			return err;
+	}
+
+	return 0;
+}
+
 /* This function is not threadsafe - the caller is responsible for any
  * necessary locks.
  */
-int gve_adminq_execute_cmd(struct gve_priv *priv,
-			   union gve_adminq_command *cmd_orig)
+static int gve_adminq_issue_cmd(struct gve_priv *priv,
+				union gve_adminq_command *cmd_orig)
 {
 	union gve_adminq_command *cmd;
-	u32 status = 0;
-	u32 prod_cnt;
+	u32 opcode;
+	u32 tail;
+
+	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+
+	// Check if next command will overflow the buffer.
+	if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+		int err;
+
+		// Flush existing commands to make room.
+		err = gve_adminq_kick_and_wait(priv);
+		if (err)
+			return err;
+
+		// Retry.
+		tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+		if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+			// This should never happen. We just flushed the
+			// command queue so there should be enough space.
+			return -ENOMEM;
+		}
+	}
 
 	cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
 	priv->adminq_prod_cnt++;
-	prod_cnt = priv->adminq_prod_cnt;
 
 	memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
-
-	gve_adminq_kick_cmd(priv, prod_cnt);
-	if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) {
-		dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n");
-		return -ENOTRECOVERABLE;
+	opcode = be32_to_cpu(READ_ONCE(cmd->opcode));
+
+	switch (opcode) {
+	case GVE_ADMINQ_DESCRIBE_DEVICE:
+		priv->adminq_describe_device_cnt++;
+		break;
+	case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
+		priv->adminq_cfg_device_resources_cnt++;
+		break;
+	case GVE_ADMINQ_REGISTER_PAGE_LIST:
+		priv->adminq_register_page_list_cnt++;
+		break;
+	case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
+		priv->adminq_unregister_page_list_cnt++;
+		break;
+	case GVE_ADMINQ_CREATE_TX_QUEUE:
+		priv->adminq_create_tx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_CREATE_RX_QUEUE:
+		priv->adminq_create_rx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_DESTROY_TX_QUEUE:
+		priv->adminq_destroy_tx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_DESTROY_RX_QUEUE:
+		priv->adminq_destroy_rx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
+		priv->adminq_dcfg_device_resources_cnt++;
+		break;
+	case GVE_ADMINQ_SET_DRIVER_PARAMETER:
+		priv->adminq_set_driver_parameter_cnt++;
+		break;
+	case GVE_ADMINQ_REPORT_STATS:
+		priv->adminq_report_stats_cnt++;
+		break;
+	case GVE_ADMINQ_REPORT_LINK_SPEED:
+		priv->adminq_report_link_speed_cnt++;
+		break;
+	default:
+		dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
 	}
 
-	memcpy(cmd_orig, cmd, sizeof(*cmd));
-	status = be32_to_cpu(READ_ONCE(cmd->status));
-	return gve_adminq_parse_err(&priv->pdev->dev, status);
+	return 0;
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ * The caller is also responsible for making sure there are no commands
+ * waiting to be executed.
+ */
+static int gve_adminq_execute_cmd(struct gve_priv *priv, union gve_adminq_command *cmd_orig)
+{
+	u32 tail, head;
+	int err;
+
+	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+	head = priv->adminq_prod_cnt;
+	if (tail != head)
+		// This is not a valid path
+		return -EINVAL;
+
+	err = gve_adminq_issue_cmd(priv, cmd_orig);
+	if (err)
+		return err;
+
+	return gve_adminq_kick_and_wait(priv);
 }
 
 /* The device specifies that the management vector can either be the first irq
@@ -190,29 +314,50 @@ int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
 	return gve_adminq_execute_cmd(priv, &cmd);
 }
 
-int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
 {
 	struct gve_tx_ring *tx = &priv->tx[queue_index];
 	union gve_adminq_command cmd;
+	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
 	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
 		.queue_id = cpu_to_be32(queue_index),
 		.reserved = 0,
-		.queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
+		.queue_resources_addr =
+			cpu_to_be64(tx->q_resources_bus),
 		.tx_ring_addr = cpu_to_be64(tx->bus),
 		.queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
 		.ntfy_id = cpu_to_be32(tx->ntfy_id),
 	};
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	err = gve_adminq_issue_cmd(priv, &cmd);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < num_queues; i++) {
+		err = gve_adminq_create_tx_queue(priv, i);
+		if (err)
+			return err;
+	}
+
+	return gve_adminq_kick_and_wait(priv);
 }
 
-int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
 {
 	struct gve_rx_ring *rx = &priv->rx[queue_index];
 	union gve_adminq_command cmd;
+	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
@@ -227,12 +372,31 @@ int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
 		.queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
 	};
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	err = gve_adminq_issue_cmd(priv, &cmd);
+	if (err)
+		return err;
+
+	return 0;
 }
 
-int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < num_queues; i++) {
+		err = gve_adminq_create_rx_queue(priv, i);
+		if (err)
+			return err;
+	}
+
+	return gve_adminq_kick_and_wait(priv);
+}
+
+static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
 {
 	union gve_adminq_command cmd;
+	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
@@ -240,12 +404,31 @@ int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
 		.queue_id = cpu_to_be32(queue_index),
 	};
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	err = gve_adminq_issue_cmd(priv, &cmd);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < num_queues; i++) {
+		err = gve_adminq_destroy_tx_queue(priv, i);
+		if (err)
+			return err;
+	}
+
+	return gve_adminq_kick_and_wait(priv);
 }
 
-int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
 {
 	union gve_adminq_command cmd;
+	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
@@ -253,7 +436,25 @@ int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
 		.queue_id = cpu_to_be32(queue_index),
 	};
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	err = gve_adminq_issue_cmd(priv, &cmd);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < num_queues; i++) {
+		err = gve_adminq_destroy_rx_queue(priv, i);
+		if (err)
+			return err;
+	}
+
+	return gve_adminq_kick_and_wait(priv);
 }
 
 int gve_adminq_describe_device(struct gve_priv *priv)
@@ -283,8 +484,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 
 	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
 	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
-		netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
-			  priv->tx_desc_cnt);
+		dev_err(&priv->pdev->dev, "Tx desc count %d too low\n", priv->tx_desc_cnt);
 		err = -EINVAL;
 		goto free_device_descriptor;
 	}
@@ -293,8 +493,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	    < PAGE_SIZE ||
 	    priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
 	    < PAGE_SIZE) {
-		netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
-			  priv->rx_desc_cnt);
+		dev_err(&priv->pdev->dev, "Rx desc count %d too low\n", priv->rx_desc_cnt);
 		err = -EINVAL;
 		goto free_device_descriptor;
 	}
@@ -302,8 +501,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 				be64_to_cpu(descriptor->max_registered_pages);
 	mtu = be16_to_cpu(descriptor->mtu);
 	if (mtu < ETH_MIN_MTU) {
-		netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
-			  mtu);
+		dev_err(&priv->pdev->dev, "MTU %d below minimum MTU\n", mtu);
 		err = -EINVAL;
 		goto free_device_descriptor;
 	}
@@ -311,12 +509,12 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	priv->num_event_counters = be16_to_cpu(descriptor->counters);
 	ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
 	mac = descriptor->mac;
-	netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
+	dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
 	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
 	priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
 	if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
-		netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
-			  priv->rx_pages_per_qpl);
+		dev_err(&priv->pdev->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+			priv->rx_pages_per_qpl);
 		priv->rx_desc_cnt = priv->rx_pages_per_qpl;
 	}
 	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
@@ -385,3 +583,46 @@ int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
 
 	return gve_adminq_execute_cmd(priv, &cmd);
 }
+
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+			    dma_addr_t stats_report_addr, u64 interval)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_STATS);
+	cmd.report_stats = (struct gve_adminq_report_stats) {
+		.stats_report_len = cpu_to_be64(stats_report_len),
+		.stats_report_addr = cpu_to_be64(stats_report_addr),
+		.interval = cpu_to_be64(interval),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_report_link_speed(struct gve_priv *priv)
+{
+	union gve_adminq_command gvnic_cmd;
+	dma_addr_t link_speed_region_bus;
+	__be64 *link_speed_region;
+	int err;
+
+	link_speed_region =
+		dma_alloc_coherent(&priv->pdev->dev, sizeof(*link_speed_region),
+				   &link_speed_region_bus, GFP_KERNEL);
+
+	if (!link_speed_region)
+		return -ENOMEM;
+
+	memset(&gvnic_cmd, 0, sizeof(gvnic_cmd));
+	gvnic_cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_LINK_SPEED);
+	gvnic_cmd.report_link_speed.link_speed_address =
+		cpu_to_be64(link_speed_region_bus);
+
+	err = gve_adminq_execute_cmd(priv, &gvnic_cmd);
+
+	priv->link_speed = be64_to_cpu(*link_speed_region);
+	dma_free_coherent(&priv->pdev->dev, sizeof(*link_speed_region), link_speed_region,
+			  link_speed_region_bus);
+	return err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 4dfa06edc0f8..281de8326bc5 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -21,6 +21,8 @@ enum gve_adminq_opcodes {
 	GVE_ADMINQ_DESTROY_RX_QUEUE		= 0x8,
 	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
 	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
+	GVE_ADMINQ_REPORT_STATS			= 0xC,
+	GVE_ADMINQ_REPORT_LINK_SPEED	= 0xD
 };
 
 /* Admin queue status codes */
@@ -172,6 +174,51 @@ struct gve_adminq_set_driver_parameter {
 
 static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16);
 
+struct gve_adminq_report_stats {
+	__be64 stats_report_len;
+	__be64 stats_report_addr;
+	__be64 interval;
+};
+
+static_assert(sizeof(struct gve_adminq_report_stats) == 24);
+
+struct gve_adminq_report_link_speed {
+	__be64 link_speed_address;
+};
+
+static_assert(sizeof(struct gve_adminq_report_link_speed) == 8);
+
+struct stats {
+	__be32 stat_name;
+	__be32 queue_id;
+	__be64 value;
+};
+
+static_assert(sizeof(struct stats) == 16);
+
+struct gve_stats_report {
+	__be64 written_count;
+	struct stats stats[0];
+};
+
+static_assert(sizeof(struct gve_stats_report) == 8);
+
+enum gve_stat_names {
+	// stats from gve
+	TX_WAKE_CNT			= 1,
+	TX_STOP_CNT			= 2,
+	TX_FRAMES_SENT			= 3,
+	TX_BYTES_SENT			= 4,
+	TX_LAST_COMPLETION_PROCESSED	= 5,
+	RX_NEXT_EXPECTED_SEQUENCE	= 6,
+	RX_BUFFERS_POSTED		= 7,
+	// stats from NIC
+	RX_QUEUE_DROP_CNT		= 65,
+	RX_NO_BUFFERS_POSTED		= 66,
+	RX_DROPS_PACKET_OVER_MRU	= 67,
+	RX_DROPS_INVALID_CHECKSUM	= 68,
+};
+
 union gve_adminq_command {
 	struct {
 		__be32 opcode;
@@ -187,6 +234,8 @@ union gve_adminq_command {
 			struct gve_adminq_register_page_list reg_page_list;
 			struct gve_adminq_unregister_page_list unreg_page_list;
 			struct gve_adminq_set_driver_parameter set_driver_param;
+			struct gve_adminq_report_stats report_stats;
+			struct gve_adminq_report_link_speed report_link_speed;
 		};
 	};
 	u8 reserved[64];
@@ -197,8 +246,6 @@ static_assert(sizeof(union gve_adminq_command) == 64);
 int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
 void gve_adminq_free(struct device *dev, struct gve_priv *priv);
 void gve_adminq_release(struct gve_priv *priv);
-int gve_adminq_execute_cmd(struct gve_priv *priv,
-			   union gve_adminq_command *cmd_orig);
 int gve_adminq_describe_device(struct gve_priv *priv);
 int gve_adminq_configure_device_resources(struct gve_priv *priv,
 					  dma_addr_t counter_array_bus_addr,
@@ -206,12 +253,15 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
 					  dma_addr_t db_array_bus_addr,
 					  u32 num_ntfy_blks);
 int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
-int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
 int gve_adminq_register_page_list(struct gve_priv *priv,
 				  struct gve_queue_page_list *qpl);
 int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
 int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+			    dma_addr_t stats_report_addr, u64 interval);
+int gve_adminq_report_link_speed(struct gve_priv *priv);
 #endif /* _GVE_ADMINQ_H */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index d8fa816f4473..7b44769bd87c 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -6,6 +6,7 @@
 
 #include <linux/rtnetlink.h>
 #include "gve.h"
+#include "gve_adminq.h"
 
 static void gve_get_drvinfo(struct net_device *netdev,
 			    struct ethtool_drvinfo *info)
@@ -34,41 +35,84 @@ static u32 gve_get_msglevel(struct net_device *netdev)
 static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
 	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
 	"rx_dropped", "tx_dropped", "tx_timeouts",
+	"rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt",
+	"interface_up_cnt", "interface_down_cnt", "reset_cnt",
+	"page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt",
+};
+
+static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
+	"rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]",
+	"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
+	"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
+	"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
+};
+
+static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
+	"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]",
+	"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
+};
+
+static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
+	"adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts",
+	"adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt",
+	"adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt",
+	"adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt",
+	"adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt",
+	"adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
+	"adminq_report_stats_cnt", "adminq_report_link_speed_cnt"
+};
+
+static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
+	"report-stats",
 };
 
 #define GVE_MAIN_STATS_LEN  ARRAY_SIZE(gve_gstrings_main_stats)
-#define NUM_GVE_TX_CNTS	5
-#define NUM_GVE_RX_CNTS	2
+#define GVE_ADMINQ_STATS_LEN  ARRAY_SIZE(gve_gstrings_adminq_stats)
+#define NUM_GVE_TX_CNTS	ARRAY_SIZE(gve_gstrings_tx_stats)
+#define NUM_GVE_RX_CNTS	ARRAY_SIZE(gve_gstrings_rx_stats)
+#define GVE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(gve_gstrings_priv_flags)
 
 static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct gve_priv *priv = netdev_priv(netdev);
 	char *s = (char *)data;
-	int i;
+	int i, j;
 
-	if (stringset != ETH_SS_STATS)
-		return;
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(s, *gve_gstrings_main_stats,
+		       sizeof(gve_gstrings_main_stats));
+		s += sizeof(gve_gstrings_main_stats);
+
+		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+			for (j = 0; j < NUM_GVE_RX_CNTS; j++) {
+				snprintf(s, ETH_GSTRING_LEN,
+					 gve_gstrings_rx_stats[j], i);
+				s += ETH_GSTRING_LEN;
+			}
+		}
 
-	memcpy(s, *gve_gstrings_main_stats,
-	       sizeof(gve_gstrings_main_stats));
-	s += sizeof(gve_gstrings_main_stats);
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i);
-		s += ETH_GSTRING_LEN;
-	}
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i);
-		s += ETH_GSTRING_LEN;
+		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+			for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
+				snprintf(s, ETH_GSTRING_LEN,
+					 gve_gstrings_tx_stats[j], i);
+				s += ETH_GSTRING_LEN;
+			}
+		}
+
+		memcpy(s, *gve_gstrings_adminq_stats,
+		       sizeof(gve_gstrings_adminq_stats));
+		s += sizeof(gve_gstrings_adminq_stats);
+		break;
+
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(s, *gve_gstrings_priv_flags,
+		       sizeof(gve_gstrings_priv_flags));
+		s += sizeof(gve_gstrings_priv_flags);
+		break;
+
+	default:
+		break;
 	}
 }
 
@@ -78,9 +122,11 @@ static int gve_get_sset_count(struct net_device *netdev, int sset)
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return GVE_MAIN_STATS_LEN +
+		return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN +
 		       (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
 		       (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+	case ETH_SS_PRIV_FLAGS:
+		return GVE_PRIV_FLAGS_STR_LEN;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -90,24 +136,56 @@ static void
 gve_get_ethtool_stats(struct net_device *netdev,
 		      struct ethtool_stats *stats, u64 *data)
 {
-	struct gve_priv *priv = netdev_priv(netdev);
-	u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes;
+	u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,	tmp_rx_buf_alloc_fail,
+		tmp_rx_desc_err_dropped_pkt, tmp_tx_pkts, tmp_tx_bytes;
+	u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts,
+		rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes;
+	int stats_idx, base_stats_idx, max_stats_idx;
+	struct stats *report_stats;
+	int *rx_qid_to_stats_idx;
+	int *tx_qid_to_stats_idx;
+	struct gve_priv *priv;
+	bool skip_nic_stats;
 	unsigned int start;
 	int ring;
-	int i;
+	int i, j;
 
 	ASSERT_RTNL();
 
-	for (rx_pkts = 0, rx_bytes = 0, ring = 0;
+	priv = netdev_priv(netdev);
+	report_stats = priv->stats_report->stats;
+	rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues,
+					    sizeof(int), GFP_KERNEL);
+	if (!rx_qid_to_stats_idx)
+		return;
+	tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues,
+					    sizeof(int), GFP_KERNEL);
+	if (!tx_qid_to_stats_idx) {
+		kfree(rx_qid_to_stats_idx);
+		return;
+	}
+	for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0,
+	     rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0;
 	     ring < priv->rx_cfg.num_queues; ring++) {
 		if (priv->rx) {
 			do {
+				struct gve_rx_ring *rx = &priv->rx[ring];
+
 				start =
 				  u64_stats_fetch_begin(&priv->rx[ring].statss);
-				rx_pkts += priv->rx[ring].rpackets;
-				rx_bytes += priv->rx[ring].rbytes;
+				tmp_rx_pkts = rx->rpackets;
+				tmp_rx_bytes = rx->rbytes;
+				tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+				tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+				tmp_rx_desc_err_dropped_pkt =
+					rx->rx_desc_err_dropped_pkt;
 			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
 						       start));
+			rx_pkts += tmp_rx_pkts;
+			rx_bytes += tmp_rx_bytes;
+			rx_skb_alloc_fail += tmp_rx_skb_alloc_fail;
+			rx_buf_alloc_fail += tmp_rx_buf_alloc_fail;
+			rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
 		}
 	}
 	for (tx_pkts = 0, tx_bytes = 0, ring = 0;
@@ -116,10 +194,12 @@ gve_get_ethtool_stats(struct net_device *netdev,
 			do {
 				start =
 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
-				tx_pkts += priv->tx[ring].pkt_done;
-				tx_bytes += priv->tx[ring].bytes_done;
+				tmp_tx_pkts = priv->tx[ring].pkt_done;
+				tmp_tx_bytes = priv->tx[ring].bytes_done;
 			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
 						       start));
+			tx_pkts += tmp_tx_pkts;
+			tx_bytes += tmp_tx_bytes;
 		}
 	}
 
@@ -128,22 +208,102 @@ gve_get_ethtool_stats(struct net_device *netdev,
 	data[i++] = tx_pkts;
 	data[i++] = rx_bytes;
 	data[i++] = tx_bytes;
-	/* Skip rx_dropped and tx_dropped */
-	i += 2;
+	/* total rx dropped packets */
+	data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail +
+		    rx_desc_err_dropped_pkt;
+	/* Skip tx_dropped */
+	i++;
+
 	data[i++] = priv->tx_timeo_cnt;
+	data[i++] = rx_skb_alloc_fail;
+	data[i++] = rx_buf_alloc_fail;
+	data[i++] = rx_desc_err_dropped_pkt;
+	data[i++] = priv->interface_up_cnt;
+	data[i++] = priv->interface_down_cnt;
+	data[i++] = priv->reset_cnt;
+	data[i++] = priv->page_alloc_fail;
+	data[i++] = priv->dma_mapping_error;
+	data[i++] = priv->stats_report_trigger_cnt;
 	i = GVE_MAIN_STATS_LEN;
 
+	/* For rx cross-reporting stats, start from nic rx stats in report */
+	base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+		GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
+	max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
+		base_stats_idx;
+	/* Preprocess the stats report for rx, map queue id to start index */
+	skip_nic_stats = false;
+	for (stats_idx = base_stats_idx; stats_idx < max_stats_idx;
+		stats_idx += NIC_RX_STATS_REPORT_NUM) {
+		u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name);
+		u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+
+		if (stat_name == 0) {
+			/* no stats written by NIC yet */
+			skip_nic_stats = true;
+			break;
+		}
+		rx_qid_to_stats_idx[queue_id] = stats_idx;
+	}
 	/* walk RX rings */
 	if (priv->rx) {
 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
 			struct gve_rx_ring *rx = &priv->rx[ring];
 
-			data[i++] = rx->cnt;
 			data[i++] = rx->fill_cnt;
+			data[i++] = rx->cnt;
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->rx[ring].statss);
+				tmp_rx_bytes = rx->rbytes;
+				tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+				tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+				tmp_rx_desc_err_dropped_pkt =
+					rx->rx_desc_err_dropped_pkt;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+			data[i++] = tmp_rx_bytes;
+			/* rx dropped packets */
+			data[i++] = tmp_rx_skb_alloc_fail +
+				tmp_rx_buf_alloc_fail +
+				tmp_rx_desc_err_dropped_pkt;
+			data[i++] = rx->rx_copybreak_pkt;
+			data[i++] = rx->rx_copied_pkt;
+			/* stats from NIC */
+			if (skip_nic_stats) {
+				/* skip NIC rx stats */
+				i += NIC_RX_STATS_REPORT_NUM;
+				continue;
+			}
+			for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
+				u64 value =
+				be64_to_cpu(report_stats[rx_qid_to_stats_idx[ring] + j].value);
+
+				data[i++] = value;
+			}
 		}
 	} else {
 		i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
 	}
+
+	/* For tx cross-reporting stats, start from nic tx stats in report */
+	base_stats_idx = max_stats_idx;
+	max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+		max_stats_idx;
+	/* Preprocess the stats report for tx, map queue id to start index */
+	skip_nic_stats = false;
+	for (stats_idx = base_stats_idx; stats_idx < max_stats_idx;
+		stats_idx += NIC_TX_STATS_REPORT_NUM) {
+		u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name);
+		u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+
+		if (stat_name == 0) {
+			/* no stats written by NIC yet */
+			skip_nic_stats = true;
+			break;
+		}
+		tx_qid_to_stats_idx[queue_id] = stats_idx;
+	}
 	/* walk TX rings */
 	if (priv->tx) {
 		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
@@ -151,14 +311,51 @@ gve_get_ethtool_stats(struct net_device *netdev,
 
 			data[i++] = tx->req;
 			data[i++] = tx->done;
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->tx[ring].statss);
+				tmp_tx_bytes = tx->bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+						       start));
+			data[i++] = tmp_tx_bytes;
 			data[i++] = tx->wake_queue;
 			data[i++] = tx->stop_queue;
 			data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
 									  tx));
+			/* stats from NIC */
+			if (skip_nic_stats) {
+				/* skip NIC tx stats */
+				i += NIC_TX_STATS_REPORT_NUM;
+				continue;
+			}
+			for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
+				u64 value =
+				be64_to_cpu(report_stats[tx_qid_to_stats_idx[ring] + j].value);
+				data[i++] = value;
+			}
 		}
 	} else {
 		i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
 	}
+
+	kfree(rx_qid_to_stats_idx);
+	kfree(tx_qid_to_stats_idx);
+	/* AQ Stats */
+	data[i++] = priv->adminq_prod_cnt;
+	data[i++] = priv->adminq_cmd_fail;
+	data[i++] = priv->adminq_timeouts;
+	data[i++] = priv->adminq_describe_device_cnt;
+	data[i++] = priv->adminq_cfg_device_resources_cnt;
+	data[i++] = priv->adminq_register_page_list_cnt;
+	data[i++] = priv->adminq_unregister_page_list_cnt;
+	data[i++] = priv->adminq_create_tx_queue_cnt;
+	data[i++] = priv->adminq_create_rx_queue_cnt;
+	data[i++] = priv->adminq_destroy_tx_queue_cnt;
+	data[i++] = priv->adminq_destroy_rx_queue_cnt;
+	data[i++] = priv->adminq_dcfg_device_resources_cnt;
+	data[i++] = priv->adminq_set_driver_parameter_cnt;
+	data[i++] = priv->adminq_report_stats_cnt;
+	data[i++] = priv->adminq_report_link_speed_cnt;
 }
 
 static void gve_get_channels(struct net_device *netdev,
@@ -230,6 +427,95 @@ static int gve_user_reset(struct net_device *netdev, u32 *flags)
 	return -EOPNOTSUPP;
 }
 
+static int gve_get_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *etuna, void *value)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	switch (etuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)value = priv->rx_copybreak;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int gve_set_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *etuna,
+			   const void *value)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u32 len;
+
+	switch (etuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		len = *(u32 *)value;
+		if (len > PAGE_SIZE / 2)
+			return -EINVAL;
+		priv->rx_copybreak = len;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static u32 gve_get_priv_flags(struct net_device *netdev)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u32 ret_flags = 0;
+
+	/* Only 1 flag exists currently: report-stats (BIT(O)), so set that flag. */
+	if (priv->ethtool_flags & BIT(0))
+		ret_flags |= BIT(0);
+	return ret_flags;
+}
+
+static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u64 ori_flags, new_flags;
+
+	ori_flags = READ_ONCE(priv->ethtool_flags);
+	new_flags = ori_flags;
+
+	/* Only one priv flag exists: report-stats (BIT(0))*/
+	if (flags & BIT(0))
+		new_flags |= BIT(0);
+	else
+		new_flags &= ~(BIT(0));
+	priv->ethtool_flags = new_flags;
+	/* start report-stats timer when user turns report stats on. */
+	if (flags & BIT(0)) {
+		mod_timer(&priv->stats_report_timer,
+			  round_jiffies(jiffies +
+					msecs_to_jiffies(priv->stats_report_timer_period)));
+	}
+	/* Zero off gve stats when report-stats turned off and */
+	/* delete report stats timer. */
+	if (!(flags & BIT(0)) && (ori_flags & BIT(0))) {
+		int tx_stats_num = GVE_TX_STATS_REPORT_NUM *
+			priv->tx_cfg.num_queues;
+		int rx_stats_num = GVE_RX_STATS_REPORT_NUM *
+			priv->rx_cfg.num_queues;
+
+		memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) *
+				   sizeof(struct stats));
+		del_timer_sync(&priv->stats_report_timer);
+	}
+	return 0;
+}
+
+static int gve_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	int err = gve_adminq_report_link_speed(priv);
+
+	cmd->base.speed = priv->link_speed;
+	return err;
+}
+
 const struct ethtool_ops gve_ethtool_ops = {
 	.get_drvinfo = gve_get_drvinfo,
 	.get_strings = gve_get_strings,
@@ -242,4 +528,9 @@ const struct ethtool_ops gve_ethtool_ops = {
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = gve_get_ringparam,
 	.reset = gve_user_reset,
+	.get_tunable = gve_get_tunable,
+	.set_tunable = gve_set_tunable,
+	.get_priv_flags = gve_get_priv_flags,
+	.set_priv_flags = gve_set_priv_flags,
+	.get_link_ksettings = gve_get_link_ksettings
 };
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index e032563ceefd..48a433154ce0 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -78,6 +78,66 @@ static void gve_free_counter_array(struct gve_priv *priv)
 	priv->counter_array = NULL;
 }
 
+/* NIC requests to report stats */
+static void gve_stats_report_task(struct work_struct *work)
+{
+	struct gve_priv *priv = container_of(work, struct gve_priv,
+					     stats_report_task);
+	if (gve_get_do_report_stats(priv)) {
+		gve_handle_report_stats(priv);
+		gve_clear_do_report_stats(priv);
+	}
+}
+
+static void gve_stats_report_schedule(struct gve_priv *priv)
+{
+	if (!gve_get_probe_in_progress(priv) &&
+	    !gve_get_reset_in_progress(priv)) {
+		gve_set_do_report_stats(priv);
+		queue_work(priv->gve_wq, &priv->stats_report_task);
+	}
+}
+
+static void gve_stats_report_timer(struct timer_list *t)
+{
+	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
+
+	mod_timer(&priv->stats_report_timer,
+		  round_jiffies(jiffies +
+		  msecs_to_jiffies(priv->stats_report_timer_period)));
+	gve_stats_report_schedule(priv);
+}
+
+static int gve_alloc_stats_report(struct gve_priv *priv)
+{
+	int tx_stats_num, rx_stats_num;
+
+	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
+		       priv->tx_cfg.num_queues;
+	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
+		       priv->rx_cfg.num_queues;
+	priv->stats_report_len = sizeof(struct gve_stats_report) +
+				 (tx_stats_num + rx_stats_num) *
+				 sizeof(struct stats);
+	priv->stats_report =
+		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
+				   &priv->stats_report_bus, GFP_KERNEL);
+	if (!priv->stats_report)
+		return -ENOMEM;
+	/* Set up timer for the report-stats task */
+	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
+	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
+	return 0;
+}
+
+static void gve_free_stats_report(struct gve_priv *priv)
+{
+	del_timer_sync(&priv->stats_report_timer);
+	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
+			  priv->stats_report, priv->stats_report_bus);
+	priv->stats_report = NULL;
+}
+
 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
 {
 	struct gve_priv *priv = arg;
@@ -270,6 +330,9 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 	err = gve_alloc_notify_blocks(priv);
 	if (err)
 		goto abort_with_counter;
+	err = gve_alloc_stats_report(priv);
+	if (err)
+		goto abort_with_ntfy_blocks;
 	err = gve_adminq_configure_device_resources(priv,
 						    priv->counter_array_bus,
 						    priv->num_event_counters,
@@ -279,10 +342,18 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 		dev_err(&priv->pdev->dev,
 			"could not setup device_resources: err=%d\n", err);
 		err = -ENXIO;
-		goto abort_with_ntfy_blocks;
+		goto abort_with_stats_report;
 	}
+	err = gve_adminq_report_stats(priv, priv->stats_report_len,
+				      priv->stats_report_bus,
+				      GVE_STATS_REPORT_TIMER_PERIOD);
+	if (err)
+		dev_err(&priv->pdev->dev,
+			"Failed to report stats: err=%d\n", err);
 	gve_set_device_resources_ok(priv);
 	return 0;
+abort_with_stats_report:
+	gve_free_stats_report(priv);
 abort_with_ntfy_blocks:
 	gve_free_notify_blocks(priv);
 abort_with_counter:
@@ -298,6 +369,13 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
 
 	/* Tell device its resources are being freed */
 	if (gve_get_device_resources_ok(priv)) {
+		/* detach the stats report */
+		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Failed to detach stats report: err=%d\n", err);
+			gve_trigger_reset(priv);
+		}
 		err = gve_adminq_deconfigure_device_resources(priv);
 		if (err) {
 			dev_err(&priv->pdev->dev,
@@ -308,6 +386,7 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
 	}
 	gve_free_counter_array(priv);
 	gve_free_notify_blocks(priv);
+	gve_free_stats_report(priv);
 	gve_clear_device_resources_ok(priv);
 }
 
@@ -371,36 +450,37 @@ static int gve_create_rings(struct gve_priv *priv)
 	int err;
 	int i;
 
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		err = gve_adminq_create_tx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
+	err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
+			  priv->tx_cfg.num_queues);
+		/* This failure will trigger a reset - no need to clean
+		 * up
+		 */
+		return err;
 	}
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		err = gve_adminq_create_rx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		/* Rx data ring has been prefilled with packet buffers at
-		 * queue allocation time.
-		 * Write the doorbell to provide descriptor slots and packet
-		 * buffers to the NIC.
+	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
+		  priv->tx_cfg.num_queues);
+
+	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
+			  priv->rx_cfg.num_queues);
+		/* This failure will trigger a reset - no need to clean
+		 * up
 		 */
-		gve_rx_write_doorbell(priv, &priv->rx[i]);
-		netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
+		return err;
 	}
+	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
+		  priv->rx_cfg.num_queues);
+
+	/* Rx data ring has been prefilled with packet buffers at queue
+	 * allocation time.
+	 * Write the doorbell to provide descriptor slots and packet buffers
+	 * to the NIC.
+	 */
+	for (i = 0; i < priv->rx_cfg.num_queues; i++)
+		gve_rx_write_doorbell(priv, &priv->rx[i]);
 
 	return 0;
 }
@@ -458,34 +538,23 @@ free_tx:
 static int gve_destroy_rings(struct gve_priv *priv)
 {
 	int err;
-	int i;
 
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		err = gve_adminq_destroy_tx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev,
-				  "failed to destroy tx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
+	err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev,
+			  "failed to destroy tx queues\n");
+		/* This failure will trigger a reset - no need to clean up */
+		return err;
 	}
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		err = gve_adminq_destroy_rx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev,
-				  "failed to destroy rx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
+	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
+	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev,
+			  "failed to destroy rx queues\n");
+		/* This failure will trigger a reset - no need to clean up */
+		return err;
 	}
+	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
 	return 0;
 }
 
@@ -514,14 +583,18 @@ static void gve_free_rings(struct gve_priv *priv)
 	}
 }
 
-int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+int gve_alloc_page(struct gve_priv *priv, struct device *dev,
+		   struct page **page, dma_addr_t *dma,
 		   enum dma_data_direction dir)
 {
 	*page = alloc_page(GFP_KERNEL);
-	if (!*page)
+	if (!*page) {
+		priv->page_alloc_fail++;
 		return -ENOMEM;
+	}
 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
 	if (dma_mapping_error(dev, *dma)) {
+		priv->dma_mapping_error++;
 		put_page(*page);
 		return -ENOMEM;
 	}
@@ -556,7 +629,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
 		return -ENOMEM;
 
 	for (i = 0; i < pages; i++) {
-		err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
+		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
 				     &qpl->page_buses[i],
 				     gve_qpl_dma_dir(priv, id));
 		/* caller handles clean up */
@@ -695,8 +768,14 @@ static int gve_open(struct net_device *dev)
 		goto reset;
 	gve_set_device_rings_ok(priv);
 
+	if (gve_get_report_stats(priv))
+		mod_timer(&priv->stats_report_timer,
+			  round_jiffies(jiffies +
+				msecs_to_jiffies(priv->stats_report_timer_period)));
+
 	gve_turnup(priv);
-	netif_carrier_on(dev);
+	queue_work(priv->gve_wq, &priv->service_task);
+	priv->interface_up_cnt++;
 	return 0;
 
 free_rings:
@@ -735,9 +814,11 @@ static int gve_close(struct net_device *dev)
 			goto err;
 		gve_clear_device_rings_ok(priv);
 	}
+	del_timer_sync(&priv->stats_report_timer);
 
 	gve_free_rings(priv);
 	gve_free_qpls(priv);
+	priv->interface_down_cnt++;
 	return 0;
 
 err:
@@ -817,6 +898,7 @@ static void gve_turndown(struct gve_priv *priv)
 	netif_tx_disable(priv->dev);
 
 	gve_clear_napi_enabled(priv);
+	gve_clear_report_stats(priv);
 }
 
 static void gve_turnup(struct gve_priv *priv)
@@ -867,6 +949,10 @@ static void gve_handle_status(struct gve_priv *priv, u32 status)
 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
 		gve_set_do_reset(priv);
 	}
+	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
+		priv->stats_report_trigger_cnt++;
+		gve_set_do_report_stats(priv);
+	}
 }
 
 static void gve_handle_reset(struct gve_priv *priv)
@@ -885,16 +971,95 @@ static void gve_handle_reset(struct gve_priv *priv)
 	}
 }
 
-/* Handle NIC status register changes and reset requests */
+void gve_handle_report_stats(struct gve_priv *priv)
+{
+	int idx, stats_idx = 0, tx_bytes;
+	unsigned int start = 0;
+	struct stats *stats = priv->stats_report->stats;
+
+	if (!gve_get_report_stats(priv))
+		return;
+
+	be64_add_cpu(&priv->stats_report->written_count, 1);
+	/* tx stats */
+	if (priv->tx) {
+		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+			do {
+				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
+				tx_bytes = priv->tx[idx].bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_WAKE_CNT),
+				.value = cpu_to_be64(priv->tx[idx].wake_queue),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_STOP_CNT),
+				.value = cpu_to_be64(priv->tx[idx].stop_queue),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
+				.value = cpu_to_be64(priv->tx[idx].req),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_BYTES_SENT),
+				.value = cpu_to_be64(tx_bytes),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
+				.value = cpu_to_be64(priv->tx[idx].done),
+				.queue_id = cpu_to_be32(idx),
+			};
+		}
+	}
+	/* rx stats */
+	if (priv->rx) {
+		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
+				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
+				.value = cpu_to_be64(priv->rx[0].fill_cnt),
+				.queue_id = cpu_to_be32(idx),
+			};
+		}
+	}
+}
+
+static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
+{
+	if (!gve_get_napi_enabled(priv))
+		return;
+
+	if (link_status == netif_carrier_ok(priv->dev))
+		return;
+
+	if (link_status) {
+		netdev_info(priv->dev, "Device link is up.\n");
+		netif_carrier_on(priv->dev);
+	} else {
+		netdev_info(priv->dev, "Device link is down.\n");
+		netif_carrier_off(priv->dev);
+	}
+}
+
+/* Handle NIC status register changes, reset requests and report stats */
 static void gve_service_task(struct work_struct *work)
 {
 	struct gve_priv *priv = container_of(work, struct gve_priv,
 					     service_task);
+	u32 status = ioread32be(&priv->reg_bar0->device_status);
 
-	gve_handle_status(priv,
-			  ioread32be(&priv->reg_bar0->device_status));
+	gve_handle_status(priv, status);
 
 	gve_handle_reset(priv);
+	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
 }
 
 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
@@ -924,7 +1089,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
 		priv->dev->max_mtu = PAGE_SIZE;
 		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
 		if (err) {
-			netif_err(priv, drv, priv->dev, "Could not set mtu");
+			dev_err(&priv->pdev->dev, "Could not set mtu");
 			goto err;
 		}
 	}
@@ -964,10 +1129,10 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
 						priv->rx_cfg.num_queues);
 	}
 
-	netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
-		   priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
-	netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
-		   priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
+	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
+		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
+	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
+		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
 
 setup_device:
 	err = gve_setup_device_resources(priv);
@@ -1047,6 +1212,10 @@ int gve_reset(struct gve_priv *priv, bool attempt_teardown)
 	/* Set it all back up */
 	err = gve_reset_recovery(priv, was_up);
 	gve_clear_reset_in_progress(priv);
+	priv->reset_cnt++;
+	priv->interface_up_cnt = 0;
+	priv->interface_down_cnt = 0;
+	priv->stats_report_trigger_cnt = 0;
 	return err;
 }
 
@@ -1149,6 +1318,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	priv->db_bar2 = db_bar;
 	priv->service_task_flags = 0x0;
 	priv->state_flags = 0x0;
+	priv->ethtool_flags = 0x0;
 
 	gve_set_probe_in_progress(priv);
 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
@@ -1158,6 +1328,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto abort_with_netdev;
 	}
 	INIT_WORK(&priv->service_task, gve_service_task);
+	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
 	priv->tx_cfg.max_queues = max_tx_queues;
 	priv->rx_cfg.max_queues = max_rx_queues;
 
diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h
index 84ab8893aadd..fb655463c357 100644
--- a/drivers/net/ethernet/google/gve/gve_register.h
+++ b/drivers/net/ethernet/google/gve/gve_register.h
@@ -23,5 +23,6 @@ struct gve_registers {
 enum gve_device_status_flags {
 	GVE_DEVICE_STATUS_RESET_MASK		= BIT(1),
 	GVE_DEVICE_STATUS_LINK_STATUS_MASK	= BIT(2),
+	GVE_DEVICE_STATUS_REPORT_STATS_MASK	= BIT(3),
 };
 #endif /* _GVE_REGISTER_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 9f52e72ff641..008fa897a3e6 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -225,7 +225,8 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
 	return PKT_HASH_TYPE_L2;
 }
 
-static struct sk_buff *gve_rx_copy(struct net_device *dev,
+static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
+				   struct net_device *dev,
 				   struct napi_struct *napi,
 				   struct gve_rx_slot_page_info *page_info,
 				   u16 len)
@@ -242,6 +243,11 @@ static struct sk_buff *gve_rx_copy(struct net_device *dev,
 	skb_copy_to_linear_data(skb, va, len);
 
 	skb->protocol = eth_type_trans(skb, dev);
+
+	u64_stats_update_begin(&rx->statss);
+	rx->rx_copied_pkt++;
+	u64_stats_update_end(&rx->statss);
+
 	return skb;
 }
 
@@ -284,8 +290,12 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 	u16 len;
 
 	/* drop this packet */
-	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
+	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_desc_err_dropped_pkt++;
+		u64_stats_update_end(&rx->statss);
 		return true;
+	}
 
 	len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
 	page_info = &rx->data.page_info[idx];
@@ -300,11 +310,14 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 	if (PAGE_SIZE == 4096) {
 		if (len <= priv->rx_copybreak) {
 			/* Just copy small packets */
-			skb = gve_rx_copy(dev, napi, page_info, len);
+			skb = gve_rx_copy(rx, dev, napi, page_info, len);
+			u64_stats_update_begin(&rx->statss);
+			rx->rx_copybreak_pkt++;
+			u64_stats_update_end(&rx->statss);
 			goto have_skb;
 		}
 		if (unlikely(!gve_can_recycle_pages(dev))) {
-			skb = gve_rx_copy(dev, napi, page_info, len);
+			skb = gve_rx_copy(rx, dev, napi, page_info, len);
 			goto have_skb;
 		}
 		pagecount = page_count(page_info->page);
@@ -314,8 +327,12 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 			 * stack.
 			 */
 			skb = gve_rx_add_frags(dev, napi, page_info, len);
-			if (!skb)
+			if (!skb) {
+				u64_stats_update_begin(&rx->statss);
+				rx->rx_skb_alloc_fail++;
+				u64_stats_update_end(&rx->statss);
 				return true;
+			}
 			/* Make sure the kernel stack can't release the page */
 			get_page(page_info->page);
 			/* "flip" to other packet buffer on this page */
@@ -324,21 +341,25 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 			/* We have previously passed the other half of this
 			 * page up the stack, but it has not yet been freed.
 			 */
-			skb = gve_rx_copy(dev, napi, page_info, len);
+			skb = gve_rx_copy(rx, dev, napi, page_info, len);
 		} else {
 			WARN(pagecount < 1, "Pagecount should never be < 1");
 			return false;
 		}
 	} else {
-		skb = gve_rx_copy(dev, napi, page_info, len);
+		skb = gve_rx_copy(rx, dev, napi, page_info, len);
 	}
 
 have_skb:
 	/* We didn't manage to allocate an skb but we haven't had any
 	 * reset worthy failures.
 	 */
-	if (!skb)
+	if (!skb) {
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_skb_alloc_fail++;
+		u64_stats_update_end(&rx->statss);
 		return true;
+	}
 
 	if (likely(feat & NETIF_F_RXCSUM)) {
 		/* NIC passes up the partial sum */
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 08339278c722..00fafc0f8512 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -270,7 +270,7 @@ static void hnae_fini_queue(struct hnae_queue *q)
 	hnae_fini_ring(&q->rx_ring);
 }
 
-/**
+/*
  * ae_chain - define ae chain head
  */
 static RAW_NOTIFIER_HEAD(ae_chain);
@@ -438,7 +438,7 @@ EXPORT_SYMBOL(hnae_ae_register);
 
 /**
  * hnae_ae_unregister - unregisters a HNAE AE engine
- * @cdev: the device to unregister
+ * @hdev: the device to unregister
  */
 void hnae_ae_unregister(struct hnae_ae_dev *hdev)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index b43dec0560a8..b98244f75ab9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -13,8 +13,6 @@
 #include "hns_dsaf_ppe.h"
 #include "hns_dsaf_rcb.h"
 
-#define AE_NAME_PORT_ID_IDX 6
-
 static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle)
 {
 	struct  hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 9a907947ba19..4a448138b4ec 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -374,11 +374,12 @@ static void hns_mac_param_get(struct mac_params *param,
 }
 
 /**
- *hns_mac_queue_config_bc_en - set broadcast rx&tx enable
- *@mac_cb: mac device
- *@queue: queue number
- *@en:enable
- *retuen 0 - success , negative --fail
+ * hns_mac_queue_config_bc_en - set broadcast rx&tx enable
+ * @mac_cb: mac device
+ * @port_num: queue number
+ * @vlan_id: vlan id`
+ * @enable: enable
+ * return 0 - success , negative --fail
  */
 static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 				     u32 port_num, u16 vlan_id, bool enable)
@@ -408,11 +409,11 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 }
 
 /**
- *hns_mac_vm_config_bc_en - set broadcast rx&tx enable
- *@mac_cb: mac device
- *@vmid: vm id
- *@en:enable
- *retuen 0 - success , negative --fail
+ * hns_mac_vm_config_bc_en - set broadcast rx&tx enable
+ * @mac_cb: mac device
+ * @vmid: vm id
+ * @enable: enable
+ * return 0 - success , negative --fail
  */
 int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
 {
@@ -542,8 +543,8 @@ void hns_mac_stop(struct hns_mac_cb *mac_cb)
 /**
  * hns_mac_get_autoneg - get auto autonegotiation
  * @mac_cb: mac control block
- * @enable: enable or not
- * retuen 0 - success , negative --fail
+ * @auto_neg: output pointer to autoneg result
+ * return 0 - success , negative --fail
  */
 void hns_mac_get_autoneg(struct hns_mac_cb *mac_cb, u32 *auto_neg)
 {
@@ -560,7 +561,7 @@ void hns_mac_get_autoneg(struct hns_mac_cb *mac_cb, u32 *auto_neg)
  * @mac_cb: mac control block
  * @rx_en: rx enable status
  * @tx_en: tx enable status
- * retuen 0 - success , negative --fail
+ * return 0 - success , negative --fail
  */
 void hns_mac_get_pauseparam(struct hns_mac_cb *mac_cb, u32 *rx_en, u32 *tx_en)
 {
@@ -578,7 +579,7 @@ void hns_mac_get_pauseparam(struct hns_mac_cb *mac_cb, u32 *rx_en, u32 *tx_en)
  * hns_mac_set_autoneg - set auto autonegotiation
  * @mac_cb: mac control block
  * @enable: enable or not
- * retuen 0 - success , negative --fail
+ * return 0 - success , negative --fail
  */
 int hns_mac_set_autoneg(struct hns_mac_cb *mac_cb, u8 enable)
 {
@@ -623,7 +624,7 @@ int hns_mac_set_pauseparam(struct hns_mac_cb *mac_cb, u32 rx_en, u32 tx_en)
 /**
  * hns_mac_init_ex - mac init
  * @mac_cb: mac control block
- * retuen 0 - success , negative --fail
+ * return 0 - success , negative --fail
  */
 static int hns_mac_init_ex(struct hns_mac_cb *mac_cb)
 {
@@ -800,7 +801,6 @@ static const struct {
 /**
  *hns_mac_get_info  - get mac information from device node
  *@mac_cb: mac device
- *@np:device node
  * return: 0 --success, negative --fail
  */
 static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
@@ -951,7 +951,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
 /**
  * hns_mac_get_mode - get mac mode
  * @phy_if: phy interface
- * retuen 0 - gmac, 1 - xgmac , negative --fail
+ * return 0 - gmac, 1 - xgmac , negative --fail
  */
 static int hns_mac_get_mode(phy_interface_t phy_if)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index acfa86e5296f..87d3db4666df 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -207,7 +207,7 @@ static int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_sbm_link_sram_init_en - config dsaf_sbm_init_en
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_sbm_link_sram_init_en(struct dsaf_device *dsaf_dev)
 {
@@ -216,8 +216,8 @@ static void hns_dsaf_sbm_link_sram_init_en(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_reg_cnt_clr_ce - config hns_dsaf_reg_cnt_clr_ce
- * @dsaf_id: dsa fabric id
- * @hns_dsaf_reg_cnt_clr_ce: config value
+ * @dsaf_dev: dsa fabric id
+ * @reg_cnt_clr_ce: config value
  */
 static void
 hns_dsaf_reg_cnt_clr_ce(struct dsaf_device *dsaf_dev, u32 reg_cnt_clr_ce)
@@ -228,8 +228,8 @@ hns_dsaf_reg_cnt_clr_ce(struct dsaf_device *dsaf_dev, u32 reg_cnt_clr_ce)
 
 /**
  * hns_ppe_qid_cfg - config ppe qid
- * @dsaf_id: dsa fabric id
- * @pppe_qid_cfg: value array
+ * @dsaf_dev: dsa fabric id
+ * @qid_cfg: value array
  */
 static void
 hns_dsaf_ppe_qid_cfg(struct dsaf_device *dsaf_dev, u32 qid_cfg)
@@ -285,8 +285,8 @@ static void hns_dsaf_inner_qid_cfg(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_sw_port_type_cfg - cfg sw type
- * @dsaf_id: dsa fabric id
- * @psw_port_type: array
+ * @dsaf_dev: dsa fabric id
+ * @port_type: array
  */
 static void hns_dsaf_sw_port_type_cfg(struct dsaf_device *dsaf_dev,
 				      enum dsaf_sw_port_type port_type)
@@ -303,8 +303,8 @@ static void hns_dsaf_sw_port_type_cfg(struct dsaf_device *dsaf_dev,
 
 /**
  * hns_dsaf_stp_port_type_cfg - cfg stp type
- * @dsaf_id: dsa fabric id
- * @pstp_port_type: array
+ * @dsaf_dev: dsa fabric id
+ * @port_type: array
  */
 static void hns_dsaf_stp_port_type_cfg(struct dsaf_device *dsaf_dev,
 				       enum dsaf_stp_port_type port_type)
@@ -323,7 +323,7 @@ static void hns_dsaf_stp_port_type_cfg(struct dsaf_device *dsaf_dev,
 	(AE_IS_VER1((dev)->dsaf_ver) ? DSAF_SBM_NUM : DSAFV2_SBM_NUM)
 /**
  * hns_dsaf_sbm_cfg - config sbm
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_sbm_cfg(struct dsaf_device *dsaf_dev)
 {
@@ -342,7 +342,7 @@ static void hns_dsaf_sbm_cfg(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_sbm_cfg_mib_en - config sbm
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev)
 {
@@ -387,7 +387,7 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_sbm_bp_wl_cfg - config sbm
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
 {
@@ -556,7 +556,7 @@ static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_voq_bp_all_thrd_cfg -  voq
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_voq_bp_all_thrd_cfg(struct dsaf_device *dsaf_dev)
 {
@@ -599,7 +599,7 @@ static void hns_dsaf_tbl_tcam_match_cfg(
 
 /**
  * hns_dsaf_tbl_tcam_data_cfg - tbl
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  * @ptbl_tcam_data: addr
  */
 static void hns_dsaf_tbl_tcam_data_cfg(
@@ -614,8 +614,8 @@ static void hns_dsaf_tbl_tcam_data_cfg(
 
 /**
  * dsaf_tbl_tcam_mcast_cfg - tbl
- * @dsaf_id: dsa fabric id
- * @ptbl_tcam_mcast: addr
+ * @dsaf_dev: dsa fabric id
+ * @mcast: addr
  */
 static void hns_dsaf_tbl_tcam_mcast_cfg(
 	struct dsaf_device *dsaf_dev,
@@ -648,8 +648,8 @@ static void hns_dsaf_tbl_tcam_mcast_cfg(
 
 /**
  * hns_dsaf_tbl_tcam_ucast_cfg - tbl
- * @dsaf_id: dsa fabric id
- * @ptbl_tcam_ucast: addr
+ * @dsaf_dev: dsa fabric id
+ * @tbl_tcam_ucast: addr
  */
 static void hns_dsaf_tbl_tcam_ucast_cfg(
 	struct dsaf_device *dsaf_dev,
@@ -674,8 +674,8 @@ static void hns_dsaf_tbl_tcam_ucast_cfg(
 
 /**
  * hns_dsaf_tbl_line_cfg - tbl
- * @dsaf_id: dsa fabric id
- * @ptbl_lin: addr
+ * @dsaf_dev: dsa fabric id
+ * @tbl_lin: addr
  */
 static void hns_dsaf_tbl_line_cfg(struct dsaf_device *dsaf_dev,
 				  struct dsaf_tbl_line_cfg *tbl_lin)
@@ -695,7 +695,7 @@ static void hns_dsaf_tbl_line_cfg(struct dsaf_device *dsaf_dev,
 
 /**
  * hns_dsaf_tbl_tcam_mcast_pul - tbl
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_tcam_mcast_pul(struct dsaf_device *dsaf_dev)
 {
@@ -710,7 +710,7 @@ static void hns_dsaf_tbl_tcam_mcast_pul(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_tbl_line_pul - tbl
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_line_pul(struct dsaf_device *dsaf_dev)
 {
@@ -725,7 +725,7 @@ static void hns_dsaf_tbl_line_pul(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_tbl_tcam_data_mcast_pul - tbl
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_tcam_data_mcast_pul(
 	struct dsaf_device *dsaf_dev)
@@ -743,7 +743,7 @@ static void hns_dsaf_tbl_tcam_data_mcast_pul(
 
 /**
  * hns_dsaf_tbl_tcam_data_ucast_pul - tbl
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_tcam_data_ucast_pul(
 	struct dsaf_device *dsaf_dev)
@@ -768,8 +768,7 @@ void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en)
 
 /**
  * hns_dsaf_tbl_stat_en - tbl
- * @dsaf_id: dsa fabric id
- * @ptbl_stat_en: addr
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_stat_en(struct dsaf_device *dsaf_dev)
 {
@@ -785,7 +784,7 @@ static void hns_dsaf_tbl_stat_en(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_rocee_bp_en - rocee back press enable
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_rocee_bp_en(struct dsaf_device *dsaf_dev)
 {
@@ -852,9 +851,9 @@ static void hns_dsaf_int_tbl_src_clr(struct dsaf_device *dsaf_dev,
 
 /**
  * hns_dsaf_single_line_tbl_cfg - INT
- * @dsaf_id: dsa fabric id
- * @address:
- * @ptbl_line:
+ * @dsaf_dev: dsa fabric id
+ * @address: the address
+ * @ptbl_line: the line
  */
 static void hns_dsaf_single_line_tbl_cfg(
 	struct dsaf_device *dsaf_dev,
@@ -876,9 +875,10 @@ static void hns_dsaf_single_line_tbl_cfg(
 
 /**
  * hns_dsaf_tcam_uc_cfg - INT
- * @dsaf_id: dsa fabric id
- * @address,
- * @ptbl_tcam_data,
+ * @dsaf_dev: dsa fabric id
+ * @address: the address
+ * @ptbl_tcam_data: the data
+ * @ptbl_tcam_ucast: unicast
  */
 static void hns_dsaf_tcam_uc_cfg(
 	struct dsaf_device *dsaf_dev, u32 address,
@@ -904,7 +904,8 @@ static void hns_dsaf_tcam_uc_cfg(
  * @dsaf_dev: dsa fabric device struct pointer
  * @address: tcam index
  * @ptbl_tcam_data: tcam data struct pointer
- * @ptbl_tcam_mcast: tcam mask struct pointer, it must be null for HNSv1
+ * @ptbl_tcam_mask: tcam mask struct pointer, it must be null for HNSv1
+ * @ptbl_tcam_mcast: tcam data struct pointer
  */
 static void hns_dsaf_tcam_mc_cfg(
 	struct dsaf_device *dsaf_dev, u32 address,
@@ -933,8 +934,10 @@ static void hns_dsaf_tcam_mc_cfg(
 /**
  * hns_dsaf_tcam_uc_cfg_vague - INT
  * @dsaf_dev: dsa fabric device struct pointer
- * @address,
- * @ptbl_tcam_data,
+ * @address: the address
+ * @tcam_data: the data
+ * @tcam_mask: the mask
+ * @tcam_uc: the unicast data
  */
 static void hns_dsaf_tcam_uc_cfg_vague(struct dsaf_device *dsaf_dev,
 				       u32 address,
@@ -960,10 +963,10 @@ static void hns_dsaf_tcam_uc_cfg_vague(struct dsaf_device *dsaf_dev,
 /**
  * hns_dsaf_tcam_mc_cfg_vague - INT
  * @dsaf_dev: dsa fabric device struct pointer
- * @address,
- * @ptbl_tcam_data,
- * @ptbl_tcam_mask
- * @ptbl_tcam_mcast
+ * @address: the address
+ * @tcam_data: the data
+ * @tcam_mask: the mask
+ * @tcam_mc: the multicast data
  */
 static void hns_dsaf_tcam_mc_cfg_vague(struct dsaf_device *dsaf_dev,
 				       u32 address,
@@ -988,8 +991,8 @@ static void hns_dsaf_tcam_mc_cfg_vague(struct dsaf_device *dsaf_dev,
 
 /**
  * hns_dsaf_tcam_mc_invld - INT
- * @dsaf_id: dsa fabric id
- * @address
+ * @dsaf_dev: dsa fabric id
+ * @address: the address
  */
 static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address)
 {
@@ -1024,10 +1027,10 @@ hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr)
 
 /**
  * hns_dsaf_tcam_uc_get - INT
- * @dsaf_id: dsa fabric id
- * @address
- * @ptbl_tcam_data
- * @ptbl_tcam_ucast
+ * @dsaf_dev: dsa fabric id
+ * @address: the address
+ * @ptbl_tcam_data: the data
+ * @ptbl_tcam_ucast: unicast
  */
 static void hns_dsaf_tcam_uc_get(
 	struct dsaf_device *dsaf_dev, u32 address,
@@ -1077,10 +1080,10 @@ static void hns_dsaf_tcam_uc_get(
 
 /**
  * hns_dsaf_tcam_mc_get - INT
- * @dsaf_id: dsa fabric id
- * @address
- * @ptbl_tcam_data
- * @ptbl_tcam_ucast
+ * @dsaf_dev: dsa fabric id
+ * @address: the address
+ * @ptbl_tcam_data: the data
+ * @ptbl_tcam_mcast: tcam multicast data
  */
 static void hns_dsaf_tcam_mc_get(
 	struct dsaf_device *dsaf_dev, u32 address,
@@ -1127,7 +1130,7 @@ static void hns_dsaf_tcam_mc_get(
 
 /**
  * hns_dsaf_tbl_line_init - INT
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_line_init(struct dsaf_device *dsaf_dev)
 {
@@ -1141,7 +1144,7 @@ static void hns_dsaf_tbl_line_init(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_tbl_tcam_init - INT
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_tcam_init(struct dsaf_device *dsaf_dev)
 {
@@ -1156,7 +1159,9 @@ static void hns_dsaf_tbl_tcam_init(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_pfc_en_cfg - dsaf pfc pause cfg
- * @mac_cb: mac contrl block
+ * @dsaf_dev: dsa fabric id
+ * @mac_id: mac contrl block
+ * @tc_en: traffic class
  */
 static void hns_dsaf_pfc_en_cfg(struct dsaf_device *dsaf_dev,
 				int mac_id, int tc_en)
@@ -1209,8 +1214,7 @@ void hns_dsaf_get_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
 
 /**
  * hns_dsaf_tbl_tcam_init - INT
- * @dsaf_id: dsa fabric id
- * @dsaf_mode
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_comm_init(struct dsaf_device *dsaf_dev)
 {
@@ -1263,7 +1267,7 @@ static void hns_dsaf_comm_init(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_inode_init - INT
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev)
 {
@@ -1315,7 +1319,7 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_sbm_init - INT
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev)
 {
@@ -1369,7 +1373,7 @@ static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_tbl_init - INT
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_tbl_init(struct dsaf_device *dsaf_dev)
 {
@@ -1381,7 +1385,7 @@ static void hns_dsaf_tbl_init(struct dsaf_device *dsaf_dev)
 
 /**
  * hns_dsaf_voq_init - INT
- * @dsaf_id: dsa fabric id
+ * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_voq_init(struct dsaf_device *dsaf_dev)
 {
@@ -1435,7 +1439,7 @@ static void hns_dsaf_remove_hw(struct dsaf_device *dsaf_dev)
 /**
  * hns_dsaf_init - init dsa fabric
  * @dsaf_dev: dsa fabric device struct pointer
- * retuen 0 - success , negative --fail
+ * return 0 - success , negative --fail
  */
 static int hns_dsaf_init(struct dsaf_device *dsaf_dev)
 {
@@ -2099,7 +2103,7 @@ static struct dsaf_device *hns_dsaf_alloc_dev(struct device *dev,
 
 /**
  * hns_dsaf_free_dev - free dev mem
- * @dev: struct device pointer
+ * @dsaf_dev: struct device pointer
  */
 static void hns_dsaf_free_dev(struct dsaf_device *dsaf_dev)
 {
@@ -2108,9 +2112,9 @@ static void hns_dsaf_free_dev(struct dsaf_device *dsaf_dev)
 
 /**
  * dsaf_pfc_unit_cnt - set pfc unit count
- * @dsaf_id: dsa fabric id
- * @pport_rate:  value array
- * @pdsaf_pfc_unit_cnt:  value array
+ * @dsaf_dev: dsa fabric id
+ * @mac_id: id in use
+ * @rate:  value array
  */
 static void hns_dsaf_pfc_unit_cnt(struct dsaf_device *dsaf_dev, int  mac_id,
 				  enum dsaf_port_rate_mode rate)
@@ -2139,8 +2143,9 @@ static void hns_dsaf_pfc_unit_cnt(struct dsaf_device *dsaf_dev, int  mac_id,
 
 /**
  * dsaf_port_work_rate_cfg - fifo
- * @dsaf_id: dsa fabric id
- * @xge_ge_work_mode
+ * @dsaf_dev: dsa fabric id
+ * @mac_id: mac contrl block
+ * @rate_mode: value array
  */
 static void
 hns_dsaf_port_work_rate_cfg(struct dsaf_device *dsaf_dev, int mac_id,
@@ -2253,7 +2258,8 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num)
 
 /**
  *hns_dsaf_get_regs - dump dsaf regs
- *@dsaf_dev: dsaf device
+ *@ddev: dsaf device
+ *@port: port
  *@data:data for value of regs
  */
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data)
@@ -2690,6 +2696,7 @@ void hns_dsaf_get_stats(struct dsaf_device *ddev, u64 *data, int port)
 
 /**
  *hns_dsaf_get_sset_count - get dsaf string set count
+ *@dsaf_dev: dsaf device
  *@stringset: type of values in data
  *return dsaf string name count
  */
@@ -2711,6 +2718,7 @@ int hns_dsaf_get_sset_count(struct dsaf_device *dsaf_dev, int stringset)
  *@stringset:srting set index
  *@data:strings name value
  *@port:port index
+ *@dsaf_dev: dsaf device
  */
 void hns_dsaf_get_strings(int stringset, u8 *data, int port,
 			  struct dsaf_device *dsaf_dev)
@@ -2943,7 +2951,7 @@ int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port)
 /**
  * dsaf_probe - probo dsaf dev
  * @pdev: dasf platform device
- * retuen 0 - success , negative --fail
+ * return 0 - success , negative --fail
  */
 static int hns_dsaf_probe(struct platform_device *pdev)
 {
@@ -3038,8 +3046,8 @@ module_platform_driver(g_dsaf_driver);
 /**
  * hns_dsaf_roce_reset - reset dsaf and roce
  * @dsaf_fwnode: Pointer to framework node for the dasf
- * @enable: false - request reset , true - drop reset
- * retuen 0 - success , negative -fail
+ * @dereset: false - request reset , true - drop reset
+ * return 0 - success , negative -fail
  */
 int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index a769273b36f7..a9aca8c24e90 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -330,11 +330,12 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
  * hns_dsaf_srst_chns - reset dsaf channels
  * @dsaf_dev: dsaf device struct pointer
  * @msk: xbar channels mask value:
+ * @dereset: false - request reset , true - drop reset
+ *
  * bit0-5 for xge0-5
  * bit6-11 for ppe0-5
  * bit12-17 for roce0-5
  * bit18-19 for com/dfx
- * @dereset: false - request reset , true - drop reset
  */
 static void
 hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
@@ -353,11 +354,12 @@ hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
  * hns_dsaf_srst_chns - reset dsaf channels
  * @dsaf_dev: dsaf device struct pointer
  * @msk: xbar channels mask value:
+ * @dereset: false - request reset , true - drop reset
+ *
  * bit0-5 for xge0-5
  * bit6-11 for ppe0-5
  * bit12-17 for roce0-5
  * bit18-19 for com/dfx
- * @dereset: false - request reset , true - drop reset
  */
 static void
 hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
@@ -612,7 +614,8 @@ static int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 /**
  * hns_mac_config_sds_loopback - set loop back for serdes
  * @mac_cb: mac control block
- * retuen 0 == success
+ * @en: enable or disable
+ * return 0 == success
  */
 static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index 2b34b553acf3..d0f8b1fff333 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -66,8 +66,8 @@ hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common)
 /**
  * hns_ppe_common_get_cfg - get ppe common config
  * @dsaf_dev: dasf device
- * comm_index: common index
- * retuen 0 - success , negative --fail
+ * @comm_index: common index
+ * return 0 - success , negative --fail
  */
 static int hns_ppe_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index)
 {
@@ -143,7 +143,7 @@ static void hns_ppe_set_vlan_strip(struct hns_ppe_cb *ppe_cb, int en)
 
 /**
  * hns_ppe_checksum_hw - set ppe checksum caculate
- * @ppe_device: ppe device
+ * @ppe_cb: ppe device
  * @value: value
  */
 static void hns_ppe_checksum_hw(struct hns_ppe_cb *ppe_cb, u32 value)
@@ -179,7 +179,7 @@ static void hns_ppe_set_qid(struct ppe_common_cb *ppe_common, u32 qid)
 
 /**
  * hns_ppe_set_port_mode - set port mode
- * @ppe_device: ppe device
+ * @ppe_cb: ppe device
  * @mode: port mode
  */
 static void hns_ppe_set_port_mode(struct hns_ppe_cb *ppe_cb,
@@ -344,7 +344,7 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 
 /**
  * ppe_uninit_hw - uninit ppe
- * @ppe_device: ppe device
+ * @ppe_cb: ppe device
  */
 static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
 {
@@ -384,7 +384,8 @@ void hns_ppe_uninit(struct dsaf_device *dsaf_dev)
 /**
  * hns_ppe_reset - reinit ppe/rcb hw
  * @dsaf_dev: dasf device
- * retuen void
+ * @ppe_common_index: the index
+ * return void
  */
 void hns_ppe_reset_common(struct dsaf_device *dsaf_dev, u8 ppe_common_index)
 {
@@ -455,7 +456,7 @@ int hns_ppe_get_regs_count(void)
 
 /**
  * ppe_get_strings - get ppe srting
- * @ppe_device: ppe device
+ * @ppe_cb: ppe device
  * @stringset: string set type
  * @data: output string
  */
@@ -513,7 +514,7 @@ void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data)
 /**
  * hns_ppe_init - init ppe device
  * @dsaf_dev: dasf device
- * retuen 0 - success , negative --fail
+ * return 0 - success , negative --fail
  */
 int hns_ppe_init(struct dsaf_device *dsaf_dev)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 5453597ec629..b6c8910cf7ba 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -34,7 +34,7 @@
 /**
  *hns_rcb_wait_fbd_clean - clean fbd
  *@qs: ring struct pointer array
- *@qnum: num of array
+ *@q_num: num of array
  *@flag: tx or rx flag
  */
 void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag)
@@ -191,7 +191,8 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag)
 
 /**
  *hns_rcb_ring_enable_hw - enable ring
- *@ring: rcb ring
+ *@q: rcb ring
+ *@val: value to write
  */
 void hns_rcb_ring_enable_hw(struct hnae_queue *q, u32 val)
 {
@@ -844,7 +845,7 @@ void hns_rcb_update_stats(struct hnae_queue *queue)
 
 /**
  *hns_rcb_get_stats - get rcb statistic
- *@ring: rcb ring
+ *@queue: rcb ring
  *@data:statistic value
  */
 void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 0a3dbab2dfc9..7e3609ce112a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -130,7 +130,7 @@ static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv)
 
 /**
  *hns_xgmac_enable - enable xgmac port
- *@drv: mac driver
+ *@mac_drv: mac driver
  *@mode: mode of mac port
  */
 static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode)
@@ -242,7 +242,8 @@ static void hns_xgmac_config_pad_and_crc(void *mac_drv, u8 newval)
 /**
  *hns_xgmac_pausefrm_cfg - set pause param about xgmac
  *@mac_drv: mac driver
- *@newval:enable of pad and crc
+ *@rx_en: enable receive
+ *@tx_en: enable transmit
  */
 static void hns_xgmac_pausefrm_cfg(void *mac_drv, u32 rx_en, u32 tx_en)
 {
@@ -490,7 +491,6 @@ static void hns_xgmac_get_link_status(void *mac_drv, u32 *link_stat)
 /**
  *hns_xgmac_get_regs - dump xgmac regs
  *@mac_drv: mac driver
- *@cmd:ethtool cmd
  *@data:data for value of regs
  */
 static void hns_xgmac_get_regs(void *mac_drv, void *data)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 22522f8a5299..858cb293152a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
@@ -557,10 +558,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 	va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(va);
 
 	skb = *out_skb = napi_alloc_skb(&ring_data->napi,
 					HNS_RX_HEAD_SIZE);
@@ -754,6 +752,8 @@ static void hns_update_rx_rate(struct hnae_ring *ring)
 
 /**
  * smooth_alg - smoothing algrithm for adjusting coalesce parameter
+ * @new_param: new value
+ * @old_param: old value
  **/
 static u32 smooth_alg(u32 new_param, u32 old_param)
 {
@@ -1293,6 +1293,7 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
 
 		rd->ring->ring_name[RCB_RING_NAME_LEN - 1] = '\0';
 
+		irq_set_status_flags(rd->ring->irq, IRQ_NOAUTOEN);
 		ret = request_irq(rd->ring->irq,
 				  hns_irq_handle, 0, rd->ring->ring_name, rd);
 		if (ret) {
@@ -1300,7 +1301,6 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
 				   rd->ring->irq);
 			goto out_free_irq;
 		}
-		disable_irq(rd->ring->irq);
 
 		cpu = hns_nic_init_affinity_mask(h->q_num, i,
 						 rd->ring, &rd->mask);
@@ -1831,9 +1831,8 @@ static int hns_nic_uc_unsync(struct net_device *netdev,
 }
 
 /**
- * nic_set_multicast_list - set mutl mac address
- * @netdev: net device
- * @p: mac address
+ * hns_set_multicast_list - set mutl mac address
+ * @ndev: net device
  *
  * return void
  */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 14e60c9e491d..7165da0ee9aa 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -462,7 +462,7 @@ static int __lb_clean_rings(struct hns_nic_priv *priv,
 }
 
 /**
- * nic_run_loopback_test -  run loopback test
+ * __lb_run_test -  run loopback test
  * @ndev: net device
  * @loop_mode: loopback mode
  */
@@ -971,7 +971,7 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 }
 
 /**
- * nic_get_sset_count - get string set count witch returned by nic_get_strings.
+ * hns_get_sset_count - get string set count returned by nic_get_strings
  * @netdev: net device
  * @stringset: string set index, 0: self test string; 1: statistics string.
  *
@@ -1027,7 +1027,7 @@ static int hns_phy_led_set(struct net_device *netdev, int value)
 }
 
 /**
- * nic_set_phys_id - set phy identify LED.
+ * hns_set_phys_id - set phy identify LED.
  * @netdev: net device
  * @state: LED state.
  *
@@ -1125,7 +1125,7 @@ static void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd,
 }
 
 /**
- * nic_get_regs_len - get total register len.
+ * hns_get_regs_len - get total register len.
  * @net_dev: net device
  *
  * Return total register len.
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 088550db2de7..912c51e327d6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -34,6 +34,13 @@
 
 #define HNAE3_MIN_VECTOR_NUM	2 /* first one for misc, another for IO */
 
+/* Device version */
+#define HNAE3_DEVICE_VERSION_V1   0x00020
+#define HNAE3_DEVICE_VERSION_V2   0x00021
+#define HNAE3_DEVICE_VERSION_V3   0x00030
+
+#define HNAE3_PCI_REVISION_BIT_SIZE		8
+
 /* Device IDs */
 #define HNAE3_DEV_ID_GE				0xA220
 #define HNAE3_DEV_ID_25GE			0xA221
@@ -42,8 +49,9 @@
 #define HNAE3_DEV_ID_50GE_RDMA			0xA224
 #define HNAE3_DEV_ID_50GE_RDMA_MACSEC		0xA225
 #define HNAE3_DEV_ID_100G_RDMA_MACSEC		0xA226
-#define HNAE3_DEV_ID_100G_VF			0xA22E
-#define HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF	0xA22F
+#define HNAE3_DEV_ID_200G_RDMA			0xA228
+#define HNAE3_DEV_ID_VF				0xA22E
+#define HNAE3_DEV_ID_RDMA_DCB_PFC_VF		0xA22F
 
 #define HNAE3_CLASS_NAME_SIZE 16
 
@@ -53,8 +61,6 @@
 #define HNAE3_KNIC_CLIENT_INITED_B		0x3
 #define HNAE3_UNIC_CLIENT_INITED_B		0x4
 #define HNAE3_ROCE_CLIENT_INITED_B		0x5
-#define HNAE3_DEV_SUPPORT_FD_B			0x6
-#define HNAE3_DEV_SUPPORT_GRO_B			0x7
 
 #define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
 		BIT(HNAE3_DEV_SUPPORT_ROCE_B))
@@ -65,11 +71,67 @@
 #define hnae3_dev_dcb_supported(hdev) \
 	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
 
+enum HNAE3_DEV_CAP_BITS {
+	HNAE3_DEV_SUPPORT_FD_B,
+	HNAE3_DEV_SUPPORT_GRO_B,
+	HNAE3_DEV_SUPPORT_FEC_B,
+	HNAE3_DEV_SUPPORT_UDP_GSO_B,
+	HNAE3_DEV_SUPPORT_QB_B,
+	HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B,
+	HNAE3_DEV_SUPPORT_PTP_B,
+	HNAE3_DEV_SUPPORT_INT_QL_B,
+	HNAE3_DEV_SUPPORT_SIMPLE_BD_B,
+	HNAE3_DEV_SUPPORT_TX_PUSH_B,
+	HNAE3_DEV_SUPPORT_PHY_IMP_B,
+	HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B,
+	HNAE3_DEV_SUPPORT_HW_PAD_B,
+	HNAE3_DEV_SUPPORT_STASH_B,
+};
+
 #define hnae3_dev_fd_supported(hdev) \
-	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_FD_B)
+	test_bit(HNAE3_DEV_SUPPORT_FD_B, (hdev)->ae_dev->caps)
 
 #define hnae3_dev_gro_supported(hdev) \
-	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_GRO_B)
+	test_bit(HNAE3_DEV_SUPPORT_GRO_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_fec_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_FEC_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_udp_gso_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_qb_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_QB_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_fd_forward_tc_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_ptp_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_PTP_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_int_ql_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_INT_QL_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_simple_bd_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_SIMPLE_BD_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_tx_push_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_phy_imp_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_tqp_txrx_indep_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_hw_pad_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_HW_PAD_B, (hdev)->ae_dev->caps)
+
+#define hnae3_dev_stash_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_STASH_B, (hdev)->ae_dev->caps)
+
+#define hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev) \
+	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (ae_dev)->caps)
 
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
@@ -152,6 +214,7 @@ enum hnae3_hw_error_type {
 	HNAE3_PPU_POISON_ERROR,
 	HNAE3_CMDQ_ECC_ERROR,
 	HNAE3_IMP_RD_POISON_ERROR,
+	HNAE3_ROCEE_AXI_RESP_ERROR,
 };
 
 enum hnae3_reset_type {
@@ -207,6 +270,17 @@ struct hnae3_ring_chain_node {
 #define HNAE3_IS_TX_RING(node) \
 	(((node)->flag & (1 << HNAE3_RING_TYPE_B)) == HNAE3_RING_TYPE_TX)
 
+/* device specification info from firmware */
+struct hnae3_dev_specs {
+	u32 mac_entry_num; /* number of mac-vlan table entry */
+	u32 mng_entry_num; /* number of manager table entry */
+	u32 max_tm_rate;
+	u16 rss_ind_tbl_size;
+	u16 rss_key_size;
+	u16 int_ql_max; /* max value of interrupt coalesce based on INT_QL */
+	u8 max_non_tso_bd_num; /* max BD number of one non-TSO packet */
+};
+
 struct hnae3_client_ops {
 	int (*init_instance)(struct hnae3_handle *handle);
 	void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
@@ -227,12 +301,16 @@ struct hnae3_client {
 	struct list_head node;
 };
 
+#define HNAE3_DEV_CAPS_MAX_NUM	96
 struct hnae3_ae_dev {
 	struct pci_dev *pdev;
 	const struct hnae3_ae_ops *ops;
 	struct list_head node;
 	u32 flag;
 	unsigned long hw_err_reset_req;
+	struct hnae3_dev_specs dev_specs;
+	u32 dev_version;
+	unsigned long caps[BITS_TO_LONGS(HNAE3_DEV_CAPS_MAX_NUM)];
 	void *priv;
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index fe7fb565da19..dc9a85745e62 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -15,11 +15,12 @@ static struct dentry *hns3_dbgfs_root;
 static int hns3_dbg_queue_info(struct hnae3_handle *h,
 			       const char *cmd_buf)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	struct hns3_nic_priv *priv = h->priv;
 	struct hns3_enet_ring *ring;
 	u32 base_add_l, base_add_h;
 	u32 queue_num, queue_max;
-	u32 value, i = 0;
+	u32 value, i;
 	int cnt;
 
 	if (!priv->ring) {
@@ -118,8 +119,25 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_PKTNUM_RECORD_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING PKTNUM: %u\n\n", i,
-			 value);
+		dev_info(&h->pdev->dev, "TX(%u) RING PKTNUM: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base + HNS3_RING_EN_REG);
+		dev_info(&h->pdev->dev, "TX/RX(%u) RING EN: %s\n", i,
+			 value ? "enable" : "disable");
+
+		if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) {
+			value = readl_relaxed(ring->tqp->io_base +
+					      HNS3_RING_TX_EN_REG);
+			dev_info(&h->pdev->dev, "TX(%u) RING EN: %s\n", i,
+				 value ? "enable" : "disable");
+
+			value = readl_relaxed(ring->tqp->io_base +
+					      HNS3_RING_RX_EN_REG);
+			dev_info(&h->pdev->dev, "RX(%u) RING EN: %s\n", i,
+				 value ? "enable" : "disable");
+		}
+
+		dev_info(&h->pdev->dev, "\n");
 	}
 
 	return 0;
@@ -244,6 +262,8 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "queue info <number>\n");
 	dev_info(&h->pdev->dev, "queue map\n");
 	dev_info(&h->pdev->dev, "bd info <q_num> <bd index>\n");
+	dev_info(&h->pdev->dev, "dev capability\n");
+	dev_info(&h->pdev->dev, "dev spec\n");
 
 	if (!hns3_is_phys_func(h->pdev))
 		return;
@@ -264,6 +284,7 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
 	dev_info(&h->pdev->dev, "dump uc mac list <func id>\n");
 	dev_info(&h->pdev->dev, "dump mc mac list <func id>\n");
+	dev_info(&h->pdev->dev, "dump intr\n");
 
 	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
 	strncat(printf_buf, "dump reg [[bios common] [ssu <port_id>]",
@@ -284,6 +305,52 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "%s", printf_buf);
 }
 
+static void hns3_dbg_dev_caps(struct hnae3_handle *h)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+	unsigned long *caps;
+
+	caps = ae_dev->caps;
+
+	dev_info(&h->pdev->dev, "support FD: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_FD_B, caps) ? "yes" : "no");
+	dev_info(&h->pdev->dev, "support GRO: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_GRO_B, caps) ? "yes" : "no");
+	dev_info(&h->pdev->dev, "support FEC: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_FEC_B, caps) ? "yes" : "no");
+	dev_info(&h->pdev->dev, "support UDP GSO: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, caps) ? "yes" : "no");
+	dev_info(&h->pdev->dev, "support PTP: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_PTP_B, caps) ? "yes" : "no");
+	dev_info(&h->pdev->dev, "support INT QL: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_INT_QL_B, caps) ? "yes" : "no");
+}
+
+static void hns3_dbg_dev_specs(struct hnae3_handle *h)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+	struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs;
+	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	struct hns3_nic_priv *priv  = h->priv;
+
+	dev_info(priv->dev, "MAC entry num: %u\n", dev_specs->mac_entry_num);
+	dev_info(priv->dev, "MNG entry num: %u\n", dev_specs->mng_entry_num);
+	dev_info(priv->dev, "MAX non tso bd num: %u\n",
+		 dev_specs->max_non_tso_bd_num);
+	dev_info(priv->dev, "RSS ind tbl size: %u\n",
+		 dev_specs->rss_ind_tbl_size);
+	dev_info(priv->dev, "RSS key size: %u\n", dev_specs->rss_key_size);
+	dev_info(priv->dev, "RSS size: %u\n", kinfo->rss_size);
+	dev_info(priv->dev, "Allocated RSS size: %u\n", kinfo->req_rss_size);
+	dev_info(priv->dev, "Task queue pairs numbers: %u\n", kinfo->num_tqps);
+
+	dev_info(priv->dev, "RX buffer length: %u\n", kinfo->rx_buf_len);
+	dev_info(priv->dev, "Desc num per TX queue: %u\n", kinfo->num_tx_desc);
+	dev_info(priv->dev, "Desc num per RX queue: %u\n", kinfo->num_rx_desc);
+	dev_info(priv->dev, "Total number of enabled TCs: %u\n", kinfo->num_tc);
+	dev_info(priv->dev, "MAX INT QL: %u\n", dev_specs->int_ql_max);
+}
+
 static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer,
 				 size_t count, loff_t *ppos)
 {
@@ -359,6 +426,10 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer,
 		ret = hns3_dbg_queue_map(handle);
 	else if (strncmp(cmd_buf, "bd info", 7) == 0)
 		ret = hns3_dbg_bd_info(handle, cmd_buf);
+	else if (strncmp(cmd_buf, "dev capability", 14) == 0)
+		hns3_dbg_dev_caps(handle);
+	else if (strncmp(cmd_buf, "dev spec", 8) == 0)
+		hns3_dbg_dev_specs(handle);
 	else if (handle->ae_algo->ops->dbg_run_cmd)
 		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
 	else
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index a4f1d515e5e0..a362516a3185 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -81,8 +81,10 @@ static const struct pci_device_id hns3_pci_tbl[] = {
 	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
 	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF),
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA),
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_VF), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
 	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	/* required last entry */
 	{0, }
@@ -623,27 +625,15 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle)
 		ops->request_update_promisc_mode(handle);
 }
 
-int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
-{
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
-
-	if (h->ae_algo->ops->set_promisc_mode) {
-		return h->ae_algo->ops->set_promisc_mode(h,
-						promisc_flags & HNAE3_UPE,
-						promisc_flags & HNAE3_MPE);
-	}
-
-	return 0;
-}
-
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	bool last_state;
 
-	if (h->pdev->revision >= 0x21 && h->ae_algo->ops->enable_vlan_filter) {
+	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 &&
+	    h->ae_algo->ops->enable_vlan_filter) {
 		last_state = h->netdev_flags & HNAE3_VLAN_FLTR ? true : false;
 		if (enable != last_state) {
 			netdev_info(netdev,
@@ -706,12 +696,19 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
 
 	/* normal or tunnel packet */
 	l4_offset = l4.hdr - skb->data;
-	hdr_len = (l4.tcp->doff << 2) + l4_offset;
 
 	/* remove payload length from inner pseudo checksum when tso */
 	l4_paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check,
-			     (__force __wsum)htonl(l4_paylen));
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(l4_paylen));
+	} else {
+		hdr_len = (l4.tcp->doff << 2) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(l4_paylen));
+	}
 
 	/* find the txbd field values */
 	*paylen = skb->len - hdr_len;
@@ -1194,21 +1191,23 @@ static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
 	return bd_num;
 }
 
-static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size)
+static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size,
+				   u8 max_non_tso_bd_num)
 {
 	struct sk_buff *frag_skb;
 	unsigned int bd_num = 0;
 
 	/* If the total len is within the max bd limit */
 	if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) &&
-		   skb_shinfo(skb)->nr_frags < HNS3_MAX_NON_TSO_BD_NUM))
+		   skb_shinfo(skb)->nr_frags < max_non_tso_bd_num))
 		return skb_shinfo(skb)->nr_frags + 1U;
 
 	/* The below case will always be linearized, return
 	 * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized.
 	 */
 	if (unlikely(skb->len > HNS3_MAX_TSO_SIZE ||
-		     (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)))
+		     (!skb_is_gso(skb) && skb->len >
+		      HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))))
 		return HNS3_MAX_TSO_BD_NUM + 1U;
 
 	bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
@@ -1233,31 +1232,34 @@ static unsigned int hns3_gso_hdr_len(struct sk_buff *skb)
 	return skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
 }
 
-/* HW need every continuous 8 buffer data to be larger than MSS,
- * we simplify it by ensuring skb_headlen + the first continuous
- * 7 frags to to be larger than gso header len + mss, and the remaining
- * continuous 7 frags to be larger than MSS except the last 7 frags.
+/* HW need every continuous max_non_tso_bd_num buffer data to be larger
+ * than MSS, we simplify it by ensuring skb_headlen + the first continuous
+ * max_non_tso_bd_num - 1 frags to be larger than gso header len + mss,
+ * and the remaining continuous max_non_tso_bd_num - 1 frags to be larger
+ * than MSS except the last max_non_tso_bd_num - 1 frags.
  */
 static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
-				     unsigned int bd_num)
+				     unsigned int bd_num, u8 max_non_tso_bd_num)
 {
 	unsigned int tot_len = 0;
 	int i;
 
-	for (i = 0; i < HNS3_MAX_NON_TSO_BD_NUM - 1U; i++)
+	for (i = 0; i < max_non_tso_bd_num - 1U; i++)
 		tot_len += bd_size[i];
 
-	/* ensure the first 8 frags is greater than mss + header */
-	if (tot_len + bd_size[HNS3_MAX_NON_TSO_BD_NUM - 1U] <
+	/* ensure the first max_non_tso_bd_num frags is greater than
+	 * mss + header
+	 */
+	if (tot_len + bd_size[max_non_tso_bd_num - 1U] <
 	    skb_shinfo(skb)->gso_size + hns3_gso_hdr_len(skb))
 		return true;
 
-	/* ensure every continuous 7 buffer is greater than mss
-	 * except the last one.
+	/* ensure every continuous max_non_tso_bd_num - 1 buffer is greater
+	 * than mss except the last one.
 	 */
-	for (i = 0; i < bd_num - HNS3_MAX_NON_TSO_BD_NUM; i++) {
+	for (i = 0; i < bd_num - max_non_tso_bd_num; i++) {
 		tot_len -= bd_size[i];
-		tot_len += bd_size[i + HNS3_MAX_NON_TSO_BD_NUM - 1U];
+		tot_len += bd_size[i + max_non_tso_bd_num - 1U];
 
 		if (tot_len < skb_shinfo(skb)->gso_size)
 			return true;
@@ -1268,7 +1270,7 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
 
 void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size)
 {
-	int i = 0;
+	int i;
 
 	for (i = 0; i < MAX_SKB_FRAGS; i++)
 		size[i] = skb_frag_size(&shinfo->frags[i]);
@@ -1279,14 +1281,16 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 				  struct sk_buff *skb)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	u8 max_non_tso_bd_num = priv->max_non_tso_bd_num;
 	unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
 	unsigned int bd_num;
 
-	bd_num = hns3_tx_bd_num(skb, bd_size);
-	if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
+	bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num);
+	if (unlikely(bd_num > max_non_tso_bd_num)) {
 		if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
-		    !hns3_skb_need_linearized(skb, bd_size, bd_num)) {
-			trace_hns3_over_8bd(skb);
+		    !hns3_skb_need_linearized(skb, bd_size, bd_num,
+					      max_non_tso_bd_num)) {
+			trace_hns3_over_max_bd(skb);
 			goto out;
 		}
 
@@ -1296,8 +1300,8 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 		bd_num = hns3_tx_bd_count(skb->len);
 		if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
 		    (!skb_is_gso(skb) &&
-		     bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
-			trace_hns3_over_8bd(skb);
+		     bd_num > max_non_tso_bd_num)) {
+			trace_hns3_over_max_bd(skb);
 			return -ENOMEM;
 		}
 
@@ -1397,6 +1401,27 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 	return bd_num;
 }
 
+static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
+			     bool doorbell)
+{
+	ring->pending_buf += num;
+
+	if (!doorbell) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_more++;
+		u64_stats_update_end(&ring->syncp);
+		return;
+	}
+
+	if (!ring->pending_buf)
+		return;
+
+	writel(ring->pending_buf,
+	       ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
+	ring->pending_buf = 0;
+	WRITE_ONCE(ring->last_to_use, ring->next_to_use);
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -1405,11 +1430,14 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int pre_ntu, next_to_use_head;
 	struct sk_buff *frag_skb;
 	int bd_num = 0;
+	bool doorbell;
 	int ret;
 
 	/* Hardware can only handle short frames above 32 bytes */
-	if (skb_put_padto(skb, HNS3_MIN_TX_LEN))
+	if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) {
+		hns3_tx_doorbell(ring, 0, !netdev_xmit_more());
 		return NETDEV_TX_OK;
+	}
 
 	/* Prefetch the data used later */
 	prefetch(skb->data);
@@ -1420,6 +1448,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_busy++;
 			u64_stats_update_end(&ring->syncp);
+			hns3_tx_doorbell(ring, 0, true);
 			return NETDEV_TX_BUSY;
 		} else if (ret == -ENOMEM) {
 			u64_stats_update_begin(&ring->syncp);
@@ -1460,11 +1489,9 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	/* Complete translate all packets */
 	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
-	netdev_tx_sent_queue(dev_queue, skb->len);
-
-	wmb(); /* Commit all data before submit */
-
-	hnae3_queue_xmit(ring->tqp, bd_num);
+	doorbell = __netdev_tx_sent_queue(dev_queue, skb->len,
+					  netdev_xmit_more());
+	hns3_tx_doorbell(ring, bd_num, doorbell);
 
 	return NETDEV_TX_OK;
 
@@ -1473,6 +1500,7 @@ fill_err:
 
 out_err_tx_ok:
 	dev_kfree_skb_any(skb);
+	hns3_tx_doorbell(ring, 0, !netdev_xmit_more());
 	return NETDEV_TX_OK;
 }
 
@@ -1853,13 +1881,13 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 		    tx_ring->next_to_clean, napi->state);
 
 	netdev_info(ndev,
-		    "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu\n",
+		    "tx_pkts: %llu, tx_bytes: %llu, sw_err_cnt: %llu, tx_pending: %d\n",
 		    tx_ring->stats.tx_pkts, tx_ring->stats.tx_bytes,
-		    tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt);
+		    tx_ring->stats.sw_err_cnt, tx_ring->pending_buf);
 
 	netdev_info(ndev,
-		    "seg_pkt_cnt: %llu, tx_err_cnt: %llu, restart_queue: %llu, tx_busy: %llu\n",
-		    tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_err_cnt,
+		    "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n",
+		    tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_more,
 		    tx_ring->stats.restart_queue, tx_ring->stats.tx_busy);
 
 	/* When mac received many pause frames continuous, it's unable to send
@@ -2034,9 +2062,10 @@ bool hns3_is_phys_func(struct pci_dev *pdev)
 	case HNAE3_DEV_ID_50GE_RDMA:
 	case HNAE3_DEV_ID_50GE_RDMA_MACSEC:
 	case HNAE3_DEV_ID_100G_RDMA_MACSEC:
+	case HNAE3_DEV_ID_200G_RDMA:
 		return true;
-	case HNAE3_DEV_ID_100G_VF:
-	case HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF:
+	case HNAE3_DEV_ID_VF:
+	case HNAE3_DEV_ID_RDMA_DCB_PFC_VF:
 		return false;
 	default:
 		dev_warn(&pdev->dev, "un-recognized pci device-id %u",
@@ -2061,15 +2090,6 @@ static void hns3_disable_sriov(struct pci_dev *pdev)
 	pci_disable_sriov(pdev);
 }
 
-static void hns3_get_dev_capability(struct pci_dev *pdev,
-				    struct hnae3_ae_dev *ae_dev)
-{
-	if (pdev->revision >= 0x21) {
-		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_SUPPORT_FD_B, 1);
-		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_SUPPORT_GRO_B, 1);
-	}
-}
-
 /* hns3_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in hns3_pci_tbl
@@ -2091,7 +2111,6 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ae_dev->pdev = pdev;
 	ae_dev->flag = ent->driver_data;
-	hns3_get_dev_capability(pdev, ae_dev);
 	pci_set_drvdata(pdev, ae_dev);
 
 	ret = hnae3_register_ae_dev(ae_dev);
@@ -2252,6 +2271,7 @@ static void hns3_set_default_feature(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct pci_dev *pdev = h->pdev;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
@@ -2289,7 +2309,7 @@ static void hns3_set_default_feature(struct net_device *netdev)
 		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
 		NETIF_F_FRAGLIST;
 
-	if (pdev->revision >= 0x21) {
+	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		netdev->hw_features |= NETIF_F_GRO_HW;
 		netdev->features |= NETIF_F_GRO_HW;
 
@@ -2298,6 +2318,13 @@ static void hns3_set_default_feature(struct net_device *netdev)
 			netdev->features |= NETIF_F_NTUPLE;
 		}
 	}
+
+	if (test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps)) {
+		netdev->hw_features |= NETIF_F_GSO_UDP_L4;
+		netdev->features |= NETIF_F_GSO_UDP_L4;
+		netdev->vlan_features |= NETIF_F_GSO_UDP_L4;
+		netdev->hw_enc_features |= NETIF_F_GSO_UDP_L4;
+	}
 }
 
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
@@ -2316,17 +2343,19 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 	cb->buf  = page_address(p);
 	cb->length = hns3_page_size(ring);
 	cb->type = DESC_TYPE_PAGE;
+	page_ref_add(p, USHRT_MAX - 1);
+	cb->pagecnt_bias = USHRT_MAX;
 
 	return 0;
 }
 
 static void hns3_free_buffer(struct hns3_enet_ring *ring,
-			     struct hns3_desc_cb *cb)
+			     struct hns3_desc_cb *cb, int budget)
 {
 	if (cb->type == DESC_TYPE_SKB)
-		dev_kfree_skb_any((struct sk_buff *)cb->priv);
-	else if (!HNAE3_IS_TX_RING(ring))
-		put_page((struct page *)cb->priv);
+		napi_consume_skb(cb->priv, budget);
+	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
+		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
 	memset(cb, 0, sizeof(*cb));
 }
 
@@ -2358,7 +2387,8 @@ static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
 	ring->desc[i].addr = 0;
 }
 
-static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i)
+static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i,
+				    int budget)
 {
 	struct hns3_desc_cb *cb = &ring->desc_cb[i];
 
@@ -2366,7 +2396,7 @@ static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i)
 		return;
 
 	hns3_buffer_detach(ring, i);
-	hns3_free_buffer(ring, cb);
+	hns3_free_buffer(ring, cb, budget);
 }
 
 static void hns3_free_buffers(struct hns3_enet_ring *ring)
@@ -2374,7 +2404,7 @@ static void hns3_free_buffers(struct hns3_enet_ring *ring)
 	int i;
 
 	for (i = 0; i < ring->desc_num; i++)
-		hns3_free_buffer_detach(ring, i);
+		hns3_free_buffer_detach(ring, i, 0);
 }
 
 /* free desc along with its attached buffer */
@@ -2419,7 +2449,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
 	return 0;
 
 out_with_buf:
-	hns3_free_buffer(ring, cb);
+	hns3_free_buffer(ring, cb, 0);
 out:
 	return ret;
 }
@@ -2451,7 +2481,7 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring)
 
 out_buffer_fail:
 	for (j = i - 1; j >= 0; j--)
-		hns3_free_buffer_detach(ring, j);
+		hns3_free_buffer_detach(ring, j, 0);
 	return ret;
 }
 
@@ -2478,71 +2508,62 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 			DMA_FROM_DEVICE);
 }
 
-static void hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int head,
-				  int *bytes, int *pkts)
+static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
+				  int *bytes, int *pkts, int budget)
 {
+	/* pair with ring->last_to_use update in hns3_tx_doorbell(),
+	 * smp_store_release() is not used in hns3_tx_doorbell() because
+	 * the doorbell operation already have the needed barrier operation.
+	 */
+	int ltu = smp_load_acquire(&ring->last_to_use);
 	int ntc = ring->next_to_clean;
 	struct hns3_desc_cb *desc_cb;
+	bool reclaimed = false;
+	struct hns3_desc *desc;
+
+	while (ltu != ntc) {
+		desc = &ring->desc[ntc];
+
+		if (le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri) &
+				BIT(HNS3_TXD_VLD_B))
+			break;
 
-	while (head != ntc) {
 		desc_cb = &ring->desc_cb[ntc];
 		(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
 		(*bytes) += desc_cb->length;
 		/* desc_cb will be cleaned, after hnae3_free_buffer_detach */
-		hns3_free_buffer_detach(ring, ntc);
+		hns3_free_buffer_detach(ring, ntc, budget);
 
 		if (++ntc == ring->desc_num)
 			ntc = 0;
 
 		/* Issue prefetch for next Tx descriptor */
 		prefetch(&ring->desc_cb[ntc]);
+		reclaimed = true;
 	}
 
+	if (unlikely(!reclaimed))
+		return false;
+
 	/* This smp_store_release() pairs with smp_load_acquire() in
 	 * ring_space called by hns3_nic_net_xmit.
 	 */
 	smp_store_release(&ring->next_to_clean, ntc);
+	return true;
 }
 
-static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
-{
-	int u = ring->next_to_use;
-	int c = ring->next_to_clean;
-
-	if (unlikely(h > ring->desc_num))
-		return 0;
-
-	return u > c ? (h > c && h <= u) : (h > c || h <= u);
-}
-
-void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
+void hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
 {
 	struct net_device *netdev = ring_to_netdev(ring);
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct netdev_queue *dev_queue;
 	int bytes, pkts;
-	int head;
-
-	head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG);
-
-	if (is_ring_empty(ring) || head == ring->next_to_clean)
-		return; /* no data to poll */
-
-	rmb(); /* Make sure head is ready before touch any data */
-
-	if (unlikely(!is_valid_clean_head(ring, head))) {
-		hns3_rl_err(netdev, "wrong head (%d, %d-%d)\n", head,
-			    ring->next_to_use, ring->next_to_clean);
-
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.io_err_cnt++;
-		u64_stats_update_end(&ring->syncp);
-		return;
-	}
 
 	bytes = 0;
 	pkts = 0;
-	hns3_nic_reclaim_desc(ring, head, &bytes, &pkts);
+
+	if (unlikely(!hns3_nic_reclaim_desc(ring, &bytes, &pkts, budget)))
+		return;
 
 	ring->tqp_vector->tx_group.total_bytes += bytes;
 	ring->tqp_vector->tx_group.total_packets += pkts;
@@ -2614,8 +2635,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 		ring_ptr_move_fw(ring, next_to_use);
 	}
 
-	wmb(); /* Make all data has been write before submit */
-	writel_relaxed(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
+	writel(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
 }
 
 static bool hns3_page_is_reusable(struct page *page)
@@ -2624,6 +2644,11 @@ static bool hns3_page_is_reusable(struct page *page)
 		!page_is_pfmemalloc(page);
 }
 
+static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
+{
+	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
+}
+
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 				struct hns3_enet_ring *ring, int pull_len,
 				struct hns3_desc_cb *desc_cb)
@@ -2632,6 +2657,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	int size = le16_to_cpu(desc->rx.size);
 	u32 truesize = hns3_buf_size(ring);
 
+	desc_cb->pagecnt_bias--;
 	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
 			size - pull_len, truesize);
 
@@ -2639,20 +2665,27 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	 * when page_offset rollback to zero, flag default unreuse
 	 */
 	if (unlikely(!hns3_page_is_reusable(desc_cb->priv)) ||
-	    (!desc_cb->page_offset && page_count(desc_cb->priv) > 1))
+	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
 		return;
+	}
 
 	/* Move offset up to the next cache line */
 	desc_cb->page_offset += truesize;
 
 	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
 		desc_cb->reuse_flag = 1;
-		/* Bump ref count on page before it is given */
-		get_page(desc_cb->priv);
-	} else if (page_count(desc_cb->priv) == 1) {
+	} else if (hns3_can_reuse_page(desc_cb)) {
 		desc_cb->reuse_flag = 1;
 		desc_cb->page_offset = 0;
-		get_page(desc_cb->priv);
+	} else if (desc_cb->pagecnt_bias) {
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
+		return;
+	}
+
+	if (unlikely(!desc_cb->pagecnt_bias)) {
+		page_ref_add(desc_cb->priv, USHRT_MAX);
+		desc_cb->pagecnt_bias = USHRT_MAX;
 	}
 }
 
@@ -2782,8 +2815,9 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
 {
 	struct hnae3_handle *handle = ring->tqp->handle;
 	struct pci_dev *pdev = ring->tqp->handle->pdev;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 
-	if (pdev->revision == 0x20) {
+	if (unlikely(ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)) {
 		*vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
 		if (!(*vlan_tag & VLAN_VID_MASK))
 			*vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
@@ -2828,6 +2862,16 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
 	}
 }
 
+static void hns3_rx_ring_move_fw(struct hns3_enet_ring *ring)
+{
+	ring->desc[ring->next_to_clean].rx.bd_base_info &=
+		cpu_to_le32(~BIT(HNS3_RXD_VLD_B));
+	ring->next_to_clean += 1;
+
+	if (unlikely(ring->next_to_clean == ring->desc_num))
+		ring->next_to_clean = 0;
+}
+
 static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 			  unsigned char *va)
 {
@@ -2860,9 +2904,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 		if (likely(hns3_page_is_reusable(desc_cb->priv)))
 			desc_cb->reuse_flag = 1;
 		else /* This page cannot be reused so discard it */
-			put_page(desc_cb->priv);
+			__page_frag_cache_drain(desc_cb->priv,
+						desc_cb->pagecnt_bias);
 
-		ring_ptr_move_fw(ring, next_to_clean);
+		hns3_rx_ring_move_fw(ring);
 		return 0;
 	}
 	u64_stats_update_begin(&ring->syncp);
@@ -2873,7 +2918,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 	__skb_put(skb, ring->pull_len);
 	hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len,
 			    desc_cb);
-	ring_ptr_move_fw(ring, next_to_clean);
+	hns3_rx_ring_move_fw(ring);
 
 	return 0;
 }
@@ -2928,7 +2973,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
 
 		hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb);
 		trace_hns3_rx_desc(ring);
-		ring_ptr_move_fw(ring, next_to_clean);
+		hns3_rx_ring_move_fw(ring);
 		ring->pending_buf++;
 	} while (!(bd_base_info & BIT(HNS3_RXD_FE_B)));
 
@@ -3070,35 +3115,32 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
 
 	prefetch(desc);
 
-	length = le16_to_cpu(desc->rx.size);
-	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+	if (!skb) {
+		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
 
-	/* Check valid BD */
-	if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B))))
-		return -ENXIO;
+		/* Check valid BD */
+		if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B))))
+			return -ENXIO;
+
+		dma_rmb();
+		length = le16_to_cpu(desc->rx.size);
 
-	if (!skb) {
 		ring->va = desc_cb->buf + desc_cb->page_offset;
 
 		dma_sync_single_for_cpu(ring_to_dev(ring),
 				desc_cb->dma + desc_cb->page_offset,
 				hns3_buf_size(ring),
 				DMA_FROM_DEVICE);
-	}
 
-	/* Prefetch first cache line of first page
-	 * Idea is to cache few bytes of the header of the packet. Our L1 Cache
-	 * line size is 64B so need to prefetch twice to make it 128B. But in
-	 * actual we can have greater size of caches with 128B Level 1 cache
-	 * lines. In such a case, single fetch would suffice to cache in the
-	 * relevant part of the header.
-	 */
-	prefetch(ring->va);
-#if L1_CACHE_BYTES < 128
-	prefetch(ring->va + L1_CACHE_BYTES);
-#endif
+		/* Prefetch first cache line of first page.
+		 * Idea is to cache few bytes of the header of the packet.
+		 * Our L1 Cache line size is 64B so need to prefetch twice to make
+		 * it 128B. But in actual we can have greater size of caches with
+		 * 128B Level 1 cache lines. In such a case, single fetch would
+		 * suffice to cache in the relevant part of the header.
+		 */
+		net_prefetch(ring->va);
 
-	if (!skb) {
 		ret = hns3_alloc_skb(ring, length, ring->va);
 		skb = ring->skb;
 
@@ -3138,19 +3180,11 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
 	int unused_count = hns3_desc_unused(ring);
 	int recv_pkts = 0;
-	int recv_bds = 0;
-	int err, num;
+	int err;
 
-	num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
-	num -= unused_count;
 	unused_count -= ring->pending_buf;
 
-	if (num <= 0)
-		goto out;
-
-	rmb(); /* Make sure num taken effect before the other data is touched */
-
-	while (recv_pkts < budget && recv_bds < num) {
+	while (recv_pkts < budget) {
 		/* Reuse or realloc buffers */
 		if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
 			hns3_nic_alloc_rx_buffers(ring, unused_count);
@@ -3168,7 +3202,6 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 			recv_pkts++;
 		}
 
-		recv_bds += ring->pending_buf;
 		unused_count += ring->pending_buf;
 		ring->skb = NULL;
 		ring->pending_buf = 0;
@@ -3337,7 +3370,7 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	hns3_for_each_ring(ring, tqp_vector->tx_group)
-		hns3_clean_tx_ring(ring);
+		hns3_clean_tx_ring(ring, budget);
 
 	/* make sure rx ring budget not smaller than 1 */
 	if (tqp_vector->num_tqps > 1)
@@ -3496,7 +3529,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	int ret = 0;
+	int ret;
 	int i;
 
 	hns3_nic_set_cpumask(priv);
@@ -3673,12 +3706,10 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 		ring = &priv->ring[q->tqp_index];
 		desc_num = priv->ae_handle->kinfo.num_tx_desc;
 		ring->queue_index = q->tqp_index;
-		ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
 	} else {
 		ring = &priv->ring[q->tqp_index + queue_num];
 		desc_num = priv->ae_handle->kinfo.num_rx_desc;
 		ring->queue_index = q->tqp_index;
-		ring->io_base = q->io_base;
 	}
 
 	hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
@@ -3692,6 +3723,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 	ring->desc_num = desc_num;
 	ring->next_to_use = 0;
 	ring->next_to_clean = 0;
+	ring->last_to_use = 0;
 }
 
 static void hns3_queue_to_ring(struct hnae3_queue *tqp,
@@ -3771,6 +3803,7 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
 	ring->desc_cb = NULL;
 	ring->next_to_clean = 0;
 	ring->next_to_use = 0;
+	ring->last_to_use = 0;
 	ring->pending_buf = 0;
 	if (ring->skb) {
 		dev_kfree_skb_any(ring->skb);
@@ -3979,6 +4012,7 @@ static void hns3_info_show(struct hns3_nic_priv *priv)
 static int hns3_client_init(struct hnae3_handle *handle)
 {
 	struct pci_dev *pdev = handle->pdev;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 	u16 alloc_tqps, max_rss_size;
 	struct hns3_nic_priv *priv;
 	struct net_device *netdev;
@@ -3995,6 +4029,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	priv->netdev = netdev;
 	priv->ae_handle = handle;
 	priv->tx_timeout_count = 0;
+	priv->max_non_tso_bd_num = ae_dev->dev_specs.max_non_tso_bd_num;
 	set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
 
 	handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL);
@@ -4181,9 +4216,11 @@ static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
 {
 	while (ring->next_to_clean != ring->next_to_use) {
 		ring->desc[ring->next_to_clean].tx.bdtp_fe_sc_vld_ra_ri = 0;
-		hns3_free_buffer_detach(ring, ring->next_to_clean);
+		hns3_free_buffer_detach(ring, ring->next_to_clean, 0);
 		ring_ptr_move_fw(ring, next_to_clean);
 	}
+
+	ring->pending_buf = 0;
 }
 
 static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
@@ -4286,6 +4323,7 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 		hns3_clear_tx_ring(&priv->ring[i]);
 		priv->ring[i].next_to_clean = 0;
 		priv->ring[i].next_to_use = 0;
+		priv->ring[i].last_to_use = 0;
 
 		rx_ring = &priv->ring[i + h->kinfo.num_tqps];
 		hns3_init_ring_hw(rx_ring);
@@ -4582,6 +4620,8 @@ static const struct hns3_hw_error_info hns3_hw_err[] = {
 	  .msg = "IMP CMDQ error" },
 	{ .type = HNAE3_IMP_RD_POISON_ERROR,
 	  .msg = "IMP RD poison" },
+	{ .type = HNAE3_ROCEE_AXI_RESP_ERROR,
+	  .msg = "ROCEE AXI RESP error" },
 };
 
 static void hns3_process_hw_error(struct hnae3_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 9922c5fd7f94..1c81dea0da1e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -42,12 +42,9 @@ enum hns3_nic_state {
 #define HNS3_RING_TX_RING_PKTNUM_RECORD_REG	0x0006C
 #define HNS3_RING_TX_RING_EBD_OFFSET_REG	0x00070
 #define HNS3_RING_TX_RING_BD_ERR_REG		0x00074
-#define HNS3_RING_PREFETCH_EN_REG		0x0007C
-#define HNS3_RING_CFG_VF_NUM_REG		0x00080
-#define HNS3_RING_ASID_REG			0x0008C
 #define HNS3_RING_EN_REG			0x00090
-
-#define HNS3_TX_REG_OFFSET			0x40
+#define HNS3_RING_RX_EN_REG			0x00098
+#define HNS3_RING_TX_EN_REG			0x000D4
 
 #define HNS3_RX_HEAD_SIZE			256
 
@@ -172,13 +169,12 @@ enum hns3_nic_state {
 #define HNS3_VECTOR_INITED			1
 
 #define HNS3_MAX_BD_SIZE			65535
-#define HNS3_MAX_NON_TSO_BD_NUM			8U
 #define HNS3_MAX_TSO_BD_NUM			63U
 #define HNS3_MAX_TSO_SIZE \
 	(HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM)
 
-#define HNS3_MAX_NON_TSO_SIZE \
-	(HNS3_MAX_BD_SIZE * HNS3_MAX_NON_TSO_BD_NUM)
+#define HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num) \
+	(HNS3_MAX_BD_SIZE * (max_non_tso_bd_num))
 
 #define HNS3_VECTOR_GL0_OFFSET			0x100
 #define HNS3_VECTOR_GL1_OFFSET			0x200
@@ -292,6 +288,7 @@ struct hns3_desc_cb {
 
 	/* desc type, used by the ring user to mark the type of the priv data */
 	u16 type;
+	u16 pagecnt_bias;
 };
 
 enum hns3_pkt_l3type {
@@ -348,14 +345,13 @@ enum hns3_pkt_ol4type {
 };
 
 struct ring_stats {
-	u64 io_err_cnt;
 	u64 sw_err_cnt;
 	u64 seg_pkt_cnt;
 	union {
 		struct {
 			u64 tx_pkts;
 			u64 tx_bytes;
-			u64 tx_err_cnt;
+			u64 tx_more;
 			u64 restart_queue;
 			u64 tx_busy;
 			u64 tx_copy;
@@ -380,7 +376,6 @@ struct ring_stats {
 };
 
 struct hns3_enet_ring {
-	u8 __iomem *io_base; /* base io address for the ring */
 	struct hns3_desc *desc; /* dma map address space */
 	struct hns3_desc_cb *desc_cb;
 	struct hns3_enet_ring *next;
@@ -402,8 +397,10 @@ struct hns3_enet_ring {
 	 * next_to_use
 	 */
 	int next_to_clean;
-
-	u32 pull_len; /* head length for current packet */
+	union {
+		int last_to_use;	/* last idx used by xmit */
+		u32 pull_len;		/* memcpy len for current rx packet */
+	};
 	u32 frag_num;
 	void *va; /* first buffer address for current packet */
 
@@ -479,6 +476,7 @@ struct hns3_nic_priv {
 	struct hns3_enet_ring *ring;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	u16 vector_num;
+	u8 max_non_tso_bd_num;
 
 	u64 tx_timeout_count;
 
@@ -518,11 +516,6 @@ static inline int ring_space(struct hns3_enet_ring *ring)
 			(begin - end)) - 1;
 }
 
-static inline int is_ring_empty(struct hns3_enet_ring *ring)
-{
-	return ring->next_to_use == ring->next_to_clean;
-}
-
 static inline u32 hns3_read_reg(void __iomem *base, u32 reg)
 {
 	return readl(base + reg);
@@ -548,9 +541,6 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
 #define hns3_write_dev(a, reg, value) \
 	hns3_write_reg((a)->io_base, (reg), (value))
 
-#define hnae3_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
-		(tqp)->io_base + HNS3_RING_TX_RING_TAIL_REG)
-
 #define ring_to_dev(ring) ((ring)->dev)
 
 #define ring_to_netdev(ring)	((ring)->tqp_vector->napi.dev)
@@ -588,7 +578,7 @@ void hns3_ethtool_set_ops(struct net_device *netdev);
 int hns3_set_channels(struct net_device *netdev,
 		      struct ethtool_channels *ch);
 
-void hns3_clean_tx_ring(struct hns3_enet_ring *ring);
+void hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
 int hns3_init_all_ring(struct hns3_nic_priv *priv);
 int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
 int hns3_nic_reset_all_ring(struct hnae3_handle *h);
@@ -607,7 +597,6 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 				 u32 rl_value);
 
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
-int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
 void hns3_request_update_promisc_mode(struct hnae3_handle *handle);
 
 #ifdef CONFIG_HNS3_DCB
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 2622e04e8eed..6b07b2771172 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -27,12 +27,11 @@ struct hns3_sfp_type {
 
 static const struct hns3_stats hns3_txq_stats[] = {
 	/* Tx per-queue statistics */
-	HNS3_TQP_STAT("io_err_cnt", io_err_cnt),
 	HNS3_TQP_STAT("dropped", sw_err_cnt),
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", tx_pkts),
 	HNS3_TQP_STAT("bytes", tx_bytes),
-	HNS3_TQP_STAT("errors", tx_err_cnt),
+	HNS3_TQP_STAT("more", tx_more),
 	HNS3_TQP_STAT("wake", restart_queue),
 	HNS3_TQP_STAT("busy", tx_busy),
 	HNS3_TQP_STAT("copy", tx_copy),
@@ -46,7 +45,6 @@ static const struct hns3_stats hns3_txq_stats[] = {
 
 static const struct hns3_stats hns3_rxq_stats[] = {
 	/* Rx per-queue statistics */
-	HNS3_TQP_STAT("io_err_cnt", io_err_cnt),
 	HNS3_TQP_STAT("dropped", sw_err_cnt),
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", rx_pkts),
@@ -79,6 +77,7 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	bool vlan_filter_enable;
 	int ret;
 
@@ -98,7 +97,7 @@ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 		break;
 	}
 
-	if (ret || h->pdev->revision >= 0x21)
+	if (ret || ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 		return ret;
 
 	if (en) {
@@ -149,6 +148,7 @@ static void hns3_lp_setup_skb(struct sk_buff *skb)
 
 	struct net_device *ndev = skb->dev;
 	struct hnae3_handle *handle;
+	struct hnae3_ae_dev *ae_dev;
 	unsigned char *packet;
 	struct ethhdr *ethh;
 	unsigned int i;
@@ -165,7 +165,8 @@ static void hns3_lp_setup_skb(struct sk_buff *skb)
 	 * the purpose of mac or serdes selftest.
 	 */
 	handle = hns3_get_handle(ndev);
-	if (handle->pdev->revision == 0x20)
+	ae_dev = pci_get_drvdata(handle->pdev);
+	if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		ethh->h_dest[5] += HNS3_NIC_LB_DST_MAC_ADDR;
 	eth_zero_addr(ethh->h_source);
 	ethh->h_proto = htons(ETH_P_ARP);
@@ -232,7 +233,7 @@ static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid,
 	for (i = start_ringid; i <= end_ringid; i++) {
 		struct hns3_enet_ring *ring = &priv->ring[i];
 
-		hns3_clean_tx_ring(ring);
+		hns3_clean_tx_ring(ring, 0);
 	}
 }
 
@@ -310,9 +311,6 @@ static void hns3_self_test(struct net_device *ndev,
 	struct hnae3_handle *h = priv->ae_handle;
 	int st_param[HNS3_SELF_TEST_TYPE_NUM][2];
 	bool if_running = netif_running(ndev);
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
-	bool dis_vlan_filter;
-#endif
 	int test_index = 0;
 	u32 i;
 
@@ -349,9 +347,7 @@ static void hns3_self_test(struct net_device *ndev,
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	/* Disable the vlan filter for selftest does not support it */
-	dis_vlan_filter = (ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
-				h->ae_algo->ops->enable_vlan_filter;
-	if (dis_vlan_filter)
+	if (h->ae_algo->ops->enable_vlan_filter)
 		h->ae_algo->ops->enable_vlan_filter(h, false);
 #endif
 
@@ -388,7 +384,7 @@ static void hns3_self_test(struct net_device *ndev,
 		h->ae_algo->ops->halt_autoneg(h, false);
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
-	if (dis_vlan_filter)
+	if (h->ae_algo->ops->enable_vlan_filter)
 		h->ae_algo->ops->enable_vlan_filter(h, true);
 #endif
 
@@ -763,6 +759,7 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
 				   const struct ethtool_link_ksettings *cmd)
 {
 	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 	int ret;
 
@@ -784,7 +781,7 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
 		return phy_ethtool_ksettings_set(netdev->phydev, cmd);
 	}
 
-	if (handle->pdev->revision == 0x20)
+	if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return -EOPNOTSUPP;
 
 	ret = hns3_check_ksettings_param(netdev, cmd);
@@ -848,11 +845,12 @@ static int hns3_set_rss(struct net_device *netdev, const u32 *indir,
 			const u8 *key, const u8 hfunc)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 
 	if (!h->ae_algo->ops->set_rss)
 		return -EOPNOTSUPP;
 
-	if ((h->pdev->revision == 0x20 &&
+	if ((ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 &&
 	     hfunc != ETH_RSS_HASH_TOP) || (hfunc != ETH_RSS_HASH_NO_CHANGE &&
 	     hfunc != ETH_RSS_HASH_TOP && hfunc != ETH_RSS_HASH_XOR)) {
 		netdev_err(netdev, "hash func not supported\n");
@@ -1073,9 +1071,6 @@ static int hns3_nway_reset(struct net_device *netdev)
 	if (phy)
 		return genphy_restart_aneg(phy);
 
-	if (handle->pdev->revision == 0x20)
-		return -EOPNOTSUPP;
-
 	return ops->restart_autoneg(handle);
 }
 
@@ -1363,11 +1358,12 @@ static int hns3_get_fecparam(struct net_device *netdev,
 			     struct ethtool_fecparam *fec)
 {
 	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 	u8 fec_ability;
 	u8 fec_mode;
 
-	if (handle->pdev->revision == 0x20)
+	if (!test_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps))
 		return -EOPNOTSUPP;
 
 	if (!ops->get_fec)
@@ -1385,10 +1381,11 @@ static int hns3_set_fecparam(struct net_device *netdev,
 			     struct ethtool_fecparam *fec)
 {
 	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 	u32 fec_mode;
 
-	if (handle->pdev->revision == 0x20)
+	if (!test_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps))
 		return -EOPNOTSUPP;
 
 	if (!ops->set_fec)
@@ -1406,11 +1403,13 @@ static int hns3_get_module_info(struct net_device *netdev,
 #define HNS3_SFF_8636_V1_3 0x03
 
 	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 	struct hns3_sfp_type sfp_type;
 	int ret;
 
-	if (handle->pdev->revision == 0x20 || !ops->get_module_eeprom)
+	if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 ||
+	    !ops->get_module_eeprom)
 		return -EOPNOTSUPP;
 
 	memset(&sfp_type, 0, sizeof(sfp_type));
@@ -1454,9 +1453,11 @@ static int hns3_get_module_eeprom(struct net_device *netdev,
 				  struct ethtool_eeprom *ee, u8 *data)
 {
 	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 
-	if (handle->pdev->revision == 0x20 || !ops->get_module_eeprom)
+	if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 ||
+	    !ops->get_module_eeprom)
 		return -EOPNOTSUPP;
 
 	if (!ee->len)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
index 7bddcca148a5..5153e5d41bbd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
@@ -53,7 +53,7 @@ DECLARE_EVENT_CLASS(hns3_skb_template,
 	)
 );
 
-DEFINE_EVENT(hns3_skb_template, hns3_over_8bd,
+DEFINE_EVENT(hns3_skb_template, hns3_over_max_bd,
 	TP_PROTO(struct sk_buff *skb),
 	TP_ARGS(skb));
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 1d6c328bd9fb..e6321dda0f3f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -261,7 +261,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 	bool complete = false;
 	u32 timeout = 0;
 	int handle = 0;
-	int retval = 0;
+	int retval;
 	int ntc;
 
 	spin_lock_bh(&hw->cmq.csq.lock);
@@ -330,9 +330,37 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 	return retval;
 }
 
-static enum hclge_cmd_status hclge_cmd_query_firmware_version(
-		struct hclge_hw *hw, u32 *version)
+static void hclge_set_default_capability(struct hclge_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+	set_bit(HNAE3_DEV_SUPPORT_FD_B, ae_dev->caps);
+	set_bit(HNAE3_DEV_SUPPORT_GRO_B, ae_dev->caps);
+	set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+}
+
+static void hclge_parse_capability(struct hclge_dev *hdev,
+				   struct hclge_query_version_cmd *cmd)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	u32 caps;
+
+	caps = __le32_to_cpu(cmd->caps[0]);
+
+	if (hnae3_get_bit(caps, HCLGE_CAP_UDP_GSO_B))
+		set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_PTP_B))
+		set_bit(HNAE3_DEV_SUPPORT_PTP_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_INT_QL_B))
+		set_bit(HNAE3_DEV_SUPPORT_INT_QL_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_TQP_TXRX_INDEP_B))
+		set_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, ae_dev->caps);
+}
+
+static enum hclge_cmd_status
+hclge_cmd_query_version_and_capability(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct hclge_query_version_cmd *resp;
 	struct hclge_desc desc;
 	int ret;
@@ -340,9 +368,20 @@ static enum hclge_cmd_status hclge_cmd_query_firmware_version(
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1);
 	resp = (struct hclge_query_version_cmd *)desc.data;
 
-	ret = hclge_cmd_send(hw, &desc, 1);
-	if (!ret)
-		*version = le32_to_cpu(resp->firmware);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		return ret;
+
+	hdev->fw_version = le32_to_cpu(resp->firmware);
+
+	ae_dev->dev_version = le32_to_cpu(resp->hardware) <<
+					 HNAE3_PCI_REVISION_BIT_SIZE;
+	ae_dev->dev_version |= hdev->pdev->revision;
+
+	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
+		hclge_set_default_capability(hdev);
+
+	hclge_parse_capability(hdev, resp);
 
 	return ret;
 }
@@ -402,7 +441,6 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
 
 int hclge_cmd_init(struct hclge_dev *hdev)
 {
-	u32 version;
 	int ret;
 
 	spin_lock_bh(&hdev->hw.cmq.csq.lock);
@@ -431,22 +469,23 @@ int hclge_cmd_init(struct hclge_dev *hdev)
 		goto err_cmd_init;
 	}
 
-	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
+	/* get version and device capabilities */
+	ret = hclge_cmd_query_version_and_capability(hdev);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"firmware version query failed %d\n", ret);
+			"failed to query version and capabilities, ret = %d\n",
+			ret);
 		goto err_cmd_init;
 	}
-	hdev->fw_version = version;
 
 	dev_info(&hdev->pdev->dev, "The firmware version is %lu.%lu.%lu.%lu\n",
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE3_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
 				 HNAE3_FW_VERSION_BYTE3_SHIFT),
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE2_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
 				 HNAE3_FW_VERSION_BYTE2_SHIFT),
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE1_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
 				 HNAE3_FW_VERSION_BYTE1_SHIFT),
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE0_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
 				 HNAE3_FW_VERSION_BYTE0_SHIFT));
 
 	/* ask the firmware to enable some features, driver can work without
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 463f29151ef0..096e26a2e16b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -115,7 +115,8 @@ enum hclge_opcode_type {
 	HCLGE_OPC_DFX_RCB_REG		= 0x004D,
 	HCLGE_OPC_DFX_TQP_REG		= 0x004E,
 	HCLGE_OPC_DFX_SSU_REG_2		= 0x004F,
-	HCLGE_OPC_DFX_QUERY_CHIP_CAP	= 0x0050,
+
+	HCLGE_OPC_QUERY_DEV_SPECS	= 0x0050,
 
 	/* MAC command */
 	HCLGE_OPC_CONFIG_MAC_MODE	= 0x0301,
@@ -362,9 +363,26 @@ struct hclge_rx_priv_buff_cmd {
 	u8 rsv[6];
 };
 
+enum HCLGE_CAP_BITS {
+	HCLGE_CAP_UDP_GSO_B,
+	HCLGE_CAP_QB_B,
+	HCLGE_CAP_FD_FORWARD_TC_B,
+	HCLGE_CAP_PTP_B,
+	HCLGE_CAP_INT_QL_B,
+	HCLGE_CAP_SIMPLE_BD_B,
+	HCLGE_CAP_TX_PUSH_B,
+	HCLGE_CAP_PHY_IMP_B,
+	HCLGE_CAP_TQP_TXRX_INDEP_B,
+	HCLGE_CAP_HW_PAD_B,
+	HCLGE_CAP_STASH_B,
+};
+
+#define HCLGE_QUERY_CAP_LENGTH		3
 struct hclge_query_version_cmd {
 	__le32 firmware;
-	__le32 firmware_rsv[5];
+	__le32 hardware;
+	__le32 rsv;
+	__le32 caps[HCLGE_QUERY_CAP_LENGTH]; /* capabilities of device */
 };
 
 #define HCLGE_RX_PRIV_EN_B	15
@@ -491,6 +509,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_RSS_SIZE_M	GENMASK(31, 24)
 #define HCLGE_CFG_SPEED_ABILITY_S	0
 #define HCLGE_CFG_SPEED_ABILITY_M	GENMASK(7, 0)
+#define HCLGE_CFG_SPEED_ABILITY_EXT_S	10
+#define HCLGE_CFG_SPEED_ABILITY_EXT_M	GENMASK(15, 10)
 #define HCLGE_CFG_UMV_TBL_SPACE_S	16
 #define HCLGE_CFG_UMV_TBL_SPACE_M	GENMASK(31, 16)
 
@@ -1069,6 +1089,20 @@ struct hclge_sfp_info_bd0_cmd {
 	u8 data[HCLGE_SFP_INFO_BD0_LEN];
 };
 
+#define HCLGE_QUERY_DEV_SPECS_BD_NUM		4
+
+struct hclge_dev_specs_0_cmd {
+	__le32 rsv0;
+	__le32 mac_entry_num;
+	__le32 mng_entry_num;
+	__le16 rss_ind_tbl_size;
+	__le16 rss_key_size;
+	__le16 int_ql_max;
+	u8 max_non_tso_bd_num;
+	u8 rsv1;
+	__le32 max_tm_rate;
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index d6c3952aba04..3606240025a8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -2,7 +2,9 @@
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include "hclge_main.h"
+#include "hclge_dcb.h"
 #include "hclge_tm.h"
+#include "hclge_dcb.h"
 #include "hnae3.h"
 
 #define BW_PERCENT	100
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 26f6f068b01d..16df050e72cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -8,7 +8,7 @@
 #include "hclge_tm.h"
 #include "hnae3.h"
 
-static struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
+static const struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
 	{ .reg_type = "bios common",
 	  .dfx_msg = &hclge_dbg_bios_common_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_bios_common_reg),
@@ -115,14 +115,14 @@ static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
 }
 
 static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
-				      struct hclge_dbg_reg_type_info *reg_info,
+				      const struct hclge_dbg_reg_type_info *reg_info,
 				      const char *cmd_buf)
 {
 #define IDX_OFFSET	1
 
 	const char *s = &cmd_buf[strlen(reg_info->reg_type) + IDX_OFFSET];
-	struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
-	struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
+	const struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
+	const struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
 	struct hclge_desc *desc_src;
 	struct hclge_desc *desc;
 	int entries_per_desc;
@@ -399,7 +399,7 @@ err_dcb_cmd_send:
 
 static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
 {
-	struct hclge_dbg_reg_type_info *reg_info;
+	const struct hclge_dbg_reg_type_info *reg_info;
 	bool has_dump = false;
 	int i;
 
@@ -428,17 +428,13 @@ static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
 	}
 }
 
-static void hclge_title_idx_print(struct hclge_dev *hdev, bool flag, int index,
-				  char *title_buf, char *true_buf,
-				  char *false_buf)
+static void hclge_print_tc_info(struct hclge_dev *hdev, bool flag, int index)
 {
 	if (flag)
-		dev_info(&hdev->pdev->dev, "%s(%d): %s weight: %u\n",
-			 title_buf, index, true_buf,
-			 hdev->tm_info.pg_info[0].tc_dwrr[index]);
+		dev_info(&hdev->pdev->dev, "tc(%d): no sp mode weight: %u\n",
+			 index, hdev->tm_info.pg_info[0].tc_dwrr[index]);
 	else
-		dev_info(&hdev->pdev->dev, "%s(%d): %s\n", title_buf, index,
-			 false_buf);
+		dev_info(&hdev->pdev->dev, "tc(%d): sp mode\n", index);
 }
 
 static void hclge_dbg_dump_tc(struct hclge_dev *hdev)
@@ -469,8 +465,7 @@ static void hclge_dbg_dump_tc(struct hclge_dev *hdev)
 		 ets_weight->weight_offset);
 
 	for (i = 0; i < HNAE3_MAX_TC; i++)
-		hclge_title_idx_print(hdev, ets_weight->tc_weight[i], i,
-				      "tc", "no sp mode", "sp mode");
+		hclge_print_tc_info(hdev, ets_weight->tc_weight[i], i);
 }
 
 static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
@@ -1170,6 +1165,14 @@ static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev)
 		 hdev->serv_processed_cnt);
 }
 
+static void hclge_dbg_dump_interrupt(struct hclge_dev *hdev)
+{
+	dev_info(&hdev->pdev->dev, "num_nic_msi: %u\n", hdev->num_nic_msi);
+	dev_info(&hdev->pdev->dev, "num_roce_msi: %u\n", hdev->num_roce_msi);
+	dev_info(&hdev->pdev->dev, "num_msi_used: %u\n", hdev->num_msi_used);
+	dev_info(&hdev->pdev->dev, "num_msi_left: %u\n", hdev->num_msi_left);
+}
+
 static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
 {
 	struct hclge_desc *desc_src, *desc_tmp;
@@ -1494,6 +1497,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 #define DUMP_REG	"dump reg"
 #define DUMP_TM_MAP	"dump tm map"
 #define DUMP_LOOPBACK	"dump loopback"
+#define DUMP_INTERRUPT	"dump intr"
 
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -1541,6 +1545,9 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_mac_list(hdev,
 					&cmd_buf[sizeof("dump mc mac list")],
 					false);
+	} else if (strncmp(cmd_buf, DUMP_INTERRUPT,
+		   strlen(DUMP_INTERRUPT)) == 0) {
+		hclge_dbg_dump_interrupt(hdev);
 	} else {
 		dev_info(&hdev->pdev->dev, "unknown command\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 38b79321c4c4..a9066e6ff697 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -81,13 +81,13 @@ struct hclge_dbg_dfx_message {
 #define HCLGE_DBG_MAC_REG_TYPE_LEN	32
 struct hclge_dbg_reg_type_info {
 	const char *reg_type;
-	struct hclge_dbg_dfx_message *dfx_msg;
+	const struct hclge_dbg_dfx_message *dfx_msg;
 	struct hclge_dbg_reg_common_msg reg_msg;
 };
 
 #pragma pack()
 
-static struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
 	{false, "Reserved"},
 	{true,	"BP_CPU_STATE"},
 	{true,	"DFX_MSIX_INFO_NIC_0"},
@@ -103,7 +103,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
 	{false, "Reserved"},
 	{true,	"SSU_ETS_PORT_STATUS"},
 	{true,	"SSU_ETS_TCG_STATUS"},
@@ -175,7 +175,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
 	{true,	"prt_id"},
 	{true,	"PACKET_TC_CURR_BUFFER_CNT_0"},
 	{true,	"PACKET_TC_CURR_BUFFER_CNT_1"},
@@ -282,7 +282,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
 	{true,	"OQ_INDEX"},
 	{true,	"QUEUE_CNT"},
 	{false, "Reserved"},
@@ -291,7 +291,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
 	{true,	"prt_id"},
 	{true,	"IGU_RX_ERR_PKT"},
 	{true,	"IGU_RX_NO_SOF_PKT"},
@@ -356,7 +356,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
 	{false,	"Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
 	{true, "tc_queue_num"},
 	{true, "FSM_DFX_ST0"},
 	{true, "FSM_DFX_ST1"},
@@ -365,7 +365,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
 	{true, "BUF_WAIT_TIMEOUT_QID"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
 	{false, "Reserved"},
 	{true,	"FIFO_DFX_ST0"},
 	{true,	"FIFO_DFX_ST1"},
@@ -381,7 +381,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
 	{false, "Reserved"},
 	{true,	"NCSI_EGU_TX_FIFO_STS"},
 	{true,	"NCSI_PAUSE_STATUS"},
@@ -453,7 +453,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
 	{true,	"NCSI_MAC_RX_PAUSE_FRAMES"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
 	{false, "Reserved"},
 	{true,	"LGE_IGU_AFIFO_DFX_0"},
 	{true,	"LGE_IGU_AFIFO_DFX_1"},
@@ -483,7 +483,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
 	{false, "Reserved"},
 	{true,	"DROP_FROM_PRT_PKT_CNT"},
 	{true,	"DROP_FROM_HOST_PKT_CNT"},
@@ -639,7 +639,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
 	{false, "Reserved"},
 	{true,	"FSM_DFX_ST0"},
 	{true,	"FSM_DFX_ST1"},
@@ -711,7 +711,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
 	{false, "Reserved"},
 };
 
-static struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
 	{true, "q_num"},
 	{true, "RCB_CFG_RX_RING_TAIL"},
 	{true, "RCB_CFG_RX_RING_HEAD"},
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 50d5ef71756b..9ee55ee0487d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -729,7 +729,7 @@ static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en)
 	struct hclge_desc desc;
 	int ret;
 
-	if (hdev->pdev->revision < 0x21)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return 0;
 
 	/* configure NCSI error interrupts */
@@ -808,7 +808,7 @@ static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
 			cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK);
 		desc[1].data[1] =
 			cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK);
-		if (hdev->pdev->revision >= 0x21)
+		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 			desc[1].data[2] =
 				cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK);
 	} else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
@@ -1041,7 +1041,7 @@ static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
 	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false);
 
 	if (en) {
-		if (hdev->pdev->revision >= 0x21)
+		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 			desc[0].data[0] =
 				cpu_to_le32(HCLGE_SSU_COMMON_INT_EN);
 		else
@@ -1507,6 +1507,8 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
 
 		reset_type = HNAE3_FUNC_RESET;
 
+		hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR);
+
 		ret = hclge_log_rocee_axi_error(hdev);
 		if (ret)
 			return HNAE3_GLOBAL_RESET;
@@ -1548,7 +1550,8 @@ int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
 	struct hclge_desc desc;
 	int ret;
 
-	if (hdev->pdev->revision < 0x21 || !hnae3_dev_roce_supported(hdev))
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 ||
+	    !hnae3_dev_roce_supported(hdev))
 		return 0;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false);
@@ -1574,8 +1577,7 @@ static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
 	struct hclge_dev *hdev = ae_dev->priv;
 	enum hnae3_reset_type reset_type;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
-	    hdev->pdev->revision < 0x21)
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
 		return;
 
 	reset_type = hclge_log_and_clear_rocee_ras_error(hdev);
@@ -1661,7 +1663,7 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 	}
 
 	/* Handling Non-fatal Rocee RAS errors */
-	if (hdev->pdev->revision >= 0x21 &&
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 &&
 	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
 		dev_err(dev, "ROCEE Non-Fatal RAS error identified\n");
 		hclge_handle_rocee_ras_error(ae_dev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d553ed7ee64c..1f026408ad38 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -84,6 +84,7 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0},
 	/* required last entry */
 	{0, }
 };
@@ -622,7 +623,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	u8 *buff = data;
-	int i = 0;
+	int i;
 
 	for (i = 0; i < kinfo->num_tqps; i++) {
 		struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
@@ -739,7 +740,7 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 	if (stringset == ETH_SS_TEST) {
 		/* clear loopback bit flags at first */
 		handle->flags = (handle->flags & (~HCLGE_LOOPBACK_TEST_FLAGS));
-		if (hdev->pdev->revision >= 0x21 ||
+		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 ||
 		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_10M ||
 		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_100M ||
 		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_1G) {
@@ -965,6 +966,9 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
 	case 5:
 		*speed = HCLGE_MAC_SPEED_100G;
 		break;
+	case 8:
+		*speed = HCLGE_MAC_SPEED_200G;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1004,6 +1008,9 @@ static int hclge_check_port_speed(struct hnae3_handle *handle, u32 speed)
 	case HCLGE_MAC_SPEED_100G:
 		speed_bit = HCLGE_SUPPORT_100G_BIT;
 		break;
+	case HCLGE_MAC_SPEED_200G:
+		speed_bit = HCLGE_SUPPORT_200G_BIT;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1014,7 +1021,7 @@ static int hclge_check_port_speed(struct hnae3_handle *handle, u32 speed)
 	return -EINVAL;
 }
 
-static void hclge_convert_setting_sr(struct hclge_mac *mac, u8 speed_ability)
+static void hclge_convert_setting_sr(struct hclge_mac *mac, u16 speed_ability)
 {
 	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
@@ -1031,9 +1038,12 @@ static void hclge_convert_setting_sr(struct hclge_mac *mac, u8 speed_ability)
 	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
 				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_200G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT,
+				 mac->supported);
 }
 
-static void hclge_convert_setting_lr(struct hclge_mac *mac, u8 speed_ability)
+static void hclge_convert_setting_lr(struct hclge_mac *mac, u16 speed_ability)
 {
 	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
@@ -1050,9 +1060,13 @@ static void hclge_convert_setting_lr(struct hclge_mac *mac, u8 speed_ability)
 	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
 				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_200G_BIT)
+		linkmode_set_bit(
+			ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+			mac->supported);
 }
 
-static void hclge_convert_setting_cr(struct hclge_mac *mac, u8 speed_ability)
+static void hclge_convert_setting_cr(struct hclge_mac *mac, u16 speed_ability)
 {
 	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
@@ -1069,9 +1083,12 @@ static void hclge_convert_setting_cr(struct hclge_mac *mac, u8 speed_ability)
 	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
 				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_200G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT,
+				 mac->supported);
 }
 
-static void hclge_convert_setting_kr(struct hclge_mac *mac, u8 speed_ability)
+static void hclge_convert_setting_kr(struct hclge_mac *mac, u16 speed_ability)
 {
 	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
@@ -1091,6 +1108,9 @@ static void hclge_convert_setting_kr(struct hclge_mac *mac, u8 speed_ability)
 	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
 				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_200G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT,
+				 mac->supported);
 }
 
 static void hclge_convert_setting_fec(struct hclge_mac *mac)
@@ -1115,6 +1135,7 @@ static void hclge_convert_setting_fec(struct hclge_mac *mac)
 			BIT(HNAE3_FEC_AUTO);
 		break;
 	case HCLGE_MAC_SPEED_100G:
+	case HCLGE_MAC_SPEED_200G:
 		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, mac->supported);
 		mac->fec_ability = BIT(HNAE3_FEC_RS) | BIT(HNAE3_FEC_AUTO);
 		break;
@@ -1125,7 +1146,7 @@ static void hclge_convert_setting_fec(struct hclge_mac *mac)
 }
 
 static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
-					u8 speed_ability)
+					u16 speed_ability)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
 
@@ -1136,7 +1157,7 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
 	hclge_convert_setting_sr(mac, speed_ability);
 	hclge_convert_setting_lr(mac, speed_ability);
 	hclge_convert_setting_cr(mac, speed_ability);
-	if (hdev->pdev->revision >= 0x21)
+	if (hnae3_dev_fec_supported(hdev))
 		hclge_convert_setting_fec(mac);
 
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported);
@@ -1145,12 +1166,12 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
 }
 
 static void hclge_parse_backplane_link_mode(struct hclge_dev *hdev,
-					    u8 speed_ability)
+					    u16 speed_ability)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
 
 	hclge_convert_setting_kr(mac, speed_ability);
-	if (hdev->pdev->revision >= 0x21)
+	if (hnae3_dev_fec_supported(hdev))
 		hclge_convert_setting_fec(mac);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
@@ -1158,7 +1179,7 @@ static void hclge_parse_backplane_link_mode(struct hclge_dev *hdev,
 }
 
 static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
-					 u8 speed_ability)
+					 u16 speed_ability)
 {
 	unsigned long *supported = hdev->hw.mac.supported;
 
@@ -1188,7 +1209,7 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
 }
 
-static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
+static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability)
 {
 	u8 media_type = hdev->hw.mac.media_type;
 
@@ -1200,8 +1221,11 @@ static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
 		hclge_parse_backplane_link_mode(hdev, speed_ability);
 }
 
-static u32 hclge_get_max_speed(u8 speed_ability)
+static u32 hclge_get_max_speed(u16 speed_ability)
 {
+	if (speed_ability & HCLGE_SUPPORT_200G_BIT)
+		return HCLGE_MAC_SPEED_200G;
+
 	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
 		return HCLGE_MAC_SPEED_100G;
 
@@ -1231,8 +1255,11 @@ static u32 hclge_get_max_speed(u8 speed_ability)
 
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
+#define SPEED_ABILITY_EXT_SHIFT			8
+
 	struct hclge_cfg_param_cmd *req;
 	u64 mac_addr_tmp_high;
+	u16 speed_ability_ext;
 	u64 mac_addr_tmp;
 	unsigned int i;
 
@@ -1281,6 +1308,11 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	cfg->speed_ability = hnae3_get_field(__le32_to_cpu(req->param[1]),
 					     HCLGE_CFG_SPEED_ABILITY_M,
 					     HCLGE_CFG_SPEED_ABILITY_S);
+	speed_ability_ext = hnae3_get_field(__le32_to_cpu(req->param[1]),
+					    HCLGE_CFG_SPEED_ABILITY_EXT_M,
+					    HCLGE_CFG_SPEED_ABILITY_EXT_S);
+	cfg->speed_ability |= speed_ability_ext << SPEED_ABILITY_EXT_SHIFT;
+
 	cfg->umv_space = hnae3_get_field(__le32_to_cpu(req->param[1]),
 					 HCLGE_CFG_UMV_TBL_SPACE_M,
 					 HCLGE_CFG_UMV_TBL_SPACE_S);
@@ -1324,6 +1356,78 @@ static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
 	return 0;
 }
 
+static void hclge_set_default_dev_specs(struct hclge_dev *hdev)
+{
+#define HCLGE_MAX_NON_TSO_BD_NUM			8U
+
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+	ae_dev->dev_specs.max_non_tso_bd_num = HCLGE_MAX_NON_TSO_BD_NUM;
+	ae_dev->dev_specs.rss_ind_tbl_size = HCLGE_RSS_IND_TBL_SIZE;
+	ae_dev->dev_specs.rss_key_size = HCLGE_RSS_KEY_SIZE;
+	ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE;
+}
+
+static void hclge_parse_dev_specs(struct hclge_dev *hdev,
+				  struct hclge_desc *desc)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	struct hclge_dev_specs_0_cmd *req0;
+
+	req0 = (struct hclge_dev_specs_0_cmd *)desc[0].data;
+
+	ae_dev->dev_specs.max_non_tso_bd_num = req0->max_non_tso_bd_num;
+	ae_dev->dev_specs.rss_ind_tbl_size =
+		le16_to_cpu(req0->rss_ind_tbl_size);
+	ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size);
+	ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate);
+}
+
+static void hclge_check_dev_specs(struct hclge_dev *hdev)
+{
+	struct hnae3_dev_specs *dev_specs = &hdev->ae_dev->dev_specs;
+
+	if (!dev_specs->max_non_tso_bd_num)
+		dev_specs->max_non_tso_bd_num = HCLGE_MAX_NON_TSO_BD_NUM;
+	if (!dev_specs->rss_ind_tbl_size)
+		dev_specs->rss_ind_tbl_size = HCLGE_RSS_IND_TBL_SIZE;
+	if (!dev_specs->rss_key_size)
+		dev_specs->rss_key_size = HCLGE_RSS_KEY_SIZE;
+	if (!dev_specs->max_tm_rate)
+		dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE;
+}
+
+static int hclge_query_dev_specs(struct hclge_dev *hdev)
+{
+	struct hclge_desc desc[HCLGE_QUERY_DEV_SPECS_BD_NUM];
+	int ret;
+	int i;
+
+	/* set default specifications as devices lower than version V3 do not
+	 * support querying specifications from firmware.
+	 */
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) {
+		hclge_set_default_dev_specs(hdev);
+		return 0;
+	}
+
+	for (i = 0; i < HCLGE_QUERY_DEV_SPECS_BD_NUM - 1; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_QUERY_DEV_SPECS,
+					   true);
+		desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	}
+	hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_QUERY_DEV_SPECS, true);
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_QUERY_DEV_SPECS_BD_NUM);
+	if (ret)
+		return ret;
+
+	hclge_parse_dev_specs(hdev, desc);
+	hclge_check_dev_specs(hdev);
+
+	return 0;
+}
+
 static int hclge_get_cap(struct hclge_dev *hdev)
 {
 	int ret;
@@ -2422,6 +2526,10 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
 		hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
 				HCLGE_CFG_SPEED_S, 5);
 		break;
+	case HCLGE_MAC_SPEED_200G:
+		hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+				HCLGE_CFG_SPEED_S, 8);
+		break;
 	default:
 		dev_err(&hdev->pdev->dev, "invalid speed (%d)\n", speed);
 		return -EINVAL;
@@ -2856,7 +2964,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
 	if (!hdev->support_sfp_query)
 		return 0;
 
-	if (hdev->pdev->revision >= 0x21)
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 		ret = hclge_get_sfp_info(hdev, mac);
 	else
 		ret = hclge_get_sfp_speed(hdev, &speed);
@@ -2868,7 +2976,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	if (hdev->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		if (mac->speed_type == QUERY_ACTIVE_SPEED) {
 			hclge_update_port_capability(mac);
 			return 0;
@@ -3211,7 +3319,7 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
 				    enum hnae3_reset_notify_type type)
 {
 	struct hnae3_client *client = hdev->roce_client;
-	int ret = 0;
+	int ret;
 	u16 i;
 
 	if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client)
@@ -3533,7 +3641,7 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
 	/* For revision 0x20, the reset interrupt source
 	 * can only be cleared after hardware reset done
 	 */
-	if (hdev->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG,
 				clearval);
 
@@ -3944,6 +4052,9 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev)
 {
 	unsigned long delta = round_jiffies_relative(HZ);
 
+	if (test_bit(HCLGE_STATE_RST_FAIL, &hdev->state))
+		return;
+
 	/* Always handle the link updating to make sure link state is
 	 * updated when it is triggered by mbx.
 	 */
@@ -4537,7 +4648,7 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev)
 	int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 	struct hclge_vport *vport = hdev->vport;
 
-	if (hdev->pdev->revision >= 0x21)
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 		rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
 
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
@@ -4737,13 +4848,14 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 				  bool en_mc_pmc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
 	bool en_bc_pmc = true;
 
-	/* For revision 0x20, if broadcast promisc enabled, vlan filter is
-	 * always bypassed. So broadcast promisc should be disabled until
-	 * user enable promisc mode
+	/* For device whose version below V2, if broadcast promisc enabled,
+	 * vlan filter is always bypassed. So broadcast promisc should be
+	 * disabled until user enable promisc mode
 	 */
-	if (handle->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false;
 
 	return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc,
@@ -6758,7 +6870,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
 	 * the same, the packets are looped back in the SSU. If SSU loopback
 	 * is disabled, packets can reach MAC even if SMAC is the same as DMAC.
 	 */
-	if (hdev->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		u8 switch_param = en ? 0 : BIT(HCLGE_SWITCH_ALW_LPBK_B);
 
 		ret = hclge_config_switch_param(hdev, PF_VPORT_ID, switch_param,
@@ -8260,7 +8372,7 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (hdev->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
 					   HCLGE_FILTER_FE_EGRESS, enable, 0);
 		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
@@ -8620,7 +8732,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 	int ret;
 	int i;
 
-	if (hdev->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		/* for revision 0x21, vf vlan filter is per function */
 		for (i = 0; i < hdev->num_alloc_vport; i++) {
 			vport = &hdev->vport[i];
@@ -8975,7 +9087,7 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
 	u16 state;
 	int ret;
 
-	if (hdev->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return -EOPNOTSUPP;
 
 	vport = hclge_get_vf_vport(hdev, vfid);
@@ -9950,6 +10062,13 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (ret)
 		goto err_cmd_uninit;
 
+	ret = hclge_query_dev_specs(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to query dev specifications, ret = %d.\n",
+			ret);
+		goto err_cmd_uninit;
+	}
+
 	ret = hclge_configure(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
@@ -10147,7 +10266,7 @@ static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf,
 	u32 new_spoofchk = enable ? 1 : 0;
 	int ret;
 
-	if (hdev->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return -EOPNOTSUPP;
 
 	vport = hclge_get_vf_vport(hdev, vf);
@@ -10180,7 +10299,7 @@ static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev)
 	int ret;
 	int i;
 
-	if (hdev->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return 0;
 
 	/* resume the vf spoof check state after reset */
@@ -10200,6 +10319,7 @@ static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
 	u32 new_trusted = enable ? 1 : 0;
 	bool en_bc_pmc;
 	int ret;
@@ -10213,7 +10333,7 @@ static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
 
 	/* Disable promisc mode for VF if it is not trusted any more. */
 	if (!enable && vport->vf_info.promisc_enable) {
-		en_bc_pmc = hdev->pdev->revision != 0x20;
+		en_bc_pmc = ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2;
 		ret = hclge_set_vport_promisc_mode(vport, false, false,
 						   en_bc_pmc);
 		if (ret)
@@ -11090,7 +11210,7 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 {
 	struct hclge_vport *vport = &hdev->vport[0];
 	struct hnae3_handle *handle = &vport->nic;
-	u8 tmp_flags = 0;
+	u8 tmp_flags;
 	int ret;
 
 	if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9bbdd4557c27..64e6afdb61b8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -199,6 +199,7 @@ enum HLCGE_PORT_TYPE {
 #define HCLGE_SUPPORT_40G_BIT		BIT(5)
 #define HCLGE_SUPPORT_100M_BIT		BIT(6)
 #define HCLGE_SUPPORT_10M_BIT		BIT(7)
+#define HCLGE_SUPPORT_200G_BIT		BIT(8)
 #define HCLGE_SUPPORT_GE \
 	(HCLGE_SUPPORT_1G_BIT | HCLGE_SUPPORT_100M_BIT | HCLGE_SUPPORT_10M_BIT)
 
@@ -238,7 +239,8 @@ enum HCLGE_MAC_SPEED {
 	HCLGE_MAC_SPEED_25G	= 25000,	/* 25000 Mbps  = 25 Gbps */
 	HCLGE_MAC_SPEED_40G	= 40000,	/* 40000 Mbps  = 40 Gbps */
 	HCLGE_MAC_SPEED_50G	= 50000,	/* 50000 Mbps  = 50 Gbps */
-	HCLGE_MAC_SPEED_100G	= 100000	/* 100000 Mbps = 100 Gbps */
+	HCLGE_MAC_SPEED_100G	= 100000,	/* 100000 Mbps = 100 Gbps */
+	HCLGE_MAC_SPEED_200G	= 200000	/* 200000 Mbps = 200 Gbps */
 };
 
 enum HCLGE_MAC_DUPLEX {
@@ -266,7 +268,7 @@ struct hclge_mac {
 	u32 fec_mode; /* active fec mode */
 	u32 user_fec_mode;
 	u32 fec_ability;
-	int link;	/* store the link status of mac & phy (if phy exit) */
+	int link;	/* store the link status of mac & phy (if phy exists) */
 	struct phy_device *phydev;
 	struct mii_bus *mdio_bus;
 	phy_interface_t phy_if;
@@ -349,7 +351,7 @@ struct hclge_cfg {
 	u8 mac_addr[ETH_ALEN];
 	u8 default_speed;
 	u32 numa_node_map;
-	u8 speed_ability;
+	u16 speed_ability;
 	u16 umv_space;
 };
 
@@ -749,7 +751,6 @@ struct hclge_dev {
 	u16 num_tx_desc;		/* desc num of per tx queue */
 	u16 num_rx_desc;		/* desc num of per rx queue */
 	u8 hw_tc_map;
-	u8 tc_num_last_time;
 	enum hclge_fc_mode fc_mode_last_time;
 	u8 support_sfp_query;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 28db13253a5e..15f69fa86323 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -23,14 +23,11 @@ enum hclge_shaper_level {
 #define HCLGE_SHAPER_BS_U_DEF	5
 #define HCLGE_SHAPER_BS_S_DEF	20
 
-#define HCLGE_ETHER_MAX_RATE	100000
-
 /* hclge_shaper_para_calc: calculate ir parameter for the shaper
  * @ir: Rate to be config, its unit is Mbps
  * @shaper_level: the shaper level. eg: port, pg, priority, queueset
- * @ir_b: IR_B parameter of IR shaper
- * @ir_u: IR_U parameter of IR shaper
- * @ir_s: IR_S parameter of IR shaper
+ * @ir_para: parameters of IR shaper
+ * @max_tm_rate: max tm rate is available to config
  *
  * the formula:
  *
@@ -41,7 +38,8 @@ enum hclge_shaper_level {
  * @return: 0: calculate sucessful, negative: fail
  */
 static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
-				  u8 *ir_b, u8 *ir_u, u8 *ir_s)
+				  struct hclge_shaper_ir_para *ir_para,
+				  u32 max_tm_rate)
 {
 #define DIVISOR_CLK		(1000 * 8)
 #define DIVISOR_IR_B_126	(126 * DIVISOR_CLK)
@@ -59,7 +57,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 
 	/* Calc tick */
 	if (shaper_level >= HCLGE_SHAPER_LVL_CNT ||
-	    ir > HCLGE_ETHER_MAX_RATE)
+	    ir > max_tm_rate)
 		return -EINVAL;
 
 	tick = tick_array[shaper_level];
@@ -74,9 +72,9 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 	ir_calc = (DIVISOR_IR_B_126 + (tick >> 1) - 1) / tick;
 
 	if (ir_calc == ir) {
-		*ir_b = 126;
-		*ir_u = 0;
-		*ir_s = 0;
+		ir_para->ir_b = 126;
+		ir_para->ir_u = 0;
+		ir_para->ir_s = 0;
 
 		return 0;
 	} else if (ir_calc > ir) {
@@ -86,8 +84,8 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 			ir_calc = DIVISOR_IR_B_126 / (tick * (1 << ir_s_calc));
 		}
 
-		*ir_b = (ir * tick * (1 << ir_s_calc) + (DIVISOR_CLK >> 1)) /
-			DIVISOR_CLK;
+		ir_para->ir_b = (ir * tick * (1 << ir_s_calc) +
+				(DIVISOR_CLK >> 1)) / DIVISOR_CLK;
 	} else {
 		/* Increasing the numerator to select ir_u value */
 		u32 numerator;
@@ -99,15 +97,16 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 		}
 
 		if (ir_calc == ir) {
-			*ir_b = 126;
+			ir_para->ir_b = 126;
 		} else {
 			u32 denominator = DIVISOR_CLK * (1 << --ir_u_calc);
-			*ir_b = (ir * tick + (denominator >> 1)) / denominator;
+			ir_para->ir_b = (ir * tick + (denominator >> 1)) /
+					denominator;
 		}
 	}
 
-	*ir_u = ir_u_calc;
-	*ir_s = ir_s_calc;
+	ir_para->ir_u = ir_u_calc;
+	ir_para->ir_s = ir_s_calc;
 
 	return 0;
 }
@@ -400,21 +399,22 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
 static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
 {
 	struct hclge_port_shapping_cmd *shap_cfg_cmd;
+	struct hclge_shaper_ir_para ir_para;
 	struct hclge_desc desc;
-	u8 ir_u, ir_b, ir_s;
 	u32 shapping_para;
 	int ret;
 
-	ret = hclge_shaper_para_calc(hdev->hw.mac.speed,
-				     HCLGE_SHAPER_LVL_PORT,
-				     &ir_b, &ir_u, &ir_s);
+	ret = hclge_shaper_para_calc(hdev->hw.mac.speed, HCLGE_SHAPER_LVL_PORT,
+				     &ir_para,
+				     hdev->ae_dev->dev_specs.max_tm_rate);
 	if (ret)
 		return ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
 	shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
 
-	shapping_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+	shapping_para = hclge_tm_get_shapping_para(ir_para.ir_b, ir_para.ir_u,
+						   ir_para.ir_s,
 						   HCLGE_SHAPER_BS_U_DEF,
 						   HCLGE_SHAPER_BS_S_DEF);
 
@@ -515,21 +515,23 @@ int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+	struct hclge_shaper_ir_para ir_para;
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
-	u8 ir_b, ir_u, ir_s;
 	u32 shaper_para;
 	int ret, i;
 
 	if (!max_tx_rate)
-		max_tx_rate = HCLGE_ETHER_MAX_RATE;
+		max_tx_rate = hdev->ae_dev->dev_specs.max_tm_rate;
 
 	ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET,
-				     &ir_b, &ir_u, &ir_s);
+				     &ir_para,
+				     hdev->ae_dev->dev_specs.max_tm_rate);
 	if (ret)
 		return ret;
 
-	shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+	shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b, ir_para.ir_u,
+						 ir_para.ir_s,
 						 HCLGE_SHAPER_BS_U_DEF,
 						 HCLGE_SHAPER_BS_S_DEF);
 
@@ -668,7 +670,8 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 		hdev->tm_info.pg_info[i].pg_id = i;
 		hdev->tm_info.pg_info[i].pg_sch_mode = HCLGE_SCH_MODE_DWRR;
 
-		hdev->tm_info.pg_info[i].bw_limit = HCLGE_ETHER_MAX_RATE;
+		hdev->tm_info.pg_info[i].bw_limit =
+					hdev->ae_dev->dev_specs.max_tm_rate;
 
 		if (i != 0)
 			continue;
@@ -729,7 +732,8 @@ static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
 
 static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
 {
-	u8 ir_u, ir_b, ir_s;
+	u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate;
+	struct hclge_shaper_ir_para ir_para;
 	u32 shaper_para;
 	int ret;
 	u32 i;
@@ -741,10 +745,9 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
 	/* Pg to pri */
 	for (i = 0; i < hdev->tm_info.num_pg; i++) {
 		/* Calc shaper para */
-		ret = hclge_shaper_para_calc(
-					hdev->tm_info.pg_info[i].bw_limit,
-					HCLGE_SHAPER_LVL_PG,
-					&ir_b, &ir_u, &ir_s);
+		ret = hclge_shaper_para_calc(hdev->tm_info.pg_info[i].bw_limit,
+					     HCLGE_SHAPER_LVL_PG,
+					     &ir_para, max_tm_rate);
 		if (ret)
 			return ret;
 
@@ -757,7 +760,9 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
 		if (ret)
 			return ret;
 
-		shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+		shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b,
+							 ir_para.ir_u,
+							 ir_para.ir_s,
 							 HCLGE_SHAPER_BS_U_DEF,
 							 HCLGE_SHAPER_BS_S_DEF);
 		ret = hclge_tm_pg_shapping_cfg(hdev,
@@ -861,16 +866,16 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
 
 static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
 {
-	u8 ir_u, ir_b, ir_s;
+	u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate;
+	struct hclge_shaper_ir_para ir_para;
 	u32 shaper_para;
 	int ret;
 	u32 i;
 
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
-		ret = hclge_shaper_para_calc(
-					hdev->tm_info.tc_info[i].bw_limit,
-					HCLGE_SHAPER_LVL_PRI,
-					&ir_b, &ir_u, &ir_s);
+		ret = hclge_shaper_para_calc(hdev->tm_info.tc_info[i].bw_limit,
+					     HCLGE_SHAPER_LVL_PRI,
+					     &ir_para, max_tm_rate);
 		if (ret)
 			return ret;
 
@@ -882,7 +887,9 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
 		if (ret)
 			return ret;
 
-		shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+		shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b,
+							 ir_para.ir_u,
+							 ir_para.ir_s,
 							 HCLGE_SHAPER_BS_U_DEF,
 							 HCLGE_SHAPER_BS_S_DEF);
 		ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i,
@@ -897,12 +904,13 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
 static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport)
 {
 	struct hclge_dev *hdev = vport->back;
-	u8 ir_u, ir_b, ir_s;
+	struct hclge_shaper_ir_para ir_para;
 	u32 shaper_para;
 	int ret;
 
 	ret = hclge_shaper_para_calc(vport->bw_limit, HCLGE_SHAPER_LVL_VF,
-				     &ir_b, &ir_u, &ir_s);
+				     &ir_para,
+				     hdev->ae_dev->dev_specs.max_tm_rate);
 	if (ret)
 		return ret;
 
@@ -914,7 +922,8 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport)
 	if (ret)
 		return ret;
 
-	shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+	shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b, ir_para.ir_u,
+						 ir_para.ir_s,
 						 HCLGE_SHAPER_BS_U_DEF,
 						 HCLGE_SHAPER_BS_S_DEF);
 	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET,
@@ -929,15 +938,15 @@ static int hclge_tm_pri_vnet_base_shaper_qs_cfg(struct hclge_vport *vport)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	struct hclge_dev *hdev = vport->back;
-	u8 ir_u, ir_b, ir_s;
+	u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate;
+	struct hclge_shaper_ir_para ir_para;
 	u32 i;
 	int ret;
 
 	for (i = 0; i < kinfo->num_tc; i++) {
-		ret = hclge_shaper_para_calc(
-					hdev->tm_info.tc_info[i].bw_limit,
-					HCLGE_SHAPER_LVL_QSET,
-					&ir_b, &ir_u, &ir_s);
+		ret = hclge_shaper_para_calc(hdev->tm_info.tc_info[i].bw_limit,
+					     HCLGE_SHAPER_LVL_QSET,
+					     &ir_para, max_tm_rate);
 		if (ret)
 			return ret;
 	}
@@ -1355,7 +1364,7 @@ static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
 
 static int hclge_tm_bp_setup(struct hclge_dev *hdev)
 {
-	int ret = 0;
+	int ret;
 	int i;
 
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 45bcb67f90fd..bb2a2d8e9259 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -19,6 +19,8 @@
 #define HCLGE_TM_TX_SCHD_DWRR_MSK	BIT(0)
 #define HCLGE_TM_TX_SCHD_SP_MSK		(0xFE)
 
+#define HCLGE_ETHER_MAX_RATE	100000
+
 struct hclge_pg_to_pri_link_cmd {
 	u8 pg_id;
 	u8 rsvd1[3];
@@ -139,6 +141,12 @@ struct hclge_port_shapping_cmd {
 	__le32 port_shapping_para;
 };
 
+struct hclge_shaper_ir_para {
+	u8 ir_b; /* IR_B parameter of IR shaper */
+	u8 ir_u; /* IR_U parameter of IR shaper */
+	u8 ir_s; /* IR_S parameter of IR shaper */
+};
+
 #define hclge_tm_set_field(dest, string, val) \
 			   hnae3_set_field((dest), \
 			   (HCLGE_TM_SHAP_##string##_MSK), \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index fec65239a3c8..66866c1cfb12 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -313,9 +313,34 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
 	return status;
 }
 
-static int  hclgevf_cmd_query_firmware_version(struct hclgevf_hw *hw,
-					       u32 *version)
+static void hclgevf_set_default_capability(struct hclgevf_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+	set_bit(HNAE3_DEV_SUPPORT_FD_B, ae_dev->caps);
+	set_bit(HNAE3_DEV_SUPPORT_GRO_B, ae_dev->caps);
+	set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+}
+
+static void hclgevf_parse_capability(struct hclgevf_dev *hdev,
+				     struct hclgevf_query_version_cmd *cmd)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	u32 caps;
+
+	caps = __le32_to_cpu(cmd->caps[0]);
+
+	if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_GSO_B))
+		set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGEVF_CAP_INT_QL_B))
+		set_bit(HNAE3_DEV_SUPPORT_INT_QL_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGEVF_CAP_TQP_TXRX_INDEP_B))
+		set_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, ae_dev->caps);
+}
+
+static int hclgevf_cmd_query_version_and_capability(struct hclgevf_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct hclgevf_query_version_cmd *resp;
 	struct hclgevf_desc desc;
 	int status;
@@ -323,9 +348,20 @@ static int  hclgevf_cmd_query_firmware_version(struct hclgevf_hw *hw,
 	resp = (struct hclgevf_query_version_cmd *)desc.data;
 
 	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_QUERY_FW_VER, 1);
-	status = hclgevf_cmd_send(hw, &desc, 1);
-	if (!status)
-		*version = le32_to_cpu(resp->firmware);
+	status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+	if (status)
+		return status;
+
+	hdev->fw_version = le32_to_cpu(resp->firmware);
+
+	ae_dev->dev_version = le32_to_cpu(resp->hardware) <<
+				 HNAE3_PCI_REVISION_BIT_SIZE;
+	ae_dev->dev_version |= hdev->pdev->revision;
+
+	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
+		hclgevf_set_default_capability(hdev);
+
+	hclgevf_parse_capability(hdev, resp);
 
 	return status;
 }
@@ -364,7 +400,6 @@ err_csq:
 
 int hclgevf_cmd_init(struct hclgevf_dev *hdev)
 {
-	u32 version;
 	int ret;
 
 	spin_lock_bh(&hdev->hw.cmq.csq.lock);
@@ -395,23 +430,22 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
 		goto err_cmd_init;
 	}
 
-	/* get firmware version */
-	ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
+	/* get version and device capabilities */
+	ret = hclgevf_cmd_query_version_and_capability(hdev);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"failed(%d) to query firmware version\n", ret);
+			"failed to query version and capabilities, ret = %d\n", ret);
 		goto err_cmd_init;
 	}
-	hdev->fw_version = version;
 
 	dev_info(&hdev->pdev->dev, "The firmware version is %lu.%lu.%lu.%lu\n",
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE3_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
 				 HNAE3_FW_VERSION_BYTE3_SHIFT),
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE2_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
 				 HNAE3_FW_VERSION_BYTE2_SHIFT),
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE1_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
 				 HNAE3_FW_VERSION_BYTE1_SHIFT),
-		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE0_MASK,
+		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
 				 HNAE3_FW_VERSION_BYTE0_SHIFT));
 
 	return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 40d6e602ab51..9460c128c095 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -91,6 +91,8 @@ enum hclgevf_opcode_type {
 	/* Generic command */
 	HCLGEVF_OPC_QUERY_FW_VER	= 0x0001,
 	HCLGEVF_OPC_QUERY_VF_RSRC	= 0x0024,
+	HCLGEVF_OPC_QUERY_DEV_SPECS	= 0x0050,
+
 	/* TQP command */
 	HCLGEVF_OPC_QUERY_TX_STATUS	= 0x0B03,
 	HCLGEVF_OPC_QUERY_RX_STATUS	= 0x0B13,
@@ -141,9 +143,26 @@ struct hclgevf_ctrl_vector_chain {
 	u8 resv;
 };
 
+enum HCLGEVF_CAP_BITS {
+	HCLGEVF_CAP_UDP_GSO_B,
+	HCLGEVF_CAP_QB_B,
+	HCLGEVF_CAP_FD_FORWARD_TC_B,
+	HCLGEVF_CAP_PTP_B,
+	HCLGEVF_CAP_INT_QL_B,
+	HCLGEVF_CAP_SIMPLE_BD_B,
+	HCLGEVF_CAP_TX_PUSH_B,
+	HCLGEVF_CAP_PHY_IMP_B,
+	HCLGEVF_CAP_TQP_TXRX_INDEP_B,
+	HCLGEVF_CAP_HW_PAD_B,
+	HCLGEVF_CAP_STASH_B,
+};
+
+#define HCLGEVF_QUERY_CAP_LENGTH		3
 struct hclgevf_query_version_cmd {
 	__le32 firmware;
-	__le32 firmware_rsv[5];
+	__le32 hardware;
+	__le32 rsv;
+	__le32 caps[HCLGEVF_QUERY_CAP_LENGTH]; /* capabilities of device */
 };
 
 #define HCLGEVF_MSIX_OFT_ROCEE_S       0
@@ -253,6 +272,19 @@ struct hclgevf_cfg_tx_queue_pointer_cmd {
 #define HCLGEVF_NIC_CMQ_DESC_NUM_S	3
 #define HCLGEVF_NIC_CMDQ_INT_SRC_REG	0x27100
 
+#define HCLGEVF_QUERY_DEV_SPECS_BD_NUM		4
+
+struct hclgevf_dev_specs_0_cmd {
+	__le32 rsv0;
+	__le32 mac_entry_num;
+	__le32 mng_entry_num;
+	__le16 rss_ind_tbl_size;
+	__le16 rss_key_size;
+	__le16 int_ql_max;
+	u8 max_non_tso_bd_num;
+	u8 rsv1[5];
+};
+
 static inline void hclgevf_write_reg(void __iomem *base, u32 reg, u32 value)
 {
 	writel(value, base + reg);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index e972138a14ad..50c84c5e65d2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -19,8 +19,9 @@ static struct hnae3_ae_algo ae_algovf;
 static struct workqueue_struct *hclgevf_wq;
 
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_VF), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	/* required last entry */
 	{0, }
 };
@@ -171,7 +172,7 @@ static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	u8 *buff = data;
-	int i = 0;
+	int i;
 
 	for (i = 0; i < kinfo->num_tqps; i++) {
 		struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i],
@@ -745,7 +746,7 @@ static int hclgevf_get_rss(struct hnae3_handle *handle, u32 *indir, u8 *key,
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
 	int i, ret;
 
-	if (handle->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		/* Get hash algorithm */
 		if (hfunc) {
 			switch (rss_cfg->hash_algo) {
@@ -791,7 +792,7 @@ static int hclgevf_set_rss(struct hnae3_handle *handle, const u32 *indir,
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
 	int ret, i;
 
-	if (handle->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		/* Set the RSS Hash Key if specififed by the user */
 		if (key) {
 			switch (hfunc) {
@@ -863,7 +864,7 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle,
 	u8 tuple_sets;
 	int ret;
 
-	if (handle->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return -EOPNOTSUPP;
 
 	if (nfc->data &
@@ -941,7 +942,7 @@ static int hclgevf_get_rss_tuple(struct hnae3_handle *handle,
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
 	u8 tuple_sets;
 
-	if (handle->pdev->revision == 0x20)
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
 		return -EOPNOTSUPP;
 
 	nfc->data = 0;
@@ -1154,10 +1155,9 @@ static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 				    bool en_mc_pmc)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct pci_dev *pdev = hdev->pdev;
 	bool en_bc_pmc;
 
-	en_bc_pmc = pdev->revision != 0x20;
+	en_bc_pmc = hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2;
 
 	return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc,
 					    en_bc_pmc);
@@ -1702,6 +1702,26 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev,
 	return ret;
 }
 
+static int hclgevf_notify_roce_client(struct hclgevf_dev *hdev,
+				      enum hnae3_reset_notify_type type)
+{
+	struct hnae3_client *client = hdev->roce_client;
+	struct hnae3_handle *handle = &hdev->roce;
+	int ret;
+
+	if (!test_bit(HCLGEVF_STATE_ROCE_REGISTERED, &hdev->state) || !client)
+		return 0;
+
+	if (!client->ops->reset_notify)
+		return -EOPNOTSUPP;
+
+	ret = client->ops->reset_notify(handle, type);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)",
+			type, ret);
+	return ret;
+}
+
 static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
 {
 #define HCLGEVF_RESET_WAIT_US	20000
@@ -1788,10 +1808,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 {
 #define HCLGEVF_RESET_SYNC_TIME 100
 
-	struct hclge_vf_to_pf_msg send_msg;
-	int ret = 0;
-
 	if (hdev->reset_type == HNAE3_VF_FUNC_RESET) {
+		struct hclge_vf_to_pf_msg send_msg;
+		int ret;
+
 		hclgevf_build_send_msg(&send_msg, HCLGE_MBX_RESET, 0);
 		ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
 		if (ret) {
@@ -1806,10 +1826,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 	/* inform hardware that preparatory work is done */
 	msleep(HCLGEVF_RESET_SYNC_TIME);
 	hclgevf_reset_handshake(hdev, true);
-	dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done, ret:%d\n",
-		 hdev->reset_type, ret);
+	dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done\n",
+		 hdev->reset_type);
 
-	return ret;
+	return 0;
 }
 
 static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev)
@@ -1865,6 +1885,11 @@ static int hclgevf_reset_prepare(struct hclgevf_dev *hdev)
 
 	hdev->rst_stats.rst_cnt++;
 
+	/* perform reset of the stack & ae device for a client */
+	ret = hclgevf_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		return ret;
+
 	rtnl_lock();
 	/* bring down the nic to stop any ongoing TX/RX */
 	ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
@@ -1880,6 +1905,9 @@ static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev)
 	int ret;
 
 	hdev->rst_stats.hw_rst_done_cnt++;
+	ret = hclgevf_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
+	if (ret)
+		return ret;
 
 	rtnl_lock();
 	/* now, re-initialize the nic client and ae device */
@@ -1890,6 +1918,18 @@ static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev)
 		return ret;
 	}
 
+	ret = hclgevf_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
+	/* ignore RoCE notify error if it fails HCLGEVF_RESET_MAX_FAIL_CNT - 1
+	 * times
+	 */
+	if (ret &&
+	    hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT - 1)
+		return ret;
+
+	ret = hclgevf_notify_roce_client(hdev, HNAE3_UP_CLIENT);
+	if (ret)
+		return ret;
+
 	hdev->last_reset_time = jiffies;
 	hdev->rst_stats.rst_done_cnt++;
 	hdev->rst_stats.rst_fail_cnt = 0;
@@ -2186,6 +2226,9 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
 	unsigned long delta = round_jiffies_relative(HZ);
 	struct hnae3_handle *handle = &hdev->nic;
 
+	if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state))
+		return;
+
 	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
 		delta = jiffies - hdev->last_serv_processed;
 
@@ -2284,7 +2327,7 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
 		 * register, so we should just write 0 to the bit we are
 		 * handling, and keep other bits as cmdq_stat_reg.
 		 */
-		if (hdev->pdev->revision >= 0x21)
+		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 			*clearval = ~(1U << HCLGEVF_VECTOR0_RX_CMDQ_INT_B);
 		else
 			*clearval = cmdq_stat_reg &
@@ -2427,7 +2470,7 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev)
 	rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_TOEPLITZ;
 	rss_cfg->rss_size = hdev->nic.kinfo.rss_size;
 	tuple_sets = &rss_cfg->rss_tuple_sets;
-	if (hdev->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE;
 		memcpy(rss_cfg->rss_hash_key, hclgevf_hash_key,
 		       HCLGEVF_RSS_KEY_SIZE);
@@ -2452,7 +2495,7 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
 	int ret;
 
-	if (hdev->pdev->revision >= 0x21) {
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo,
 					       rss_cfg->rss_hash_key);
 		if (ret)
@@ -2551,13 +2594,7 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive)
 
 static int hclgevf_client_start(struct hnae3_handle *handle)
 {
-	int ret;
-
-	ret = hclgevf_set_alive(handle, true);
-	if (ret)
-		return ret;
-
-	return 0;
+	return hclgevf_set_alive(handle, true);
 }
 
 static void hclgevf_client_stop(struct hnae3_handle *handle)
@@ -2760,6 +2797,7 @@ static int hclgevf_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
 	if (ret)
 		return ret;
 
+	set_bit(HCLGEVF_STATE_ROCE_REGISTERED, &hdev->state);
 	hnae3_set_client_init_flag(client, ae_dev, 1);
 
 	return 0;
@@ -2820,6 +2858,7 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
 
 	/* un-init roce, if it exists */
 	if (hdev->roce_client) {
+		clear_bit(HCLGEVF_STATE_ROCE_REGISTERED, &hdev->state);
 		hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
 		hdev->roce_client = NULL;
 		hdev->roce.client = NULL;
@@ -2942,6 +2981,76 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
 	return 0;
 }
 
+static void hclgevf_set_default_dev_specs(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_MAX_NON_TSO_BD_NUM			8U
+
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+	ae_dev->dev_specs.max_non_tso_bd_num =
+					HCLGEVF_MAX_NON_TSO_BD_NUM;
+	ae_dev->dev_specs.rss_ind_tbl_size = HCLGEVF_RSS_IND_TBL_SIZE;
+	ae_dev->dev_specs.rss_key_size = HCLGEVF_RSS_KEY_SIZE;
+}
+
+static void hclgevf_parse_dev_specs(struct hclgevf_dev *hdev,
+				    struct hclgevf_desc *desc)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	struct hclgevf_dev_specs_0_cmd *req0;
+
+	req0 = (struct hclgevf_dev_specs_0_cmd *)desc[0].data;
+
+	ae_dev->dev_specs.max_non_tso_bd_num = req0->max_non_tso_bd_num;
+	ae_dev->dev_specs.rss_ind_tbl_size =
+					le16_to_cpu(req0->rss_ind_tbl_size);
+	ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size);
+}
+
+static void hclgevf_check_dev_specs(struct hclgevf_dev *hdev)
+{
+	struct hnae3_dev_specs *dev_specs = &hdev->ae_dev->dev_specs;
+
+	if (!dev_specs->max_non_tso_bd_num)
+		dev_specs->max_non_tso_bd_num = HCLGEVF_MAX_NON_TSO_BD_NUM;
+	if (!dev_specs->rss_ind_tbl_size)
+		dev_specs->rss_ind_tbl_size = HCLGEVF_RSS_IND_TBL_SIZE;
+	if (!dev_specs->rss_key_size)
+		dev_specs->rss_key_size = HCLGEVF_RSS_KEY_SIZE;
+}
+
+static int hclgevf_query_dev_specs(struct hclgevf_dev *hdev)
+{
+	struct hclgevf_desc desc[HCLGEVF_QUERY_DEV_SPECS_BD_NUM];
+	int ret;
+	int i;
+
+	/* set default specifications as devices lower than version V3 do not
+	 * support querying specifications from firmware.
+	 */
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) {
+		hclgevf_set_default_dev_specs(hdev);
+		return 0;
+	}
+
+	for (i = 0; i < HCLGEVF_QUERY_DEV_SPECS_BD_NUM - 1; i++) {
+		hclgevf_cmd_setup_basic_desc(&desc[i],
+					     HCLGEVF_OPC_QUERY_DEV_SPECS, true);
+		desc[i].flag |= cpu_to_le16(HCLGEVF_CMD_FLAG_NEXT);
+	}
+	hclgevf_cmd_setup_basic_desc(&desc[i], HCLGEVF_OPC_QUERY_DEV_SPECS,
+				     true);
+
+	ret = hclgevf_cmd_send(&hdev->hw, desc, HCLGEVF_QUERY_DEV_SPECS_BD_NUM);
+	if (ret)
+		return ret;
+
+	hclgevf_parse_dev_specs(hdev, desc);
+	hclgevf_check_dev_specs(hdev);
+
+	return 0;
+}
+
 static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
@@ -3050,6 +3159,13 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 	if (ret)
 		goto err_cmd_init;
 
+	ret = hclgevf_query_dev_specs(hdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"failed to query dev specifications, ret = %d\n", ret);
+		goto err_cmd_init;
+	}
+
 	ret = hclgevf_init_msi(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed(%d) to init MSI/MSI-X\n", ret);
@@ -3345,6 +3461,13 @@ static bool hclgevf_get_hw_reset_stat(struct hnae3_handle *handle)
 	return !!hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
 }
 
+static bool hclgevf_get_cmdq_stat(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+}
+
 static bool hclgevf_ae_dev_resetting(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -3530,6 +3653,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_link_mode = hclgevf_get_link_mode,
 	.set_promisc_mode = hclgevf_set_promisc_mode,
 	.request_update_promisc_mode = hclgevf_request_update_promisc_mode,
+	.get_cmdq_stat = hclgevf_get_cmdq_stat,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index c1fac8920ae3..c5bcc3894fd5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -139,6 +139,7 @@ enum hclgevf_states {
 	HCLGEVF_STATE_IRQ_INITED,
 	HCLGEVF_STATE_REMOVING,
 	HCLGEVF_STATE_NIC_REGISTERED,
+	HCLGEVF_STATE_ROCE_REGISTERED,
 	/* task states */
 	HCLGEVF_STATE_RST_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_HANDLING,
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 7df5d7d211d4..883d0d7c6858 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -210,7 +210,7 @@ static void hns_mdio_cmd_write(struct hns_mdio_device *mdio_dev,
  * @bus: mdio bus
  * @phy_id: phy id
  * @regnum: register num
- * @value: register value
+ * @data: register value
  *
  * Return 0 on success, negative on failure
  */
@@ -273,7 +273,6 @@ static int hns_mdio_write(struct mii_bus *bus,
  * @bus: mdio bus
  * @phy_id: phy id
  * @regnum: register num
- * @value: register value
  *
  * Return phy register value
  */
diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
index 67b59d0ba769..2f89119c9b69 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -4,4 +4,5 @@ obj-$(CONFIG_HINIC) += hinic.o
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
 	   hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
 	   hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
-	   hinic_common.o hinic_ethtool.o hinic_devlink.o hinic_hw_mbox.o hinic_sriov.o
+	   hinic_common.o hinic_ethtool.o hinic_devlink.o hinic_hw_mbox.o \
+	   hinic_sriov.o hinic_debugfs.o
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
new file mode 100644
index 000000000000..19eb839177ec
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+
+#include "hinic_debugfs.h"
+
+static struct dentry *hinic_dbgfs_root;
+
+enum sq_dbg_info {
+	GLB_SQ_ID,
+	SQ_PI,
+	SQ_CI,
+	SQ_FI,
+	SQ_MSIX_ENTRY,
+};
+
+static char *sq_fields[] = {"glb_sq_id", "sq_pi", "sq_ci", "sq_fi", "sq_msix_entry"};
+
+static u64 hinic_dbg_get_sq_info(struct hinic_dev *nic_dev, struct hinic_sq *sq, int idx)
+{
+	struct hinic_wq *wq = sq->wq;
+
+	switch (idx) {
+	case GLB_SQ_ID:
+		return nic_dev->hwdev->func_to_io.global_qpn + sq->qid;
+	case SQ_PI:
+		return atomic_read(&wq->prod_idx) & wq->mask;
+	case SQ_CI:
+		return atomic_read(&wq->cons_idx) & wq->mask;
+	case SQ_FI:
+		return be16_to_cpu(*(__be16 *)(sq->hw_ci_addr)) & wq->mask;
+	case SQ_MSIX_ENTRY:
+		return sq->msix_entry;
+	}
+
+	return 0;
+}
+
+enum rq_dbg_info {
+	GLB_RQ_ID,
+	RQ_HW_PI,
+	RQ_SW_CI,
+	RQ_SW_PI,
+	RQ_MSIX_ENTRY,
+};
+
+static char *rq_fields[] = {"glb_rq_id", "rq_hw_pi", "rq_sw_ci", "rq_sw_pi", "rq_msix_entry"};
+
+static u64 hinic_dbg_get_rq_info(struct hinic_dev *nic_dev, struct hinic_rq *rq, int idx)
+{
+	struct hinic_wq *wq = rq->wq;
+
+	switch (idx) {
+	case GLB_RQ_ID:
+		return nic_dev->hwdev->func_to_io.global_qpn + rq->qid;
+	case RQ_HW_PI:
+		return be16_to_cpu(*(__be16 *)(rq->pi_virt_addr)) & wq->mask;
+	case RQ_SW_CI:
+		return atomic_read(&wq->cons_idx) & wq->mask;
+	case RQ_SW_PI:
+		return atomic_read(&wq->prod_idx) & wq->mask;
+	case RQ_MSIX_ENTRY:
+		return rq->msix_entry;
+	}
+
+	return 0;
+}
+
+enum func_tbl_info {
+	VALID,
+	RX_MODE,
+	MTU,
+	RQ_DEPTH,
+	QUEUE_NUM,
+};
+
+static char *func_table_fields[] = {"valid", "rx_mode", "mtu", "rq_depth", "cfg_q_num"};
+
+static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx)
+{
+	struct tag_sml_funcfg_tbl *funcfg_table_elem;
+	struct hinic_cmd_lt_rd *read_data;
+	u16 out_size = sizeof(*read_data);
+	int err;
+
+	read_data = kzalloc(sizeof(*read_data), GFP_KERNEL);
+	if (!read_data)
+		return ~0;
+
+	read_data->node = TBL_ID_FUNC_CFG_SM_NODE;
+	read_data->inst = TBL_ID_FUNC_CFG_SM_INST;
+	read_data->entry_size = HINIC_FUNCTION_CONFIGURE_TABLE_SIZE;
+	read_data->lt_index = HINIC_HWIF_FUNC_IDX(nic_dev->hwdev->hwif);
+	read_data->len = HINIC_FUNCTION_CONFIGURE_TABLE_SIZE;
+
+	err = hinic_port_msg_cmd(nic_dev->hwdev, HINIC_PORT_CMD_RD_LINE_TBL, read_data,
+				 sizeof(*read_data), read_data, &out_size);
+	if (err || out_size != sizeof(*read_data) || read_data->status) {
+		netif_err(nic_dev, drv, nic_dev->netdev,
+			  "Failed to get func table, err: %d, status: 0x%x, out size: 0x%x\n",
+			  err, read_data->status, out_size);
+		kfree(read_data);
+		return ~0;
+	}
+
+	funcfg_table_elem = (struct tag_sml_funcfg_tbl *)read_data->data;
+
+	switch (idx) {
+	case VALID:
+		return funcfg_table_elem->dw0.bs.valid;
+	case RX_MODE:
+		return funcfg_table_elem->dw0.bs.nic_rx_mode;
+	case MTU:
+		return funcfg_table_elem->dw1.bs.mtu;
+	case RQ_DEPTH:
+		return funcfg_table_elem->dw13.bs.cfg_rq_depth;
+	case QUEUE_NUM:
+		return funcfg_table_elem->dw13.bs.cfg_q_num;
+	}
+
+	kfree(read_data);
+
+	return ~0;
+}
+
+static ssize_t hinic_dbg_cmd_read(struct file *filp, char __user *buffer, size_t count,
+				  loff_t *ppos)
+{
+	struct hinic_debug_priv *dbg;
+	char ret_buf[20];
+	int *desc;
+	u64 out;
+	int ret;
+
+	desc = filp->private_data;
+	dbg = container_of(desc, struct hinic_debug_priv, field_id[*desc]);
+
+	switch (dbg->type) {
+	case HINIC_DBG_SQ_INFO:
+		out = hinic_dbg_get_sq_info(dbg->dev, dbg->object, *desc);
+		break;
+
+	case HINIC_DBG_RQ_INFO:
+		out = hinic_dbg_get_rq_info(dbg->dev, dbg->object, *desc);
+		break;
+
+	case HINIC_DBG_FUNC_TABLE:
+		out = hinic_dbg_get_func_table(dbg->dev, *desc);
+		break;
+
+	default:
+		netif_warn(dbg->dev, drv, dbg->dev->netdev, "Invalid hinic debug cmd: %d\n",
+			   dbg->type);
+		return -EINVAL;
+	}
+
+	ret = snprintf(ret_buf, sizeof(ret_buf), "0x%llx\n", out);
+
+	return simple_read_from_buffer(buffer, count, ppos, ret_buf, ret);
+}
+
+static const struct file_operations hinic_dbg_cmd_fops = {
+	.owner = THIS_MODULE,
+	.open  = simple_open,
+	.read  = hinic_dbg_cmd_read,
+};
+
+static int create_dbg_files(struct hinic_dev *dev, enum hinic_dbg_type type, void *data,
+			    struct dentry *root, struct hinic_debug_priv **dbg, char **field,
+			    int nfile)
+{
+	struct hinic_debug_priv *tmp;
+	int i;
+
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->dev = dev;
+	tmp->object = data;
+	tmp->type = type;
+	tmp->root = root;
+
+	for (i = 0; i < nfile; i++) {
+		tmp->field_id[i] = i;
+		debugfs_create_file(field[i], 0400, root, &tmp->field_id[i], &hinic_dbg_cmd_fops);
+	}
+
+	*dbg = tmp;
+
+	return 0;
+}
+
+static void rem_dbg_files(struct hinic_debug_priv *dbg)
+{
+	if (dbg->type != HINIC_DBG_FUNC_TABLE)
+		debugfs_remove_recursive(dbg->root);
+
+	kfree(dbg);
+}
+
+int hinic_sq_debug_add(struct hinic_dev *dev, u16 sq_id)
+{
+	struct hinic_sq *sq;
+	struct dentry *root;
+	char sub_dir[16];
+
+	sq = dev->txqs[sq_id].sq;
+
+	sprintf(sub_dir, "0x%x", sq_id);
+
+	root = debugfs_create_dir(sub_dir, dev->sq_dbgfs);
+
+	return create_dbg_files(dev, HINIC_DBG_SQ_INFO, sq, root, &sq->dbg, sq_fields,
+				ARRAY_SIZE(sq_fields));
+}
+
+void hinic_sq_debug_rem(struct hinic_sq *sq)
+{
+	if (sq->dbg)
+		rem_dbg_files(sq->dbg);
+}
+
+int hinic_rq_debug_add(struct hinic_dev *dev, u16 rq_id)
+{
+	struct hinic_rq *rq;
+	struct dentry *root;
+	char sub_dir[16];
+
+	rq = dev->rxqs[rq_id].rq;
+
+	sprintf(sub_dir, "0x%x", rq_id);
+
+	root = debugfs_create_dir(sub_dir, dev->rq_dbgfs);
+
+	return create_dbg_files(dev, HINIC_DBG_RQ_INFO, rq, root, &rq->dbg, rq_fields,
+				ARRAY_SIZE(rq_fields));
+}
+
+void hinic_rq_debug_rem(struct hinic_rq *rq)
+{
+	if (rq->dbg)
+		rem_dbg_files(rq->dbg);
+}
+
+int hinic_func_table_debug_add(struct hinic_dev *dev)
+{
+	if (HINIC_IS_VF(dev->hwdev->hwif))
+		return 0;
+
+	return create_dbg_files(dev, HINIC_DBG_FUNC_TABLE, dev, dev->func_tbl_dbgfs, &dev->dbg,
+				func_table_fields, ARRAY_SIZE(func_table_fields));
+}
+
+void hinic_func_table_debug_rem(struct hinic_dev *dev)
+{
+	if (!HINIC_IS_VF(dev->hwdev->hwif) && dev->dbg)
+		rem_dbg_files(dev->dbg);
+}
+
+void hinic_sq_dbgfs_init(struct hinic_dev *nic_dev)
+{
+	nic_dev->sq_dbgfs = debugfs_create_dir("SQs", nic_dev->dbgfs_root);
+}
+
+void hinic_sq_dbgfs_uninit(struct hinic_dev *nic_dev)
+{
+	debugfs_remove_recursive(nic_dev->sq_dbgfs);
+}
+
+void hinic_rq_dbgfs_init(struct hinic_dev *nic_dev)
+{
+	nic_dev->rq_dbgfs = debugfs_create_dir("RQs", nic_dev->dbgfs_root);
+}
+
+void hinic_rq_dbgfs_uninit(struct hinic_dev *nic_dev)
+{
+	debugfs_remove_recursive(nic_dev->rq_dbgfs);
+}
+
+void hinic_func_tbl_dbgfs_init(struct hinic_dev *nic_dev)
+{
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		nic_dev->func_tbl_dbgfs = debugfs_create_dir("func_table", nic_dev->dbgfs_root);
+}
+
+void hinic_func_tbl_dbgfs_uninit(struct hinic_dev *nic_dev)
+{
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		debugfs_remove_recursive(nic_dev->func_tbl_dbgfs);
+}
+
+void hinic_dbg_init(struct hinic_dev *nic_dev)
+{
+	nic_dev->dbgfs_root = debugfs_create_dir(pci_name(nic_dev->hwdev->hwif->pdev),
+						 hinic_dbgfs_root);
+}
+
+void hinic_dbg_uninit(struct hinic_dev *nic_dev)
+{
+	debugfs_remove_recursive(nic_dev->dbgfs_root);
+	nic_dev->dbgfs_root = NULL;
+}
+
+void hinic_dbg_register_debugfs(const char *debugfs_dir_name)
+{
+	hinic_dbgfs_root = debugfs_create_dir(debugfs_dir_name, NULL);
+}
+
+void hinic_dbg_unregister_debugfs(void)
+{
+	debugfs_remove_recursive(hinic_dbgfs_root);
+	hinic_dbgfs_root = NULL;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.h b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.h
new file mode 100644
index 000000000000..e9e00cfa1329
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#ifndef HINIC_DEBUGFS_H
+#define HINIC_DEBUGFS_H
+
+#include "hinic_dev.h"
+
+#define    TBL_ID_FUNC_CFG_SM_NODE                      11
+#define    TBL_ID_FUNC_CFG_SM_INST                      1
+
+#define HINIC_FUNCTION_CONFIGURE_TABLE_SIZE             64
+#define HINIC_FUNCTION_CONFIGURE_TABLE			1
+
+struct hinic_cmd_lt_rd {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	unsigned char node;
+	unsigned char inst;
+	unsigned char entry_size;
+	unsigned char rsvd;
+	unsigned int lt_index;
+	unsigned int offset;
+	unsigned int len;
+	unsigned char data[100];
+};
+
+struct tag_sml_funcfg_tbl {
+	union {
+		struct {
+			u32 rsvd0            :8;
+			u32 nic_rx_mode      :5;
+			u32 rsvd1            :18;
+			u32 valid            :1;
+		} bs;
+
+		u32 value;
+	} dw0;
+
+	union {
+		struct {
+			u32 vlan_id             :12;
+			u32 vlan_mode           :3;
+			u32 fast_recycled_mode  :1;
+			u32 mtu                 :16;
+		} bs;
+
+		u32 value;
+	} dw1;
+
+	u32 dw2;
+	u32 dw3;
+	u32 dw4;
+	u32 dw5;
+	u32 dw6;
+	u32 dw7;
+	u32 dw8;
+	u32 dw9;
+	u32 dw10;
+	u32 dw11;
+	u32 dw12;
+
+	union {
+		struct {
+			u32 rsvd2               :15;
+			u32 cfg_q_num           :9;
+			u32 cfg_rq_depth        :6;
+			u32 vhd_type            :2;
+		} bs;
+
+		u32 value;
+	} dw13;
+
+	u32 dw14;
+	u32 dw15;
+};
+
+int hinic_sq_debug_add(struct hinic_dev *dev, u16 sq_id);
+
+void hinic_sq_debug_rem(struct hinic_sq *sq);
+
+int hinic_rq_debug_add(struct hinic_dev *dev, u16 rq_id);
+
+void hinic_rq_debug_rem(struct hinic_rq *rq);
+
+int hinic_func_table_debug_add(struct hinic_dev *dev);
+
+void hinic_func_table_debug_rem(struct hinic_dev *dev);
+
+void hinic_sq_dbgfs_init(struct hinic_dev *nic_dev);
+
+void hinic_sq_dbgfs_uninit(struct hinic_dev *nic_dev);
+
+void hinic_rq_dbgfs_init(struct hinic_dev *nic_dev);
+
+void hinic_rq_dbgfs_uninit(struct hinic_dev *nic_dev);
+
+void hinic_func_tbl_dbgfs_init(struct hinic_dev *nic_dev);
+
+void hinic_func_tbl_dbgfs_uninit(struct hinic_dev *nic_dev);
+
+void hinic_dbg_init(struct hinic_dev *nic_dev);
+
+void hinic_dbg_uninit(struct hinic_dev *nic_dev);
+
+void hinic_dbg_register_debugfs(const char *debugfs_dir_name);
+
+void hinic_dbg_unregister_debugfs(void);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 0a1e20edf7cf..fb3e89141a0d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -58,6 +58,20 @@ struct hinic_intr_coal_info {
 	u8	resend_timer_cfg;
 };
 
+enum hinic_dbg_type {
+	HINIC_DBG_SQ_INFO,
+	HINIC_DBG_RQ_INFO,
+	HINIC_DBG_FUNC_TABLE,
+};
+
+struct hinic_debug_priv {
+	struct hinic_dev	*dev;
+	void			*object;
+	enum hinic_dbg_type	type;
+	struct dentry		*root;
+	int			field_id[64];
+};
+
 struct hinic_dev {
 	struct net_device               *netdev;
 	struct hinic_hwdev              *hwdev;
@@ -97,6 +111,12 @@ struct hinic_dev {
 	int				lb_test_rx_idx;
 	int				lb_pkt_len;
 	u8				*lb_test_rx_buf;
+
+	struct dentry			*dbgfs_root;
+	struct dentry			*sq_dbgfs;
+	struct dentry			*rq_dbgfs;
+	struct dentry			*func_tbl_dbgfs;
+	struct hinic_debug_priv		*dbg;
 	struct devlink			*devlink;
 	bool				cable_unplugged;
 	bool				module_unrecognized;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
index 16bda7381ba0..2630d667f393 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
@@ -281,18 +281,14 @@ static int hinic_firmware_update(struct hinic_devlink_priv *priv,
 }
 
 static int hinic_devlink_flash_update(struct devlink *devlink,
-				      const char *file_name,
-				      const char *component,
+				      struct devlink_flash_update_params *params,
 				      struct netlink_ext_ack *extack)
 {
 	struct hinic_devlink_priv *priv = devlink_priv(devlink);
 	const struct firmware *fw;
 	int err;
 
-	if (component)
-		return -EOPNOTSUPP;
-
-	err = request_firmware_direct(&fw, file_name,
+	err = request_firmware_direct(&fw, params->file_name,
 				      &priv->hwdev->hwif->pdev->dev);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index 29e88e25a4a4..4e4029d5c8e1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -373,7 +373,7 @@ static int wait_for_api_cmd_completion(struct hinic_api_cmd_chain *chain)
  * @chain: chain for the command
  * @dest: destination node on the card that will receive the command
  * @cmd: command data
- * @size: the command size
+ * @cmd_size: the command size
  *
  * Return 0 - Success, negative - Failure
  **/
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index e0eb294779ec..5a6bbee819cd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -784,7 +784,7 @@ static void free_cmdq(struct hinic_cmdq *cmdq)
  * init_cmdqs_ctxt - write the cmdq ctxt to HW after init all cmdq
  * @hwdev: the NIC HW device
  * @cmdqs: cmdqs to write the ctxts for
- * &db_area: db_area for all the cmdqs
+ * @db_area: db_area for all the cmdqs
  *
  * Return 0 - Success, negative - Failure
  **/
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 0c737765d113..0c74f6674634 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -437,6 +437,8 @@ static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
 /**
  * hinic_hwdev_ifup - Preparing the HW for passing IO
  * @hwdev: the NIC HW device
+ * @sq_depth: the send queue depth
+ * @rq_depth: the receive queue depth
  *
  * Return 0 - Success, negative - Failure
  **/
@@ -465,6 +467,7 @@ int hinic_hwdev_ifup(struct hinic_hwdev *hwdev, u16 sq_depth, u16 rq_depth)
 	func_to_io->hwdev = hwdev;
 	func_to_io->sq_depth = sq_depth;
 	func_to_io->rq_depth = rq_depth;
+	func_to_io->global_qpn = base_qpn;
 
 	err = hinic_io_init(func_to_io, hwif, nic_cap->max_qps, num_ceqs,
 			    ceq_msix_entries);
@@ -581,6 +584,7 @@ void hinic_hwdev_cb_unregister(struct hinic_hwdev *hwdev,
 /**
  * nic_mgmt_msg_handler - nic mgmt event handler
  * @handle: private data for the handler
+ * @cmd: message command
  * @buf_in: input buffer
  * @in_size: input size
  * @buf_out: output buffer
@@ -908,6 +912,7 @@ int hinic_set_interrupt_cfg(struct hinic_hwdev *hwdev,
 /**
  * hinic_init_hwdev - Initialize the NIC HW
  * @pdev: the NIC pci device
+ * @devlink: the poniter of hinic devlink
  *
  * Return initialized NIC HW device
  *
@@ -1120,7 +1125,7 @@ int hinic_hwdev_msix_cnt_set(struct hinic_hwdev *hwdev, u16 msix_index)
  * @msix_index: msix_index
  * @pending_limit: the maximum pending interrupt events (unit 8)
  * @coalesc_timer: coalesc period for interrupt (unit 8 us)
- * @lli_timer: replenishing period for low latency credit (unit 8 us)
+ * @lli_timer_cfg: replenishing period for low latency credit (unit 8 us)
  * @lli_credit_limit: maximum credits for low latency msix messages (unit 8)
  * @resend_timer: maximum wait for resending msix (unit coalesc period)
  *
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 701eb81e09a7..416492e48274 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -96,6 +96,8 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_RSS_TEMP_MGR	= 49,
 
+	HINIC_PORT_CMD_RD_LINE_TBL	= 57,
+
 	HINIC_PORT_CMD_RSS_CFG		= 66,
 
 	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index ca8cb68a8d20..19942fef99d9 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -106,7 +106,7 @@ enum eq_arm_state {
  * @aeqs: pointer to Async eqs of the chip
  * @event: aeq event to register callback for it
  * @handle: private data will be used by the callback
- * @hw_handler: callback function
+ * @hwe_handler: callback function
  **/
 void hinic_aeq_register_hw_cb(struct hinic_aeqs *aeqs,
 			      enum hinic_aeq_type event, void *handle,
@@ -188,6 +188,7 @@ static u8 eq_cons_idx_checksum_set(u32 val)
 /**
  * eq_update_ci - update the HW cons idx of event queue
  * @eq: the event queue to update the cons idx for
+ * @arm_state: the arm bit value of eq's interrupt
  **/
 static void eq_update_ci(struct hinic_eq *eq, u32 arm_state)
 {
@@ -368,11 +369,11 @@ static void eq_irq_work(struct work_struct *work)
 
 /**
  * ceq_tasklet - the tasklet of the EQ that received the event
- * @ceq_data: the eq
+ * @t: the tasklet struct pointer
  **/
-static void ceq_tasklet(unsigned long ceq_data)
+static void ceq_tasklet(struct tasklet_struct *t)
 {
-	struct hinic_eq *ceq = (struct hinic_eq *)ceq_data;
+	struct hinic_eq *ceq = from_tasklet(ceq, t, ceq_tasklet);
 
 	eq_irq_handler(ceq);
 }
@@ -782,8 +783,7 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
 
 		INIT_WORK(&aeq_work->work, eq_irq_work);
 	} else if (type == HINIC_CEQ) {
-		tasklet_init(&eq->ceq_tasklet, ceq_tasklet,
-			     (unsigned long)eq);
+		tasklet_setup(&eq->ceq_tasklet, ceq_tasklet);
 	}
 
 	/* set the attributes of the msix entry */
@@ -794,12 +794,15 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
 			    HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT,
 			    HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT);
 
-	if (type == HINIC_AEQ)
-		err = request_irq(entry.vector, aeq_interrupt, 0,
-				  "hinic_aeq", eq);
-	else if (type == HINIC_CEQ)
-		err = request_irq(entry.vector, ceq_interrupt, 0,
-				  "hinic_ceq", eq);
+	if (type == HINIC_AEQ) {
+		snprintf(eq->irq_name, sizeof(eq->irq_name), "hinic_aeq%d@pci:%s", eq->q_id,
+			 pci_name(pdev));
+		err = request_irq(entry.vector, aeq_interrupt, 0, eq->irq_name, eq);
+	} else if (type == HINIC_CEQ) {
+		snprintf(eq->irq_name, sizeof(eq->irq_name), "hinic_ceq%d@pci:%s", eq->q_id,
+			 pci_name(pdev));
+		err = request_irq(entry.vector, ceq_interrupt, 0, eq->irq_name, eq);
+	}
 
 	if (err) {
 		dev_err(&pdev->dev, "Failed to request irq for the EQ\n");
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
index 43065fc70869..2f3222174fc7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
@@ -186,6 +186,7 @@ struct hinic_eq {
 	int                     num_elem_in_pg;
 
 	struct msix_entry       msix_entry;
+	char			irq_name[64];
 
 	dma_addr_t              *dma_addr;
 	void                    **virt_addr;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index bc8925c0c982..efbaed389440 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -230,6 +230,7 @@ static int wait_hwif_ready(struct hinic_hwif *hwif)
  * @hwif: the HW interface of a pci function device
  * @attr0: the first attribute that was read from the hw
  * @attr1: the second attribute that was read from the hw
+ * @attr2: the third attribute that was read from the hw
  **/
 static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1,
 			  u32 attr2)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 3e3fa742e476..4ef4008e65bd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -305,6 +305,7 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
 
 	func_to_io->sq_db[q_id] = db_base;
 
+	qp->sq.qid = q_id;
 	err = hinic_init_sq(&qp->sq, hwif, &func_to_io->sq_wq[q_id],
 			    sq_msix_entry,
 			    CI_ADDR(func_to_io->ci_addr_base, q_id),
@@ -314,6 +315,7 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
 		goto err_sq_init;
 	}
 
+	qp->rq.qid = q_id;
 	err = hinic_init_rq(&qp->rq, hwif, &func_to_io->rq_wq[q_id],
 			    rq_msix_entry);
 	if (err) {
@@ -361,8 +363,8 @@ static void destroy_qp(struct hinic_func_to_io *func_to_io,
  * @func_to_io: func to io channel that holds the IO components
  * @base_qpn: base qp number
  * @num_qps: number queue pairs to create
- * @sq_msix_entry: msix entries for sq
- * @rq_msix_entry: msix entries for rq
+ * @sq_msix_entries: msix entries for sq
+ * @rq_msix_entries: msix entries for rq
  *
  * Return 0 - Success, negative - Failure
  **/
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
index ee6d60762d84..52159a90278a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
@@ -59,6 +59,7 @@ struct hinic_nic_cfg {
 struct hinic_func_to_io {
 	struct hinic_hwif       *hwif;
 	struct hinic_hwdev      *hwdev;
+	u16			global_qpn;
 	struct hinic_ceqs       ceqs;
 
 	struct hinic_wqs        wqs;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 2ebae6cb5db5..819fa13034c0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -238,6 +238,7 @@ static int send_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt,
  * @out_size: response length
  * @direction: the direction of the original message
  * @resp_msg_id: msg id to response for
+ * @timeout: time-out period of waiting for response
  *
  * Return 0 - Success, negative - Failure
  **/
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index ca3e2d060284..0dfa51ad5855 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -81,6 +81,8 @@ struct hinic_sq {
 
 	struct hinic_wq         *wq;
 
+	u16			qid;
+
 	u32                     irq;
 	u16                     msix_entry;
 
@@ -90,6 +92,7 @@ struct hinic_sq {
 	void __iomem            *db_base;
 
 	struct sk_buff          **saved_skb;
+	struct hinic_debug_priv	*dbg;
 };
 
 struct hinic_rq {
@@ -97,6 +100,8 @@ struct hinic_rq {
 
 	struct hinic_wq         *wq;
 
+	u16			qid;
+
 	struct cpumask		affinity_mask;
 	u32                     irq;
 	u16                     msix_entry;
@@ -110,6 +115,7 @@ struct hinic_rq {
 
 	u16                     *pi_virt_addr;
 	dma_addr_t              pi_dma_addr;
+	struct hinic_debug_priv	*dbg;
 };
 
 struct hinic_qp {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 28581bd8ce07..350225bbe0be 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 
+#include "hinic_debugfs.h"
 #include "hinic_hw_qp.h"
 #include "hinic_hw_dev.h"
 #include "hinic_devlink.h"
@@ -153,6 +154,8 @@ static int create_txqs(struct hinic_dev *nic_dev)
 	if (!nic_dev->txqs)
 		return -ENOMEM;
 
+	hinic_sq_dbgfs_init(nic_dev);
+
 	for (i = 0; i < num_txqs; i++) {
 		struct hinic_sq *sq = hinic_hwdev_get_sq(nic_dev->hwdev, i);
 
@@ -162,13 +165,27 @@ static int create_txqs(struct hinic_dev *nic_dev)
 				  "Failed to init Txq\n");
 			goto err_init_txq;
 		}
+
+		err = hinic_sq_debug_add(nic_dev, i);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to add SQ%d debug\n", i);
+			goto err_add_sq_dbg;
+		}
+
 	}
 
 	return 0;
 
+err_add_sq_dbg:
+	hinic_clean_txq(&nic_dev->txqs[i]);
 err_init_txq:
-	for (j = 0; j < i; j++)
+	for (j = 0; j < i; j++) {
+		hinic_sq_debug_rem(nic_dev->txqs[j].sq);
 		hinic_clean_txq(&nic_dev->txqs[j]);
+	}
+
+	hinic_sq_dbgfs_uninit(nic_dev);
 
 	devm_kfree(&netdev->dev, nic_dev->txqs);
 	return err;
@@ -204,8 +221,12 @@ static void free_txqs(struct hinic_dev *nic_dev)
 	if (!nic_dev->txqs)
 		return;
 
-	for (i = 0; i < num_txqs; i++)
+	for (i = 0; i < num_txqs; i++) {
+		hinic_sq_debug_rem(nic_dev->txqs[i].sq);
 		hinic_clean_txq(&nic_dev->txqs[i]);
+	}
+
+	hinic_sq_dbgfs_uninit(nic_dev);
 
 	devm_kfree(&netdev->dev, nic_dev->txqs);
 	nic_dev->txqs = NULL;
@@ -231,6 +252,8 @@ static int create_rxqs(struct hinic_dev *nic_dev)
 	if (!nic_dev->rxqs)
 		return -ENOMEM;
 
+	hinic_rq_dbgfs_init(nic_dev);
+
 	for (i = 0; i < num_rxqs; i++) {
 		struct hinic_rq *rq = hinic_hwdev_get_rq(nic_dev->hwdev, i);
 
@@ -240,13 +263,26 @@ static int create_rxqs(struct hinic_dev *nic_dev)
 				  "Failed to init rxq\n");
 			goto err_init_rxq;
 		}
+
+		err = hinic_rq_debug_add(nic_dev, i);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to add RQ%d debug\n", i);
+			goto err_add_rq_dbg;
+		}
 	}
 
 	return 0;
 
+err_add_rq_dbg:
+	hinic_clean_rxq(&nic_dev->rxqs[i]);
 err_init_rxq:
-	for (j = 0; j < i; j++)
+	for (j = 0; j < i; j++) {
+		hinic_rq_debug_rem(nic_dev->rxqs[j].rq);
 		hinic_clean_rxq(&nic_dev->rxqs[j]);
+	}
+
+	hinic_rq_dbgfs_uninit(nic_dev);
 
 	devm_kfree(&netdev->dev, nic_dev->rxqs);
 	return err;
@@ -264,8 +300,12 @@ static void free_rxqs(struct hinic_dev *nic_dev)
 	if (!nic_dev->rxqs)
 		return;
 
-	for (i = 0; i < num_rxqs; i++)
+	for (i = 0; i < num_rxqs; i++) {
+		hinic_rq_debug_rem(nic_dev->rxqs[i].rq);
 		hinic_clean_rxq(&nic_dev->rxqs[i]);
+	}
+
+	hinic_rq_dbgfs_uninit(nic_dev);
 
 	devm_kfree(&netdev->dev, nic_dev->rxqs);
 	nic_dev->rxqs = NULL;
@@ -913,11 +953,16 @@ static void netdev_features_init(struct net_device *netdev)
 	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
 			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
 			      NETIF_F_RXCSUM | NETIF_F_LRO |
-			      NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+			      NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+			      NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM;
 
 	netdev->vlan_features = netdev->hw_features;
 
 	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	netdev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SCTP_CRC |
+				  NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN |
+				  NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_UDP_TUNNEL;
 }
 
 static void hinic_refresh_nic_cfg(struct hinic_dev *nic_dev)
@@ -945,7 +990,7 @@ static void hinic_refresh_nic_cfg(struct hinic_dev *nic_dev)
  * @handle: nic device for the handler
  * @buf_in: input buffer
  * @in_size: input size
- * @buf_in: output buffer
+ * @buf_out: output buffer
  * @out_size: returned output size
  *
  * Return 0 - Success, negative - Failure
@@ -1284,6 +1329,16 @@ static int nic_dev_init(struct pci_dev *pdev)
 		goto err_init_intr;
 	}
 
+	hinic_dbg_init(nic_dev);
+
+	hinic_func_tbl_dbgfs_init(nic_dev);
+
+	err = hinic_func_table_debug_add(nic_dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to add func_table debug\n");
+		goto err_add_func_table_dbg;
+	}
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to register netdev\n");
@@ -1293,6 +1348,10 @@ static int nic_dev_init(struct pci_dev *pdev)
 	return 0;
 
 err_reg_netdev:
+	hinic_func_table_debug_rem(nic_dev);
+err_add_func_table_dbg:
+	hinic_func_tbl_dbgfs_uninit(nic_dev);
+	hinic_dbg_uninit(nic_dev);
 	hinic_free_intr_coalesce(nic_dev);
 err_init_intr:
 err_set_pfc:
@@ -1415,6 +1474,12 @@ static void hinic_remove(struct pci_dev *pdev)
 
 	unregister_netdev(netdev);
 
+	hinic_func_table_debug_rem(nic_dev);
+
+	hinic_func_tbl_dbgfs_uninit(nic_dev);
+
+	hinic_dbg_uninit(nic_dev);
+
 	hinic_free_intr_coalesce(nic_dev);
 
 	hinic_port_del_mac(nic_dev, netdev->dev_addr, 0);
@@ -1469,4 +1534,17 @@ static struct pci_driver hinic_driver = {
 	.sriov_configure = hinic_pci_sriov_configure,
 };
 
-module_pci_driver(hinic_driver);
+static int __init hinic_module_init(void)
+{
+	hinic_dbg_register_debugfs(HINIC_DRV_NAME);
+	return pci_register_driver(&hinic_driver);
+}
+
+static void __exit hinic_module_exit(void)
+{
+	pci_unregister_driver(&hinic_driver);
+	hinic_dbg_unregister_debugfs();
+}
+
+module_init(hinic_module_init);
+module_exit(hinic_module_exit);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index d0072f5e7efc..070a7cc6392e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -595,7 +595,7 @@ int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq,
 	rxq_stats_init(rxq);
 
 	rxq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL,
-				       "hinic_rxq%d", qp->q_id);
+				       "%s_rxq%d", netdev->name, qp->q_id);
 	if (!rxq->irq_name)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index c1f81e9144a1..8da7d46363b2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -357,6 +357,7 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
 	enum hinic_l4_offload_type l4_offload;
 	u32 offset, l4_len, network_hdr_len;
 	enum hinic_l3_offload_type l3_type;
+	u32 tunnel_type = NOT_TUNNEL;
 	union hinic_l3 ip;
 	union hinic_l4 l4;
 	u8 l4_proto;
@@ -367,27 +368,55 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
 	if (skb->encapsulation) {
 		u32 l4_tunnel_len;
 
+		tunnel_type = TUNNEL_UDP_NO_CSUM;
 		ip.hdr = skb_network_header(skb);
 
-		if (ip.v4->version == 4)
+		if (ip.v4->version == 4) {
 			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
-		else if (ip.v4->version == 6)
+			l4_proto = ip.v4->protocol;
+		} else if (ip.v4->version == 6) {
+			unsigned char *exthdr;
+			__be16 frag_off;
 			l3_type = IPV6_PKT;
-		else
+			tunnel_type = TUNNEL_UDP_CSUM;
+			exthdr = ip.hdr + sizeof(*ip.v6);
+			l4_proto = ip.v6->nexthdr;
+			l4.hdr = skb_transport_header(skb);
+			if (l4.hdr != exthdr)
+				ipv6_skip_exthdr(skb, exthdr - skb->data,
+						 &l4_proto, &frag_off);
+		} else {
 			l3_type = L3TYPE_UNKNOWN;
+			l4_proto = IPPROTO_RAW;
+		}
 
 		hinic_task_set_outter_l3(task, l3_type,
 					 skb_network_header_len(skb));
 
-		l4_tunnel_len = skb_inner_network_offset(skb) -
-				skb_transport_offset(skb);
-
-		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
-					 l4_tunnel_len);
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			l4_tunnel_len = skb_inner_network_offset(skb) -
+					skb_transport_offset(skb);
+			ip.hdr = skb_inner_network_header(skb);
+			l4.hdr = skb_inner_transport_header(skb);
+			network_hdr_len = skb_inner_network_header_len(skb);
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			tunnel_type = NOT_TUNNEL;
+			l4_tunnel_len = 0;
+
+			ip.hdr = skb_inner_network_header(skb);
+			l4.hdr = skb_transport_header(skb);
+			network_hdr_len = skb_network_header_len(skb);
+			break;
+		default:
+			/* Unsupported tunnel packet, disable csum offload */
+			skb_checksum_help(skb);
+			return 0;
+		}
 
-		ip.hdr = skb_inner_network_header(skb);
-		l4.hdr = skb_inner_transport_header(skb);
-		network_hdr_len = skb_inner_network_header_len(skb);
+		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
 	} else {
 		ip.hdr = skb_network_header(skb);
 		l4.hdr = skb_transport_header(skb);
@@ -853,14 +882,14 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
 		goto err_alloc_free_sges;
 	}
 
-	irqname_len = snprintf(NULL, 0, "hinic_txq%d", qp->q_id) + 1;
+	irqname_len = snprintf(NULL, 0, "%s_txq%d", netdev->name, qp->q_id) + 1;
 	txq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
 	if (!txq->irq_name) {
 		err = -ENOMEM;
 		goto err_alloc_irqname;
 	}
 
-	sprintf(txq->irq_name, "hinic_txq%d", qp->q_id);
+	sprintf(txq->irq_name, "%s_txq%d", netdev->name, qp->q_id);
 
 	err = hinic_hwdev_hw_ci_addr_set(hwdev, sq, CI_UPDATE_NO_PENDING,
 					 CI_UPDATE_NO_COALESC);
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 3153d62cc73e..c2e740475786 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -1212,9 +1212,9 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe)
 	}
 }
 
-static void ehea_neq_tasklet(unsigned long data)
+static void ehea_neq_tasklet(struct tasklet_struct *t)
 {
-	struct ehea_adapter *adapter = (struct ehea_adapter *)data;
+	struct ehea_adapter *adapter = from_tasklet(adapter, t, neq_tasklet);
 	struct ehea_eqe *eqe;
 	u64 event_mask;
 
@@ -3417,8 +3417,7 @@ static int ehea_probe_adapter(struct platform_device *dev)
 		goto out_free_ad;
 	}
 
-	tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet,
-		     (unsigned long)adapter);
+	tasklet_setup(&adapter->neq_tasklet, ehea_neq_tasklet);
 
 	ret = ehea_create_device_sysfs(dev);
 	if (ret)
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index c5c732601e35..7ef3369953b6 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1349,6 +1349,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 			int offset = ibmveth_rxq_frame_offset(adapter);
 			int csum_good = ibmveth_rxq_csum_good(adapter);
 			int lrg_pkt = ibmveth_rxq_large_packet(adapter);
+			__sum16 iph_check = 0;
 
 			skb = ibmveth_rxq_get_buffer(adapter);
 
@@ -1385,16 +1386,26 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 			skb_put(skb, length);
 			skb->protocol = eth_type_trans(skb, netdev);
 
-			if (csum_good) {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				ibmveth_rx_csum_helper(skb, adapter);
+			/* PHYP without PLSO support places a -1 in the ip
+			 * checksum for large send frames.
+			 */
+			if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+				struct iphdr *iph = (struct iphdr *)skb->data;
+
+				iph_check = iph->check;
 			}
 
-			if (length > netdev->mtu + ETH_HLEN) {
+			if ((length > netdev->mtu + ETH_HLEN) ||
+			    lrg_pkt || iph_check == 0xffff) {
 				ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
 				adapter->rx_large_packets++;
 			}
 
+			if (csum_good) {
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+				ibmveth_rx_csum_helper(skb, adapter);
+			}
+
 			napi_gro_receive(napi, skb);	/* send it up */
 
 			netdev->stats.rx_packets++;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 1b702a43a5d0..1f7fe6b3dd5a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -97,15 +97,14 @@ static int pending_scrq(struct ibmvnic_adapter *,
 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
 					struct ibmvnic_sub_crq_queue *);
 static int ibmvnic_poll(struct napi_struct *napi, int data);
-static void send_map_query(struct ibmvnic_adapter *adapter);
+static void send_query_map(struct ibmvnic_adapter *adapter);
 static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static int send_request_unmap(struct ibmvnic_adapter *, u8);
 static int send_login(struct ibmvnic_adapter *adapter);
-static void send_cap_queries(struct ibmvnic_adapter *adapter);
+static void send_query_cap(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
-static int ibmvnic_init(struct ibmvnic_adapter *);
-static int ibmvnic_reset_init(struct ibmvnic_adapter *);
+static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset);
 static void release_crq_queue(struct ibmvnic_adapter *);
 static int __ibmvnic_set_mac(struct net_device *, u8 *);
 static int init_crq_queue(struct ibmvnic_adapter *adapter);
@@ -297,8 +296,7 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
-	     i++)
+	for (i = 0; i < adapter->num_active_rx_pools; i++)
 		adapter->rx_pool[i].active = 0;
 }
 
@@ -306,6 +304,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 			      struct ibmvnic_rx_pool *pool)
 {
 	int count = pool->size - atomic_read(&pool->available);
+	u64 handle = adapter->rx_scrq[pool->index]->handle;
 	struct device *dev = &adapter->vdev->dev;
 	int buffers_added = 0;
 	unsigned long lpar_rc;
@@ -314,7 +313,6 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 	unsigned int offset;
 	dma_addr_t dma_addr;
 	unsigned char *dst;
-	u64 *handle_array;
 	int shift = 0;
 	int index;
 	int i;
@@ -322,10 +320,6 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 	if (!pool->active)
 		return;
 
-	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
-				      be32_to_cpu(adapter->login_rsp_buf->
-				      off_rxadd_subcrqs));
-
 	for (i = 0; i < count; ++i) {
 		skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
 		if (!skb) {
@@ -369,8 +363,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 #endif
 		sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift);
 
-		lpar_rc = send_subcrq(adapter, handle_array[pool->index],
-				      &sub_crq);
+		lpar_rc = send_subcrq(adapter, handle, &sub_crq);
 		if (lpar_rc != H_SUCCESS)
 			goto failure;
 
@@ -407,8 +400,7 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
 	int i;
 
 	adapter->replenish_task_cycles++;
-	for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
-	     i++) {
+	for (i = 0; i < adapter->num_active_rx_pools; i++) {
 		if (adapter->rx_pool[i].active)
 			replenish_rx_pool(adapter, &adapter->rx_pool[i]);
 	}
@@ -475,25 +467,23 @@ static int init_stats_token(struct ibmvnic_adapter *adapter)
 static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_rx_pool *rx_pool;
+	u64 buff_size;
 	int rx_scrqs;
 	int i, j, rc;
-	u64 *size_array;
 
 	if (!adapter->rx_pool)
 		return -1;
 
-	size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
-		be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
-
-	rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+	buff_size = adapter->cur_rx_buf_sz;
+	rx_scrqs = adapter->num_active_rx_pools;
 	for (i = 0; i < rx_scrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
 		netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i);
 
-		if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
+		if (rx_pool->buff_size != buff_size) {
 			free_long_term_buff(adapter, &rx_pool->long_term_buff);
-			rx_pool->buff_size = be64_to_cpu(size_array[i]);
+			rx_pool->buff_size = buff_size;
 			rc = alloc_long_term_buff(adapter,
 						  &rx_pool->long_term_buff,
 						  rx_pool->size *
@@ -561,13 +551,11 @@ static int init_rx_pools(struct net_device *netdev)
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_rx_pool *rx_pool;
 	int rxadd_subcrqs;
-	u64 *size_array;
+	u64 buff_size;
 	int i, j;
 
-	rxadd_subcrqs =
-		be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
-	size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
-		be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+	rxadd_subcrqs = adapter->num_active_rx_scrqs;
+	buff_size = adapter->cur_rx_buf_sz;
 
 	adapter->rx_pool = kcalloc(rxadd_subcrqs,
 				   sizeof(struct ibmvnic_rx_pool),
@@ -585,11 +573,11 @@ static int init_rx_pools(struct net_device *netdev)
 		netdev_dbg(adapter->netdev,
 			   "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n",
 			   i, adapter->req_rx_add_entries_per_subcrq,
-			   be64_to_cpu(size_array[i]));
+			   buff_size);
 
 		rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
 		rx_pool->index = i;
-		rx_pool->buff_size = be64_to_cpu(size_array[i]);
+		rx_pool->buff_size = buff_size;
 		rx_pool->active = 1;
 
 		rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
@@ -655,7 +643,7 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 	if (!adapter->tx_pool)
 		return -1;
 
-	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+	tx_scrqs = adapter->num_active_tx_pools;
 	for (i = 0; i < tx_scrqs; i++) {
 		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
 		if (rc)
@@ -744,7 +732,7 @@ static int init_tx_pools(struct net_device *netdev)
 	int tx_subcrqs;
 	int i, rc;
 
-	tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+	tx_subcrqs = adapter->num_active_tx_scrqs;
 	adapter->tx_pool = kcalloc(tx_subcrqs,
 				   sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
 	if (!adapter->tx_pool)
@@ -894,7 +882,7 @@ static int ibmvnic_login(struct net_device *netdev)
 				   "Received partial success, retrying...\n");
 			adapter->init_done_rc = 0;
 			reinit_completion(&adapter->init_done);
-			send_cap_queries(adapter);
+			send_query_cap(adapter);
 			if (!wait_for_completion_timeout(&adapter->init_done,
 							 timeout)) {
 				netdev_warn(netdev,
@@ -980,7 +968,7 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
 			return -1;
 		}
 
-		if (adapter->init_done_rc == 1) {
+		if (adapter->init_done_rc == PARTIALSUCCESS) {
 			/* Partuial success, delay and re-send */
 			mdelay(1000);
 			resend = true;
@@ -1125,7 +1113,7 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	if (rc)
 		return rc;
 
-	send_map_query(adapter);
+	send_query_map(adapter);
 
 	rc = init_rx_pools(netdev);
 	if (rc)
@@ -1530,9 +1518,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	unsigned int offset;
 	int num_entries = 1;
 	unsigned char *dst;
-	u64 *handle_array;
 	int index = 0;
 	u8 proto = 0;
+	u64 handle;
 	netdev_tx_t ret = NETDEV_TX_OK;
 
 	if (test_bit(0, &adapter->resetting)) {
@@ -1559,8 +1547,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
-	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
-		be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
+	handle = tx_scrq->handle;
 
 	index = tx_pool->free_map[tx_pool->consumer_index];
 
@@ -1672,14 +1659,14 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 			ret = NETDEV_TX_OK;
 			goto tx_err_out;
 		}
-		lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
+		lpar_rc = send_subcrq_indirect(adapter, handle,
 					       (u64)tx_buff->indir_dma,
 					       (u64)num_entries);
 		dma_unmap_single(dev, tx_buff->indir_dma,
 				 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
 	} else {
 		tx_buff->num_entries = num_entries;
-		lpar_rc = send_subcrq(adapter, handle_array[queue_num],
+		lpar_rc = send_subcrq(adapter, handle,
 				      &tx_crq);
 	}
 	if (lpar_rc != H_SUCCESS) {
@@ -1874,7 +1861,7 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter,
 		return rc;
 	}
 
-	rc = ibmvnic_reset_init(adapter);
+	rc = ibmvnic_reset_init(adapter, true);
 	if (rc)
 		return IBMVNIC_INIT_FAILED;
 
@@ -1992,7 +1979,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			goto out;
 		}
 
-		rc = ibmvnic_reset_init(adapter);
+		rc = ibmvnic_reset_init(adapter, true);
 		if (rc) {
 			rc = IBMVNIC_INIT_FAILED;
 			goto out;
@@ -2108,7 +2095,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 		return rc;
 	}
 
-	rc = ibmvnic_init(adapter);
+	rc = ibmvnic_reset_init(adapter, false);
 	if (rc)
 		return rc;
 
@@ -3312,7 +3299,7 @@ tx_failed:
 	return -1;
 }
 
-static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry)
+static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
 {
 	struct device *dev = &adapter->vdev->dev;
 	union ibmvnic_crq crq;
@@ -3583,8 +3570,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
 	if (rc) {
 		if (rc == H_CLOSED) {
 			dev_warn(dev, "CRQ Queue closed\n");
-			if (test_bit(0, &adapter->resetting))
-				ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+			/* do not reset, report the fail, wait for passive init from server */
 		}
 
 		dev_warn(dev, "Send error (rc=%d)\n", rc);
@@ -3595,14 +3581,31 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
 
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
 {
+	struct device *dev = &adapter->vdev->dev;
 	union ibmvnic_crq crq;
+	int retries = 100;
+	int rc;
 
 	memset(&crq, 0, sizeof(crq));
 	crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
 	crq.generic.cmd = IBMVNIC_CRQ_INIT;
 	netdev_dbg(adapter->netdev, "Sending CRQ init\n");
 
-	return ibmvnic_send_crq(adapter, &crq);
+	do {
+		rc = ibmvnic_send_crq(adapter, &crq);
+		if (rc != H_CLOSED)
+			break;
+		retries--;
+		msleep(50);
+
+	} while (retries > 0);
+
+	if (rc) {
+		dev_err(dev, "Failed to send init request, rc = %d\n", rc);
+		return rc;
+	}
+
+	return 0;
 }
 
 static int send_version_xchg(struct ibmvnic_adapter *adapter)
@@ -3822,7 +3825,7 @@ static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
 	return ibmvnic_send_crq(adapter, &crq);
 }
 
-static void send_map_query(struct ibmvnic_adapter *adapter)
+static void send_query_map(struct ibmvnic_adapter *adapter)
 {
 	union ibmvnic_crq crq;
 
@@ -3833,7 +3836,7 @@ static void send_map_query(struct ibmvnic_adapter *adapter)
 }
 
 /* Send a series of CRQs requesting various capabilities of the VNIC server */
-static void send_cap_queries(struct ibmvnic_adapter *adapter)
+static void send_query_cap(struct ibmvnic_adapter *adapter)
 {
 	union ibmvnic_crq crq;
 
@@ -3950,6 +3953,113 @@ static void send_cap_queries(struct ibmvnic_adapter *adapter)
 	ibmvnic_send_crq(adapter, &crq);
 }
 
+static void send_query_ip_offload(struct ibmvnic_adapter *adapter)
+{
+	int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer);
+	struct device *dev = &adapter->vdev->dev;
+	union ibmvnic_crq crq;
+
+	adapter->ip_offload_tok =
+		dma_map_single(dev,
+			       &adapter->ip_offload_buf,
+			       buf_sz,
+			       DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(dev, adapter->ip_offload_tok)) {
+		if (!firmware_has_feature(FW_FEATURE_CMO))
+			dev_err(dev, "Couldn't map offload buffer\n");
+		return;
+	}
+
+	memset(&crq, 0, sizeof(crq));
+	crq.query_ip_offload.first = IBMVNIC_CRQ_CMD;
+	crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD;
+	crq.query_ip_offload.len = cpu_to_be32(buf_sz);
+	crq.query_ip_offload.ioba =
+	    cpu_to_be32(adapter->ip_offload_tok);
+
+	ibmvnic_send_crq(adapter, &crq);
+}
+
+static void send_control_ip_offload(struct ibmvnic_adapter *adapter)
+{
+	struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl;
+	struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
+	struct device *dev = &adapter->vdev->dev;
+	netdev_features_t old_hw_features = 0;
+	union ibmvnic_crq crq;
+
+	adapter->ip_offload_ctrl_tok =
+		dma_map_single(dev,
+			       ctrl_buf,
+			       sizeof(adapter->ip_offload_ctrl),
+			       DMA_TO_DEVICE);
+
+	if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) {
+		dev_err(dev, "Couldn't map ip offload control buffer\n");
+		return;
+	}
+
+	ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
+	ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB);
+	ctrl_buf->ipv4_chksum = buf->ipv4_chksum;
+	ctrl_buf->ipv6_chksum = buf->ipv6_chksum;
+	ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
+	ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum;
+	ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
+	ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum;
+	ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4;
+	ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6;
+
+	/* large_rx disabled for now, additional features needed */
+	ctrl_buf->large_rx_ipv4 = 0;
+	ctrl_buf->large_rx_ipv6 = 0;
+
+	if (adapter->state != VNIC_PROBING) {
+		old_hw_features = adapter->netdev->hw_features;
+		adapter->netdev->hw_features = 0;
+	}
+
+	adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
+
+	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
+		adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
+
+	if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
+		adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
+
+	if ((adapter->netdev->features &
+	    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
+		adapter->netdev->hw_features |= NETIF_F_RXCSUM;
+
+	if (buf->large_tx_ipv4)
+		adapter->netdev->hw_features |= NETIF_F_TSO;
+	if (buf->large_tx_ipv6)
+		adapter->netdev->hw_features |= NETIF_F_TSO6;
+
+	if (adapter->state == VNIC_PROBING) {
+		adapter->netdev->features |= adapter->netdev->hw_features;
+	} else if (old_hw_features != adapter->netdev->hw_features) {
+		netdev_features_t tmp = 0;
+
+		/* disable features no longer supported */
+		adapter->netdev->features &= adapter->netdev->hw_features;
+		/* turn on features now supported if previously enabled */
+		tmp = (old_hw_features ^ adapter->netdev->hw_features) &
+			adapter->netdev->hw_features;
+		adapter->netdev->features |=
+				tmp & adapter->netdev->wanted_features;
+	}
+
+	memset(&crq, 0, sizeof(crq));
+	crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
+	crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD;
+	crq.control_ip_offload.len =
+	    cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
+	crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok);
+	ibmvnic_send_crq(adapter, &crq);
+}
+
 static void handle_vpd_size_rsp(union ibmvnic_crq *crq,
 				struct ibmvnic_adapter *adapter)
 {
@@ -4019,8 +4129,6 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
-	netdev_features_t old_hw_features = 0;
-	union ibmvnic_crq crq;
 	int i;
 
 	dma_unmap_single(dev, adapter->ip_offload_tok,
@@ -4070,74 +4178,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 	netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n",
 		   buf->off_ipv6_ext_headers);
 
-	adapter->ip_offload_ctrl_tok =
-	    dma_map_single(dev, &adapter->ip_offload_ctrl,
-			   sizeof(adapter->ip_offload_ctrl), DMA_TO_DEVICE);
-
-	if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) {
-		dev_err(dev, "Couldn't map ip offload control buffer\n");
-		return;
-	}
-
-	adapter->ip_offload_ctrl.len =
-	    cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
-	adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB);
-	adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum;
-	adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum;
-	adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
-	adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum;
-	adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
-	adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum;
-	adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4;
-	adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6;
-
-	/* large_rx disabled for now, additional features needed */
-	adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
-	adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
-
-	if (adapter->state != VNIC_PROBING) {
-		old_hw_features = adapter->netdev->hw_features;
-		adapter->netdev->hw_features = 0;
-	}
-
-	adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
-
-	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
-		adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
-
-	if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
-		adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
-
-	if ((adapter->netdev->features &
-	    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
-		adapter->netdev->hw_features |= NETIF_F_RXCSUM;
-
-	if (buf->large_tx_ipv4)
-		adapter->netdev->hw_features |= NETIF_F_TSO;
-	if (buf->large_tx_ipv6)
-		adapter->netdev->hw_features |= NETIF_F_TSO6;
-
-	if (adapter->state == VNIC_PROBING) {
-		adapter->netdev->features |= adapter->netdev->hw_features;
-	} else if (old_hw_features != adapter->netdev->hw_features) {
-		netdev_features_t tmp = 0;
-
-		/* disable features no longer supported */
-		adapter->netdev->features &= adapter->netdev->hw_features;
-		/* turn on features now supported if previously enabled */
-		tmp = (old_hw_features ^ adapter->netdev->hw_features) &
-			adapter->netdev->hw_features;
-		adapter->netdev->features |=
-				tmp & adapter->netdev->wanted_features;
-	}
-
-	memset(&crq, 0, sizeof(crq));
-	crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
-	crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD;
-	crq.control_ip_offload.len =
-	    cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
-	crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok);
-	ibmvnic_send_crq(adapter, &crq);
+	send_control_ip_offload(adapter);
 }
 
 static const char *ibmvnic_fw_err_cause(u16 cause)
@@ -4263,7 +4304,7 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
 				be64_to_cpu(crq->request_capability_rsp.number);
 		}
 
-		ibmvnic_send_req_caps(adapter, 1);
+		send_request_cap(adapter, 1);
 		return;
 	default:
 		dev_err(dev, "Error %d in request cap rsp\n",
@@ -4273,30 +4314,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
 
 	/* Done receiving requested capabilities, query IP offload support */
 	if (atomic_read(&adapter->running_cap_crqs) == 0) {
-		union ibmvnic_crq newcrq;
-		int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer);
-		struct ibmvnic_query_ip_offload_buffer *ip_offload_buf =
-		    &adapter->ip_offload_buf;
-
 		adapter->wait_capability = false;
-		adapter->ip_offload_tok = dma_map_single(dev, ip_offload_buf,
-							 buf_sz,
-							 DMA_FROM_DEVICE);
-
-		if (dma_mapping_error(dev, adapter->ip_offload_tok)) {
-			if (!firmware_has_feature(FW_FEATURE_CMO))
-				dev_err(dev, "Couldn't map offload buffer\n");
-			return;
-		}
-
-		memset(&newcrq, 0, sizeof(newcrq));
-		newcrq.query_ip_offload.first = IBMVNIC_CRQ_CMD;
-		newcrq.query_ip_offload.cmd = QUERY_IP_OFFLOAD;
-		newcrq.query_ip_offload.len = cpu_to_be32(buf_sz);
-		newcrq.query_ip_offload.ioba =
-		    cpu_to_be32(adapter->ip_offload_tok);
-
-		ibmvnic_send_crq(adapter, &newcrq);
+		send_query_ip_offload(adapter);
 	}
 }
 
@@ -4307,6 +4326,11 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
 	struct net_device *netdev = adapter->netdev;
 	struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf;
 	struct ibmvnic_login_buffer *login = adapter->login_buf;
+	u64 *tx_handle_array;
+	u64 *rx_handle_array;
+	int num_tx_pools;
+	int num_rx_pools;
+	u64 *size_array;
 	int i;
 
 	dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
@@ -4341,6 +4365,30 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
 		ibmvnic_remove(adapter->vdev);
 		return -EIO;
 	}
+	size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+		be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+	/* variable buffer sizes are not supported, so just read the
+	 * first entry.
+	 */
+	adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]);
+
+	num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+	num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+
+	tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+				  be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
+	rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+				  be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs));
+
+	for (i = 0; i < num_tx_pools; i++)
+		adapter->tx_scrq[i]->handle = tx_handle_array[i];
+
+	for (i = 0; i < num_rx_pools; i++)
+		adapter->rx_scrq[i]->handle = rx_handle_array[i];
+
+	adapter->num_active_tx_scrqs = num_tx_pools;
+	adapter->num_active_rx_scrqs = num_rx_pools;
+	release_login_rsp_buffer(adapter);
 	release_login_buffer(adapter);
 	complete(&adapter->init_done);
 
@@ -4550,7 +4598,7 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
 out:
 	if (atomic_read(&adapter->running_cap_crqs) == 0) {
 		adapter->wait_capability = false;
-		ibmvnic_send_req_caps(adapter, 0);
+		send_request_cap(adapter, 0);
 	}
 }
 
@@ -4605,7 +4653,7 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
 	case IBMVNIC_1GBPS:
 		adapter->speed = SPEED_1000;
 		break;
-	case IBMVNIC_10GBP:
+	case IBMVNIC_10GBPS:
 		adapter->speed = SPEED_10000;
 		break;
 	case IBMVNIC_25GBPS:
@@ -4620,6 +4668,9 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
 	case IBMVNIC_100GBPS:
 		adapter->speed = SPEED_100000;
 		break;
+	case IBMVNIC_200GBPS:
+		adapter->speed = SPEED_200000;
+		break;
 	default:
 		if (netif_carrier_ok(netdev))
 			netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed);
@@ -4715,7 +4766,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			    be16_to_cpu(crq->version_exchange_rsp.version);
 		dev_info(dev, "Partner protocol version is %d\n",
 			 ibmvnic_version);
-		send_cap_queries(adapter);
+		send_query_cap(adapter);
 		break;
 	case QUERY_CAPABILITY_RSP:
 		handle_query_cap_rsp(crq, adapter);
@@ -4812,9 +4863,9 @@ static irqreturn_t ibmvnic_interrupt(int irq, void *instance)
 	return IRQ_HANDLED;
 }
 
-static void ibmvnic_tasklet(void *data)
+static void ibmvnic_tasklet(struct tasklet_struct *t)
 {
-	struct ibmvnic_adapter *adapter = data;
+	struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet);
 	struct ibmvnic_crq_queue *queue = &adapter->crq;
 	union ibmvnic_crq *crq;
 	unsigned long flags;
@@ -4949,8 +5000,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter)
 
 	retrc = 0;
 
-	tasklet_init(&adapter->tasklet, (void *)ibmvnic_tasklet,
-		     (unsigned long)adapter);
+	tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet);
 
 	netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq);
 	snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x",
@@ -4986,7 +5036,7 @@ map_failed:
 	return retrc;
 }
 
-static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
+static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
 {
 	struct device *dev = &adapter->vdev->dev;
 	unsigned long timeout = msecs_to_jiffies(30000);
@@ -4995,12 +5045,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
 
 	adapter->from_passive_init = false;
 
-	old_num_rx_queues = adapter->req_rx_queues;
-	old_num_tx_queues = adapter->req_tx_queues;
+	if (reset) {
+		old_num_rx_queues = adapter->req_rx_queues;
+		old_num_tx_queues = adapter->req_tx_queues;
+		reinit_completion(&adapter->init_done);
+	}
 
-	reinit_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
-	ibmvnic_send_crq_init(adapter);
+	rc = ibmvnic_send_crq_init(adapter);
+	if (rc) {
+		dev_err(dev, "Send crq init failed with error %d\n", rc);
+		return rc;
+	}
+
 	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
 		dev_err(dev, "Initialization sequence timed out\n");
 		return -1;
@@ -5017,7 +5074,8 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
-	if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
+	if (reset &&
+	    test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
 	    adapter->reset_reason != VNIC_RESET_MOBILITY) {
 		if (adapter->req_rx_queues != old_num_rx_queues ||
 		    adapter->req_tx_queues != old_num_tx_queues) {
@@ -5045,48 +5103,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
 	return rc;
 }
 
-static int ibmvnic_init(struct ibmvnic_adapter *adapter)
-{
-	struct device *dev = &adapter->vdev->dev;
-	unsigned long timeout = msecs_to_jiffies(30000);
-	int rc;
-
-	adapter->from_passive_init = false;
-
-	adapter->init_done_rc = 0;
-	ibmvnic_send_crq_init(adapter);
-	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
-		dev_err(dev, "Initialization sequence timed out\n");
-		return -1;
-	}
-
-	if (adapter->init_done_rc) {
-		release_crq_queue(adapter);
-		return adapter->init_done_rc;
-	}
-
-	if (adapter->from_passive_init) {
-		adapter->state = VNIC_OPEN;
-		adapter->from_passive_init = false;
-		return -1;
-	}
-
-	rc = init_sub_crqs(adapter);
-	if (rc) {
-		dev_err(dev, "Initialization of sub crqs failed\n");
-		release_crq_queue(adapter);
-		return rc;
-	}
-
-	rc = init_sub_crq_irqs(adapter);
-	if (rc) {
-		dev_err(dev, "Failed to initialize sub crq irqs\n");
-		release_crq_queue(adapter);
-	}
-
-	return rc;
-}
-
 static struct device_attribute dev_attr_failover;
 
 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
@@ -5149,7 +5165,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 			goto ibmvnic_init_fail;
 		}
 
-		rc = ibmvnic_init(adapter);
+		rc = ibmvnic_reset_init(adapter, false);
 		if (rc && rc != EAGAIN)
 			goto ibmvnic_init_fail;
 	} while (rc == EAGAIN);
@@ -5299,8 +5315,7 @@ static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev)
 	for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++)
 		ret += 4 * PAGE_SIZE; /* the scrq message queue */
 
-	for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
-	     i++)
+	for (i = 0; i < adapter->num_active_rx_pools; i++)
 		ret += adapter->rx_pool[i].size *
 		    IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index f8416e1d4cf0..217dcc7ded70 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -373,7 +373,7 @@ struct ibmvnic_phys_parms {
 #define IBMVNIC_10MBPS		0x40000000
 #define IBMVNIC_100MBPS		0x20000000
 #define IBMVNIC_1GBPS		0x10000000
-#define IBMVNIC_10GBP		0x08000000
+#define IBMVNIC_10GBPS		0x08000000
 #define IBMVNIC_40GBPS		0x04000000
 #define IBMVNIC_100GBPS		0x02000000
 #define IBMVNIC_25GBPS		0x01000000
@@ -875,6 +875,7 @@ struct ibmvnic_sub_crq_queue {
 	struct ibmvnic_adapter *adapter;
 	atomic_t used;
 	char name[32];
+	u64 handle;
 };
 
 struct ibmvnic_long_term_buff {
@@ -1075,6 +1076,7 @@ struct ibmvnic_adapter {
 	u32 num_active_rx_napi;
 	u32 num_active_tx_scrqs;
 	u32 num_active_tx_pools;
+	u32 cur_rx_buf_sz;
 
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 36da059388dc..8cc651d37a7f 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -384,7 +384,7 @@ enum cb_status {
 	cb_ok       = 0x2000,
 };
 
-/**
+/*
  * cb_command - Command Block flags
  * @cb_tx_nc:  0: controller does CRC (normal),  1: CRC from skb memory
  */
@@ -1531,7 +1531,7 @@ static int e100_hw_init(struct nic *nic)
 	e100_hw_reset(nic);
 
 	netif_err(nic, hw, nic->netdev, "e100_hw_init\n");
-	if (!in_interrupt() && (err = e100_self_test(nic)))
+	if ((err = e100_self_test(nic)))
 		return err;
 
 	if ((err = e100_phy_init(nic)))
@@ -2155,7 +2155,7 @@ static int e100_rx_alloc_list(struct nic *nic)
 	nic->rx_to_use = nic->rx_to_clean = NULL;
 	nic->ru_running = RU_UNINITIALIZED;
 
-	if (!(nic->rxs = kcalloc(count, sizeof(struct rx), GFP_ATOMIC)))
+	if (!(nic->rxs = kcalloc(count, sizeof(struct rx), GFP_KERNEL)))
 		return -ENOMEM;
 
 	for (rx = nic->rxs, i = 0; i < count; rx++, i++) {
@@ -2593,7 +2593,7 @@ static void e100_diag_test(struct net_device *netdev,
 {
 	struct ethtool_cmd cmd;
 	struct nic *nic = netdev_priv(netdev);
-	int i, err;
+	int i;
 
 	memset(data, 0, E100_TEST_LEN * sizeof(u64));
 	data[0] = !mii_link_ok(&nic->mii);
@@ -2601,7 +2601,7 @@ static void e100_diag_test(struct net_device *netdev,
 	if (test->flags & ETH_TEST_FL_OFFLINE) {
 
 		/* save speed, duplex & autoneg settings */
-		err = mii_ethtool_gset(&nic->mii, &cmd);
+		mii_ethtool_gset(&nic->mii, &cmd);
 
 		if (netif_running(netdev))
 			e100_down(nic);
@@ -2610,7 +2610,7 @@ static void e100_diag_test(struct net_device *netdev,
 		data[4] = e100_loopback_test(nic, lb_phy);
 
 		/* restore speed, duplex & autoneg settings */
-		err = mii_ethtool_sset(&nic->mii, &cmd);
+		mii_ethtool_sset(&nic->mii, &cmd);
 
 		if (netif_running(netdev))
 			e100_up(nic);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 4e7a0810eaeb..4c0c9433bd60 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -129,7 +129,6 @@ static s32 e1000_set_phy_type(struct e1000_hw *hw)
  */
 static void e1000_phy_init_script(struct e1000_hw *hw)
 {
-	u32 ret_val;
 	u16 phy_saved_data;
 
 	if (hw->phy_init_script) {
@@ -138,7 +137,7 @@ static void e1000_phy_init_script(struct e1000_hw *hw)
 		/* Save off the current value of register 0x2F5B to be restored
 		 * at the end of this routine.
 		 */
-		ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
+		e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
 
 		/* Disabled the PHY transmitter */
 		e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
@@ -377,7 +376,6 @@ s32 e1000_reset_hw(struct e1000_hw *hw)
 {
 	u32 ctrl;
 	u32 ctrl_ext;
-	u32 icr;
 	u32 manc;
 	u32 led_ctrl;
 	s32 ret_val;
@@ -502,7 +500,7 @@ s32 e1000_reset_hw(struct e1000_hw *hw)
 	ew32(IMC, 0xffffffff);
 
 	/* Clear any pending interrupt events. */
-	icr = er32(ICR);
+	er32(ICR);
 
 	/* If MWI was previously enabled, reenable it. */
 	if (hw->mac_type == e1000_82542_rev2_0) {
@@ -1897,7 +1895,6 @@ void e1000_config_collision_dist(struct e1000_hw *hw)
 /**
  * e1000_config_mac_to_phy - sync phy and mac settings
  * @hw: Struct containing variables accessed by shared code
- * @mii_reg: data to write to the MII control register
  *
  * Sets MAC speed and duplex settings to reflect the those in the PHY
  * The contents of the PHY register containing the needed information need to
@@ -2370,16 +2367,13 @@ static s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw)
  */
 s32 e1000_check_for_link(struct e1000_hw *hw)
 {
-	u32 rxcw = 0;
-	u32 ctrl;
 	u32 status;
 	u32 rctl;
 	u32 icr;
-	u32 signal = 0;
 	s32 ret_val;
 	u16 phy_data;
 
-	ctrl = er32(CTRL);
+	er32(CTRL);
 	status = er32(STATUS);
 
 	/* On adapters with a MAC newer than 82544, SW Definable pin 1 will be
@@ -2388,12 +2382,9 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
 	 */
 	if ((hw->media_type == e1000_media_type_fiber) ||
 	    (hw->media_type == e1000_media_type_internal_serdes)) {
-		rxcw = er32(RXCW);
+		er32(RXCW);
 
 		if (hw->media_type == e1000_media_type_fiber) {
-			signal =
-			    (hw->mac_type >
-			     e1000_82544) ? E1000_CTRL_SWDPIN1 : 0;
 			if (status & E1000_STATUS_LU)
 				hw->get_link_status = false;
 		}
@@ -2922,7 +2913,7 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
  *
  * @hw: Struct containing variables accessed by shared code
  * @reg_addr: address of the PHY register to write
- * @data: data to write to the PHY
+ * @phy_data: data to write to the PHY
  *
  * Writes a value to a PHY register
  */
@@ -4410,17 +4401,9 @@ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
 static void e1000_clear_vfta(struct e1000_hw *hw)
 {
 	u32 offset;
-	u32 vfta_value = 0;
-	u32 vfta_offset = 0;
-	u32 vfta_bit_in_reg = 0;
 
 	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
-		/* If the offset we want to clear is the same offset of the
-		 * manageability VLAN ID, then clear all bits except that of the
-		 * manageability unit
-		 */
-		vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
-		E1000_WRITE_REG_ARRAY(hw, VFTA, offset, vfta_value);
+		E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
 		E1000_WRITE_FLUSH();
 	}
 }
@@ -4675,78 +4658,76 @@ s32 e1000_led_off(struct e1000_hw *hw)
  */
 static void e1000_clear_hw_cntrs(struct e1000_hw *hw)
 {
-	volatile u32 temp;
-
-	temp = er32(CRCERRS);
-	temp = er32(SYMERRS);
-	temp = er32(MPC);
-	temp = er32(SCC);
-	temp = er32(ECOL);
-	temp = er32(MCC);
-	temp = er32(LATECOL);
-	temp = er32(COLC);
-	temp = er32(DC);
-	temp = er32(SEC);
-	temp = er32(RLEC);
-	temp = er32(XONRXC);
-	temp = er32(XONTXC);
-	temp = er32(XOFFRXC);
-	temp = er32(XOFFTXC);
-	temp = er32(FCRUC);
-
-	temp = er32(PRC64);
-	temp = er32(PRC127);
-	temp = er32(PRC255);
-	temp = er32(PRC511);
-	temp = er32(PRC1023);
-	temp = er32(PRC1522);
-
-	temp = er32(GPRC);
-	temp = er32(BPRC);
-	temp = er32(MPRC);
-	temp = er32(GPTC);
-	temp = er32(GORCL);
-	temp = er32(GORCH);
-	temp = er32(GOTCL);
-	temp = er32(GOTCH);
-	temp = er32(RNBC);
-	temp = er32(RUC);
-	temp = er32(RFC);
-	temp = er32(ROC);
-	temp = er32(RJC);
-	temp = er32(TORL);
-	temp = er32(TORH);
-	temp = er32(TOTL);
-	temp = er32(TOTH);
-	temp = er32(TPR);
-	temp = er32(TPT);
-
-	temp = er32(PTC64);
-	temp = er32(PTC127);
-	temp = er32(PTC255);
-	temp = er32(PTC511);
-	temp = er32(PTC1023);
-	temp = er32(PTC1522);
-
-	temp = er32(MPTC);
-	temp = er32(BPTC);
+	er32(CRCERRS);
+	er32(SYMERRS);
+	er32(MPC);
+	er32(SCC);
+	er32(ECOL);
+	er32(MCC);
+	er32(LATECOL);
+	er32(COLC);
+	er32(DC);
+	er32(SEC);
+	er32(RLEC);
+	er32(XONRXC);
+	er32(XONTXC);
+	er32(XOFFRXC);
+	er32(XOFFTXC);
+	er32(FCRUC);
+
+	er32(PRC64);
+	er32(PRC127);
+	er32(PRC255);
+	er32(PRC511);
+	er32(PRC1023);
+	er32(PRC1522);
+
+	er32(GPRC);
+	er32(BPRC);
+	er32(MPRC);
+	er32(GPTC);
+	er32(GORCL);
+	er32(GORCH);
+	er32(GOTCL);
+	er32(GOTCH);
+	er32(RNBC);
+	er32(RUC);
+	er32(RFC);
+	er32(ROC);
+	er32(RJC);
+	er32(TORL);
+	er32(TORH);
+	er32(TOTL);
+	er32(TOTH);
+	er32(TPR);
+	er32(TPT);
+
+	er32(PTC64);
+	er32(PTC127);
+	er32(PTC255);
+	er32(PTC511);
+	er32(PTC1023);
+	er32(PTC1522);
+
+	er32(MPTC);
+	er32(BPTC);
 
 	if (hw->mac_type < e1000_82543)
 		return;
 
-	temp = er32(ALGNERRC);
-	temp = er32(RXERRC);
-	temp = er32(TNCRS);
-	temp = er32(CEXTERR);
-	temp = er32(TSCTC);
-	temp = er32(TSCTFC);
+	er32(ALGNERRC);
+	er32(RXERRC);
+	er32(TNCRS);
+	er32(CEXTERR);
+	er32(TSCTC);
+	er32(TSCTFC);
 
 	if (hw->mac_type <= e1000_82544)
 		return;
 
-	temp = er32(MGTPRC);
-	temp = er32(MGTPDC);
-	temp = er32(MGTPTC);
+	er32(MGTPRC);
+	er32(MGTPDC);
+	er32(MGTPTC);
 }
 
 /**
@@ -4778,8 +4759,6 @@ void e1000_reset_adaptive(struct e1000_hw *hw)
 /**
  * e1000_update_adaptive - update adaptive IFS
  * @hw: Struct containing variables accessed by shared code
- * @tx_packets: Number of transmits since last callback
- * @total_collisions: Number of collisions since last callback
  *
  * Called during the callback/watchdog routine to update IFS value based on
  * the ratio of transmits to collisions.
@@ -5064,8 +5043,6 @@ static s32 e1000_check_polarity(struct e1000_hw *hw,
 /**
  * e1000_check_downshift - Check if Downshift occurred
  * @hw: Struct containing variables accessed by shared code
- * @downshift: output parameter : 0 - No Downshift occurred.
- *                                1 - Downshift occurred.
  *
  * returns: - E1000_ERR_XXX
  *            E1000_SUCCESS
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 1e6ec081fd9d..5e28cf4fa2cd 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -199,8 +199,10 @@ module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
 /**
- * e1000_get_hw_dev - return device
- * used by hardware layer to print debugging information
+ * e1000_get_hw_dev - helper function for getting netdev
+ * @hw: pointer to HW struct
+ *
+ * return device used by hardware layer to print debugging information
  *
  **/
 struct net_device *e1000_get_hw_dev(struct e1000_hw *hw)
@@ -354,7 +356,7 @@ static void e1000_release_manageability(struct e1000_adapter *adapter)
 
 /**
  * e1000_configure - configure the hardware for RX and TX
- * @adapter = private board structure
+ * @adapter: private board structure
  **/
 static void e1000_configure(struct e1000_adapter *adapter)
 {
@@ -534,7 +536,6 @@ void e1000_down(struct e1000_adapter *adapter)
 
 void e1000_reinit_locked(struct e1000_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
 		msleep(1);
 
@@ -3489,8 +3490,9 @@ exit:
 /**
  * e1000_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: number of the Tx queue that hung (unused)
  **/
-static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
@@ -3787,7 +3789,8 @@ static irqreturn_t e1000_intr(int irq, void *data)
 
 /**
  * e1000_clean - NAPI Rx polling callback
- * @adapter: board private structure
+ * @napi: napi struct containing references to driver info
+ * @budget: budget given to driver for receive packets
  **/
 static int e1000_clean(struct napi_struct *napi, int budget)
 {
@@ -3818,6 +3821,7 @@ static int e1000_clean(struct napi_struct *napi, int budget)
 /**
  * e1000_clean_tx_irq - Reclaim resources after transmit completes
  * @adapter: board private structure
+ * @tx_ring: ring to clean
  **/
 static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
 			       struct e1000_tx_ring *tx_ring)
@@ -3933,7 +3937,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
  * @adapter:     board private structure
  * @status_err:  receive descriptor status and error fields
  * @csum:        receive descriptor csum field
- * @sk_buff:     socket buffer with received data
+ * @skb:         socket buffer with received data
  **/
 static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 			      u32 csum, struct sk_buff *skb)
@@ -3970,6 +3974,9 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 
 /**
  * e1000_consume_page - helper function for jumbo Rx path
+ * @bi: software descriptor shadow data
+ * @skb: skb being modified
+ * @length: length of data being added
  **/
 static void e1000_consume_page(struct e1000_rx_buffer *bi, struct sk_buff *skb,
 			       u16 length)
@@ -4003,6 +4010,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status,
 /**
  * e1000_tbi_adjust_stats
  * @hw: Struct containing variables accessed by shared code
+ * @stats: point to stats struct
  * @frame_len: The length of the frame in question
  * @mac_addr: The Ethernet destination address of the frame in question
  *
@@ -4548,6 +4556,8 @@ e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
 /**
  * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
  * @adapter: address of board private structure
+ * @rx_ring: pointer to ring struct
+ * @cleaned_count: number of new Rx buffers to try to allocate
  **/
 static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
 				   struct e1000_rx_ring *rx_ring,
@@ -4662,7 +4672,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
 
 /**
  * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers.
- * @adapter:
+ * @adapter: address of board private structure
  **/
 static void e1000_smartspeed(struct e1000_adapter *adapter)
 {
@@ -4718,10 +4728,10 @@ static void e1000_smartspeed(struct e1000_adapter *adapter)
 }
 
 /**
- * e1000_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
+ * e1000_ioctl - handle ioctl calls
+ * @netdev: pointer to our netdev
+ * @ifr: pointer to interface request structure
+ * @cmd: ioctl data
  **/
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
@@ -4737,9 +4747,9 @@ static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 
 /**
  * e1000_mii_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
+ * @netdev: pointer to our netdev
+ * @ifr: pointer to interface request structure
+ * @cmd: ioctl data
  **/
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 			   int cmd)
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index 4b103cca8a39..be9c695dde12 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -1072,7 +1072,6 @@ static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw)
 /**
  *  e1000_cfg_on_link_up_80003es2lan - es2 link configuration after link-up
  *  @hw: pointer to the HW structure
- *  @duplex: current duplex setting
  *
  *  Configure the KMRN interface by applying last minute quirks for
  *  10/100 operation.
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index a8fc9208382c..03215b0aee4b 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -895,6 +895,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		mask |= BIT(18);
 		break;
 	default:
@@ -1560,6 +1561,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		fext_nvm11 = er32(FEXTNVM11);
 		fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX;
 		ew32(FEXTNVM11, fext_nvm11);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index b1447221669e..69a2329ea463 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -102,6 +102,10 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_ADP_I219_V16		0x1A1F
 #define E1000_DEV_ID_PCH_ADP_I219_LM17		0x1A1C
 #define E1000_DEV_ID_PCH_ADP_I219_V17		0x1A1D
+#define E1000_DEV_ID_PCH_MTP_I219_LM18		0x550A
+#define E1000_DEV_ID_PCH_MTP_I219_V18		0x550B
+#define E1000_DEV_ID_PCH_MTP_I219_LM19		0x550C
+#define E1000_DEV_ID_PCH_MTP_I219_V19		0x550D
 
 #define E1000_REVISION_4	4
 
@@ -127,6 +131,7 @@ enum e1000_mac_type {
 	e1000_pch_cnp,
 	e1000_pch_tgp,
 	e1000_pch_adp,
+	e1000_pch_mtp,
 };
 
 enum e1000_media_type {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index b2f2fcfdf732..9aa6fad8ed47 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -320,6 +320,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		if (e1000_phy_is_accessible_pchlan(hw))
 			break;
 
@@ -464,6 +465,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 		case e1000_pch_cnp:
 		case e1000_pch_tgp:
 		case e1000_pch_adp:
+		case e1000_pch_mtp:
 			/* In case the PHY needs to be in mdio slow mode,
 			 * set slow mode and try to get the PHY id again.
 			 */
@@ -708,6 +710,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 	case e1000_pchlan:
 		/* check management mode */
 		mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -743,7 +746,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
 /**
  *  __e1000_access_emi_reg_locked - Read/write EMI register
  *  @hw: pointer to the HW structure
- *  @addr: EMI address to program
+ *  @address: EMI address to program
  *  @data: pointer to value to read/write from/to the EMI address
  *  @read: boolean flag to indicate read or write
  *
@@ -1648,6 +1651,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		rc = e1000_init_phy_params_pchlan(hw);
 		break;
 	default:
@@ -2102,6 +2106,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
 		break;
 	default:
@@ -2266,7 +2271,7 @@ release:
 /**
  *  e1000_configure_k1_ich8lan - Configure K1 power state
  *  @hw: pointer to the HW structure
- *  @enable: K1 state to configure
+ *  @k1_enable: K1 state to configure
  *
  *  Configure the K1 power state based on the provided parameter.
  *  Assumes semaphore already acquired.
@@ -2405,8 +2410,10 @@ static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw)
 }
 
 /**
- *  e1000_hv_phy_workarounds_ich8lan - A series of Phy workarounds to be
- *  done after every PHY reset.
+ *  e1000_hv_phy_workarounds_ich8lan - apply PHY workarounds
+ *  @hw: pointer to the HW structure
+ *
+ *  A series of PHY workarounds to be done after every PHY reset.
  **/
 static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
 {
@@ -2694,8 +2701,10 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
 }
 
 /**
- *  e1000_lv_phy_workarounds_ich8lan - A series of Phy workarounds to be
- *  done after every PHY reset.
+ *  e1000_lv_phy_workarounds_ich8lan - apply ich8 specific workarounds
+ *  @hw: pointer to the HW structure
+ *
+ *  A series of PHY workarounds to be done after every PHY reset.
  **/
 static s32 e1000_lv_phy_workarounds_ich8lan(struct e1000_hw *hw)
 {
@@ -3141,6 +3150,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		bank1_offset = nvm->flash_bank_size;
 		act_offset = E1000_ICH_NVM_SIG_WORD;
 
@@ -4086,6 +4096,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		word = NVM_COMPAT;
 		valid_csum_mask = NVM_COMPAT_VALID_CSUM;
 		break;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 664e8ccc88d2..b30f00891c03 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -501,6 +501,7 @@ rx_ring_summary:
 
 /**
  * e1000_desc_unused - calculate if we have unused descriptors
+ * @ring: pointer to ring struct to perform calculation on
  **/
 static int e1000_desc_unused(struct e1000_ring *ring)
 {
@@ -577,6 +578,7 @@ static void e1000e_rx_hwtstamp(struct e1000_adapter *adapter, u32 status,
 /**
  * e1000_receive_skb - helper function to handle Rx indications
  * @adapter: board private structure
+ * @netdev: pointer to netdev struct
  * @staterr: descriptor extended error and status field as written by hardware
  * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
  * @skb: pointer to sk_buff to be indicated to stack
@@ -601,8 +603,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter,
  * e1000_rx_checksum - Receive Checksum Offload
  * @adapter: board private structure
  * @status_err: receive descriptor status and error fields
- * @csum: receive descriptor csum field
- * @sk_buff: socket buffer with received data
+ * @skb: socket buffer with received data
  **/
 static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 			      struct sk_buff *skb)
@@ -673,6 +674,8 @@ static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, unsigned int i)
 /**
  * e1000_alloc_rx_buffers - Replace used receive buffers
  * @rx_ring: Rx descriptor ring
+ * @cleaned_count: number to reallocate
+ * @gfp: flags for allocation
  **/
 static void e1000_alloc_rx_buffers(struct e1000_ring *rx_ring,
 				   int cleaned_count, gfp_t gfp)
@@ -741,6 +744,8 @@ map_skb:
 /**
  * e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split
  * @rx_ring: Rx descriptor ring
+ * @cleaned_count: number to reallocate
+ * @gfp: flags for allocation
  **/
 static void e1000_alloc_rx_buffers_ps(struct e1000_ring *rx_ring,
 				      int cleaned_count, gfp_t gfp)
@@ -844,6 +849,7 @@ no_buffers:
  * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
  * @rx_ring: Rx descriptor ring
  * @cleaned_count: number of buffers to allocate this pass
+ * @gfp: flags for allocation
  **/
 
 static void e1000_alloc_jumbo_rx_buffers(struct e1000_ring *rx_ring,
@@ -933,6 +939,8 @@ static inline void e1000_rx_hash(struct net_device *netdev, __le32 rss,
 /**
  * e1000_clean_rx_irq - Send received data up the network stack
  * @rx_ring: Rx descriptor ring
+ * @work_done: output parameter for indicating completed work
+ * @work_to_do: how many packets we can clean
  *
  * the return value indicates whether actual cleaning was done, there
  * is no guarantee that everything was cleaned
@@ -1327,6 +1335,8 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
 /**
  * e1000_clean_rx_irq_ps - Send received data up the network stack; packet split
  * @rx_ring: Rx descriptor ring
+ * @work_done: output parameter for indicating completed work
+ * @work_to_do: how many packets we can clean
  *
  * the return value indicates whether actual cleaning was done, there
  * is no guarantee that everything was cleaned
@@ -1517,9 +1527,6 @@ next_desc:
 	return cleaned;
 }
 
-/**
- * e1000_consume_page - helper function
- **/
 static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb,
 			       u16 length)
 {
@@ -1531,7 +1538,9 @@ static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb,
 
 /**
  * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
- * @adapter: board private structure
+ * @rx_ring: Rx descriptor ring
+ * @work_done: output parameter for indicating completed work
+ * @work_to_do: how many packets we can clean
  *
  * the return value indicates whether actual cleaning was done, there
  * is no guarantee that everything was cleaned
@@ -1994,6 +2003,7 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused irq, void *data)
 
 /**
  * e1000_configure_msix - Configure MSI-X hardware
+ * @adapter: board private structure
  *
  * e1000_configure_msix sets up the hardware to properly
  * generate MSI-X interrupts.
@@ -2072,6 +2082,7 @@ void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter)
 
 /**
  * e1000e_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: board private structure
  *
  * Attempt to configure interrupts using the best available
  * capabilities of the hardware and kernel.
@@ -2127,6 +2138,7 @@ void e1000e_set_interrupt_capability(struct e1000_adapter *adapter)
 
 /**
  * e1000_request_msix - Initialize MSI-X interrupts
+ * @adapter: board private structure
  *
  * e1000_request_msix allocates MSI-X vectors and requests interrupts from the
  * kernel.
@@ -2180,6 +2192,7 @@ static int e1000_request_msix(struct e1000_adapter *adapter)
 
 /**
  * e1000_request_irq - initialize interrupts
+ * @adapter: board private structure
  *
  * Attempts to configure interrupts using the best available
  * capabilities of the hardware and kernel.
@@ -2240,6 +2253,7 @@ static void e1000_free_irq(struct e1000_adapter *adapter)
 
 /**
  * e1000_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
  **/
 static void e1000_irq_disable(struct e1000_adapter *adapter)
 {
@@ -2262,6 +2276,7 @@ static void e1000_irq_disable(struct e1000_adapter *adapter)
 
 /**
  * e1000_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
  **/
 static void e1000_irq_enable(struct e1000_adapter *adapter)
 {
@@ -2332,6 +2347,8 @@ void e1000e_release_hw_control(struct e1000_adapter *adapter)
 
 /**
  * e1000_alloc_ring_dma - allocate memory for a ring structure
+ * @adapter: board private structure
+ * @ring: ring struct for which to allocate dma
  **/
 static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
 				struct e1000_ring *ring)
@@ -2507,7 +2524,6 @@ void e1000e_free_rx_resources(struct e1000_ring *rx_ring)
 
 /**
  * e1000_update_itr - update the dynamic ITR value based on statistics
- * @adapter: pointer to adapter
  * @itr_setting: current adapter->itr
  * @packets: the number of packets during this measurement interval
  * @bytes: the number of bytes during this measurement interval
@@ -3049,12 +3065,13 @@ static void e1000_configure_tx(struct e1000_adapter *adapter)
 	}
 }
 
+#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
+			   (((S) & (PAGE_SIZE - 1)) ? 1 : 0))
+
 /**
  * e1000_setup_rctl - configure the receive control registers
  * @adapter: Board private structure
  **/
-#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
-			   (((S) & (PAGE_SIZE - 1)) ? 1 : 0))
 static void e1000_setup_rctl(struct e1000_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
@@ -3570,6 +3587,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
 			/* Stable 24MHz frequency */
 			incperiod = INCPERIOD_24MHZ;
@@ -3605,6 +3623,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
 /**
  * e1000e_config_hwtstamp - configure the hwtstamp registers and enable/disable
  * @adapter: board private structure
+ * @config: timestamp configuration
  *
  * Outgoing time stamping can be enabled and disabled. Play nice and
  * disable it when requested, although it shouldn't cause any overhead
@@ -3808,6 +3827,7 @@ void e1000e_power_up_phy(struct e1000_adapter *adapter)
 
 /**
  * e1000_power_down_phy - Power down the PHY
+ * @adapter: board private structure
  *
  * Power down the PHY so no link is implied when interface is down.
  * The PHY cannot be powered down if management or WoL is active.
@@ -3820,6 +3840,7 @@ static void e1000_power_down_phy(struct e1000_adapter *adapter)
 
 /**
  * e1000_flush_tx_ring - remove all descriptors from the tx_ring
+ * @adapter: board private structure
  *
  * We want to clear all pending descriptors from the TX ring.
  * zeroing happens when the HW reads the regs. We  assign the ring itself as
@@ -3854,6 +3875,7 @@ static void e1000_flush_tx_ring(struct e1000_adapter *adapter)
 
 /**
  * e1000_flush_rx_ring - remove all descriptors from the rx_ring
+ * @adapter: board private structure
  *
  * Mark all descriptors in the RX ring as consumed and disable the rx ring
  */
@@ -3886,6 +3908,7 @@ static void e1000_flush_rx_ring(struct e1000_adapter *adapter)
 
 /**
  * e1000_flush_desc_rings - remove all descriptors from the descriptor rings
+ * @adapter: board private structure
  *
  * In i219, the descriptor rings must be emptied before resetting the HW
  * or before changing the device state to D3 during runtime (runtime PM).
@@ -3968,6 +3991,7 @@ static void e1000e_systim_reset(struct e1000_adapter *adapter)
 
 /**
  * e1000e_reset - bring the hardware into a known good state
+ * @adapter: board private structure
  *
  * This function boots the hardware and enables some settings that
  * require a configuration cycle of the hardware - those cannot be
@@ -4081,6 +4105,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		fc->refresh_time = 0xFFFF;
 		fc->pause_time = 0xFFFF;
 
@@ -4847,7 +4872,7 @@ static void e1000e_update_phy_task(struct work_struct *work)
 
 /**
  * e1000_update_phy_info - timre call-back to update PHY info
- * @data: pointer to adapter cast into an unsigned long
+ * @t: pointer to timer_list containing private info adapter
  *
  * Need to wait a few seconds after link up to get diagnostic information from
  * the phy
@@ -5187,7 +5212,7 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
 
 /**
  * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
+ * @t: pointer to timer_list containing private info adapter
  **/
 static void e1000_watchdog(struct timer_list *t)
 {
@@ -5972,8 +5997,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 /**
  * e1000_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: index of the hung queue (unused)
  **/
-static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
@@ -6174,7 +6200,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 /**
  * e1000e_hwtstamp_ioctl - control hardware time stamping
  * @netdev: network interface device structure
- * @ifreq: interface request
+ * @ifr: interface request
  *
  * Outgoing time stamping can be enabled and disabled. Play nice and
  * disable it when requested, although it shouldn't cause any overhead
@@ -7853,6 +7879,10 @@ static const struct pci_device_id e1000_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp },
 
 	{ 0, 0, 0, 0, 0, 0, 0 }	/* terminate list */
 };
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index e11c877595fb..bdd9dc163f15 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -2311,6 +2311,7 @@ s32 e1000e_determine_phy_address(struct e1000_hw *hw)
 /**
  *  e1000_get_phy_addr_for_bm_page - Retrieve PHY page address
  *  @page: page to access
+ *  @reg: register to check
  *
  *  Returns the phy address for the page requested.
  **/
@@ -2728,6 +2729,7 @@ void e1000_power_down_phy_copper(struct e1000_hw *hw)
  *  @offset: register offset to be read
  *  @data: pointer to the read data
  *  @locked: semaphore has already been acquired or not
+ *  @page_set: BM_WUC_PAGE already set and access enabled
  *
  *  Acquires semaphore, if necessary, then reads the PHY register at offset
  *  and stores the retrieved information in data.  Release any acquired
@@ -2836,6 +2838,7 @@ s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 *data)
  *  @offset: register offset to write to
  *  @data: data to write at register offset
  *  @locked: semaphore has already been acquired or not
+ *  @page_set: BM_WUC_PAGE already set and access enabled
  *
  *  Acquires semaphore, if necessary, then writes the data to PHY register
  *  at the offset.  Release any acquired semaphores before exiting.
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 34b988d70488..f3f671311855 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -144,7 +144,7 @@ static int e1000e_phc_get_syncdevicetime(ktime_t *device,
 /**
  * e1000e_phc_getsynctime - Reads the current system/device cross timestamp
  * @ptp: ptp clock structure
- * @cts: structure containing timestamp
+ * @xtstamp: structure containing timestamp
  *
  * Read device and system (ART) clock simultaneously and return the scaled
  * clock values in ns.
@@ -297,6 +297,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
 	case e1000_pch_cnp:
 	case e1000_pch_tgp:
 	case e1000_pch_adp:
+	case e1000_pch_mtp:
 		if ((hw->mac.type < e1000_pch_lpt) ||
 		    (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
 			adapter->ptp_clock_info.max_adj = 24000000 - 1;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index d88dd41a9442..99b8252eb969 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -310,10 +310,7 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
 				  rx_buffer->page_offset;
 
 		/* prefetch first cache line of first page */
-		prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
-		prefetch((void *)((u8 *)page_addr + L1_CACHE_BYTES));
-#endif
+		net_prefetch(page_addr);
 
 		/* allocate a skb to store the frags */
 		skb = napi_alloc_skb(&rx_ring->q_vector->napi,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 140212bfe08b..9e3103fae723 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -221,8 +221,6 @@ static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface)
 {
 	struct net_device *netdev = interface->netdev;
 
-	WARN_ON(in_interrupt());
-
 	/* put off any impending NetWatchDogTimeout */
 	netif_trans_update(netdev);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index a7e212d1caa2..537300e762f0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -35,6 +35,7 @@
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/udp_tunnel.h>
 #include <net/xdp_sock.h>
 #include "i40e_type.h"
 #include "i40e_prototype.h"
@@ -90,7 +91,7 @@
 #define I40E_OEM_RELEASE_MASK		0x0000ffff
 
 #define I40E_RX_DESC(R, i)	\
-	(&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
+	(&(((union i40e_rx_desc *)((R)->desc))[i]))
 #define I40E_TX_DESC(R, i)	\
 	(&(((struct i40e_tx_desc *)((R)->desc))[i]))
 #define I40E_TX_CTXTDESC(R, i)	\
@@ -133,7 +134,6 @@ enum i40e_state_t {
 	__I40E_PORT_SUSPENDED,
 	__I40E_VF_DISABLE,
 	__I40E_MACVLAN_SYNC_PENDING,
-	__I40E_UDP_FILTER_SYNC_PENDING,
 	__I40E_TEMP_LINK_POLLING,
 	__I40E_CLIENT_SERVICE_REQUESTED,
 	__I40E_CLIENT_L2_CHANGE,
@@ -478,8 +478,8 @@ struct i40e_pf {
 	struct list_head l3_flex_pit_list;
 	struct list_head l4_flex_pit_list;
 
-	struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
-	u16 pending_udp_bitmap;
+	struct udp_tunnel_nic_shared udp_tunnel_shared;
+	struct udp_tunnel_nic_info udp_tunnel_nic;
 
 	struct hlist_head cloud_filter_list;
 	u16 num_cloud_filters;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index c897a2863e4f..593912b17609 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -541,6 +541,12 @@ static void i40e_set_hw_flags(struct i40e_hw *hw)
 		    (aq->api_maj_ver == 1 &&
 		     aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_X722))
 			hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_FW_REQUEST_FEC_X722))
+			hw->flags |= I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE;
+
 		fallthrough;
 	default:
 		break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index edec3df78971..ee394aacef4d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -85,8 +85,8 @@ struct i40e_adminq_info {
 
 /**
  * i40e_aq_rc_to_posix - convert errors to user-land codes
- * aq_ret: AdminQ handler error code can override aq_rc
- * aq_rc: AdminQ firmware error code to convert
+ * @aq_ret: AdminQ handler error code can override aq_rc
+ * @aq_rc: AdminQ firmware error code to convert
  **/
 static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
 {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index c0c8efe42fce..1e960c3c7ef0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -24,6 +24,8 @@
 #define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009
 /* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */
 #define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006
+/* API version 1.10 for X722 devices adds ability to request FEC encoding */
+#define I40E_MINOR_VER_FW_REQUEST_FEC_X722 0x000A
 
 struct i40e_aq_desc {
 	__le16 flags;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index befd3018183f..a2dba32383f6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -278,8 +278,6 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
- * @client: pointer to a client struct in the client list.
- * @existing: if there was already an existing instance
  *
  **/
 static void i40e_client_add_instance(struct i40e_pf *pf)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 6ab52cbd697a..adc9e4fa4789 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -3766,9 +3766,7 @@ i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
 /**
  * i40e_aq_start_lldp
  * @hw: pointer to the hw struct
- * @buff: buffer for result
  * @persist: True if start of LLDP should be persistent across power cycles
- * @buff_size: buffer size
  * @cmd_details: pointer to command details structure or NULL
  *
  * Start the embedded LLDP Agent on all ports.
@@ -5395,6 +5393,7 @@ static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @page_change: flag to indicate if phy page should be updated
  * @set_mdio: use MDIO I/F number specified by mdio_num
  * @mdio_num: MDIO I/F number
  * @reg_addr: PHY register address
@@ -5439,6 +5438,7 @@ enum i40e_status_code i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @page_change: flag to indicate if phy page should be updated
  * @set_mdio: use MDIO I/F number specified by mdio_num
  * @mdio_num: MDIO I/F number
  * @reg_addr: PHY register address
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index d3ad2e3aa838..d7c13ca9be7d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -604,10 +604,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
 			} else {
 				rxd = I40E_RX_DESC(ring, i);
 				dev_info(&pf->pdev->dev,
-					 "   d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+					 "   d[%03x] = 0x%016llx 0x%016llx\n",
 					 i, rxd->read.pkt_addr,
-					 rxd->read.hdr_addr,
-					 rxd->read.rsvd1, rxd->read.rsvd2);
+					 rxd->read.hdr_addr);
 			}
 		}
 	} else if (cnt == 3) {
@@ -625,10 +624,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
 		} else {
 			rxd = I40E_RX_DESC(ring, desc_n);
 			dev_info(&pf->pdev->dev,
-				 "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+				 "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
 				 vsi_seid, ring_id, desc_n,
-				 rxd->read.pkt_addr, rxd->read.hdr_addr,
-				 rxd->read.rsvd1, rxd->read.rsvd2);
+				 rxd->read.pkt_addr, rxd->read.hdr_addr);
 		}
 	} else {
 		dev_info(&pf->pdev->dev, "dump desc rx/tx/xdp <vsi_seid> <ring_id> [<desc_n>]\n");
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 825c104ecba1..26ba1f3eb2d8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -891,6 +891,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseT_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
 		break;
 	case I40E_PHY_TYPE_SGMII:
 		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
@@ -1481,12 +1482,16 @@ static int i40e_set_fec_param(struct net_device *netdev,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	u8 fec_cfg = 0;
-	int err = 0;
 
 	if (hw->device_id != I40E_DEV_ID_25G_SFP28 &&
-	    hw->device_id != I40E_DEV_ID_25G_B) {
-		err = -EPERM;
-		goto done;
+	    hw->device_id != I40E_DEV_ID_25G_B &&
+	    hw->device_id != I40E_DEV_ID_KX_X722)
+		return -EPERM;
+
+	if (hw->mac.type == I40E_MAC_X722 &&
+	    !(hw->flags & I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE)) {
+		netdev_err(netdev, "Setting FEC encoding not supported by firmware. Please update the NVM image.\n");
+		return -EOPNOTSUPP;
 	}
 
 	switch (fecparam->fec) {
@@ -1508,14 +1513,10 @@ static int i40e_set_fec_param(struct net_device *netdev,
 	default:
 		dev_warn(&pf->pdev->dev, "Unsupported FEC mode: %d",
 			 fecparam->fec);
-		err = -EINVAL;
-		goto done;
+		return -EINVAL;
 	}
 
-	err = i40e_set_fec_cfg(netdev, fec_cfg);
-
-done:
-	return err;
+	return i40e_set_fec_cfg(netdev, fec_cfg);
 }
 
 static int i40e_nway_reset(struct net_device *netdev)
@@ -1967,7 +1968,7 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	    (new_rx_count == vsi->rx_rings[0]->count))
 		return 0;
 
-	/* If there is a AF_XDP UMEM attached to any of Rx rings,
+	/* If there is a AF_XDP page pool attached to any of Rx rings,
 	 * disallow changing the number of descriptors -- regardless
 	 * if the netdev is running or not.
 	 */
@@ -4951,8 +4952,7 @@ flags_complete:
 		}
 	}
 
-	if (((changed_flags & I40E_FLAG_RS_FEC) ||
-	     (changed_flags & I40E_FLAG_BASE_R_FEC)) &&
+	if (changed_flags & I40E_FLAG_RS_FEC &&
 	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
 	    pf->hw.device_id != I40E_DEV_ID_25G_B) {
 		dev_warn(&pf->pdev->dev,
@@ -4960,6 +4960,15 @@ flags_complete:
 		return -EOPNOTSUPP;
 	}
 
+	if (changed_flags & I40E_FLAG_BASE_R_FEC &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_B &&
+	    pf->hw.device_id != I40E_DEV_ID_KX_X722) {
+		dev_warn(&pf->pdev->dev,
+			 "Device does not support changing FEC configuration\n");
+		return -EOPNOTSUPP;
+	}
+
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
 	 * changed in the code above.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2e433fdbf2c3..4f8a2154b93f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -287,6 +287,7 @@ void i40e_service_event_schedule(struct i40e_pf *pf)
 /**
  * i40e_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: queue number timing out
  *
  * If any port has noticed a Tx timeout, it is likely that the whole
  * device is munged, not just the one netdev port, so go for the full
@@ -1609,6 +1610,8 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
  * i40e_config_rss_aq - Prepare for RSS using AQ commands
  * @vsi: vsi structure
  * @seed: RSS hash seed
+ * @lut: pointer to lookup table of lut_size
+ * @lut_size: size of the lookup table
  **/
 static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
 			      u8 *lut, u16 lut_size)
@@ -3122,12 +3125,12 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
 }
 
 /**
- * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled
+ * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled
  * @ring: The Tx or Rx ring
  *
- * Returns the UMEM or NULL.
+ * Returns the AF_XDP buffer pool or NULL.
  **/
-static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
+static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring)
 {
 	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
 	int qid = ring->queue_index;
@@ -3138,7 +3141,7 @@ static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
 	if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
 		return NULL;
 
-	return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
+	return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
 }
 
 /**
@@ -3157,7 +3160,7 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 	u32 qtx_ctl = 0;
 
 	if (ring_is_xdp(ring))
-		ring->xsk_umem = i40e_xsk_umem(ring);
+		ring->xsk_pool = i40e_xsk_pool(ring);
 
 	/* some ATR related tx ring init */
 	if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
@@ -3280,12 +3283,13 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 
 	kfree(ring->rx_bi);
-	ring->xsk_umem = i40e_xsk_umem(ring);
-	if (ring->xsk_umem) {
+	ring->xsk_pool = i40e_xsk_pool(ring);
+	if (ring->xsk_pool) {
 		ret = i40e_alloc_rx_bi_zc(ring);
 		if (ret)
 			return ret;
-		ring->rx_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
+		ring->rx_buf_len =
+		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
 		/* For AF_XDP ZC, we disallow packets to span on
 		 * multiple buffers, thus letting us skip that
 		 * handling in the fast-path.
@@ -3320,8 +3324,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	rx_ctx.base = (ring->dma / 128);
 	rx_ctx.qlen = ring->count;
 
-	/* use 32 byte descriptors */
-	rx_ctx.dsize = 1;
+	/* use 16 byte descriptors */
+	rx_ctx.dsize = 0;
 
 	/* descriptor type is always zero
 	 * rx_ctx.dtype = 0;
@@ -3368,8 +3372,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
 	writel(0, ring->tail);
 
-	if (ring->xsk_umem) {
-		xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+	if (ring->xsk_pool) {
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
 		ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
 	} else {
 		ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
@@ -3380,7 +3384,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 		 */
 		dev_info(&vsi->back->pdev->dev,
 			 "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
-			 ring->xsk_umem ? "UMEM enabled " : "",
+			 ring->xsk_pool ? "AF_XDP ZC enabled " : "",
 			 ring->queue_index, pf_q);
 	}
 
@@ -5814,7 +5818,6 @@ static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
 /**
  * i40e_channel_setup_queue_map - Setup a channel queue map
  * @pf: ptr to PF device
- * @vsi: the VSI being setup
  * @ctxt: VSI context structure
  * @ch: ptr to channel structure
  *
@@ -6057,8 +6060,7 @@ static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
 /**
  * i40e_setup_channel - setup new channel using uplink element
  * @pf: ptr to PF device
- * @type: type of channel to be created (VMDq2/VF)
- * @uplink_seid: underlying HW switching element (VEB) ID
+ * @vsi: pointer to the VSI to set up the channel within
  * @ch: ptr to channel structure
  *
  * Setup new channel (VSI) based on specified type (VMDq2/VF)
@@ -6623,6 +6625,25 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 		netdev_info(vsi->netdev,
 			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
 			    speed, req_fec, fec, an, fc);
+	} else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) {
+		req_fec = "None";
+		fec = "None";
+		an = "False";
+
+		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+			an = "True";
+
+		if (pf->hw.phy.link_info.fec_info &
+		    I40E_AQ_CONFIG_FEC_KR_ENA)
+			fec = "CL74 FC-FEC/BASE-R";
+
+		if (pf->hw.phy.link_info.req_fec_info &
+		    I40E_AQ_REQUEST_FEC_KR)
+			req_fec = "CL74 FC-FEC/BASE-R";
+
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
+			    speed, req_fec, fec, an, fc);
 	} else {
 		netdev_info(vsi->netdev,
 			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
@@ -6689,7 +6710,6 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 
-	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
 		usleep_range(1000, 2000);
 	i40e_down(vsi);
@@ -7779,7 +7799,7 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
 /**
  * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct flow_cls_offload
+ * @f: Pointer to struct flow_cls_offload
  * @filter: Pointer to cloud filter structure
  *
  **/
@@ -8160,8 +8180,8 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
 
 /**
  * i40e_setup_tc_cls_flower - flower classifier offloads
- * @netdev: net device to configure
- * @type_data: offload data
+ * @np: net device to configure
+ * @cls_flower: offload data
  **/
 static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
 				    struct flow_cls_offload *cls_flower)
@@ -8462,9 +8482,6 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
 {
 	u32 val;
 
-	WARN_ON(in_interrupt());
-
-
 	/* do the biggest reset indicated */
 	if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) {
 
@@ -9585,6 +9602,7 @@ end_reconstitute:
 /**
  * i40e_get_capabilities - get info about the HW
  * @pf: the PF struct
+ * @list_type: AQ capability to be queried
  **/
 static int i40e_get_capabilities(struct i40e_pf *pf,
 				 enum i40e_admin_queue_opc list_type)
@@ -10383,106 +10401,6 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
 	i40e_flush(hw);
 }
 
-static const char *i40e_tunnel_name(u8 type)
-{
-	switch (type) {
-	case UDP_TUNNEL_TYPE_VXLAN:
-		return "vxlan";
-	case UDP_TUNNEL_TYPE_GENEVE:
-		return "geneve";
-	default:
-		return "unknown";
-	}
-}
-
-/**
- * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters
- * @pf: board private structure
- **/
-static void i40e_sync_udp_filters(struct i40e_pf *pf)
-{
-	int i;
-
-	/* loop through and set pending bit for all active UDP filters */
-	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
-		if (pf->udp_ports[i].port)
-			pf->pending_udp_bitmap |= BIT_ULL(i);
-	}
-
-	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
-}
-
-/**
- * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW
- * @pf: board private structure
- **/
-static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
-{
-	struct i40e_hw *hw = &pf->hw;
-	u8 filter_index, type;
-	u16 port;
-	int i;
-
-	if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
-		return;
-
-	/* acquire RTNL to maintain state of flags and port requests */
-	rtnl_lock();
-
-	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
-		if (pf->pending_udp_bitmap & BIT_ULL(i)) {
-			struct i40e_udp_port_config *udp_port;
-			i40e_status ret = 0;
-
-			udp_port = &pf->udp_ports[i];
-			pf->pending_udp_bitmap &= ~BIT_ULL(i);
-
-			port = READ_ONCE(udp_port->port);
-			type = READ_ONCE(udp_port->type);
-			filter_index = READ_ONCE(udp_port->filter_index);
-
-			/* release RTNL while we wait on AQ command */
-			rtnl_unlock();
-
-			if (port)
-				ret = i40e_aq_add_udp_tunnel(hw, port,
-							     type,
-							     &filter_index,
-							     NULL);
-			else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED)
-				ret = i40e_aq_del_udp_tunnel(hw, filter_index,
-							     NULL);
-
-			/* reacquire RTNL so we can update filter_index */
-			rtnl_lock();
-
-			if (ret) {
-				dev_info(&pf->pdev->dev,
-					 "%s %s port %d, index %d failed, err %s aq_err %s\n",
-					 i40e_tunnel_name(type),
-					 port ? "add" : "delete",
-					 port,
-					 filter_index,
-					 i40e_stat_str(&pf->hw, ret),
-					 i40e_aq_str(&pf->hw,
-						     pf->hw.aq.asq_last_status));
-				if (port) {
-					/* failed to add, just reset port,
-					 * drop pending bit for any deletion
-					 */
-					udp_port->port = 0;
-					pf->pending_udp_bitmap &= ~BIT_ULL(i);
-				}
-			} else if (port) {
-				/* record filter index on success */
-				udp_port->filter_index = filter_index;
-			}
-		}
-	}
-
-	rtnl_unlock();
-}
-
 /**
  * i40e_service_task - Run the driver's async subtasks
  * @work: pointer to work_struct containing our data
@@ -10522,7 +10440,6 @@ static void i40e_service_task(struct work_struct *work)
 								pf->vsi[pf->lan_vsi]);
 		}
 		i40e_sync_filters_subtask(pf);
-		i40e_sync_udp_filters_subtask(pf);
 	} else {
 		i40e_reset_subtask(pf);
 	}
@@ -10546,7 +10463,7 @@ static void i40e_service_task(struct work_struct *work)
 
 /**
  * i40e_service_timer - timer callback
- * @data: pointer to PF struct
+ * @t: timer list pointer
  **/
 static void i40e_service_timer(struct timer_list *t)
 {
@@ -11185,11 +11102,10 @@ static int i40e_init_msix(struct i40e_pf *pf)
  * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
  * @vsi: the VSI being configured
  * @v_idx: index of the vector in the vsi struct
- * @cpu: cpu to be used on affinity_mask
  *
  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 {
 	struct i40e_q_vector *q_vector;
 
@@ -11222,7 +11138,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
-	int err, v_idx, num_q_vectors, current_cpu;
+	int err, v_idx, num_q_vectors;
 
 	/* if not MSIX, give the one vector only to the LAN VSI */
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -11232,15 +11148,10 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 	else
 		return -EINVAL;
 
-	current_cpu = cpumask_first(cpu_online_mask);
-
 	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
+		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
 		if (err)
 			goto err_out;
-		current_cpu = cpumask_next(current_cpu, cpu_online_mask);
-		if (unlikely(current_cpu >= nr_cpu_ids))
-			current_cpu = cpumask_first(cpu_online_mask);
 	}
 
 	return 0;
@@ -12228,131 +12139,48 @@ static int i40e_set_features(struct net_device *netdev,
 	return 0;
 }
 
-/**
- * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port
- * @pf: board private structure
- * @port: The UDP port to look up
- *
- * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found
- **/
-static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
-{
-	u8 i;
-
-	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
-		/* Do not report ports with pending deletions as
-		 * being available.
-		 */
-		if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
-			continue;
-		if (pf->udp_ports[i].port == port)
-			return i;
-	}
-
-	return i;
-}
-
-/**
- * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
- * @netdev: This physical port's netdev
- * @ti: Tunnel endpoint information
- **/
-static void i40e_udp_tunnel_add(struct net_device *netdev,
-				struct udp_tunnel_info *ti)
+static int i40e_udp_tunnel_set_port(struct net_device *netdev,
+				    unsigned int table, unsigned int idx,
+				    struct udp_tunnel_info *ti)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	u16 port = ntohs(ti->port);
-	u8 next_idx;
-	u8 idx;
-
-	idx = i40e_get_udp_port_idx(pf, port);
-
-	/* Check if port already exists */
-	if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		netdev_info(netdev, "port %d already offloaded\n", port);
-		return;
-	}
-
-	/* Now check if there is space to add the new port */
-	next_idx = i40e_get_udp_port_idx(pf, 0);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	u8 type, filter_index;
+	i40e_status ret;
 
-	if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n",
-			    port);
-		return;
-	}
+	type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
+						   I40E_AQC_TUNNEL_TYPE_NGE;
 
-	switch (ti->type) {
-	case UDP_TUNNEL_TYPE_VXLAN:
-		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
-		break;
-	case UDP_TUNNEL_TYPE_GENEVE:
-		if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE))
-			return;
-		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
-		break;
-	default:
-		return;
+	ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
+				     NULL);
+	if (ret) {
+		netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n",
+			    i40e_stat_str(hw, ret),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		return -EIO;
 	}
 
-	/* New port: add it and mark its index in the bitmap */
-	pf->udp_ports[next_idx].port = port;
-	pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED;
-	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
-	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
+	udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index);
+	return 0;
 }
 
-/**
- * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
- * @netdev: This physical port's netdev
- * @ti: Tunnel endpoint information
- **/
-static void i40e_udp_tunnel_del(struct net_device *netdev,
-				struct udp_tunnel_info *ti)
+static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
+				      unsigned int table, unsigned int idx,
+				      struct udp_tunnel_info *ti)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	u16 port = ntohs(ti->port);
-	u8 idx;
-
-	idx = i40e_get_udp_port_idx(pf, port);
-
-	/* Check if port already exists */
-	if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS)
-		goto not_found;
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	i40e_status ret;
 
-	switch (ti->type) {
-	case UDP_TUNNEL_TYPE_VXLAN:
-		if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN)
-			goto not_found;
-		break;
-	case UDP_TUNNEL_TYPE_GENEVE:
-		if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE)
-			goto not_found;
-		break;
-	default:
-		goto not_found;
+	ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
+	if (ret) {
+		netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n",
+			    i40e_stat_str(hw, ret),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		return -EIO;
 	}
 
-	/* if port exists, set it to 0 (mark for deletion)
-	 * and make it pending
-	 */
-	pf->udp_ports[idx].port = 0;
-
-	/* Toggle pending bit instead of setting it. This way if we are
-	 * deleting a port that has yet to be added we just clear the pending
-	 * bit and don't have to worry about it.
-	 */
-	pf->pending_udp_bitmap ^= BIT_ULL(idx);
-	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
-
-	return;
-not_found:
-	netdev_warn(netdev, "UDP port %d was not found, not deleting\n",
-		    port);
+	return 0;
 }
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
@@ -12379,6 +12207,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
  * @addr: the MAC address entry being added
  * @vid: VLAN ID
  * @flags: instructions from stack about fdb operation
+ * @extack: netlink extended ack, unused currently
  */
 static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			    struct net_device *dev,
@@ -12644,7 +12473,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
 	 */
 	if (need_reset && prog)
 		for (i = 0; i < vsi->num_queue_pairs; i++)
-			if (vsi->xdp_rings[i]->xsk_umem)
+			if (vsi->xdp_rings[i]->xsk_pool)
 				(void)i40e_xsk_wakeup(vsi->netdev, i,
 						      XDP_WAKEUP_RX);
 
@@ -12923,8 +12752,8 @@ static int i40e_xdp(struct net_device *dev,
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return i40e_xdp_setup(vsi, xdp->prog);
-	case XDP_SETUP_XSK_UMEM:
-		return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
+	case XDP_SETUP_XSK_POOL:
+		return i40e_xsk_pool_setup(vsi, xdp->xsk.pool,
 					   xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
@@ -12957,8 +12786,8 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
 	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofchk,
 	.ndo_set_vf_trust	= i40e_ndo_set_vf_trust,
-	.ndo_udp_tunnel_add	= i40e_udp_tunnel_add,
-	.ndo_udp_tunnel_del	= i40e_udp_tunnel_del,
+	.ndo_udp_tunnel_add	= udp_tunnel_nic_add_port,
+	.ndo_udp_tunnel_del	= udp_tunnel_nic_del_port,
 	.ndo_get_phys_port_id	= i40e_get_phys_port_id,
 	.ndo_fdb_add		= i40e_ndo_fdb_add,
 	.ndo_features_check	= i40e_features_check,
@@ -13022,6 +12851,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
 		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
 
+	netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;
+
 	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
 
 	netdev->hw_enc_features |= hw_enc_features;
@@ -14422,7 +14253,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 	i40e_ptp_init(pf);
 
 	/* repopulate tunnel port filters */
-	i40e_sync_udp_filters(pf);
+	udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);
 
 	return ret;
 }
@@ -15151,6 +14982,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_switch_setup;
 
+	pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
+	pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
+	pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+	pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
+	pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
+	pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
+						    UDP_TUNNEL_TYPE_GENEVE;
+
 	/* The number of VSIs reported by the FW is the minimum guaranteed
 	 * to us; HW supports far more and we share the remaining pool with
 	 * the other PFs. We allocate space for more than the guarantee with
@@ -15160,6 +14999,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
 	else
 		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
+	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
+		dev_warn(&pf->pdev->dev,
+			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
+			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
+		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
+	}
 
 	/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
 	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index ff7b19c6bc73..7a879614ca55 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -259,7 +259,6 @@ static u32 i40e_ptp_get_rx_events(struct i40e_pf *pf)
 /**
  * i40e_ptp_rx_hang - Detect error case when Rx timestamp registers are hung
  * @pf: The PF private data structure
- * @vsi: The VSI with the rings relevant to 1588
  *
  * This watchdog task is scheduled to detect error case where hardware has
  * dropped an Rx packet that was timestamped when the ring is full. The
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index 424f02077e2e..b5b12299931f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -22,7 +22,7 @@
 
 #include <linux/tracepoint.h>
 
-/**
+/*
  * i40e_trace() macro enables shared code to refer to trace points
  * like:
  *
@@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(
 	i40e_rx_template,
 
 	TP_PROTO(struct i40e_ring *ring,
-		 union i40e_32byte_rx_desc *desc,
+		 union i40e_16byte_rx_desc *desc,
 		 struct sk_buff *skb),
 
 	TP_ARGS(ring, desc, skb),
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(
 DEFINE_EVENT(
 	i40e_rx_template, i40e_clean_rx_irq,
 	TP_PROTO(struct i40e_ring *ring,
-		 union i40e_32byte_rx_desc *desc,
+		 union i40e_16byte_rx_desc *desc,
 		 struct sk_buff *skb),
 
 	TP_ARGS(ring, desc, skb));
@@ -148,7 +148,7 @@ DEFINE_EVENT(
 DEFINE_EVENT(
 	i40e_rx_template, i40e_clean_rx_irq_rx,
 	TP_PROTO(struct i40e_ring *ring,
-		 union i40e_32byte_rx_desc *desc,
+		 union i40e_16byte_rx_desc *desc,
 		 struct sk_buff *skb),
 
 	TP_ARGS(ring, desc, skb));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 3e5c566ceb01..d43ce13a93c9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -533,11 +533,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
 {
 	struct i40e_pf *pf = rx_ring->vsi->back;
 	struct pci_dev *pdev = pf->pdev;
-	struct i40e_32b_rx_wb_qw0 *qw0;
+	struct i40e_16b_rx_wb_qw0 *qw0;
 	u32 fcnt_prog, fcnt_avail;
 	u32 error;
 
-	qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
+	qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw;
 	error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
 
@@ -636,7 +636,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
 	unsigned long bi_size;
 	u16 i;
 
-	if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+	if (ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
 		i40e_xsk_clean_tx_ring(tx_ring);
 	} else {
 		/* ring already cleared, nothing to do */
@@ -1335,7 +1335,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 		rx_ring->skb = NULL;
 	}
 
-	if (rx_ring->xsk_umem) {
+	if (rx_ring->xsk_pool) {
 		i40e_xsk_clean_rx_ring(rx_ring);
 		goto skip_free;
 	}
@@ -1369,7 +1369,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 	}
 
 skip_free:
-	if (rx_ring->xsk_umem)
+	if (rx_ring->xsk_pool)
 		i40e_clear_rx_bi_zc(rx_ring);
 	else
 		i40e_clear_rx_bi(rx_ring);
@@ -1418,7 +1418,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 	u64_stats_init(&rx_ring->syncp);
 
 	/* Round up to nearest 4K */
-	rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+	rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc);
 	rx_ring->size = ALIGN(rx_ring->size, 4096);
 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
 					   &rx_ring->dma, GFP_KERNEL);
@@ -1755,7 +1755,6 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
  * @rx_ring: rx descriptor ring packet is being transacted on
  * @rx_desc: pointer to the EOP Rx descriptor
  * @skb: pointer to current skb being populated
- * @rx_ptype: the packet type decoded by hardware
  *
  * This function checks the ring, descriptor, and packet information in
  * order to populate the hash, checksum, VLAN, protocol, and
@@ -1953,7 +1952,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
 	struct i40e_rx_buffer *rx_buffer;
 
 	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
-	prefetchw(rx_buffer->page);
+	prefetch_page_address(rx_buffer->page);
 
 	/* we are reusing so sync this buffer for CPU use */
 	dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -1992,10 +1991,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data);
+
 	/* Note, we get here by enabling legacy-rx via:
 	 *
 	 *    ethtool --set-priv-flags <dev> legacy-rx on
@@ -2078,10 +2075,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 	 * likely have a consumer accessing first few bytes of meta
 	 * data, and then actual data.
 	 */
-	prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
-	prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data_meta);
+
 	/* build an skb around the page buffer */
 	skb = build_skb(xdp->data_hard_start, truesize);
 	if (unlikely(!skb))
@@ -2300,6 +2295,19 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
 }
 
 /**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(I40E_RX_DESC(rx_ring, ntc));
+}
+
+/**
  * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: rx descriptor ring to transact packets on
  * @budget: Total limit on number of packets to process
@@ -2579,7 +2587,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	i40e_for_each_ring(ring, q_vector->tx) {
-		bool wd = ring->xsk_umem ?
+		bool wd = ring->xsk_pool ?
 			  i40e_clean_xdp_tx_irq(vsi, ring) :
 			  i40e_clean_tx_irq(vsi, ring, budget);
 
@@ -2607,7 +2615,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 		budget_per_ring = budget;
 
 	i40e_for_each_ring(ring, q_vector->rx) {
-		int cleaned = ring->xsk_umem ?
+		int cleaned = ring->xsk_pool ?
 			      i40e_clean_rx_irq_zc(ring, budget_per_ring) :
 			      i40e_clean_rx_irq(ring, budget_per_ring);
 
@@ -3503,7 +3511,7 @@ dma_error:
 
 /**
  * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
- * @xdp: data to transmit
+ * @xdpf: data to transmit
  * @xdp_ring: XDP Tx ring
  **/
 static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
@@ -3698,7 +3706,9 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 /**
  * i40e_xdp_xmit - Implements ndo_xdp_xmit
  * @dev: netdev
- * @xdp: XDP buffer
+ * @n: number of frames
+ * @frames: array of XDP buffer pointers
+ * @flags: XDP extra info
  *
  * Returns number of frames successfully sent. Frames that fail are
  * free'ed via XDP return API.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 4036893d6825..2feed920ef8a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -110,7 +110,7 @@ enum i40e_dyn_idx_t {
  */
 #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
 #define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
-#define i40e_rx_desc i40e_32byte_rx_desc
+#define i40e_rx_desc i40e_16byte_rx_desc
 
 #define I40E_RX_DMA_ATTR \
 	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
@@ -388,7 +388,7 @@ struct i40e_ring {
 
 	struct i40e_channel *ch;
 	struct xdp_rxq_info xdp_rxq;
-	struct xdp_umem *xsk_umem;
+	struct xsk_buff_pool *xsk_pool;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)
@@ -482,7 +482,6 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
 /**
  * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed
  * @skb:     send buffer
- * @tx_ring: ring to send buffer on
  *
  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
  * there is not enough descriptors available in this ring since we need at least
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 667c4dc4b39f..19da3b22160f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -21,9 +21,9 @@ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val);
 #define I40E_XDP_TX		BIT(1)
 #define I40E_XDP_REDIR		BIT(2)
 
-/**
+/*
  * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword
- **/
+ */
 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 				u32 td_tag)
 {
@@ -37,7 +37,7 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 /**
  * i40e_update_tx_stats - Update the egress statistics for the Tx ring
  * @tx_ring: Tx ring to update
- * @total_packet: total packets sent
+ * @total_packets: total packets sent
  * @total_bytes: total bytes sent
  **/
 static inline void i40e_update_tx_stats(struct i40e_ring *tx_ring,
@@ -99,19 +99,6 @@ static inline bool i40e_rx_is_programming_status(u64 qword1)
 	return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
 }
 
-/**
- * i40e_inc_ntc: Advance the next_to_clean index
- * @rx_ring: Rx ring
- **/
-static inline void i40e_inc_ntc(struct i40e_ring *rx_ring)
-{
-	u32 ntc = rx_ring->next_to_clean + 1;
-
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
-	prefetch(I40E_RX_DESC(rx_ring, ntc));
-}
-
 void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
 void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
 bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 52410d609ba1..c0bdc666f557 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -595,6 +595,7 @@ struct i40e_hw {
 #define I40E_HW_FLAG_FW_LLDP_PERSISTENT     BIT_ULL(5)
 #define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
 #define I40E_HW_FLAG_DROP_MODE              BIT_ULL(7)
+#define I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE BIT_ULL(8)
 	u64 flags;
 
 	/* Used in set switch config AQ command */
@@ -628,7 +629,7 @@ union i40e_16byte_rx_desc {
 		__le64 hdr_addr; /* Header buffer address */
 	} read;
 	struct {
-		struct {
+		struct i40e_16b_rx_wb_qw0 {
 			struct {
 				union {
 					__le16 mirroring_status;
@@ -647,6 +648,9 @@ union i40e_16byte_rx_desc {
 			__le64 status_error_len;
 		} qword1;
 	} wb;  /* writeback */
+	struct {
+		u64 qword[2];
+	} raw;
 };
 
 union i40e_32byte_rx_desc {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 47bfb2e95e2d..c96e2f2d4cba 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2244,7 +2244,8 @@ error_param:
 }
 
 /**
- * i40e_validate_queue_map
+ * i40e_validate_queue_map - check queue map is valid
+ * @vf: the VF structure pointer
  * @vsi_id: vsi id
  * @queuemap: Tx or Rx queue map
  *
@@ -3160,8 +3161,8 @@ err:
 
 /**
  * i40e_validate_cloud_filter
- * @mask: mask for TC filter
- * @data: data for TC filter
+ * @vf: pointer to VF structure
+ * @tc_filter: pointer to filter requested
  *
  * This function validates cloud filter programmed as TC filter for ADq
  **/
@@ -3294,7 +3295,7 @@ err:
 /**
  * i40e_find_vsi_from_seid - searches for the vsi with the given seid
  * @vf: pointer to the VF info
- * @seid - seid of the vsi it is searching for
+ * @seid: seid of the vsi it is searching for
  **/
 static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
 {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 8ce57b507a21..6acede0acdca 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -29,14 +29,16 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
 }
 
 /**
- * i40e_xsk_umem_enable - Enable/associate a UMEM to a certain ring/qid
+ * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
+ * certain ring/qid
  * @vsi: Current VSI
- * @umem: UMEM
- * @qid: Rx ring to associate UMEM to
+ * @pool: buffer pool
+ * @qid: Rx ring to associate buffer pool with
  *
  * Returns 0 on success, <0 on failure
  **/
-static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
+static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
+				struct xsk_buff_pool *pool,
 				u16 qid)
 {
 	struct net_device *netdev = vsi->netdev;
@@ -53,7 +55,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 	    qid >= netdev->real_num_tx_queues)
 		return -EINVAL;
 
-	err = xsk_buff_dma_map(umem, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
+	err = xsk_pool_dma_map(pool, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
 	if (err)
 		return err;
 
@@ -80,21 +82,22 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 }
 
 /**
- * i40e_xsk_umem_disable - Disassociate a UMEM from a certain ring/qid
+ * i40e_xsk_pool_disable - Disassociate an AF_XDP buffer pool from a
+ * certain ring/qid
  * @vsi: Current VSI
- * @qid: Rx ring to associate UMEM to
+ * @qid: Rx ring to associate buffer pool with
  *
  * Returns 0 on success, <0 on failure
  **/
-static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
+static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
 {
 	struct net_device *netdev = vsi->netdev;
-	struct xdp_umem *umem;
+	struct xsk_buff_pool *pool;
 	bool if_running;
 	int err;
 
-	umem = xdp_get_umem_from_qid(netdev, qid);
-	if (!umem)
+	pool = xsk_get_pool_from_qid(netdev, qid);
+	if (!pool)
 		return -EINVAL;
 
 	if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
@@ -106,7 +109,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
 	}
 
 	clear_bit(qid, vsi->af_xdp_zc_qps);
-	xsk_buff_dma_unmap(umem, I40E_RX_DMA_ATTR);
+	xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
 
 	if (if_running) {
 		err = i40e_queue_pair_enable(vsi, qid);
@@ -118,20 +121,21 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
 }
 
 /**
- * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
+ * i40e_xsk_pool_setup - Enable/disassociate an AF_XDP buffer pool to/from
+ * a ring/qid
  * @vsi: Current VSI
- * @umem: UMEM to enable/associate to a ring, or NULL to disable
- * @qid: Rx ring to (dis)associate UMEM (from)to
+ * @pool: Buffer pool to enable/associate to a ring, or NULL to disable
+ * @qid: Rx ring to (dis)associate buffer pool (from)to
  *
- * This function enables or disables a UMEM to a certain ring.
+ * This function enables or disables a buffer pool to a certain ring.
  *
  * Returns 0 on success, <0 on failure
  **/
-int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
+int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
 			u16 qid)
 {
-	return umem ? i40e_xsk_umem_enable(vsi, umem, qid) :
-		i40e_xsk_umem_disable(vsi, qid);
+	return pool ? i40e_xsk_pool_enable(vsi, pool, qid) :
+		i40e_xsk_pool_disable(vsi, qid);
 }
 
 /**
@@ -191,7 +195,7 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
 	rx_desc = I40E_RX_DESC(rx_ring, ntu);
 	bi = i40e_rx_bi(rx_ring, ntu);
 	do {
-		xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+		xdp = xsk_buff_alloc(rx_ring->xsk_pool);
 		if (!xdp) {
 			ok = false;
 			goto no_buffers;
@@ -254,6 +258,18 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
 }
 
 /**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+}
+
+/**
  * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
  * @rx_ring: Rx ring
  * @budget: NAPI budget
@@ -265,8 +281,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	unsigned int xdp_res, xdp_xmit = 0;
-	bool failure = false;
 	struct sk_buff *skb;
+	bool failure;
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		union i40e_rx_desc *rx_desc;
@@ -274,13 +290,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 		unsigned int size;
 		u64 qword;
 
-		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
-			failure = failure ||
-				  !i40e_alloc_rx_buffers_zc(rx_ring,
-							    cleaned_count);
-			cleaned_count = 0;
-		}
-
 		rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 
@@ -310,7 +319,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 
 		bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 		(*bi)->data_end = (*bi)->data + size;
-		xsk_buff_dma_sync_for_cpu(*bi);
+		xsk_buff_dma_sync_for_cpu(*bi, rx_ring->xsk_pool);
 
 		xdp_res = i40e_run_xdp_zc(rx_ring, *bi);
 		if (xdp_res) {
@@ -355,14 +364,17 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 		napi_gro_receive(&rx_ring->q_vector->napi, skb);
 	}
 
+	if (cleaned_count >= I40E_RX_BUFFER_WRITE)
+		failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count);
+
 	i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
 	i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
 
-	if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
 		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
-			xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
 		else
-			xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
 
 		return (int)total_rx_packets;
 	}
@@ -385,11 +397,11 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 	dma_addr_t dma;
 
 	while (budget-- > 0) {
-		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+		if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
 			break;
 
-		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
-		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
 						 desc.len);
 
 		tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
@@ -416,7 +428,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 						 I40E_TXD_QW1_CMD_SHIFT);
 		i40e_xdp_ring_update_tail(xdp_ring);
 
-		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+		xsk_tx_release(xdp_ring->xsk_pool);
 		i40e_update_tx_stats(xdp_ring, sent_frames, total_bytes);
 	}
 
@@ -448,7 +460,7 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
  **/
 bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
 {
-	struct xdp_umem *umem = tx_ring->xsk_umem;
+	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
 	u32 i, completed_frames, xsk_frames = 0;
 	u32 head_idx = i40e_get_head(tx_ring);
 	struct i40e_tx_buffer *tx_bi;
@@ -488,13 +500,13 @@ skip:
 		tx_ring->next_to_clean -= tx_ring->count;
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(umem, xsk_frames);
+		xsk_tx_completed(bp, xsk_frames);
 
 	i40e_arm_wb(tx_ring, vsi, completed_frames);
 
 out_xmit:
-	if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
-		xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+	if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
+		xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
 
 	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
 }
@@ -526,7 +538,7 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
 	if (queue_id >= vsi->num_queue_pairs)
 		return -ENXIO;
 
-	if (!vsi->xdp_rings[queue_id]->xsk_umem)
+	if (!vsi->xdp_rings[queue_id]->xsk_pool)
 		return -ENXIO;
 
 	ring = vsi->xdp_rings[queue_id];
@@ -565,7 +577,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
 void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
 {
 	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
-	struct xdp_umem *umem = tx_ring->xsk_umem;
+	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
 	struct i40e_tx_buffer *tx_bi;
 	u32 xsk_frames = 0;
 
@@ -585,14 +597,15 @@ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
 	}
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(umem, xsk_frames);
+		xsk_tx_completed(bp, xsk_frames);
 }
 
 /**
- * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have AF_XDP UMEM attached
+ * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have an AF_XDP
+ * buffer pool attached
  * @vsi: vsi
  *
- * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ * Returns true if any of the Rx rings has an AF_XDP buffer pool attached
  **/
 bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
 {
@@ -600,7 +613,7 @@ bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
 	int i;
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
-		if (xdp_get_umem_from_qid(netdev, i))
+		if (xsk_get_pool_from_qid(netdev, i))
 			return true;
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index c524c142127f..7adfd8539247 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -5,12 +5,12 @@
 #define _I40E_XSK_H_
 
 struct i40e_vsi;
-struct xdp_umem;
+struct xsk_buff_pool;
 struct zero_copy_allocator;
 
 int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
 int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
-int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
+int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
 			u16 qid);
 bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
 int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.h b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
index baf2fe26f302..1f60518eb0e5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adminq.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
@@ -85,8 +85,8 @@ struct iavf_adminq_info {
 
 /**
  * iavf_aq_rc_to_posix - convert errors to user-land codes
- * aq_ret: AdminQ handler error code can override aq_rc
- * aq_rc: AdminQ firmware error code to convert
+ * @aq_ret: AdminQ handler error code can override aq_rc
+ * @aq_rc: AdminQ firmware error code to convert
  **/
 static inline int iavf_aq_rc_to_posix(int aq_ret, int aq_rc)
 {
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index cf539db79af9..95543dfd4fe7 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -147,6 +147,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
 /**
  * iavf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: queue number that is timing out
  **/
 static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
@@ -2572,8 +2573,8 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter,
 }
 
 /**
- * iavf_del_all_cloud_filters - delete all cloud filters
- * on the traffic classes
+ * iavf_del_all_cloud_filters - delete all cloud filters on the traffic classes
+ * @adapter: board private structure
  **/
 static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter)
 {
@@ -2592,7 +2593,7 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter)
 /**
  * __iavf_setup_tc - configure multiple traffic classes
  * @netdev: network interface device structure
- * @type_date: tc offload data
+ * @type_data: tc offload data
  *
  * This function processes the config information provided by the
  * user to configure traffic classes/queue channels and packages the
@@ -2690,7 +2691,7 @@ exit:
 /**
  * iavf_parse_cls_flower - Parse tc flower filters provided by kernel
  * @adapter: board private structure
- * @cls_flower: pointer to struct flow_cls_offload
+ * @f: pointer to struct flow_cls_offload
  * @filter: pointer to cloud filter structure
  */
 static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
@@ -3064,8 +3065,8 @@ static int iavf_delete_clsflower(struct iavf_adapter *adapter,
 
 /**
  * iavf_setup_tc_cls_flower - flower classifier offloads
- * @netdev: net device to configure
- * @type_data: offload data
+ * @adapter: board private structure
+ * @cls_flower: pointer to flow_cls_offload struct with flow info
  */
 static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter,
 				    struct flow_cls_offload *cls_flower)
@@ -3112,7 +3113,7 @@ static LIST_HEAD(iavf_block_cb_list);
  * iavf_setup_tc - configure multiple traffic classes
  * @netdev: network interface device structure
  * @type: type of offload
- * @type_date: tc offload data
+ * @type_data: tc offload data
  *
  * This function is the callback to ndo_setup_tc in the
  * netdev_ops.
@@ -3768,8 +3769,7 @@ err_dma:
 
 /**
  * iavf_suspend - Power management suspend routine
- * @pdev: PCI device information struct
- * @state: unused
+ * @dev_d: device info pointer
  *
  * Called when the system (VM) is entering sleep/suspend.
  **/
@@ -3799,7 +3799,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
 
 /**
  * iavf_resume - Power management resume routine
- * @pdev: PCI device information struct
+ * @dev_d: device info pointer
  *
  * Called when the system (VM) is resumed from sleep/suspend.
  **/
diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h
index 1058e68a02b4..82fda6f5abf0 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_trace.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h
@@ -22,7 +22,7 @@
 
 #include <linux/tracepoint.h>
 
-/**
+/*
  * iavf_trace() macro enables shared code to refer to trace points
  * like:
  *
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index ca041b39ffda..256fa07d54d5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1309,10 +1309,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
 		return NULL;
 	/* prefetch first cache line of first page */
 	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(va);
 
 	/* allocate a skb to store the frags */
 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
@@ -1376,10 +1373,8 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
 		return NULL;
 	/* prefetch first cache line of first page */
 	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(va);
+
 	/* build an skb around the page buffer */
 	skb = build_skb(va - IAVF_SKB_PAD, truesize);
 	if (unlikely(!skb))
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
index dd3348f9da9d..e5b9ba42dd00 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
@@ -454,7 +454,6 @@ bool __iavf_chk_linearize(struct sk_buff *skb);
 /**
  * iavf_xmit_descriptor_count - calculate number of Tx descriptors needed
  * @skb:     send buffer
- * @tx_ring: ring to send buffer on
  *
  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
  * there is not enough descriptors available in this ring since we need at least
@@ -514,6 +513,7 @@ static inline bool iavf_chk_linearize(struct sk_buff *skb, int count)
 	return count != IAVF_MAX_BUFFER_TXD;
 }
 /**
+ * txring_txq - helper to convert from a ring to a queue
  * @ring: Tx ring to find the netdev equivalent of
  **/
 static inline struct netdev_queue *txring_txq(const struct iavf_ring *ring)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index fe140ff38f74..a0723831c4e4 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -284,6 +284,10 @@ struct ice_vsi {
 	spinlock_t arfs_lock;	/* protects aRFS hash table and filter state */
 	atomic_t *arfs_last_fltr_id;
 
+	/* devlink port data */
+	struct devlink_port devlink_port;
+	bool devlink_port_registered;
+
 	u16 max_frame;
 	u16 rx_buf_len;
 
@@ -321,9 +325,9 @@ struct ice_vsi {
 	struct ice_ring **xdp_rings;	 /* XDP ring array */
 	u16 num_xdp_txq;		 /* Used XDP queues */
 	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
-	struct xdp_umem **xsk_umems;
-	u16 num_xsk_umems_used;
-	u16 num_xsk_umems;
+	struct xsk_buff_pool **xsk_pools;
+	u16 num_xsk_pools_used;
+	u16 num_xsk_pools;
 } ____cacheline_internodealigned_in_smp;
 
 /* struct that defines an interrupt vector */
@@ -375,9 +379,6 @@ enum ice_pf_flags {
 struct ice_pf {
 	struct pci_dev *pdev;
 
-	/* devlink port data */
-	struct devlink_port devlink_port;
-
 	struct devlink_region *nvm_region;
 	struct devlink_region *devcaps_region;
 
@@ -507,25 +508,25 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
 }
 
 /**
- * ice_xsk_umem - get XDP UMEM bound to a ring
- * @ring - ring to use
+ * ice_xsk_pool - get XSK buffer pool bound to a ring
+ * @ring: ring to use
  *
- * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * Returns a pointer to xdp_umem structure if there is a buffer pool present,
  * NULL otherwise.
  */
-static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
 {
-	struct xdp_umem **umems = ring->vsi->xsk_umems;
+	struct xsk_buff_pool **pools = ring->vsi->xsk_pools;
 	u16 qid = ring->q_index;
 
 	if (ice_ring_is_xdp(ring))
 		qid -= ring->vsi->num_xdp_txq;
 
-	if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] ||
+	if (qid >= ring->vsi->num_xsk_pools || !pools || !pools[qid] ||
 	    !ice_is_xdp_ena_vsi(ring->vsi))
 		return NULL;
 
-	return umems[qid];
+	return pools[qid];
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ba9375218fef..b06fbe99d8e9 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1422,7 +1422,7 @@ struct ice_aqc_nvm_comp_tbl {
 	u8 cvs[]; /* Component Version String */
 } __packed;
 
-/**
+/*
  * Send to PF command (indirect 0x0801) ID is only used by PF
  *
  * Send to VF command (indirect 0x0802) ID is only used by PF
@@ -1826,8 +1826,8 @@ struct ice_aqc_event_lan_overflow {
  * @opcode: AQ command opcode
  * @datalen: length in bytes of indirect/external data buffer
  * @retval: return value from firmware
- * @cookie_h: opaque data high-half
- * @cookie_l: opaque data low-half
+ * @cookie_high: opaque data high-half
+ * @cookie_low: opaque data low-half
  * @params: command-specific parameters
  *
  * Descriptor format for commands the driver posts on the Admin Transmit Queue
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 87008476d8fe..fe4320e2d1f2 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -308,12 +308,12 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
 					 ring->q_index);
 
-		ring->xsk_umem = ice_xsk_umem(ring);
-		if (ring->xsk_umem) {
+		ring->xsk_pool = ice_xsk_pool(ring);
+		if (ring->xsk_pool) {
 			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 
 			ring->rx_buf_len =
-				xsk_umem_get_rx_frame_size(ring->xsk_umem);
+				xsk_pool_get_rx_frame_size(ring->xsk_pool);
 			/* For AF_XDP ZC, we disallow packets to span on
 			 * multiple buffers, thus letting us skip that
 			 * handling in the fast-path.
@@ -324,7 +324,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 							 NULL);
 			if (err)
 				return err;
-			xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+			xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
 
 			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
 				 ring->q_index);
@@ -417,9 +417,9 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
 	writel(0, ring->tail);
 
-	if (ring->xsk_umem) {
-		if (!xsk_buff_can_alloc(ring->xsk_umem, num_bufs)) {
-			dev_warn(dev, "UMEM does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+	if (ring->xsk_pool) {
+		if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) {
+			dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
 				 num_bufs, ring->q_index);
 			dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n");
 
@@ -428,7 +428,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 
 		err = ice_alloc_rx_bufs_zc(ring, num_bufs);
 		if (err)
-			dev_info(dev, "Failed to allocate some buffers on UMEM enabled Rx ring %d (pf_q %d)\n",
+			dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
 				 ring->q_index, pf_q);
 		return 0;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 111d6bfe4222..511da59bd6f2 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -6,18 +6,14 @@
 #include "ice_devlink.h"
 #include "ice_fw_update.h"
 
-static int ice_info_get_dsn(struct ice_pf *pf, char *buf, size_t len)
+static void ice_info_get_dsn(struct ice_pf *pf, char *buf, size_t len)
 {
 	u8 dsn[8];
 
 	/* Copy the DSN into an array in Big Endian format */
 	put_unaligned_be64(pci_get_dsn(pf->pdev), dsn);
 
-	snprintf(buf, len, "%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x",
-		 dsn[0], dsn[1], dsn[2], dsn[3],
-		 dsn[4], dsn[5], dsn[6], dsn[7]);
-
-	return 0;
+	snprintf(buf, len, "%8phD", dsn);
 }
 
 static int ice_info_pba(struct ice_pf *pf, char *buf, size_t len)
@@ -106,6 +102,13 @@ static int ice_info_ddp_pkg_version(struct ice_pf *pf, char *buf, size_t len)
 	return 0;
 }
 
+static int ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, char *buf, size_t len)
+{
+	snprintf(buf, len, "0x%08x", pf->hw.active_track_id);
+
+	return 0;
+}
+
 static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len)
 {
 	struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver;
@@ -150,6 +153,7 @@ static const struct ice_devlink_version {
 	running(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack),
 	running("fw.app.name", ice_info_ddp_pkg_name),
 	running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
+	running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id),
 	running("fw.netlist", ice_info_netlist_ver),
 	running("fw.netlist.build", ice_info_netlist_build),
 };
@@ -180,11 +184,7 @@ static int ice_devlink_info_get(struct devlink *devlink,
 		return err;
 	}
 
-	err = ice_info_get_dsn(pf, buf, sizeof(buf));
-	if (err) {
-		NL_SET_ERR_MSG_MOD(extack, "Unable to obtain serial number");
-		return err;
-	}
+	ice_info_get_dsn(pf, buf, sizeof(buf));
 
 	err = devlink_info_serial_number_put(req, buf);
 	if (err) {
@@ -233,8 +233,7 @@ static int ice_devlink_info_get(struct devlink *devlink,
 /**
  * ice_devlink_flash_update - Update firmware stored in flash on the device
  * @devlink: pointer to devlink associated with device to update
- * @path: the path of the firmware file to use via request_firmware
- * @component: name of the component to update, or NULL
+ * @params: flash update parameters
  * @extack: netlink extended ACK structure
  *
  * Perform a device flash update. The bulk of the update logic is contained
@@ -243,38 +242,52 @@ static int ice_devlink_info_get(struct devlink *devlink,
  * Returns: zero on success, or an error code on failure.
  */
 static int
-ice_devlink_flash_update(struct devlink *devlink, const char *path,
-			 const char *component, struct netlink_ext_ack *extack)
+ice_devlink_flash_update(struct devlink *devlink,
+			 struct devlink_flash_update_params *params,
+			 struct netlink_ext_ack *extack)
 {
 	struct ice_pf *pf = devlink_priv(devlink);
 	struct device *dev = &pf->pdev->dev;
 	struct ice_hw *hw = &pf->hw;
 	const struct firmware *fw;
+	u8 preservation;
 	int err;
 
-	/* individual component update is not yet supported */
-	if (component)
+	if (!params->overwrite_mask) {
+		/* preserve all settings and identifiers */
+		preservation = ICE_AQC_NVM_PRESERVE_ALL;
+	} else if (params->overwrite_mask == DEVLINK_FLASH_OVERWRITE_SETTINGS) {
+		/* overwrite settings, but preserve the vital device identifiers */
+		preservation = ICE_AQC_NVM_PRESERVE_SELECTED;
+	} else if (params->overwrite_mask == (DEVLINK_FLASH_OVERWRITE_SETTINGS |
+					      DEVLINK_FLASH_OVERWRITE_IDENTIFIERS)) {
+		/* overwrite both settings and identifiers, preserve nothing */
+		preservation = ICE_AQC_NVM_NO_PRESERVATION;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Requested overwrite mask is not supported");
 		return -EOPNOTSUPP;
+	}
 
 	if (!hw->dev_caps.common_cap.nvm_unified_update) {
 		NL_SET_ERR_MSG_MOD(extack, "Current firmware does not support unified update");
 		return -EOPNOTSUPP;
 	}
 
-	err = ice_check_for_pending_update(pf, component, extack);
+	err = ice_check_for_pending_update(pf, NULL, extack);
 	if (err)
 		return err;
 
-	err = request_firmware(&fw, path, dev);
+	err = request_firmware(&fw, params->file_name, dev);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack, "Unable to read file from disk");
 		return err;
 	}
 
+	dev_dbg(dev, "Beginning flash update with file '%s'\n", params->file_name);
+
 	devlink_flash_update_begin_notify(devlink);
-	devlink_flash_update_status_notify(devlink, "Preparing to flash",
-					   component, 0, 0);
-	err = ice_flash_pldm_image(pf, fw, extack);
+	devlink_flash_update_status_notify(devlink, "Preparing to flash", NULL, 0, 0);
+	err = ice_flash_pldm_image(pf, fw, preservation, extack);
 	devlink_flash_update_end_notify(devlink);
 
 	release_firmware(fw);
@@ -283,6 +296,7 @@ ice_devlink_flash_update(struct devlink *devlink, const char *path,
 }
 
 static const struct devlink_ops ice_devlink_ops = {
+	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
 	.info_get = ice_devlink_info_get,
 	.flash_update = ice_devlink_flash_update,
 };
@@ -352,55 +366,66 @@ void ice_devlink_unregister(struct ice_pf *pf)
 }
 
 /**
- * ice_devlink_create_port - Create a devlink port for this PF
- * @pf: the PF to create a port for
+ * ice_devlink_create_port - Create a devlink port for this VSI
+ * @vsi: the VSI to create a port for
  *
- * Create and register a devlink_port for this PF. Note that although each
- * physical function is connected to a separate devlink instance, the port
- * will still be numbered according to the physical function ID.
+ * Create and register a devlink_port for this VSI.
  *
  * Return: zero on success or an error code on failure.
  */
-int ice_devlink_create_port(struct ice_pf *pf)
+int ice_devlink_create_port(struct ice_vsi *vsi)
 {
-	struct devlink *devlink = priv_to_devlink(pf);
-	struct ice_vsi *vsi = ice_get_main_vsi(pf);
-	struct device *dev = ice_pf_to_dev(pf);
 	struct devlink_port_attrs attrs = {};
+	struct ice_port_info *pi;
+	struct devlink *devlink;
+	struct device *dev;
+	struct ice_pf *pf;
 	int err;
 
-	if (!vsi) {
-		dev_err(dev, "%s: unable to find main VSI\n", __func__);
-		return -EIO;
-	}
+	/* Currently we only create devlink_port instances for PF VSIs */
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	pf = vsi->back;
+	devlink = priv_to_devlink(pf);
+	dev = ice_pf_to_dev(pf);
+	pi = pf->hw.port_info;
 
 	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
-	attrs.phys.port_number = pf->hw.pf_id;
-	devlink_port_attrs_set(&pf->devlink_port, &attrs);
-	err = devlink_port_register(devlink, &pf->devlink_port, pf->hw.pf_id);
+	attrs.phys.port_number = pi->lport;
+	devlink_port_attrs_set(&vsi->devlink_port, &attrs);
+	err = devlink_port_register(devlink, &vsi->devlink_port, vsi->idx);
 	if (err) {
 		dev_err(dev, "devlink_port_register failed: %d\n", err);
 		return err;
 	}
 
+	vsi->devlink_port_registered = true;
+
 	return 0;
 }
 
 /**
- * ice_devlink_destroy_port - Destroy the devlink_port for this PF
- * @pf: the PF to cleanup
+ * ice_devlink_destroy_port - Destroy the devlink_port for this VSI
+ * @vsi: the VSI to cleanup
  *
- * Unregisters the devlink_port structure associated with this PF.
+ * Unregisters the devlink_port structure associated with this VSI.
  */
-void ice_devlink_destroy_port(struct ice_pf *pf)
+void ice_devlink_destroy_port(struct ice_vsi *vsi)
 {
-	devlink_port_type_clear(&pf->devlink_port);
-	devlink_port_unregister(&pf->devlink_port);
+	if (!vsi->devlink_port_registered)
+		return;
+
+	devlink_port_type_clear(&vsi->devlink_port);
+	devlink_port_unregister(&vsi->devlink_port);
+
+	vsi->devlink_port_registered = false;
 }
 
 /**
  * ice_devlink_nvm_snapshot - Capture a snapshot of the Shadow RAM contents
  * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
  * @extack: extended ACK response structure
  * @data: on exit points to snapshot data buffer
  *
@@ -413,6 +438,7 @@ void ice_devlink_destroy_port(struct ice_pf *pf)
  * error code on failure.
  */
 static int ice_devlink_nvm_snapshot(struct devlink *devlink,
+				    const struct devlink_region_ops *ops,
 				    struct netlink_ext_ack *extack, u8 **data)
 {
 	struct ice_pf *pf = devlink_priv(devlink);
@@ -456,6 +482,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
 /**
  * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities
  * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
  * @extack: extended ACK response structure
  * @data: on exit points to snapshot data buffer
  *
@@ -468,6 +495,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
  */
 static int
 ice_devlink_devcaps_snapshot(struct devlink *devlink,
+			     const struct devlink_region_ops *ops,
 			     struct netlink_ext_ack *extack, u8 **data)
 {
 	struct ice_pf *pf = devlink_priv(devlink);
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
index 6e806a08dc23..e07e74426bde 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.h
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
@@ -8,8 +8,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev);
 
 int ice_devlink_register(struct ice_pf *pf);
 void ice_devlink_unregister(struct ice_pf *pf);
-int ice_devlink_create_port(struct ice_pf *pf);
-void ice_devlink_destroy_port(struct ice_pf *pf);
+int ice_devlink_create_port(struct ice_vsi *vsi);
+void ice_devlink_destroy_port(struct ice_vsi *vsi);
 
 void ice_devlink_init_regions(struct ice_pf *pf);
 void ice_devlink_destroy_regions(struct ice_pf *pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index d7430ce6af26..2d27f66ac853 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1268,8 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
 		bool is_tun = tun == ICE_FD_HW_SEG_TUN;
 		int err;
 
-		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, TNL_ALL,
-							&port_num))
+		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num))
 			continue;
 		err = ice_fdir_write_fltr(pf, input, add, is_tun);
 		if (err)
@@ -1647,8 +1646,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 	}
 
 	/* return error if not an update and no available filters */
-	fltrs_needed = ice_get_open_tunnel_port(hw, TNL_ALL, &tunnel_port) ?
-		2 : 1;
+	fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1;
 	if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
 	    ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
 		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 6834df14332f..59c0c6a0f8c5 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -556,7 +556,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
 		loc = pkt;
 	} else {
-		if (!ice_get_open_tunnel_port(hw, TNL_ALL, &tnl_port))
+		if (!ice_get_open_tunnel_port(hw, &tnl_port))
 			return ICE_ERR_DOES_NOT_EXIST;
 		if (!ice_fdir_pkt[idx].tun_pkt)
 			return ICE_ERR_PARAM;
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index b17ae3e20157..9095b4d274ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -489,8 +489,6 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
 			if ((label_name[len] - '0') == hw->pf_id) {
 				hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
 				hw->tnl.tbl[hw->tnl.count].valid = false;
-				hw->tnl.tbl[hw->tnl.count].in_use = false;
-				hw->tnl.tbl[hw->tnl.count].marked = false;
 				hw->tnl.tbl[hw->tnl.count].boost_addr = val;
 				hw->tnl.tbl[hw->tnl.count].port = 0;
 				hw->tnl.count++;
@@ -505,8 +503,11 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
 	for (i = 0; i < hw->tnl.count; i++) {
 		ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr,
 				     &hw->tnl.tbl[i].boost_entry);
-		if (hw->tnl.tbl[i].boost_entry)
+		if (hw->tnl.tbl[i].boost_entry) {
 			hw->tnl.tbl[i].valid = true;
+			if (hw->tnl.tbl[i].type < __TNL_TYPE_CNT)
+				hw->tnl.valid_count[hw->tnl.tbl[i].type]++;
+		}
 	}
 }
 
@@ -1626,104 +1627,59 @@ static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
 }
 
 /**
- * ice_tunnel_port_in_use_hlpr - helper function to determine tunnel usage
+ * ice_get_open_tunnel_port - retrieve an open tunnel port
  * @hw: pointer to the HW structure
- * @port: port to search for
- * @index: optionally returns index
- *
- * Returns whether a port is already in use as a tunnel, and optionally its
- * index
+ * @port: returns open port
  */
-static bool ice_tunnel_port_in_use_hlpr(struct ice_hw *hw, u16 port, u16 *index)
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port)
 {
+	bool res = false;
 	u16 i;
 
+	mutex_lock(&hw->tnl_lock);
+
 	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
-		if (hw->tnl.tbl[i].in_use && hw->tnl.tbl[i].port == port) {
-			if (index)
-				*index = i;
-			return true;
+		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port) {
+			*port = hw->tnl.tbl[i].port;
+			res = true;
+			break;
 		}
 
-	return false;
-}
-
-/**
- * ice_tunnel_port_in_use
- * @hw: pointer to the HW structure
- * @port: port to search for
- * @index: optionally returns index
- *
- * Returns whether a port is already in use as a tunnel, and optionally its
- * index
- */
-bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index)
-{
-	bool res;
-
-	mutex_lock(&hw->tnl_lock);
-	res = ice_tunnel_port_in_use_hlpr(hw, port, index);
 	mutex_unlock(&hw->tnl_lock);
 
 	return res;
 }
 
 /**
- * ice_find_free_tunnel_entry
+ * ice_tunnel_idx_to_entry - convert linear index to the sparse one
  * @hw: pointer to the HW structure
- * @type: tunnel type
- * @index: optionally returns index
+ * @type: type of tunnel
+ * @idx: linear index
  *
- * Returns whether there is a free tunnel entry, and optionally its index
+ * Stack assumes we have 2 linear tables with indexes [0, count_valid),
+ * but really the port table may be sprase, and types are mixed, so convert
+ * the stack index into the device index.
  */
-static bool
-ice_find_free_tunnel_entry(struct ice_hw *hw, enum ice_tunnel_type type,
-			   u16 *index)
+static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type,
+				   u16 idx)
 {
 	u16 i;
 
 	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
-		if (hw->tnl.tbl[i].valid && !hw->tnl.tbl[i].in_use &&
-		    hw->tnl.tbl[i].type == type) {
-			if (index)
-				*index = i;
-			return true;
-		}
+		if (hw->tnl.tbl[i].valid &&
+		    hw->tnl.tbl[i].type == type &&
+		    idx--)
+			return i;
 
-	return false;
-}
-
-/**
- * ice_get_open_tunnel_port - retrieve an open tunnel port
- * @hw: pointer to the HW structure
- * @type: tunnel type (TNL_ALL will return any open port)
- * @port: returns open port
- */
-bool
-ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type,
-			 u16 *port)
-{
-	bool res = false;
-	u16 i;
-
-	mutex_lock(&hw->tnl_lock);
-
-	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
-		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use &&
-		    (type == TNL_ALL || hw->tnl.tbl[i].type == type)) {
-			*port = hw->tnl.tbl[i].port;
-			res = true;
-			break;
-		}
-
-	mutex_unlock(&hw->tnl_lock);
-
-	return res;
+	WARN_ON_ONCE(1);
+	return 0;
 }
 
 /**
  * ice_create_tunnel
  * @hw: pointer to the HW structure
+ * @index: device table entry
  * @type: type of tunnel
  * @port: port of tunnel to create
  *
@@ -1731,27 +1687,16 @@ ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type,
  * creating a package buffer with the tunnel info and issuing an update package
  * command.
  */
-enum ice_status
-ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port)
+static enum ice_status
+ice_create_tunnel(struct ice_hw *hw, u16 index,
+		  enum ice_tunnel_type type, u16 port)
 {
 	struct ice_boost_tcam_section *sect_rx, *sect_tx;
 	enum ice_status status = ICE_ERR_MAX_LIMIT;
 	struct ice_buf_build *bld;
-	u16 index;
 
 	mutex_lock(&hw->tnl_lock);
 
-	if (ice_tunnel_port_in_use_hlpr(hw, port, &index)) {
-		hw->tnl.tbl[index].ref++;
-		status = 0;
-		goto ice_create_tunnel_end;
-	}
-
-	if (!ice_find_free_tunnel_entry(hw, type, &index)) {
-		status = ICE_ERR_OUT_OF_RANGE;
-		goto ice_create_tunnel_end;
-	}
-
 	bld = ice_pkg_buf_alloc(hw);
 	if (!bld) {
 		status = ICE_ERR_NO_MEMORY;
@@ -1790,11 +1735,8 @@ ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port)
 	memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
 
 	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
-	if (!status) {
+	if (!status)
 		hw->tnl.tbl[index].port = port;
-		hw->tnl.tbl[index].in_use = true;
-		hw->tnl.tbl[index].ref = 1;
-	}
 
 ice_create_tunnel_err:
 	ice_pkg_buf_free(hw, bld);
@@ -1808,46 +1750,31 @@ ice_create_tunnel_end:
 /**
  * ice_destroy_tunnel
  * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
  * @port: port of tunnel to destroy (ignored if the all parameter is true)
- * @all: flag that states to destroy all tunnels
  *
  * Destroys a tunnel or all tunnels by creating an update package buffer
  * targeting the specific updates requested and then performing an update
  * package.
  */
-enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all)
+static enum ice_status
+ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type,
+		   u16 port)
 {
 	struct ice_boost_tcam_section *sect_rx, *sect_tx;
 	enum ice_status status = ICE_ERR_MAX_LIMIT;
 	struct ice_buf_build *bld;
-	u16 count = 0;
-	u16 index;
-	u16 size;
-	u16 i;
 
 	mutex_lock(&hw->tnl_lock);
 
-	if (!all && ice_tunnel_port_in_use_hlpr(hw, port, &index))
-		if (hw->tnl.tbl[index].ref > 1) {
-			hw->tnl.tbl[index].ref--;
-			status = 0;
-			goto ice_destroy_tunnel_end;
-		}
-
-	/* determine count */
-	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
-		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use &&
-		    (all || hw->tnl.tbl[i].port == port))
-			count++;
-
-	if (!count) {
-		status = ICE_ERR_PARAM;
+	if (WARN_ON(!hw->tnl.tbl[index].valid ||
+		    hw->tnl.tbl[index].type != type ||
+		    hw->tnl.tbl[index].port != port)) {
+		status = ICE_ERR_OUT_OF_RANGE;
 		goto ice_destroy_tunnel_end;
 	}
 
-	/* size of section - there is at least one entry */
-	size = struct_size(sect_rx, tcam, count);
-
 	bld = ice_pkg_buf_alloc(hw);
 	if (!bld) {
 		status = ICE_ERR_NO_MEMORY;
@@ -1859,13 +1786,13 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all)
 		goto ice_destroy_tunnel_err;
 
 	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
-					    size);
+					    struct_size(sect_rx, tcam, 1));
 	if (!sect_rx)
 		goto ice_destroy_tunnel_err;
 	sect_rx->count = cpu_to_le16(1);
 
 	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
-					    size);
+					    struct_size(sect_tx, tcam, 1));
 	if (!sect_tx)
 		goto ice_destroy_tunnel_err;
 	sect_tx->count = cpu_to_le16(1);
@@ -1873,26 +1800,14 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all)
 	/* copy original boost entry to update package buffer, one copy to Rx
 	 * section, another copy to the Tx section
 	 */
-	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
-		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use &&
-		    (all || hw->tnl.tbl[i].port == port)) {
-			memcpy(sect_rx->tcam + i, hw->tnl.tbl[i].boost_entry,
-			       sizeof(*sect_rx->tcam));
-			memcpy(sect_tx->tcam + i, hw->tnl.tbl[i].boost_entry,
-			       sizeof(*sect_tx->tcam));
-			hw->tnl.tbl[i].marked = true;
-		}
+	memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_rx->tcam));
+	memcpy(sect_tx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_tx->tcam));
 
 	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
 	if (!status)
-		for (i = 0; i < hw->tnl.count &&
-		     i < ICE_TUNNEL_MAX_ENTRIES; i++)
-			if (hw->tnl.tbl[i].marked) {
-				hw->tnl.tbl[i].ref = 0;
-				hw->tnl.tbl[i].port = 0;
-				hw->tnl.tbl[i].in_use = false;
-				hw->tnl.tbl[i].marked = false;
-			}
+		hw->tnl.tbl[index].port = 0;
 
 ice_destroy_tunnel_err:
 	ice_pkg_buf_free(hw, bld);
@@ -1903,6 +1818,52 @@ ice_destroy_tunnel_end:
 	return status;
 }
 
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+			    unsigned int idx, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	enum ice_status status;
+	u16 index;
+
+	tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+	index = ice_tunnel_idx_to_entry(&pf->hw, idx, tnl_type);
+
+	status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port));
+	if (status) {
+		netdev_err(netdev, "Error adding UDP tunnel - %s\n",
+			   ice_stat_str(status));
+		return -EIO;
+	}
+
+	udp_tunnel_nic_set_port_priv(netdev, table, idx, index);
+	return 0;
+}
+
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+			      unsigned int idx, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	enum ice_status status;
+
+	tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+
+	status = ice_destroy_tunnel(&pf->hw, ti->hw_priv, tnl_type,
+				    ntohs(ti->port));
+	if (status) {
+		netdev_err(netdev, "Error removing UDP tunnel - %s\n",
+			   ice_stat_str(status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
 /* PTG Management */
 
 /**
@@ -4915,7 +4876,7 @@ ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
 
 			if (last_profile) {
 				/* If there are no profiles left for this VSIG,
-				 * then simply remove the the VSIG.
+				 * then simply remove the VSIG.
 				 */
 				status = ice_rem_vsig(hw, blk, vsig, &chg);
 				if (status)
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 568ea519af51..20deddb807c5 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -19,12 +19,11 @@
 #define ICE_PKG_CNT 4
 
 bool
-ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type,
-			 u16 *port);
-enum ice_status
-ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port);
-enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all);
-bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index);
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port);
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+			    unsigned int idx, struct udp_tunnel_info *ti);
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+			      unsigned int idx, struct udp_tunnel_info *ti);
 
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index c1c99a267a98..24063c1351b2 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -298,6 +298,7 @@ struct ice_pkg_enum {
 enum ice_tunnel_type {
 	TNL_VXLAN = 0,
 	TNL_GENEVE,
+	__TNL_TYPE_CNT,
 	TNL_LAST = 0xFF,
 	TNL_ALL = 0xFF,
 };
@@ -311,11 +312,8 @@ struct ice_tunnel_entry {
 	enum ice_tunnel_type type;
 	u16 boost_addr;
 	u16 port;
-	u16 ref;
 	struct ice_boost_tcam_entry *boost_entry;
 	u8 valid;
-	u8 in_use;
-	u8 marked;
 };
 
 #define ICE_TUNNEL_MAX_ENTRIES	16
@@ -323,6 +321,7 @@ struct ice_tunnel_entry {
 struct ice_tunnel_table {
 	struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES];
 	u16 count;
+	u16 valid_count[__TNL_TYPE_CNT];
 };
 
 struct ice_pkg_es {
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index fe677621dd51..eadc85aee389 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -99,6 +99,54 @@ static const u32 ice_ptypes_ipv6_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */
+static const u32 ice_ipv4_ofos_no_l4[] = {
+	0x10C00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
+static const u32 ice_ipv4_il_no_l4[] = {
+	0x60000000, 0x18043008, 0x80000002, 0x6010c021,
+	0x00000008, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */
+static const u32 ice_ipv6_ofos_no_l4[] = {
+	0x00000000, 0x00000000, 0x43000000, 0x10002000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header - no L4 */
+static const u32 ice_ipv6_il_no_l4[] = {
+	0x00000000, 0x02180430, 0x0000010c, 0x086010c0,
+	0x00000430, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* UDP Packet types for non-tunneled packets or tunneled
  * packets with inner UDP.
  */
@@ -250,11 +298,23 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 
 		hdrs = prof->segs[i].hdrs;
 
-		if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
+		if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+		    !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+			src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 :
+				(const unsigned long *)ice_ipv4_il_no_l4;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
 			src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos :
 				(const unsigned long *)ice_ptypes_ipv4_il;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+			src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 :
+				(const unsigned long *)ice_ipv6_il_no_l4;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) {
 			src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos :
 				(const unsigned long *)ice_ptypes_ipv6_il;
@@ -385,7 +445,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
  * ice_flow_xtract_raws - Create extract sequence entries for raw bytes
  * @hw: pointer to the HW struct
  * @params: information about the flow to be processed
- * @seg: index of packet segment whose raw fields are to be be extracted
+ * @seg: index of packet segment whose raw fields are to be extracted
  */
 static enum ice_status
 ice_flow_xtract_raws(struct ice_hw *hw, struct ice_flow_prof_params *params,
@@ -999,7 +1059,7 @@ enum ice_status ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk,
  *
  * This helper function stores information of a field being matched, including
  * the type of the field and the locations of the value to match, the mask, and
- * and the upper-bound value in the start of the input buffer for a flow entry.
+ * the upper-bound value in the start of the input buffer for a flow entry.
  * This function should only be used for fixed-size data structures.
  *
  * This function also opportunistically determines the protocol headers to be
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 3913da2116d2..829f90b1e998 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -194,8 +194,8 @@ struct ice_flow_entry {
 	u16 entry_sz;
 };
 
-#define ICE_FLOW_ENTRY_HNDL(e)	((u64)e)
-#define ICE_FLOW_ENTRY_PTR(h)	((struct ice_flow_entry *)(h))
+#define ICE_FLOW_ENTRY_HNDL(e)	((u64)(uintptr_t)e)
+#define ICE_FLOW_ENTRY_PTR(h)	((struct ice_flow_entry *)(uintptr_t)(h))
 
 struct ice_flow_prof {
 	struct list_head l_entry;
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
index 8968fdd4816b..8f81b95e679c 100644
--- a/drivers/net/ethernet/intel/ice/ice_fw_update.c
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -43,6 +43,8 @@ ice_send_package_data(struct pldmfw *context, const u8 *data, u16 length)
 	enum ice_status status;
 	u8 *package_data;
 
+	dev_dbg(dev, "Sending PLDM record package data to firmware\n");
+
 	package_data = kmemdup(data, length, GFP_KERNEL);
 	if (!package_data)
 		return -ENOMEM;
@@ -229,6 +231,8 @@ ice_send_component_table(struct pldmfw *context, struct pldmfw_component *compon
 	comp_tbl->cvs_len = component->version_len;
 	memcpy(comp_tbl->cvs, component->version_string, component->version_len);
 
+	dev_dbg(dev, "Sending component table to firmware:\n");
+
 	status = ice_nvm_pass_component_tbl(hw, (u8 *)comp_tbl, length,
 					    transfer_flag, &comp_response,
 					    &comp_response_code, NULL);
@@ -279,11 +283,14 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
 
 	memset(&event, 0, sizeof(event));
 
+	dev_dbg(dev, "Writing block of %u bytes for module 0x%02x at offset %u\n",
+		block_size, module, offset);
+
 	status = ice_aq_update_nvm(hw, module, offset, block_size, block,
 				   last_cmd, 0, NULL);
 	if (status) {
-		dev_err(dev, "Failed to program flash module 0x%02x at offset %u, err %s aq_err %s\n",
-			module, offset, ice_stat_str(status),
+		dev_err(dev, "Failed to flash module 0x%02x with block of size %u at offset %u, err %s aq_err %s\n",
+			module, block_size, offset, ice_stat_str(status),
 			ice_aq_str(hw->adminq.sq_last_status));
 		NL_SET_ERR_MSG_MOD(extack, "Failed to program flash module");
 		return -EIO;
@@ -297,8 +304,8 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
 	 */
 	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event);
 	if (err) {
-		dev_err(dev, "Timed out waiting for firmware write completion for module 0x%02x, err %d\n",
-			module, err);
+		dev_err(dev, "Timed out while trying to flash module 0x%02x with block of size %u at offset %u, err %d\n",
+			module, block_size, offset, err);
 		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
 		return -EIO;
 	}
@@ -324,8 +331,8 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
 	}
 
 	if (completion_retval) {
-		dev_err(dev, "Firmware failed to program flash module 0x%02x at offset %u, completion err %s\n",
-			module, offset,
+		dev_err(dev, "Firmware failed to flash module 0x%02x with block of size %u at offset %u, err %s\n",
+			module, block_size, offset,
 			ice_aq_str((enum ice_aq_err)completion_retval));
 		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to program flash module");
 		return -EIO;
@@ -356,12 +363,15 @@ ice_write_nvm_module(struct ice_pf *pf, u16 module, const char *component,
 		     const u8 *image, u32 length,
 		     struct netlink_ext_ack *extack)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct devlink *devlink;
 	u32 offset = 0;
 	bool last_cmd;
 	u8 *block;
 	int err;
 
+	dev_dbg(dev, "Beginning write of flash component '%s', module 0x%02x\n", component, module);
+
 	devlink = priv_to_devlink(pf);
 
 	devlink_flash_update_status_notify(devlink, "Flashing",
@@ -394,6 +404,8 @@ ice_write_nvm_module(struct ice_pf *pf, u16 module, const char *component,
 						   component, offset, length);
 	} while (!last_cmd);
 
+	dev_dbg(dev, "Completed write of flash component '%s', module 0x%02x\n", component, module);
+
 	if (err)
 		devlink_flash_update_status_notify(devlink, "Flashing failed",
 						   component, length, length);
@@ -431,6 +443,8 @@ ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component,
 	enum ice_status status;
 	int err;
 
+	dev_dbg(dev, "Beginning erase of flash component '%s', module 0x%02x\n", component, module);
+
 	memset(&event, 0, sizeof(event));
 
 	devlink = priv_to_devlink(pf);
@@ -476,6 +490,8 @@ ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component,
 		goto out_notify_devlink;
 	}
 
+	dev_dbg(dev, "Completed erase of flash component '%s', module 0x%02x\n", component, module);
+
 out_notify_devlink:
 	if (err)
 		devlink_flash_update_status_notify(devlink, "Erasing failed",
@@ -614,14 +630,9 @@ static int ice_finalize_update(struct pldmfw *context)
 	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
 	struct netlink_ext_ack *extack = priv->extack;
 	struct ice_pf *pf = priv->pf;
-	int err;
 
 	/* Finally, notify firmware to activate the written NVM banks */
-	err = ice_switch_flash_banks(pf, priv->activate_flags, extack);
-	if (err)
-		return err;
-
-	return 0;
+	return ice_switch_flash_banks(pf, priv->activate_flags, extack);
 }
 
 static const struct pldmfw_ops ice_fwu_ops = {
@@ -636,6 +647,7 @@ static const struct pldmfw_ops ice_fwu_ops = {
  * ice_flash_pldm_image - Write a PLDM-formatted firmware image to the device
  * @pf: private device driver structure
  * @fw: firmware object pointing to the relevant firmware file
+ * @preservation: preservation level to request from firmware
  * @extack: netlink extended ACK structure
  *
  * Parse the data for a given firmware file, verifying that it is a valid PLDM
@@ -649,7 +661,7 @@ static const struct pldmfw_ops ice_fwu_ops = {
  * Returns: zero on success or a negative error code on failure.
  */
 int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw,
-			 struct netlink_ext_ack *extack)
+			 u8 preservation, struct netlink_ext_ack *extack)
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
@@ -657,13 +669,24 @@ int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw,
 	enum ice_status status;
 	int err;
 
+	switch (preservation) {
+	case ICE_AQC_NVM_PRESERVE_ALL:
+	case ICE_AQC_NVM_PRESERVE_SELECTED:
+	case ICE_AQC_NVM_NO_PRESERVATION:
+	case ICE_AQC_NVM_FACTORY_DEFAULT:
+		break;
+	default:
+		WARN(1, "Unexpected preservation level request %u", preservation);
+		return -EINVAL;
+	}
+
 	memset(&priv, 0, sizeof(priv));
 
 	priv.context.ops = &ice_fwu_ops;
 	priv.context.dev = dev;
 	priv.extack = extack;
 	priv.pf = pf;
-	priv.activate_flags = ICE_AQC_NVM_PRESERVE_ALL;
+	priv.activate_flags = preservation;
 
 	status = ice_acquire_nvm(hw, ICE_RES_WRITE);
 	if (status) {
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.h b/drivers/net/ethernet/intel/ice/ice_fw_update.h
index 79472cc618b4..c6390f6851ff 100644
--- a/drivers/net/ethernet/intel/ice/ice_fw_update.h
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.h
@@ -5,7 +5,7 @@
 #define _ICE_FW_UPDATE_H_
 
 int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw,
-			 struct netlink_ext_ack *extack);
+			 u8 preservation, struct netlink_ext_ack *extack);
 int ice_check_for_pending_update(struct ice_pf *pf, const char *component,
 				 struct netlink_ext_ack *extack);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ebbb8f54871c..3df67486d42d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -7,6 +7,7 @@
 #include "ice_lib.h"
 #include "ice_fltr.h"
 #include "ice_dcb_lib.h"
+#include "ice_devlink.h"
 
 /**
  * ice_vsi_type_str - maps VSI type enum to string equivalents
@@ -1755,7 +1756,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
 		return ret;
 
 	for (i = 0; i < vsi->num_xdp_txq; i++)
-		vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
+		vsi->xdp_rings[i]->xsk_pool = ice_xsk_pool(vsi->xdp_rings[i]);
 
 	return ret;
 }
@@ -2616,8 +2617,10 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	 * PF that is running the work queue items currently. This is done to
 	 * avoid check_flush_dependency() warning on this wq
 	 */
-	if (vsi->netdev && !ice_is_reset_in_progress(pf->state))
+	if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
 		unregister_netdev(vsi->netdev);
+		ice_devlink_destroy_port(vsi);
+	}
 
 	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
 		ice_rss_clean(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 54a7f55eb8c1..2dea4d0e9415 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -486,7 +486,6 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 	struct ice_hw *hw = &pf->hw;
 
 	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
-	WARN_ON(in_interrupt());
 
 	ice_prepare_for_reset(pf);
 
@@ -1057,7 +1056,9 @@ struct ice_aq_task {
 int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
 			  struct ice_rq_event_info *event)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_aq_task *task;
+	unsigned long start;
 	long ret;
 	int err;
 
@@ -1074,6 +1075,8 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
 	hlist_add_head(&task->entry, &pf->aq_wait_list);
 	spin_unlock_bh(&pf->aq_wait_lock);
 
+	start = jiffies;
+
 	ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
 					       timeout);
 	switch (task->state) {
@@ -1092,6 +1095,11 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
 		break;
 	}
 
+	dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
+		jiffies_to_msecs(jiffies - start),
+		jiffies_to_msecs(timeout),
+		opcode);
+
 	spin_lock_bh(&pf->aq_wait_lock);
 	hlist_del(&task->entry);
 	spin_unlock_bh(&pf->aq_wait_lock);
@@ -2273,7 +2281,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
 		if (ice_setup_tx_ring(xdp_ring))
 			goto free_xdp_rings;
 		ice_set_ring_xdp(xdp_ring);
-		xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+		xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
 	}
 
 	return 0;
@@ -2417,7 +2425,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi)
 	int i, v_idx;
 
 	/* q_vectors are freed in reset path so there's no point in detaching
-	 * rings; in case of rebuild being triggered not from reset reset bits
+	 * rings; in case of rebuild being triggered not from reset bits
 	 * in pf->state won't be set, so additionally check first q_vector
 	 * against NULL
 	 */
@@ -2517,13 +2525,13 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 	if (if_running)
 		ret = ice_up(vsi);
 
-	if (!ret && prog && vsi->xsk_umems) {
+	if (!ret && prog && vsi->xsk_pools) {
 		int i;
 
 		ice_for_each_rxq(vsi, i) {
 			struct ice_ring *rx_ring = vsi->rx_rings[i];
 
-			if (rx_ring->xsk_umem)
+			if (rx_ring->xsk_pool)
 				napi_schedule(&rx_ring->q_vector->napi);
 		}
 	}
@@ -2549,8 +2557,8 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
-	case XDP_SETUP_XSK_UMEM:
-		return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+	case XDP_SETUP_XSK_POOL:
+		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
 					  xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
@@ -2873,6 +2881,7 @@ static void ice_set_ops(struct net_device *netdev)
 	}
 
 	netdev->netdev_ops = &ice_netdev_ops;
+	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
 	ice_set_ethtool_ops(netdev);
 }
 
@@ -2953,7 +2962,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	u8 mac_addr[ETH_ALEN];
 	int err;
 
-	err = ice_devlink_create_port(pf);
+	err = ice_devlink_create_port(vsi);
 	if (err)
 		return err;
 
@@ -2994,7 +3003,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	if (err)
 		goto err_free_netdev;
 
-	devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev);
+	devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
 
 	netif_carrier_off(vsi->netdev);
 
@@ -3007,7 +3016,7 @@ err_free_netdev:
 	free_netdev(vsi->netdev);
 	vsi->netdev = NULL;
 err_destroy_devlink_port:
-	ice_devlink_destroy_port(pf);
+	ice_devlink_destroy_port(vsi);
 	return err;
 }
 
@@ -3971,7 +3980,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	struct device *dev = &pdev->dev;
 	struct ice_pf *pf;
 	struct ice_hw *hw;
-	int err;
+	int i, err;
 
 	/* this driver uses devres, see
 	 * Documentation/driver-api/driver-model/devres.rst
@@ -4066,11 +4075,37 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	ice_devlink_init_regions(pf);
 
+	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
+	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
+	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
+	i = 0;
+	if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
+		pf->hw.udp_tunnel_nic.tables[i].n_entries =
+			pf->hw.tnl.valid_count[TNL_VXLAN];
+		pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
+			UDP_TUNNEL_TYPE_VXLAN;
+		i++;
+	}
+	if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
+		pf->hw.udp_tunnel_nic.tables[i].n_entries =
+			pf->hw.tnl.valid_count[TNL_GENEVE];
+		pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
+			UDP_TUNNEL_TYPE_GENEVE;
+		i++;
+	}
+
 	pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
 	if (!pf->num_alloc_vsi) {
 		err = -EIO;
 		goto err_init_pf_unroll;
 	}
+	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
+		dev_warn(&pf->pdev->dev,
+			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
+			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
+		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
+	}
 
 	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
 			       GFP_KERNEL);
@@ -4216,7 +4251,6 @@ probe_done:
 err_send_version_unroll:
 	ice_vsi_release_all(pf);
 err_alloc_sw_unroll:
-	ice_devlink_destroy_port(pf);
 	set_bit(__ICE_SERVICE_DIS, pf->state);
 	set_bit(__ICE_DOWN, pf->state);
 	devm_kfree(dev, pf->first_sw);
@@ -4331,7 +4365,6 @@ static void ice_remove(struct pci_dev *pdev)
 	if (!ice_is_safe_mode(pf))
 		ice_remove_arfs(pf);
 	ice_setup_mc_magic_wake(pf);
-	ice_devlink_destroy_port(pf);
 	ice_vsi_release_all(pf);
 	ice_set_wake(pf);
 	ice_free_irq_msix_misc(pf);
@@ -6569,70 +6602,6 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 }
 
 /**
- * ice_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
- * @netdev: This physical port's netdev
- * @ti: Tunnel endpoint information
- */
-static void
-ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti)
-{
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
-	enum ice_tunnel_type tnl_type;
-	u16 port = ntohs(ti->port);
-	enum ice_status status;
-
-	switch (ti->type) {
-	case UDP_TUNNEL_TYPE_VXLAN:
-		tnl_type = TNL_VXLAN;
-		break;
-	case UDP_TUNNEL_TYPE_GENEVE:
-		tnl_type = TNL_GENEVE;
-		break;
-	default:
-		netdev_err(netdev, "Unknown tunnel type\n");
-		return;
-	}
-
-	status = ice_create_tunnel(&pf->hw, tnl_type, port);
-	if (status == ICE_ERR_OUT_OF_RANGE)
-		netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n",
-			    port);
-	else if (status)
-		netdev_err(netdev, "Error adding UDP tunnel - %s\n",
-			   ice_stat_str(status));
-}
-
-/**
- * ice_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
- * @netdev: This physical port's netdev
- * @ti: Tunnel endpoint information
- */
-static void
-ice_udp_tunnel_del(struct net_device *netdev, struct udp_tunnel_info *ti)
-{
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
-	u16 port = ntohs(ti->port);
-	enum ice_status status;
-	bool retval;
-
-	retval = ice_tunnel_port_in_use(&pf->hw, port, NULL);
-	if (!retval) {
-		netdev_info(netdev, "port %d not found in UDP tunnels list\n",
-			    port);
-		return;
-	}
-
-	status = ice_destroy_tunnel(&pf->hw, port, false);
-	if (status)
-		netdev_err(netdev, "error deleting port %d from UDP tunnels list\n",
-			   port);
-}
-
-/**
  * ice_open - Called when a network interface becomes active
  * @netdev: network interface device structure
  *
@@ -6824,6 +6793,6 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_bpf = ice_xdp,
 	.ndo_xdp_xmit = ice_xdp_xmit,
 	.ndo_xsk_wakeup = ice_xsk_wakeup,
-	.ndo_udp_tunnel_add = ice_udp_tunnel_add,
-	.ndo_udp_tunnel_del = ice_udp_tunnel_del,
+	.ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+	.ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 9d0d6b0025cf..eae75260fe20 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -145,7 +145,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 {
 	u16 i;
 
-	if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+	if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
 		ice_xsk_clean_xdp_ring(tx_ring);
 		goto tx_skip_free;
 	}
@@ -375,7 +375,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 	if (!rx_ring->rx_buf)
 		return;
 
-	if (rx_ring->xsk_umem) {
+	if (rx_ring->xsk_pool) {
 		ice_xsk_clean_rx_ring(rx_ring);
 		goto rx_skip_free;
 	}
@@ -919,10 +919,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 	 * likely have a consumer accessing first few bytes of meta
 	 * data, and then actual data.
 	 */
-	prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
-	prefetch((void *)(xdp->data + L1_CACHE_BYTES));
-#endif
+	net_prefetch(xdp->data_meta);
 	/* build an skb around the page buffer */
 	skb = build_skb(xdp->data_hard_start, truesize);
 	if (unlikely(!skb))
@@ -964,10 +961,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
-	prefetch((void *)(xdp->data + L1_CACHE_BYTES));
-#endif /* L1_CACHE_BYTES */
+	net_prefetch(xdp->data);
 
 	/* allocate a skb to store the frags */
 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
@@ -1616,7 +1610,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	ice_for_each_ring(ring, q_vector->tx) {
-		bool wd = ring->xsk_umem ?
+		bool wd = ring->xsk_pool ?
 			  ice_clean_tx_irq_zc(ring, budget) :
 			  ice_clean_tx_irq(ring, budget);
 
@@ -1646,7 +1640,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 		 * comparison in the irq context instead of many inside the
 		 * ice_clean_rx_irq function and makes the codebase cleaner.
 		 */
-		cleaned = ring->xsk_umem ?
+		cleaned = ring->xsk_pool ?
 			  ice_clean_rx_irq_zc(ring, budget_per_ring) :
 			  ice_clean_rx_irq(ring, budget_per_ring);
 		work_done += cleaned;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 51b4df7a59d2..ff1a1cbd078e 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -43,7 +43,7 @@
 
 /**
  * ice_compute_pad - compute the padding
- * rx_buf_len: buffer length
+ * @rx_buf_len: buffer length
  *
  * Figure out the size of half page based on given buffer length and
  * then subtract the skb_shared_info followed by subtraction of the
@@ -295,7 +295,7 @@ struct ice_ring {
 
 	struct rcu_head rcu;		/* to avoid race on free */
 	struct bpf_prog *xdp_prog;
-	struct xdp_umem *xsk_umem;
+	struct xsk_buff_pool *xsk_pool;
 	/* CL3 - 3rd cacheline starts here */
 	struct xdp_rxq_info xdp_rxq;
 	/* CLX - the below items are only accessed infrequently and should be
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 4cdccfadf274..2226a291a394 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -676,6 +676,9 @@ struct ice_hw {
 	struct mutex tnl_lock;
 	struct ice_tunnel_table tnl;
 
+	struct udp_tunnel_nic_shared udp_tunnel_shared;
+	struct udp_tunnel_nic_info udp_tunnel_nic;
+
 	/* HW block tables */
 	struct ice_blk_info blk[ICE_BLK_COUNT];
 	struct mutex fl_profs_locks[ICE_BLK_COUNT];	/* lock fltr profiles */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 71497776ac62..ec7f6c64132e 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -871,7 +871,7 @@ static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
  * If there are not enough resources available, return an error. This should
  * always be caught by ice_set_per_vf_res().
  *
- * Return 0 on success, and -EINVAL when there are not enough MSIX vectors in
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors
  * in the PF's space available for SR-IOV.
  */
 static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 20ac5fca68c6..797886524054 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -236,7 +236,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
 		if (err)
 			goto free_buf;
 		ice_set_ring_xdp(xdp_ring);
-		xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+		xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
 	}
 
 	err = ice_setup_rx_ctx(rx_ring);
@@ -260,21 +260,21 @@ free_buf:
 }
 
 /**
- * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
- * @vsi: VSI to allocate the UMEM on
+ * ice_xsk_alloc_pools - allocate a buffer pool for an XDP socket
+ * @vsi: VSI to allocate the buffer pool on
  *
  * Returns 0 on success, negative on error
  */
-static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+static int ice_xsk_alloc_pools(struct ice_vsi *vsi)
 {
-	if (vsi->xsk_umems)
+	if (vsi->xsk_pools)
 		return 0;
 
-	vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+	vsi->xsk_pools = kcalloc(vsi->num_xsk_pools, sizeof(*vsi->xsk_pools),
 				 GFP_KERNEL);
 
-	if (!vsi->xsk_umems) {
-		vsi->num_xsk_umems = 0;
+	if (!vsi->xsk_pools) {
+		vsi->num_xsk_pools = 0;
 		return -ENOMEM;
 	}
 
@@ -282,73 +282,73 @@ static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
 }
 
 /**
- * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * ice_xsk_remove_pool - Remove an buffer pool for a certain ring/qid
  * @vsi: VSI from which the VSI will be removed
- * @qid: Ring/qid associated with the UMEM
+ * @qid: Ring/qid associated with the buffer pool
  */
-static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
 {
-	vsi->xsk_umems[qid] = NULL;
-	vsi->num_xsk_umems_used--;
+	vsi->xsk_pools[qid] = NULL;
+	vsi->num_xsk_pools_used--;
 
-	if (vsi->num_xsk_umems_used == 0) {
-		kfree(vsi->xsk_umems);
-		vsi->xsk_umems = NULL;
-		vsi->num_xsk_umems = 0;
+	if (vsi->num_xsk_pools_used == 0) {
+		kfree(vsi->xsk_pools);
+		vsi->xsk_pools = NULL;
+		vsi->num_xsk_pools = 0;
 	}
 }
 
 /**
- * ice_xsk_umem_disable - disable a UMEM region
+ * ice_xsk_pool_disable - disable a buffer pool region
  * @vsi: Current VSI
  * @qid: queue ID
  *
  * Returns 0 on success, negative on failure
  */
-static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
 {
-	if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
-	    !vsi->xsk_umems[qid])
+	if (!vsi->xsk_pools || qid >= vsi->num_xsk_pools ||
+	    !vsi->xsk_pools[qid])
 		return -EINVAL;
 
-	xsk_buff_dma_unmap(vsi->xsk_umems[qid], ICE_RX_DMA_ATTR);
-	ice_xsk_remove_umem(vsi, qid);
+	xsk_pool_dma_unmap(vsi->xsk_pools[qid], ICE_RX_DMA_ATTR);
+	ice_xsk_remove_pool(vsi, qid);
 
 	return 0;
 }
 
 /**
- * ice_xsk_umem_enable - enable a UMEM region
+ * ice_xsk_pool_enable - enable a buffer pool region
  * @vsi: Current VSI
- * @umem: pointer to a requested UMEM region
+ * @pool: pointer to a requested buffer pool region
  * @qid: queue ID
  *
  * Returns 0 on success, negative on failure
  */
 static int
-ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 {
 	int err;
 
 	if (vsi->type != ICE_VSI_PF)
 		return -EINVAL;
 
-	if (!vsi->num_xsk_umems)
-		vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
-	if (qid >= vsi->num_xsk_umems)
+	if (!vsi->num_xsk_pools)
+		vsi->num_xsk_pools = min_t(u16, vsi->num_rxq, vsi->num_txq);
+	if (qid >= vsi->num_xsk_pools)
 		return -EINVAL;
 
-	err = ice_xsk_alloc_umems(vsi);
+	err = ice_xsk_alloc_pools(vsi);
 	if (err)
 		return err;
 
-	if (vsi->xsk_umems && vsi->xsk_umems[qid])
+	if (vsi->xsk_pools && vsi->xsk_pools[qid])
 		return -EBUSY;
 
-	vsi->xsk_umems[qid] = umem;
-	vsi->num_xsk_umems_used++;
+	vsi->xsk_pools[qid] = pool;
+	vsi->num_xsk_pools_used++;
 
-	err = xsk_buff_dma_map(vsi->xsk_umems[qid], ice_pf_to_dev(vsi->back),
+	err = xsk_pool_dma_map(vsi->xsk_pools[qid], ice_pf_to_dev(vsi->back),
 			       ICE_RX_DMA_ATTR);
 	if (err)
 		return err;
@@ -357,17 +357,17 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
 }
 
 /**
- * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
  * @vsi: Current VSI
- * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @pool: buffer pool to enable/associate to a ring, NULL to disable
  * @qid: queue ID
  *
  * Returns 0 on success, negative on failure
  */
-int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 {
-	bool if_running, umem_present = !!umem;
-	int ret = 0, umem_failure = 0;
+	bool if_running, pool_present = !!pool;
+	int ret = 0, pool_failure = 0;
 
 	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
 
@@ -375,26 +375,26 @@ int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
 		ret = ice_qp_dis(vsi, qid);
 		if (ret) {
 			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
-			goto xsk_umem_if_up;
+			goto xsk_pool_if_up;
 		}
 	}
 
-	umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
-				      ice_xsk_umem_disable(vsi, qid);
+	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
+				      ice_xsk_pool_disable(vsi, qid);
 
-xsk_umem_if_up:
+xsk_pool_if_up:
 	if (if_running) {
 		ret = ice_qp_ena(vsi, qid);
-		if (!ret && umem_present)
+		if (!ret && pool_present)
 			napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
 		else if (ret)
 			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
 	}
 
-	if (umem_failure) {
-		netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d\n",
-			   umem_present ? "en" : "dis", umem_failure);
-		return umem_failure;
+	if (pool_failure) {
+		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+			   pool_present ? "en" : "dis", pool_failure);
+		return pool_failure;
 	}
 
 	return ret;
@@ -425,7 +425,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
 	rx_buf = &rx_ring->rx_buf[ntu];
 
 	do {
-		rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+		rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
 		if (!rx_buf->xdp) {
 			ret = true;
 			break;
@@ -595,7 +595,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 
 		rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
 		rx_buf->xdp->data_end = rx_buf->xdp->data + size;
-		xsk_buff_dma_sync_for_cpu(rx_buf->xdp);
+		xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool);
 
 		xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
 		if (xdp_res) {
@@ -645,11 +645,11 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 	ice_finalize_xdp_rx(rx_ring, xdp_xmit);
 	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
 
-	if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
 		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
-			xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
 		else
-			xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
 
 		return (int)total_rx_packets;
 	}
@@ -682,11 +682,11 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
 
 		tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
 
-		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+		if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
 			break;
 
-		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
-		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
 						 desc.len);
 
 		tx_buf->bytecount = desc.len;
@@ -703,7 +703,7 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
 
 	if (tx_desc) {
 		ice_xdp_ring_update_tail(xdp_ring);
-		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+		xsk_tx_release(xdp_ring->xsk_pool);
 	}
 
 	return budget > 0 && work_done;
@@ -777,10 +777,10 @@ bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
 	xdp_ring->next_to_clean = ntc;
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
 
-	if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
-		xsk_set_tx_need_wakeup(xdp_ring->xsk_umem);
+	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
+		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
 
 	ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
 	xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
@@ -814,7 +814,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
 	if (queue_id >= vsi->num_txq)
 		return -ENXIO;
 
-	if (!vsi->xdp_rings[queue_id]->xsk_umem)
+	if (!vsi->xdp_rings[queue_id]->xsk_pool)
 		return -ENXIO;
 
 	ring = vsi->xdp_rings[queue_id];
@@ -833,20 +833,20 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
 }
 
 /**
- * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
  * @vsi: VSI to be checked
  *
- * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ * Returns true if any of the Rx rings has an AF_XDP buff pool attached
  */
 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
 {
 	int i;
 
-	if (!vsi->xsk_umems)
+	if (!vsi->xsk_pools)
 		return false;
 
-	for (i = 0; i < vsi->num_xsk_umems; i++) {
-		if (vsi->xsk_umems[i])
+	for (i = 0; i < vsi->num_xsk_pools; i++) {
+		if (vsi->xsk_pools[i])
 			return true;
 	}
 
@@ -854,7 +854,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
 }
 
 /**
- * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
  * @rx_ring: ring to be cleaned
  */
 void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
@@ -872,7 +872,7 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
 }
 
 /**
- * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
  * @xdp_ring: XDP_Tx ring
  */
 void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
@@ -896,5 +896,5 @@ void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
 	}
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index fc1a06b4df36..fad783690134 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -9,7 +9,8 @@
 struct ice_vsi;
 
 #ifdef CONFIG_XDP_SOCKETS
-int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
+		       u16 qid);
 int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
 bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
 int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
@@ -19,8 +20,8 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
 void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
 #else
 static inline int
-ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
-		   struct xdp_umem __always_unused *umem,
+ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
+		   struct xsk_buff_pool __always_unused *pool,
 		   u16 __always_unused qid)
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index a32391e82762..50863fd87d53 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -2554,7 +2554,7 @@ out:
 /**
  *  __igb_access_emi_reg - Read/write EMI register
  *  @hw: pointer to the HW structure
- *  @addr: EMI address to program
+ *  @address: EMI address to program
  *  @data: pointer to value to read/write from/to the EMI address
  *  @read: boolean flag to indicate read or write
  **/
@@ -2590,7 +2590,7 @@ s32 igb_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
  *  igb_set_eee_i350 - Enable/disable EEE support
  *  @hw: pointer to the HW structure
  *  @adv1G: boolean flag enabling 1G EEE advertisement
- *  @adv100m: boolean flag enabling 100M EEE advertisement
+ *  @adv100M: boolean flag enabling 100M EEE advertisement
  *
  *  Enable/disable EEE based on setting in dev_spec structure.
  *
@@ -2646,7 +2646,7 @@ out:
  *  igb_set_eee_i354 - Enable/disable EEE support
  *  @hw: pointer to the HW structure
  *  @adv1G: boolean flag enabling 1G EEE advertisement
- *  @adv100m: boolean flag enabling 100M EEE advertisement
+ *  @adv100M: boolean flag enabling 100M EEE advertisement
  *
  *  Enable/disable EEE legacy mode based on setting in dev_spec structure.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index c393cb2c0f16..9265901455cd 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -357,13 +357,14 @@ static s32 igb_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
 /**
  * igb_read_invm_i210 - Read invm wrapper function for I210/I211
  *  @hw: pointer to the HW structure
- *  @words: number of words to read
+ *  @offset: offset to read from
+ *  @words: number of words to read (unused)
  *  @data: pointer to the data read
  *
  *  Wrapper function to return data formerly found in the NVM.
  **/
 static s32 igb_read_invm_i210(struct e1000_hw *hw, u16 offset,
-				u16 words __always_unused, u16 *data)
+				u16 __always_unused words, u16 *data)
 {
 	s32 ret_val = 0;
 
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 3254737c07a3..fd8eb2f9ab9d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -166,6 +166,7 @@ static s32 igb_find_vlvf_slot(struct e1000_hw *hw, u32 vlan, bool vlvf_bypass)
  *  @vlan: VLAN id to add or remove
  *  @vind: VMDq output index that maps queue to VLAN id
  *  @vlan_on: if true add filter, if false remove
+ *  @vlvf_bypass: skip VLVF if no match is found
  *
  *  Sets or clears a bit in the VLAN filter table array based on VLAN id
  *  and if we are adding or removing the filter
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index 46debd991bfe..33cceb77e960 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -9,6 +9,7 @@
  *  @msg: The message buffer
  *  @size: Length of buffer
  *  @mbx_id: id of mailbox to read
+ *  @unlock: skip locking or not
  *
  *  returns SUCCESS if it successfully read message from buffer
  **/
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 2f015b60a995..0286d2fceee4 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -19,6 +19,8 @@
 #include <linux/pci.h>
 #include <linux/mdio.h>
 
+#include <net/xdp.h>
+
 struct igb_adapter;
 
 #define E1000_PCS_CFG_IGN_SD	1
@@ -79,6 +81,12 @@ struct igb_adapter;
 #define IGB_I210_RX_LATENCY_100		2213
 #define IGB_I210_RX_LATENCY_1000	448
 
+/* XDP */
+#define IGB_XDP_PASS		0
+#define IGB_XDP_CONSUMED	BIT(0)
+#define IGB_XDP_TX		BIT(1)
+#define IGB_XDP_REDIR		BIT(2)
+
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
 	u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
@@ -132,17 +140,62 @@ struct vf_mac_filter {
 
 /* Supported Rx Buffer Sizes */
 #define IGB_RXBUFFER_256	256
+#define IGB_RXBUFFER_1536	1536
 #define IGB_RXBUFFER_2048	2048
 #define IGB_RXBUFFER_3072	3072
 #define IGB_RX_HDR_LEN		IGB_RXBUFFER_256
 #define IGB_TS_HDR_LEN		16
 
-#define IGB_SKB_PAD		(NET_SKB_PAD + NET_IP_ALIGN)
+/* Attempt to maximize the headroom available for incoming frames.  We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame.  This leaves us with 512 bytes of room.  From that we need
+ * to deduct the space needed for the shared info and the padding needed
+ * to IP align the frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the 3K
+ *	 buffers.
+ */
 #if (PAGE_SIZE < 8192)
-#define IGB_MAX_FRAME_BUILD_SKB \
-	(SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048) - IGB_SKB_PAD - IGB_TS_HDR_LEN)
+#define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_1536 - NET_IP_ALIGN)
+#define IGB_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + IGB_TS_HDR_LEN + IGB_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048))
+
+static inline int igb_compute_pad(int rx_buf_len)
+{
+	int page_size, pad_size;
+
+	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
+
+	return pad_size;
+}
+
+static inline int igb_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (IGB_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = IGB_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = IGB_RXBUFFER_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return igb_compute_pad(rx_buf_len);
+}
+
+#define IGB_SKB_PAD	igb_skb_pad()
 #else
-#define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_2048 - IGB_TS_HDR_LEN)
+#define IGB_SKB_PAD	(NET_SKB_PAD + NET_IP_ALIGN)
 #endif
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
@@ -194,13 +247,22 @@ enum igb_tx_flags {
 #define IGB_SFF_ADDRESSING_MODE		0x4
 #define IGB_SFF_8472_UNSUP		0x00
 
+enum igb_tx_buf_type {
+	IGB_TYPE_SKB = 0,
+	IGB_TYPE_XDP,
+};
+
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer
  */
 struct igb_tx_buffer {
 	union e1000_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
-	struct sk_buff *skb;
+	enum igb_tx_buf_type type;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
 	unsigned int bytecount;
 	u16 gso_segs;
 	__be16 protocol;
@@ -248,6 +310,7 @@ struct igb_ring_container {
 struct igb_ring {
 	struct igb_q_vector *q_vector;	/* backlink to q_vector */
 	struct net_device *netdev;	/* back pointer to net_device */
+	struct bpf_prog *xdp_prog;
 	struct device *dev;		/* device pointer for dma mapping */
 	union {				/* array of buffer info structs */
 		struct igb_tx_buffer *tx_buffer_info;
@@ -288,6 +351,7 @@ struct igb_ring {
 			struct u64_stats_sync rx_syncp;
 		};
 	};
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_internodealigned_in_smp;
 
 struct igb_q_vector {
@@ -339,7 +403,7 @@ static inline unsigned int igb_rx_bufsz(struct igb_ring *ring)
 		return IGB_RXBUFFER_3072;
 
 	if (ring_uses_build_skb(ring))
-		return IGB_MAX_FRAME_BUILD_SKB + IGB_TS_HDR_LEN;
+		return IGB_MAX_FRAME_BUILD_SKB;
 #endif
 	return IGB_RXBUFFER_2048;
 }
@@ -467,6 +531,7 @@ struct igb_adapter {
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 
 	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
 
 	unsigned long state;
 	unsigned int flags;
@@ -643,6 +708,9 @@ enum igb_boards {
 
 extern char igb_driver_name[];
 
+int igb_xmit_xdp_ring(struct igb_adapter *adapter,
+		      struct igb_ring *ring,
+		      struct xdp_frame *xdpf);
 int igb_open(struct net_device *netdev);
 int igb_close(struct net_device *netdev);
 int igb_up(struct igb_adapter *);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 6e8231c1ddf0..28baf203459a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -961,6 +961,10 @@ static int igb_set_ringparam(struct net_device *netdev,
 			memcpy(&temp_ring[i], adapter->rx_ring[i],
 			       sizeof(struct igb_ring));
 
+			/* Clear copied XDP RX-queue info */
+			memset(&temp_ring[i].xdp_rxq, 0,
+			       sizeof(temp_ring[i].xdp_rxq));
+
 			temp_ring[i].count = new_rx_count;
 			err = igb_setup_rx_resources(&temp_ring[i]);
 			if (err) {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index d9c3a6b169f9..5fc2c381da55 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -30,6 +30,8 @@
 #include <linux/if_ether.h>
 #include <linux/aer.h>
 #include <linux/prefetch.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 #include <linux/pm_runtime.h>
 #include <linux/etherdevice.h>
 #ifdef CONFIG_IGB_DCA
@@ -549,8 +551,7 @@ exit:
 
 /**
  *  igb_get_i2c_data - Reads the I2C SDA data bit
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
+ *  @data: opaque pointer to adapter struct
  *
  *  Returns the I2C data bit value
  **/
@@ -2220,7 +2221,6 @@ void igb_down(struct igb_adapter *adapter)
 
 void igb_reinit_locked(struct igb_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);
 	igb_down(adapter);
@@ -2824,6 +2824,147 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 }
 
+static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct igb_adapter *adapter = netdev_priv(dev);
+	bool running = netif_running(dev);
+	struct bpf_prog *old_prog;
+	bool need_reset;
+
+	/* verify igb ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igb_ring *ring = adapter->rx_ring[i];
+
+		if (frame_size > igb_rx_bufsz(ring))
+			return -EINVAL;
+	}
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+	need_reset = (!!prog != !!old_prog);
+
+	/* device is up and bpf is added/removed, must setup the RX queues */
+	if (need_reset && running) {
+		igb_close(dev);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			(void)xchg(&adapter->rx_ring[i]->xdp_prog,
+			    adapter->xdp_prog);
+	}
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	/* bpf is just replaced, RXQ and MTU are already setup */
+	if (!need_reset)
+		return 0;
+
+	if (running)
+		igb_open(dev);
+
+	return 0;
+}
+
+static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return igb_xdp_setup(dev, xdp->prog);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void igb_xdp_ring_update_tail(struct igb_ring *ring)
+{
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+}
+
+static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
+{
+	unsigned int r_idx = smp_processor_id();
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+
+static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	int cpu = smp_processor_id();
+	struct igb_ring *tx_ring;
+	struct netdev_queue *nq;
+	u32 ret;
+
+	if (unlikely(!xdpf))
+		return IGB_XDP_CONSUMED;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+	if (unlikely(!tx_ring))
+		return -ENXIO;
+
+	nq = txring_txq(tx_ring);
+	__netif_tx_lock(nq, cpu);
+	ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
+	__netif_tx_unlock(nq);
+
+	return ret;
+}
+
+static int igb_xdp_xmit(struct net_device *dev, int n,
+			struct xdp_frame **frames, u32 flags)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int cpu = smp_processor_id();
+	struct igb_ring *tx_ring;
+	struct netdev_queue *nq;
+	int drops = 0;
+	int i;
+
+	if (unlikely(test_bit(__IGB_DOWN, &adapter->state)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+	if (unlikely(!tx_ring))
+		return -ENXIO;
+
+	nq = txring_txq(tx_ring);
+	__netif_tx_lock(nq, cpu);
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
+		if (err != IGB_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	__netif_tx_unlock(nq);
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		igb_xdp_ring_update_tail(tx_ring);
+
+	return n - drops;
+}
+
 static const struct net_device_ops igb_netdev_ops = {
 	.ndo_open		= igb_open,
 	.ndo_stop		= igb_close,
@@ -2848,6 +2989,8 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_fdb_add		= igb_ndo_fdb_add,
 	.ndo_features_check	= igb_features_check,
 	.ndo_setup_tc		= igb_setup_tc,
+	.ndo_bpf		= igb_xdp,
+	.ndo_xdp_xmit		= igb_xdp_xmit,
 };
 
 /**
@@ -3388,7 +3531,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			  "Width x1" : "unknown"), netdev->dev_addr);
 	}
 
-	if ((hw->mac.type >= e1000_i210 ||
+	if ((hw->mac.type == e1000_82576 &&
+	     rd32(E1000_EECD) & E1000_EECD_PRES) ||
+	    (hw->mac.type >= e1000_i210 ||
 	     igb_get_flash_presence_i210(hw))) {
 		ret_val = igb_read_part_string(hw, part_str,
 					       E1000_PBANUM_LENGTH);
@@ -3868,6 +4013,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
 /**
  *  igb_open - Called when a network interface is made active
  *  @netdev: network interface device structure
+ *  @resuming: indicates whether we are in a resume call
  *
  *  Returns 0 on success, negative value on failure
  *
@@ -3985,6 +4131,7 @@ int igb_open(struct net_device *netdev)
 /**
  *  igb_close - Disables a network interface
  *  @netdev: network interface device structure
+ *  @suspending: indicates we are in a suspend call
  *
  *  Returns 0, this is not allowed to fail
  *
@@ -4178,6 +4325,7 @@ static void igb_configure_tx(struct igb_adapter *adapter)
  **/
 int igb_setup_rx_resources(struct igb_ring *rx_ring)
 {
+	struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
 	struct device *dev = rx_ring->dev;
 	int size;
 
@@ -4200,6 +4348,13 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 
+	rx_ring->xdp_prog = adapter->xdp_prog;
+
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+			     rx_ring->queue_index) < 0)
+		goto err;
+
 	return 0;
 
 err:
@@ -4504,6 +4659,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 	int reg_idx = ring->reg_idx;
 	u32 rxdctl = 0;
 
+	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+					   MEM_TYPE_PAGE_SHARED, NULL));
+
 	/* disable the queue */
 	wr32(E1000_RXDCTL(reg_idx), 0);
 
@@ -4708,6 +4867,8 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
 {
 	igb_clean_rx_ring(rx_ring);
 
+	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
@@ -5219,7 +5380,7 @@ static void igb_check_lvmmc(struct igb_adapter *adapter)
 
 /**
  *  igb_watchdog - Timer Call-back
- *  @data: pointer to adapter cast into an unsigned long
+ *  @t: pointer to timer_list containing our private info pointer
  **/
 static void igb_watchdog(struct timer_list *t)
 {
@@ -6077,6 +6238,80 @@ dma_error:
 	return -1;
 }
 
+int igb_xmit_xdp_ring(struct igb_adapter *adapter,
+		      struct igb_ring *tx_ring,
+		      struct xdp_frame *xdpf)
+{
+	union e1000_adv_tx_desc *tx_desc;
+	u32 len, cmd_type, olinfo_status;
+	struct igb_tx_buffer *tx_buffer;
+	dma_addr_t dma;
+	u16 i;
+
+	len = xdpf->len;
+
+	if (unlikely(!igb_desc_unused(tx_ring)))
+		return IGB_XDP_CONSUMED;
+
+	dma = dma_map_single(tx_ring->dev, xdpf->data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(tx_ring->dev, dma))
+		return IGB_XDP_CONSUMED;
+
+	/* record the location of the first descriptor for this packet */
+	tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	tx_buffer->bytecount = len;
+	tx_buffer->gso_segs = 1;
+	tx_buffer->protocol = 0;
+
+	i = tx_ring->next_to_use;
+	tx_desc = IGB_TX_DESC(tx_ring, i);
+
+	dma_unmap_len_set(tx_buffer, len, len);
+	dma_unmap_addr_set(tx_buffer, dma, dma);
+	tx_buffer->type = IGB_TYPE_XDP;
+	tx_buffer->xdpf = xdpf;
+
+	tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+	/* put descriptor type bits */
+	cmd_type = E1000_ADVTXD_DTYP_DATA |
+		   E1000_ADVTXD_DCMD_DEXT |
+		   E1000_ADVTXD_DCMD_IFCS;
+	cmd_type |= len | IGB_TXD_DCMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	olinfo_status = cpu_to_le32(len << E1000_ADVTXD_PAYLEN_SHIFT);
+	/* 82575 requires a unique index per ring */
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		olinfo_status |= tx_ring->reg_idx << 4;
+
+	tx_desc->read.olinfo_status = olinfo_status;
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer->bytecount);
+
+	/* set the timestamp */
+	tx_buffer->time_stamp = jiffies;
+
+	/* Avoid any potential race with xdp_xmit and cleanup */
+	smp_wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_buffer->next_to_watch = tx_desc;
+	tx_ring->next_to_use = i;
+
+	/* Make sure there is space in the ring for the next send. */
+	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
+		writel(i, tx_ring->tail);
+
+	return IGB_XDP_TX;
+}
+
 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 				struct igb_ring *tx_ring)
 {
@@ -6105,6 +6340,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 
 	/* record the location of the first descriptor for this packet */
 	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->type = IGB_TYPE_SKB;
 	first->skb = skb;
 	first->bytecount = skb->len;
 	first->gso_segs = 1;
@@ -6192,8 +6428,9 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 /**
  *  igb_tx_timeout - Respond to a Tx Hang
  *  @netdev: network interface device structure
+ *  @txqueue: number of the Tx queue that hung (unused)
  **/
-static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+static void igb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -6256,6 +6493,19 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 
+	if (adapter->xdp_prog) {
+		int i;
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct igb_ring *ring = adapter->rx_ring[i];
+
+			if (max_frame > igb_rx_bufsz(ring)) {
+				netdev_warn(adapter->netdev, "Requested MTU size is not supported with XDP\n");
+				return -EINVAL;
+			}
+		}
+	}
+
 	/* adjust max frame to be at least the size of a standard frame */
 	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
 		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
@@ -7809,7 +8059,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
 		total_packets += tx_buffer->gso_segs;
 
 		/* free the skb */
-		napi_consume_skb(tx_buffer->skb, napi_budget);
+		if (tx_buffer->type == IGB_TYPE_SKB)
+			napi_consume_skb(tx_buffer->skb, napi_budget);
+		else
+			xdp_return_frame(tx_buffer->xdpf);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -7993,8 +8246,8 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
 
@@ -8033,23 +8286,21 @@ static void igb_add_rx_frag(struct igb_ring *rx_ring,
 
 static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 					 struct igb_rx_buffer *rx_buffer,
-					 union e1000_adv_rx_desc *rx_desc,
-					 unsigned int size)
+					 struct xdp_buff *xdp,
+					 union e1000_adv_rx_desc *rx_desc)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
 #endif
+	unsigned int size = xdp->data_end - xdp->data;
 	unsigned int headlen;
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data);
 
 	/* allocate a skb to store the frags */
 	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
@@ -8057,24 +8308,24 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 		return NULL;
 
 	if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
-		igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
-		va += IGB_TS_HDR_LEN;
+		igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb);
+		xdp->data += IGB_TS_HDR_LEN;
 		size -= IGB_TS_HDR_LEN;
 	}
 
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > IGB_RX_HDR_LEN)
-		headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN);
+		headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN);
 
 	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+	memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long)));
 
 	/* update all of the pointers */
 	size -= headlen;
 	if (size) {
 		skb_add_rx_frag(skb, 0, rx_buffer->page,
-				(va + headlen) - page_address(rx_buffer->page),
+				(xdp->data + headlen) - page_address(rx_buffer->page),
 				size, truesize);
 #if (PAGE_SIZE < 8192)
 		rx_buffer->page_offset ^= truesize;
@@ -8090,32 +8341,29 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 
 static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
 				     struct igb_rx_buffer *rx_buffer,
-				     union e1000_adv_rx_desc *rx_desc,
-				     unsigned int size)
+				     struct xdp_buff *xdp,
+				     union e1000_adv_rx_desc *rx_desc)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
 #else
 	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-				SKB_DATA_ALIGN(IGB_SKB_PAD + size);
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
 #endif
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data_meta);
 
 	/* build an skb around the page buffer */
-	skb = build_skb(va - IGB_SKB_PAD, truesize);
+	skb = build_skb(xdp->data_hard_start, truesize);
 	if (unlikely(!skb))
 		return NULL;
 
 	/* update pointers within the skb to store the data */
-	skb_reserve(skb, IGB_SKB_PAD);
-	__skb_put(skb, size);
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
 
 	/* pull timestamp out of packet data */
 	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
@@ -8133,6 +8381,79 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
 	return skb;
 }
 
+static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
+				   struct igb_ring *rx_ring,
+				   struct xdp_buff *xdp)
+{
+	int err, result = IGB_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		result = igb_xdp_xmit_back(adapter, xdp);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+		if (!err)
+			result = IGB_XDP_REDIR;
+		else
+			result = IGB_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_DROP:
+		result = IGB_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	rcu_read_unlock();
+	return ERR_PTR(-result);
+}
+
+static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
+					  unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ring_uses_build_skb(rx_ring) ?
+		SKB_DATA_ALIGN(IGB_SKB_PAD + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+static void igb_rx_buffer_flip(struct igb_ring *rx_ring,
+			       struct igb_rx_buffer *rx_buffer,
+			       unsigned int size)
+{
+	unsigned int truesize = igb_rx_frame_truesize(rx_ring, size);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
 static inline void igb_rx_checksum(struct igb_ring *ring,
 				   union e1000_adv_rx_desc *rx_desc,
 				   struct sk_buff *skb)
@@ -8187,7 +8508,6 @@ static inline void igb_rx_hash(struct igb_ring *ring,
  *  igb_is_non_eop - process handling of non-EOP buffers
  *  @rx_ring: Rx ring being processed
  *  @rx_desc: Rx descriptor for current buffer
- *  @skb: current socket buffer containing buffer in progress
  *
  *  This function updates next to clean.  If the buffer is an EOP buffer
  *  this function exits returning false, otherwise it will place the
@@ -8229,6 +8549,10 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
 				union e1000_adv_rx_desc *rx_desc,
 				struct sk_buff *skb)
 {
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
 	if (unlikely((igb_test_staterr(rx_desc,
 				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
 		struct net_device *netdev = rx_ring->netdev;
@@ -8287,6 +8611,11 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring,
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static unsigned int igb_rx_offset(struct igb_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
+}
+
 static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
 					       const unsigned int size)
 {
@@ -8330,10 +8659,20 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
 
 static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 {
+	struct igb_adapter *adapter = q_vector->adapter;
 	struct igb_ring *rx_ring = q_vector->rx.ring;
 	struct sk_buff *skb = rx_ring->skb;
 	unsigned int total_bytes = 0, total_packets = 0;
 	u16 cleaned_count = igb_desc_unused(rx_ring);
+	unsigned int xdp_xmit = 0;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
+
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	xdp.frame_sz = igb_rx_frame_truesize(rx_ring, 0);
+#endif
 
 	while (likely(total_packets < budget)) {
 		union e1000_adv_rx_desc *rx_desc;
@@ -8360,13 +8699,38 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 		rx_buffer = igb_get_rx_buffer(rx_ring, size);
 
 		/* retrieve a buffer from the ring */
-		if (skb)
+		if (!skb) {
+			xdp.data = page_address(rx_buffer->page) +
+				   rx_buffer->page_offset;
+			xdp.data_meta = xdp.data;
+			xdp.data_hard_start = xdp.data -
+					      igb_rx_offset(rx_ring);
+			xdp.data_end = xdp.data + size;
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
+#endif
+			skb = igb_run_xdp(adapter, rx_ring, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				igb_rx_buffer_flip(rx_ring, rx_buffer, size);
+			} else {
+				rx_buffer->pagecnt_bias++;
+			}
+			total_packets++;
+			total_bytes += size;
+		} else if (skb)
 			igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
 		else if (ring_uses_build_skb(rx_ring))
-			skb = igb_build_skb(rx_ring, rx_buffer, rx_desc, size);
+			skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc);
 		else
 			skb = igb_construct_skb(rx_ring, rx_buffer,
-						rx_desc, size);
+						&xdp, rx_desc);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
@@ -8406,6 +8770,15 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 	/* place incomplete frames back on ring for completion */
 	rx_ring->skb = skb;
 
+	if (xdp_xmit & IGB_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & IGB_XDP_TX) {
+		struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
+		igb_xdp_ring_update_tail(tx_ring);
+	}
+
 	u64_stats_update_begin(&rx_ring->rx_syncp);
 	rx_ring->rx_stats.packets += total_packets;
 	rx_ring->rx_stats.bytes += total_bytes;
@@ -8419,11 +8792,6 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 	return total_packets;
 }
 
-static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring)
-{
-	return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
-}
-
 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 				  struct igb_rx_buffer *bi)
 {
@@ -8460,14 +8828,16 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = igb_rx_offset(rx_ring);
-	bi->pagecnt_bias = 1;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 
 	return true;
 }
 
 /**
- *  igb_alloc_rx_buffers - Replace used receive buffers; packet split
- *  @adapter: address of board private structure
+ *  igb_alloc_rx_buffers - Replace used receive buffers
+ *  @rx_ring: rx descriptor ring to allocate new receive buffers
+ *  @cleaned_count: count of buffers to allocate
  **/
 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
 {
@@ -8536,9 +8906,9 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
 
 /**
  * igb_mii_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
+ * @netdev: pointer to netdev struct
+ * @ifr: interface structure
+ * @cmd: ioctl command to execute
  **/
 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
@@ -8566,9 +8936,9 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 
 /**
  * igb_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
+ * @netdev: pointer to netdev struct
+ * @ifr: interface structure
+ * @cmd: ioctl command to execute
  **/
 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 490368d3d03c..7cc5428c3b3d 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -957,8 +957,8 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
 
 /**
  * igb_ptp_get_ts_config - get hardware time stamping config
- * @netdev:
- * @ifreq:
+ * @netdev: netdev struct
+ * @ifr: interface struct
  *
  * Get the hwtstamp_config settings to return to the user. Rather than attempt
  * to deconstruct the settings from the registers, just return a shadow copy
@@ -1141,8 +1141,8 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
 
 /**
  * igb_ptp_set_ts_config - set hardware time stamping config
- * @netdev:
- * @ifreq:
+ * @netdev: netdev struct
+ * @ifr: interface struct
  *
  **/
 int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 19269f5d52bc..ee9f8c1dca83 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -61,7 +61,7 @@ static const struct igbvf_info *igbvf_info_tbl[] = {
 
 /**
  * igbvf_desc_unused - calculate if we have unused descriptors
- * @rx_ring: address of receive ring structure
+ * @ring: address of receive ring structure
  **/
 static int igbvf_desc_unused(struct igbvf_ring *ring)
 {
@@ -74,6 +74,8 @@ static int igbvf_desc_unused(struct igbvf_ring *ring)
 /**
  * igbvf_receive_skb - helper function to handle Rx indications
  * @adapter: board private structure
+ * @netdev: pointer to netdev struct
+ * @skb: skb to indicate to stack
  * @status: descriptor status field as written by hardware
  * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
  * @skb: pointer to sk_buff to be indicated to stack
@@ -233,6 +235,8 @@ no_buffers:
 /**
  * igbvf_clean_rx_irq - Send received data up the network stack; legacy
  * @adapter: board private structure
+ * @work_done: output parameter used to indicate completed work
+ * @work_to_do: input parameter setting limit of work
  *
  * the return value indicates whether actual cleaning was done, there
  * is no guarantee that everything was cleaned
@@ -406,6 +410,7 @@ static void igbvf_put_txbuf(struct igbvf_adapter *adapter,
 /**
  * igbvf_setup_tx_resources - allocate Tx resources (Descriptors)
  * @adapter: board private structure
+ * @tx_ring: ring being initialized
  *
  * Return 0 on success, negative on failure
  **/
@@ -444,6 +449,7 @@ err:
 /**
  * igbvf_setup_rx_resources - allocate Rx resources (Descriptors)
  * @adapter: board private structure
+ * @rx_ring: ring being initialized
  *
  * Returns 0 on success, negative on failure
  **/
@@ -540,7 +546,7 @@ void igbvf_free_tx_resources(struct igbvf_ring *tx_ring)
 
 /**
  * igbvf_clean_rx_ring - Free Rx Buffers per Queue
- * @adapter: board private structure
+ * @rx_ring: ring structure pointer to free buffers from
  **/
 static void igbvf_clean_rx_ring(struct igbvf_ring *rx_ring)
 {
@@ -760,7 +766,7 @@ static void igbvf_set_itr(struct igbvf_adapter *adapter)
 
 /**
  * igbvf_clean_tx_irq - Reclaim resources after transmit completes
- * @adapter: board private structure
+ * @tx_ring: ring structure to clean descriptors from
  *
  * returns true if ring is completely cleaned
  **/
@@ -1891,7 +1897,7 @@ static bool igbvf_has_link(struct igbvf_adapter *adapter)
 
 /**
  * igbvf_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
+ * @t: timer list pointer containing private struct
  **/
 static void igbvf_watchdog(struct timer_list *t)
 {
@@ -2372,8 +2378,9 @@ static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb,
 /**
  * igbvf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: queue timing out (unused)
  **/
-static void igbvf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+static void igbvf_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 2d566f3c827b..35baae900c1f 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -215,6 +215,8 @@ struct igc_adapter {
 	spinlock_t tmreg_lock;
 	struct cyclecounter cc;
 	struct timecounter tc;
+	struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
+	ktime_t ptp_reset_start; /* Reset time in clock mono */
 };
 
 void igc_up(struct igc_adapter *adapter);
@@ -548,6 +550,7 @@ void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
 int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
 void igc_ptp_tx_hang(struct igc_adapter *adapter);
+void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
 
 #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
 
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index cc5a6cf531c7..fd37d2c203af 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -215,6 +215,11 @@ static s32 igc_get_invariants_base(struct igc_hw *hw)
 	case IGC_DEV_ID_I225_K2:
 	case IGC_DEV_ID_I225_LMVP:
 	case IGC_DEV_ID_I225_IT:
+	case IGC_DEV_ID_I226_LM:
+	case IGC_DEV_ID_I226_V:
+	case IGC_DEV_ID_I226_IT:
+	case IGC_DEV_ID_I221_V:
+	case IGC_DEV_ID_I226_BLANK_NVM:
 	case IGC_DEV_ID_I225_BLANK_NVM:
 		mac->type = igc_i225;
 		break;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index f1f464967f87..32f5fd684139 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -324,22 +324,10 @@
 /* Advanced Receive Descriptor bit definitions */
 #define IGC_RXDADV_STAT_TSIP	0x08000 /* timestamp in packet */
 
-#define IGC_RXDEXT_STATERR_CE		0x01000000
-#define IGC_RXDEXT_STATERR_SE		0x02000000
-#define IGC_RXDEXT_STATERR_SEQ		0x04000000
-#define IGC_RXDEXT_STATERR_CXE		0x10000000
-#define IGC_RXDEXT_STATERR_TCPE		0x20000000
+#define IGC_RXDEXT_STATERR_L4E		0x20000000
 #define IGC_RXDEXT_STATERR_IPE		0x40000000
 #define IGC_RXDEXT_STATERR_RXE		0x80000000
 
-/* Same mask, but for extended and packet split descriptors */
-#define IGC_RXDEXT_ERR_FRAME_ERR_MASK ( \
-	IGC_RXDEXT_STATERR_CE  |	\
-	IGC_RXDEXT_STATERR_SE  |	\
-	IGC_RXDEXT_STATERR_SEQ |	\
-	IGC_RXDEXT_STATERR_CXE |	\
-	IGC_RXDEXT_STATERR_RXE)
-
 #define IGC_MRQC_RSS_FIELD_IPV4_TCP	0x00010000
 #define IGC_MRQC_RSS_FIELD_IPV4		0x00020000
 #define IGC_MRQC_RSS_FIELD_IPV6_TCP_EX	0x00040000
@@ -409,7 +397,7 @@
 #define IGC_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
 
 /* Time Sync Transmit Control bit definitions */
-#define IGC_TSYNCTXCTL_VALID			0x00000001  /* Tx timestamp valid */
+#define IGC_TSYNCTXCTL_TXTT_0			0x00000001  /* Tx timestamp reg 0 valid */
 #define IGC_TSYNCTXCTL_ENABLED			0x00000010  /* enable Tx timestamping */
 #define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK	0x0000F000  /* max delay */
 #define IGC_TSYNCTXCTL_SYNC_COMP_ERR		0x20000000  /* sync err */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 44410c2265d6..61d331ce38cd 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -321,6 +321,9 @@ static void igc_ethtool_get_regs(struct net_device *netdev,
 
 	for (i = 0; i < 8; i++)
 		regs_buff[205 + i] = rd32(IGC_ETQF(i));
+
+	regs_buff[213] = adapter->stats.tlpic;
+	regs_buff[214] = adapter->stats.rlpic;
 }
 
 static void igc_ethtool_get_wol(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index b9fe51b91c47..55dae7c4703f 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -24,6 +24,11 @@
 #define IGC_DEV_ID_I225_K2			0x3101
 #define IGC_DEV_ID_I225_LMVP			0x5502
 #define IGC_DEV_ID_I225_IT			0x0D9F
+#define IGC_DEV_ID_I226_LM			0x125B
+#define IGC_DEV_ID_I226_V			0x125C
+#define IGC_DEV_ID_I226_IT			0x125D
+#define IGC_DEV_ID_I221_V			0x125E
+#define IGC_DEV_ID_I226_BLANK_NVM		0x125F
 #define IGC_DEV_ID_I225_BLANK_NVM		0x15FD
 
 /* Function pointers for the MAC. */
@@ -125,9 +130,6 @@ struct igc_nvm_info {
 	struct igc_nvm_operations ops;
 	enum igc_nvm_type type;
 
-	u32 flash_bank_size;
-	u32 flash_base_addr;
-
 	u16 word_size;
 	u16 delay_usec;
 	u16 address_bits;
@@ -153,7 +155,6 @@ struct igc_phy_info {
 	u8 mdix;
 
 	bool is_mdix;
-	bool reset_disable;
 	bool speed_downgraded;
 	bool autoneg_wait_to_complete;
 };
@@ -239,6 +240,8 @@ struct igc_hw_stats {
 	u64 prc511;
 	u64 prc1023;
 	u64 prc1522;
+	u64 tlpic;
+	u64 rlpic;
 	u64 gprc;
 	u64 bprc;
 	u64 mprc;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9593aa4eea36..9112dff075cf 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -47,6 +47,11 @@ static const struct pci_device_id igc_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
 	/* required last entry */
 	{0, }
@@ -1428,7 +1433,7 @@ static void igc_rx_checksum(struct igc_ring *ring,
 
 	/* TCP/UDP checksum error bit is set */
 	if (igc_test_staterr(rx_desc,
-			     IGC_RXDEXT_STATERR_TCPE |
+			     IGC_RXDEXT_STATERR_L4E |
 			     IGC_RXDEXT_STATERR_IPE)) {
 		/* work around errata with sctp packets where the TCPE aka
 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
@@ -1550,10 +1555,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(va);
 
 	/* build an skb around the page buffer */
 	skb = build_skb(va - IGC_SKB_PAD, truesize);
@@ -1589,10 +1591,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
+	net_prefetch(va);
 
 	/* allocate a skb to store the frags */
 	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
@@ -1743,8 +1742,7 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
 				union igc_adv_rx_desc *rx_desc,
 				struct sk_buff *skb)
 {
-	if (unlikely((igc_test_staterr(rx_desc,
-				       IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
 		struct net_device *netdev = rx_ring->netdev;
 
 		if (!(netdev->features & NETIF_F_RXALL)) {
@@ -3685,6 +3683,8 @@ void igc_update_stats(struct igc_adapter *adapter)
 	adapter->stats.prc511 += rd32(IGC_PRC511);
 	adapter->stats.prc1023 += rd32(IGC_PRC1023);
 	adapter->stats.prc1522 += rd32(IGC_PRC1522);
+	adapter->stats.tlpic += rd32(IGC_TLPIC);
+	adapter->stats.rlpic += rd32(IGC_RLPIC);
 
 	mpc = rd32(IGC_MPC);
 	adapter->stats.mpc += mpc;
@@ -3778,6 +3778,8 @@ void igc_down(struct igc_adapter *adapter)
 
 	set_bit(__IGC_DOWN, &adapter->state);
 
+	igc_ptp_suspend(adapter);
+
 	/* disable receives in the hardware */
 	rctl = rd32(IGC_RCTL);
 	wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
@@ -3831,7 +3833,6 @@ void igc_down(struct igc_adapter *adapter)
 
 void igc_reinit_locked(struct igc_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);
 	igc_down(adapter);
@@ -4659,7 +4660,7 @@ int igc_close(struct net_device *netdev)
 /**
  * igc_ioctl - Access the hwtstamp interface
  * @netdev: network interface device structure
- * @ifreq: interface request data
+ * @ifr: interface request data
  * @cmd: ioctl command
  **/
 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
@@ -4700,14 +4701,35 @@ static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
 	return 0;
 }
 
-static bool validate_schedule(const struct tc_taprio_qopt_offload *qopt)
+static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
+{
+	struct timespec64 b;
+
+	b = ktime_to_timespec64(base_time);
+
+	return timespec64_compare(now, &b) > 0;
+}
+
+static bool validate_schedule(struct igc_adapter *adapter,
+			      const struct tc_taprio_qopt_offload *qopt)
 {
 	int queue_uses[IGC_MAX_TX_QUEUES] = { };
+	struct timespec64 now;
 	size_t n;
 
 	if (qopt->cycle_time_extension)
 		return false;
 
+	igc_ptp_read(adapter, &now);
+
+	/* If we program the controller's BASET registers with a time
+	 * in the future, it will hold all the packets until that
+	 * time, causing a lot of TX Hangs, so to avoid that, we
+	 * reject schedules that would start in the future.
+	 */
+	if (!is_base_time_past(qopt->base_time, &now))
+		return false;
+
 	for (n = 0; n < qopt->num_entries; n++) {
 		const struct tc_taprio_sched_entry *e;
 		int i;
@@ -4762,7 +4784,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 	if (adapter->base_time)
 		return -EALREADY;
 
-	if (!validate_schedule(qopt))
+	if (!validate_schedule(adapter, qopt))
 		return -EINVAL;
 
 	adapter->cycle_time = qopt->cycle_time;
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 6a9b5102aa55..ac0b9c85da7c 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -8,6 +8,7 @@
 #include <linux/pci.h>
 #include <linux/ptp_classify.h>
 #include <linux/clocksource.h>
+#include <linux/ktime.h>
 
 #define INCVALUE_MASK		0x7fffffff
 #define ISGN			0x80000000
@@ -16,17 +17,12 @@
 #define IGC_PTP_TX_TIMEOUT		(HZ * 15)
 
 /* SYSTIM read access for I225 */
-static void igc_ptp_read_i225(struct igc_adapter *adapter,
-			      struct timespec64 *ts)
+void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts)
 {
 	struct igc_hw *hw = &adapter->hw;
 	u32 sec, nsec;
 
-	/* The timestamp latches on lowest register read. For I210/I211, the
-	 * lowest register is SYSTIMR. Since we only need to provide nanosecond
-	 * resolution, we can ignore it.
-	 */
-	rd32(IGC_SYSTIMR);
+	/* The timestamp is latched when SYSTIML is read. */
 	nsec = rd32(IGC_SYSTIML);
 	sec = rd32(IGC_SYSTIMH);
 
@@ -39,9 +35,6 @@ static void igc_ptp_write_i225(struct igc_adapter *adapter,
 {
 	struct igc_hw *hw = &adapter->hw;
 
-	/* Writing the SYSTIMR register is not necessary as it only
-	 * provides sub-nanosecond resolution.
-	 */
 	wr32(IGC_SYSTIML, ts->tv_nsec);
 	wr32(IGC_SYSTIMH, ts->tv_sec);
 }
@@ -81,7 +74,7 @@ static int igc_ptp_adjtime_i225(struct ptp_clock_info *ptp, s64 delta)
 
 	spin_lock_irqsave(&igc->tmreg_lock, flags);
 
-	igc_ptp_read_i225(igc, &now);
+	igc_ptp_read(igc, &now);
 	now = timespec64_add(now, then);
 	igc_ptp_write_i225(igc, (const struct timespec64 *)&now);
 
@@ -102,10 +95,9 @@ static int igc_ptp_gettimex64_i225(struct ptp_clock_info *ptp,
 	spin_lock_irqsave(&igc->tmreg_lock, flags);
 
 	ptp_read_system_prets(sts);
-	rd32(IGC_SYSTIMR);
-	ptp_read_system_postts(sts);
 	ts->tv_nsec = rd32(IGC_SYSTIML);
 	ts->tv_sec = rd32(IGC_SYSTIMH);
+	ptp_read_system_postts(sts);
 
 	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
 
@@ -422,24 +414,17 @@ static void igc_ptp_tx_work(struct work_struct *work)
 	if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
 		return;
 
-	if (time_is_before_jiffies(adapter->ptp_tx_start +
-				   IGC_PTP_TX_TIMEOUT)) {
-		igc_ptp_tx_timeout(adapter);
+	tsynctxctl = rd32(IGC_TSYNCTXCTL);
+	if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0)))
 		return;
-	}
 
-	tsynctxctl = rd32(IGC_TSYNCTXCTL);
-	if (tsynctxctl & IGC_TSYNCTXCTL_VALID)
-		igc_ptp_tx_hwtstamp(adapter);
-	else
-		/* reschedule to check later */
-		schedule_work(&adapter->ptp_tx_work);
+	igc_ptp_tx_hwtstamp(adapter);
 }
 
 /**
  * igc_ptp_set_ts_config - set hardware time stamping config
  * @netdev: network interface device structure
- * @ifreq: interface request data
+ * @ifr: interface request data
  *
  **/
 int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
@@ -466,7 +451,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
 /**
  * igc_ptp_get_ts_config - get hardware time stamping config
  * @netdev: network interface device structure
- * @ifreq: interface request data
+ * @ifr: interface request data
  *
  * Get the hwtstamp_config settings to return to the user. Rather than attempt
  * to deconstruct the settings from the registers, just return a shadow copy
@@ -515,6 +500,9 @@ void igc_ptp_init(struct igc_adapter *adapter)
 	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
 	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
 
+	adapter->prev_ptp_time = ktime_to_timespec64(ktime_get_real());
+	adapter->ptp_reset_start = ktime_get();
+
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
@@ -526,6 +514,24 @@ void igc_ptp_init(struct igc_adapter *adapter)
 	}
 }
 
+static void igc_ptp_time_save(struct igc_adapter *adapter)
+{
+	igc_ptp_read(adapter, &adapter->prev_ptp_time);
+	adapter->ptp_reset_start = ktime_get();
+}
+
+static void igc_ptp_time_restore(struct igc_adapter *adapter)
+{
+	struct timespec64 ts = adapter->prev_ptp_time;
+	ktime_t delta;
+
+	delta = ktime_sub(ktime_get(), adapter->ptp_reset_start);
+
+	timespec64_add_ns(&ts, ktime_to_ns(delta));
+
+	igc_ptp_write_i225(adapter, &ts);
+}
+
 /**
  * igc_ptp_suspend - Disable PTP work items and prepare for suspend
  * @adapter: Board private structure
@@ -542,6 +548,8 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
 	dev_kfree_skb_any(adapter->ptp_tx_skb);
 	adapter->ptp_tx_skb = NULL;
 	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+
+	igc_ptp_time_save(adapter);
 }
 
 /**
@@ -591,9 +599,7 @@ void igc_ptp_reset(struct igc_adapter *adapter)
 
 	/* Re-initialize the timer. */
 	if (hw->mac.type == igc_i225) {
-		struct timespec64 ts64 = ktime_to_timespec64(ktime_get_real());
-
-		igc_ptp_write_i225(adapter, &ts64);
+		igc_ptp_time_restore(adapter);
 	} else {
 		timecounter_init(&adapter->tc, &adapter->cc,
 				 ktime_to_ns(ktime_get_real()));
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
index cbaa933ef30d..a430871d1c27 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
@@ -98,7 +98,6 @@ bool
 ixgb_adapter_stop(struct ixgb_hw *hw)
 {
 	u32 ctrl_reg;
-	u32 icr_reg;
 
 	ENTER();
 
@@ -142,7 +141,7 @@ ixgb_adapter_stop(struct ixgb_hw *hw)
 	IXGB_WRITE_REG(hw, IMC, 0xffffffff);
 
 	/* Clear any pending interrupt events. */
-	icr_reg = IXGB_READ_REG(hw, ICR);
+	IXGB_READ_REG(hw, ICR);
 
 	return ctrl_reg & IXGB_CTRL0_RST;
 }
@@ -274,7 +273,6 @@ bool
 ixgb_init_hw(struct ixgb_hw *hw)
 {
 	u32 i;
-	u32 ctrl_reg;
 	bool status;
 
 	ENTER();
@@ -286,7 +284,7 @@ ixgb_init_hw(struct ixgb_hw *hw)
 	 */
 	pr_debug("Issuing a global reset to MAC\n");
 
-	ctrl_reg = ixgb_mac_reset(hw);
+	ixgb_mac_reset(hw);
 
 	pr_debug("Issuing an EE reset to MAC\n");
 #ifdef HP_ZX1
@@ -949,8 +947,6 @@ bool ixgb_check_for_bad_link(struct ixgb_hw *hw)
 static void
 ixgb_clear_hw_cntrs(struct ixgb_hw *hw)
 {
-	volatile u32 temp_reg;
-
 	ENTER();
 
 	/* if we are stopped or resetting exit gracefully */
@@ -959,66 +955,66 @@ ixgb_clear_hw_cntrs(struct ixgb_hw *hw)
 		return;
 	}
 
-	temp_reg = IXGB_READ_REG(hw, TPRL);
-	temp_reg = IXGB_READ_REG(hw, TPRH);
-	temp_reg = IXGB_READ_REG(hw, GPRCL);
-	temp_reg = IXGB_READ_REG(hw, GPRCH);
-	temp_reg = IXGB_READ_REG(hw, BPRCL);
-	temp_reg = IXGB_READ_REG(hw, BPRCH);
-	temp_reg = IXGB_READ_REG(hw, MPRCL);
-	temp_reg = IXGB_READ_REG(hw, MPRCH);
-	temp_reg = IXGB_READ_REG(hw, UPRCL);
-	temp_reg = IXGB_READ_REG(hw, UPRCH);
-	temp_reg = IXGB_READ_REG(hw, VPRCL);
-	temp_reg = IXGB_READ_REG(hw, VPRCH);
-	temp_reg = IXGB_READ_REG(hw, JPRCL);
-	temp_reg = IXGB_READ_REG(hw, JPRCH);
-	temp_reg = IXGB_READ_REG(hw, GORCL);
-	temp_reg = IXGB_READ_REG(hw, GORCH);
-	temp_reg = IXGB_READ_REG(hw, TORL);
-	temp_reg = IXGB_READ_REG(hw, TORH);
-	temp_reg = IXGB_READ_REG(hw, RNBC);
-	temp_reg = IXGB_READ_REG(hw, RUC);
-	temp_reg = IXGB_READ_REG(hw, ROC);
-	temp_reg = IXGB_READ_REG(hw, RLEC);
-	temp_reg = IXGB_READ_REG(hw, CRCERRS);
-	temp_reg = IXGB_READ_REG(hw, ICBC);
-	temp_reg = IXGB_READ_REG(hw, ECBC);
-	temp_reg = IXGB_READ_REG(hw, MPC);
-	temp_reg = IXGB_READ_REG(hw, TPTL);
-	temp_reg = IXGB_READ_REG(hw, TPTH);
-	temp_reg = IXGB_READ_REG(hw, GPTCL);
-	temp_reg = IXGB_READ_REG(hw, GPTCH);
-	temp_reg = IXGB_READ_REG(hw, BPTCL);
-	temp_reg = IXGB_READ_REG(hw, BPTCH);
-	temp_reg = IXGB_READ_REG(hw, MPTCL);
-	temp_reg = IXGB_READ_REG(hw, MPTCH);
-	temp_reg = IXGB_READ_REG(hw, UPTCL);
-	temp_reg = IXGB_READ_REG(hw, UPTCH);
-	temp_reg = IXGB_READ_REG(hw, VPTCL);
-	temp_reg = IXGB_READ_REG(hw, VPTCH);
-	temp_reg = IXGB_READ_REG(hw, JPTCL);
-	temp_reg = IXGB_READ_REG(hw, JPTCH);
-	temp_reg = IXGB_READ_REG(hw, GOTCL);
-	temp_reg = IXGB_READ_REG(hw, GOTCH);
-	temp_reg = IXGB_READ_REG(hw, TOTL);
-	temp_reg = IXGB_READ_REG(hw, TOTH);
-	temp_reg = IXGB_READ_REG(hw, DC);
-	temp_reg = IXGB_READ_REG(hw, PLT64C);
-	temp_reg = IXGB_READ_REG(hw, TSCTC);
-	temp_reg = IXGB_READ_REG(hw, TSCTFC);
-	temp_reg = IXGB_READ_REG(hw, IBIC);
-	temp_reg = IXGB_READ_REG(hw, RFC);
-	temp_reg = IXGB_READ_REG(hw, LFC);
-	temp_reg = IXGB_READ_REG(hw, PFRC);
-	temp_reg = IXGB_READ_REG(hw, PFTC);
-	temp_reg = IXGB_READ_REG(hw, MCFRC);
-	temp_reg = IXGB_READ_REG(hw, MCFTC);
-	temp_reg = IXGB_READ_REG(hw, XONRXC);
-	temp_reg = IXGB_READ_REG(hw, XONTXC);
-	temp_reg = IXGB_READ_REG(hw, XOFFRXC);
-	temp_reg = IXGB_READ_REG(hw, XOFFTXC);
-	temp_reg = IXGB_READ_REG(hw, RJC);
+	IXGB_READ_REG(hw, TPRL);
+	IXGB_READ_REG(hw, TPRH);
+	IXGB_READ_REG(hw, GPRCL);
+	IXGB_READ_REG(hw, GPRCH);
+	IXGB_READ_REG(hw, BPRCL);
+	IXGB_READ_REG(hw, BPRCH);
+	IXGB_READ_REG(hw, MPRCL);
+	IXGB_READ_REG(hw, MPRCH);
+	IXGB_READ_REG(hw, UPRCL);
+	IXGB_READ_REG(hw, UPRCH);
+	IXGB_READ_REG(hw, VPRCL);
+	IXGB_READ_REG(hw, VPRCH);
+	IXGB_READ_REG(hw, JPRCL);
+	IXGB_READ_REG(hw, JPRCH);
+	IXGB_READ_REG(hw, GORCL);
+	IXGB_READ_REG(hw, GORCH);
+	IXGB_READ_REG(hw, TORL);
+	IXGB_READ_REG(hw, TORH);
+	IXGB_READ_REG(hw, RNBC);
+	IXGB_READ_REG(hw, RUC);
+	IXGB_READ_REG(hw, ROC);
+	IXGB_READ_REG(hw, RLEC);
+	IXGB_READ_REG(hw, CRCERRS);
+	IXGB_READ_REG(hw, ICBC);
+	IXGB_READ_REG(hw, ECBC);
+	IXGB_READ_REG(hw, MPC);
+	IXGB_READ_REG(hw, TPTL);
+	IXGB_READ_REG(hw, TPTH);
+	IXGB_READ_REG(hw, GPTCL);
+	IXGB_READ_REG(hw, GPTCH);
+	IXGB_READ_REG(hw, BPTCL);
+	IXGB_READ_REG(hw, BPTCH);
+	IXGB_READ_REG(hw, MPTCL);
+	IXGB_READ_REG(hw, MPTCH);
+	IXGB_READ_REG(hw, UPTCL);
+	IXGB_READ_REG(hw, UPTCH);
+	IXGB_READ_REG(hw, VPTCL);
+	IXGB_READ_REG(hw, VPTCH);
+	IXGB_READ_REG(hw, JPTCL);
+	IXGB_READ_REG(hw, JPTCH);
+	IXGB_READ_REG(hw, GOTCL);
+	IXGB_READ_REG(hw, GOTCH);
+	IXGB_READ_REG(hw, TOTL);
+	IXGB_READ_REG(hw, TOTH);
+	IXGB_READ_REG(hw, DC);
+	IXGB_READ_REG(hw, PLT64C);
+	IXGB_READ_REG(hw, TSCTC);
+	IXGB_READ_REG(hw, TSCTFC);
+	IXGB_READ_REG(hw, IBIC);
+	IXGB_READ_REG(hw, RFC);
+	IXGB_READ_REG(hw, LFC);
+	IXGB_READ_REG(hw, PFRC);
+	IXGB_READ_REG(hw, PFTC);
+	IXGB_READ_REG(hw, MCFRC);
+	IXGB_READ_REG(hw, MCFTC);
+	IXGB_READ_REG(hw, XONRXC);
+	IXGB_READ_REG(hw, XONTXC);
+	IXGB_READ_REG(hw, XOFFRXC);
+	IXGB_READ_REG(hw, XOFFTXC);
+	IXGB_READ_REG(hw, RJC);
 }
 
 /******************************************************************************
@@ -1161,18 +1157,13 @@ static void
 ixgb_optics_reset(struct ixgb_hw *hw)
 {
 	if (hw->phy_type == ixgb_phy_type_txn17401) {
-		u16 mdio_reg;
-
 		ixgb_write_phy_reg(hw,
 				   MDIO_CTRL1,
 				   IXGB_PHY_ADDRESS,
 				   MDIO_MMD_PMAPMD,
 				   MDIO_CTRL1_RESET);
 
-		mdio_reg = ixgb_read_phy_reg(hw,
-					     MDIO_CTRL1,
-					     IXGB_PHY_ADDRESS,
-					     MDIO_MMD_PMAPMD);
+		ixgb_read_phy_reg(hw, MDIO_CTRL1, IXGB_PHY_ADDRESS, MDIO_MMD_PMAPMD);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 048351cf0e4a..1588376d4c67 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -1109,7 +1109,7 @@ alloc_failed:
 
 /**
  * ixgb_watchdog - Timer Call-back
- * @data: pointer to netdev cast into an unsigned long
+ * @t: pointer to timer_list containing our private info pointer
  **/
 
 static void
@@ -1531,10 +1531,11 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 /**
  * ixgb_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: queue hanging (unused)
  **/
 
 static void
-ixgb_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+ixgb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
 
@@ -1746,7 +1747,8 @@ ixgb_intr(int irq, void *data)
 
 /**
  * ixgb_clean - NAPI Rx polling callback
- * @adapter: board private structure
+ * @napi: napi struct pointer
+ * @budget: max number of receives to clean
  **/
 
 static int
@@ -1865,7 +1867,7 @@ ixgb_clean_tx_irq(struct ixgb_adapter *adapter)
  * ixgb_rx_checksum - Receive Checksum Offload for 82597.
  * @adapter: board private structure
  * @rx_desc: receive descriptor
- * @sk_buff: socket buffer with received data
+ * @skb: socket buffer with received data
  **/
 
 static void
@@ -1923,6 +1925,8 @@ static void ixgb_check_copybreak(struct napi_struct *napi,
 /**
  * ixgb_clean_rx_irq - Send received data up the network stack,
  * @adapter: board private structure
+ * @work_done: output pointer to amount of packets cleaned
+ * @work_to_do: how much work we can complete
  **/
 
 static bool
@@ -2042,6 +2046,7 @@ rxdesc_done:
 /**
  * ixgb_alloc_rx_buffers - Replace used receive buffers
  * @adapter: address of board private structure
+ * @cleaned_count: how many buffers to allocate
  **/
 
 static void
@@ -2211,7 +2216,7 @@ static pci_ers_result_t ixgb_io_error_detected(struct pci_dev *pdev,
 
 /**
  * ixgb_io_slot_reset - called after the pci bus has been reset.
- * @pdev    pointer to pci device with error
+ * @pdev: pointer to pci device with error
  *
  * This callback is called after the PCI bus has been reset.
  * Basically, this tries to restart the card from scratch.
@@ -2259,7 +2264,7 @@ static pci_ers_result_t ixgb_io_slot_reset(struct pci_dev *pdev)
 
 /**
  * ixgb_io_resume - called when its OK to resume normal operations
- * @pdev    pointer to pci device with error
+ * @pdev: pointer to pci device with error
  *
  * The error recovery driver tells us that its OK to resume
  * normal operation. Implementation resembles the second-half
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 1e8a809233a0..de0fc6ecf491 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -350,7 +350,7 @@ struct ixgbe_ring {
 		struct ixgbe_rx_queue_stats rx_stats;
 	};
 	struct xdp_rxq_info xdp_rxq;
-	struct xdp_umem *xsk_umem;
+	struct xsk_buff_pool *xsk_pool;
 	u16 ring_idx;		/* {rx,tx,xdp}_ring back reference idx */
 	u16 rx_buf_len;
 } ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 71ec908266a6..a280aa34ca1d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -531,6 +531,16 @@ static int ixgbe_set_link_ksettings(struct net_device *netdev,
 	return err;
 }
 
+static void ixgbe_get_pause_stats(struct net_device *netdev,
+				  struct ethtool_pause_stats *stats)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+
+	stats->tx_pause_frames = hwstats->lxontxc + hwstats->lxofftxc;
+	stats->rx_pause_frames = hwstats->lxonrxc + hwstats->lxoffrxc;
+}
+
 static void ixgbe_get_pauseparam(struct net_device *netdev,
 				 struct ethtool_pauseparam *pause)
 {
@@ -3546,6 +3556,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.set_eeprom             = ixgbe_set_eeprom,
 	.get_ringparam          = ixgbe_get_ringparam,
 	.set_ringparam          = ixgbe_set_ringparam,
+	.get_pause_stats	= ixgbe_get_pause_stats,
 	.get_pauseparam         = ixgbe_get_pauseparam,
 	.set_pauseparam         = ixgbe_set_pauseparam,
 	.get_msglevel           = ixgbe_get_msglevel,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 2e35c5706cf1..df389a11d3af 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1029,10 +1029,10 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
 		WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL);
 
 	adapter->q_vector[v_idx] = NULL;
-	napi_hash_del(&q_vector->napi);
-	netif_napi_del(&q_vector->napi);
+	__netif_napi_del(&q_vector->napi);
 
 	/*
+	 * after a call to __netif_napi_del() napi may still be used and
 	 * ixgbe_get_stats64() might access the rings on this vector,
 	 * we must wait a grace period before freeing it.
 	 */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 86ca8b9ea1b8..45ae33e15303 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2095,10 +2095,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data);
+
 	/* Note, we get here by enabling legacy-rx via:
 	 *
 	 *    ethtool --set-priv-flags <dev> legacy-rx on
@@ -2161,10 +2159,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 	 * likely have a consumer accessing first few bytes of meta
 	 * data, and then actual data.
 	 */
-	prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
-	prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data_meta);
 
 	/* build an skb to around the page buffer */
 	skb = build_skb(xdp->data_hard_start, truesize);
@@ -3156,7 +3151,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 #endif
 
 	ixgbe_for_each_ring(ring, q_vector->tx) {
-		bool wd = ring->xsk_umem ?
+		bool wd = ring->xsk_pool ?
 			  ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
 			  ixgbe_clean_tx_irq(q_vector, ring, budget);
 
@@ -3176,7 +3171,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 		per_ring_budget = budget;
 
 	ixgbe_for_each_ring(ring, q_vector->rx) {
-		int cleaned = ring->xsk_umem ?
+		int cleaned = ring->xsk_pool ?
 			      ixgbe_clean_rx_irq_zc(q_vector, ring,
 						    per_ring_budget) :
 			      ixgbe_clean_rx_irq(q_vector, ring,
@@ -3471,9 +3466,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
 	u32 txdctl = IXGBE_TXDCTL_ENABLE;
 	u8 reg_idx = ring->reg_idx;
 
-	ring->xsk_umem = NULL;
+	ring->xsk_pool = NULL;
 	if (ring_is_xdp(ring))
-		ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
+		ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
 
 	/* disable queue to avoid issues while updating state */
 	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
@@ -3713,8 +3708,8 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
 	srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
 
 	/* configure the packet buffer length */
-	if (rx_ring->xsk_umem) {
-		u32 xsk_buf_len = xsk_umem_get_rx_frame_size(rx_ring->xsk_umem);
+	if (rx_ring->xsk_pool) {
+		u32 xsk_buf_len = xsk_pool_get_rx_frame_size(rx_ring->xsk_pool);
 
 		/* If the MAC support setting RXDCTL.RLPML, the
 		 * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
@@ -4059,12 +4054,12 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 	u8 reg_idx = ring->reg_idx;
 
 	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
-	ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
-	if (ring->xsk_umem) {
+	ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
+	if (ring->xsk_pool) {
 		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 						   MEM_TYPE_XSK_BUFF_POOL,
 						   NULL));
-		xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
 	} else {
 		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 						   MEM_TYPE_PAGE_SHARED, NULL));
@@ -4119,8 +4114,8 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 #endif
 	}
 
-	if (ring->xsk_umem && hw->mac.type != ixgbe_mac_82599EB) {
-		u32 xsk_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
+	if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) {
+		u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
 
 		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
 			    IXGBE_RXDCTL_RLPML_EN);
@@ -4142,7 +4137,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
 
 	ixgbe_rx_desc_queue_enable(adapter, ring);
-	if (ring->xsk_umem)
+	if (ring->xsk_pool)
 		ixgbe_alloc_rx_buffers_zc(ring, ixgbe_desc_unused(ring));
 	else
 		ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
@@ -5292,7 +5287,7 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
 	u16 i = rx_ring->next_to_clean;
 	struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
 
-	if (rx_ring->xsk_umem) {
+	if (rx_ring->xsk_pool) {
 		ixgbe_xsk_clean_rx_ring(rx_ring);
 		goto skip_free;
 	}
@@ -5682,7 +5677,6 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
 
 void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
 	/* put off any impending NetWatchDogTimeout */
 	netif_trans_update(adapter->netdev);
 
@@ -5989,7 +5983,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
 	u16 i = tx_ring->next_to_clean;
 	struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
 
-	if (tx_ring->xsk_umem) {
+	if (tx_ring->xsk_pool) {
 		ixgbe_xsk_clean_tx_ring(tx_ring);
 		goto out;
 	}
@@ -6185,8 +6179,9 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
 /**
  * ixgbe_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: queue number that timed out
  **/
-static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -10161,7 +10156,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
 	 */
 	if (need_reset && prog)
 		for (i = 0; i < adapter->num_rx_queues; i++)
-			if (adapter->xdp_ring[i]->xsk_umem)
+			if (adapter->xdp_ring[i]->xsk_pool)
 				(void)ixgbe_xsk_wakeup(adapter->netdev, i,
 						       XDP_WAKEUP_RX);
 
@@ -10175,8 +10170,8 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return ixgbe_xdp_setup(dev, xdp->prog);
-	case XDP_SETUP_XSK_UMEM:
-		return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
+	case XDP_SETUP_XSK_POOL:
+		return ixgbe_xsk_pool_setup(adapter, xdp->xsk.pool,
 					    xdp->xsk.queue_id);
 
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 7980d7265e10..f77fa3e4fdd1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -771,7 +771,7 @@ mii_bus_write_done:
 
 /**
  *  ixgbe_mii_bus_read - Read a clause 22/45 register
- *  @hw: pointer to hardware structure
+ *  @bus: pointer to mii_bus structure which points to our driver private
  *  @addr: address
  *  @regnum: register number
  **/
@@ -786,7 +786,7 @@ static s32 ixgbe_mii_bus_read(struct mii_bus *bus, int addr, int regnum)
 
 /**
  *  ixgbe_mii_bus_write - Write a clause 22/45 register
- *  @hw: pointer to hardware structure
+ *  @bus: pointer to mii_bus structure which points to our driver private
  *  @addr: address
  *  @regnum: register number
  *  @val: value to write
@@ -803,7 +803,7 @@ static s32 ixgbe_mii_bus_write(struct mii_bus *bus, int addr, int regnum,
 
 /**
  *  ixgbe_x550em_a_mii_bus_read - Read a clause 22/45 register on x550em_a
- *  @hw: pointer to hardware structure
+ *  @bus: pointer to mii_bus structure which points to our driver private
  *  @addr: address
  *  @regnum: register number
  **/
@@ -820,7 +820,7 @@ static s32 ixgbe_x550em_a_mii_bus_read(struct mii_bus *bus, int addr,
 
 /**
  *  ixgbe_x550em_a_mii_bus_write - Write a clause 22/45 register on x550em_a
- *  @hw: pointer to hardware structure
+ *  @bus: pointer to mii_bus structure which points to our driver private
  *  @addr: address
  *  @regnum: register number
  *  @val: value to write
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index 7887ae4aaf4f..2aeec78029bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -28,9 +28,10 @@ void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, u64 qmask);
 void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring);
 void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
 
-struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
-				struct ixgbe_ring *ring);
-int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
+				     struct ixgbe_ring *ring);
+int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
+			 struct xsk_buff_pool *pool,
 			 u16 qid);
 
 void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index ec7121f352e2..3771857cf887 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -8,8 +8,8 @@
 #include "ixgbe.h"
 #include "ixgbe_txrx_common.h"
 
-struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
-				struct ixgbe_ring *ring)
+struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
+				     struct ixgbe_ring *ring)
 {
 	bool xdp_on = READ_ONCE(adapter->xdp_prog);
 	int qid = ring->ring_idx;
@@ -17,11 +17,11 @@ struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
 	if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps))
 		return NULL;
 
-	return xdp_get_umem_from_qid(adapter->netdev, qid);
+	return xsk_get_pool_from_qid(adapter->netdev, qid);
 }
 
-static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
-				 struct xdp_umem *umem,
+static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter,
+				 struct xsk_buff_pool *pool,
 				 u16 qid)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -35,7 +35,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 	    qid >= netdev->real_num_tx_queues)
 		return -EINVAL;
 
-	err = xsk_buff_dma_map(umem, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
+	err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
 	if (err)
 		return err;
 
@@ -59,13 +59,13 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 	return 0;
 }
 
-static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
+static int ixgbe_xsk_pool_disable(struct ixgbe_adapter *adapter, u16 qid)
 {
-	struct xdp_umem *umem;
+	struct xsk_buff_pool *pool;
 	bool if_running;
 
-	umem = xdp_get_umem_from_qid(adapter->netdev, qid);
-	if (!umem)
+	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+	if (!pool)
 		return -EINVAL;
 
 	if_running = netif_running(adapter->netdev) &&
@@ -75,7 +75,7 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
 		ixgbe_txrx_ring_disable(adapter, qid);
 
 	clear_bit(qid, adapter->af_xdp_zc_qps);
-	xsk_buff_dma_unmap(umem, IXGBE_RX_DMA_ATTR);
+	xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR);
 
 	if (if_running)
 		ixgbe_txrx_ring_enable(adapter, qid);
@@ -83,11 +83,12 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
 	return 0;
 }
 
-int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
+			 struct xsk_buff_pool *pool,
 			 u16 qid)
 {
-	return umem ? ixgbe_xsk_umem_enable(adapter, umem, qid) :
-		ixgbe_xsk_umem_disable(adapter, qid);
+	return pool ? ixgbe_xsk_pool_enable(adapter, pool, qid) :
+		ixgbe_xsk_pool_disable(adapter, qid);
 }
 
 static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
@@ -149,7 +150,7 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
 	i -= rx_ring->count;
 
 	do {
-		bi->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+		bi->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
 		if (!bi->xdp) {
 			ok = false;
 			break;
@@ -286,7 +287,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 		}
 
 		bi->xdp->data_end = bi->xdp->data + size;
-		xsk_buff_dma_sync_for_cpu(bi->xdp);
+		xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
 		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
 
 		if (xdp_res) {
@@ -344,11 +345,11 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 	q_vector->rx.total_packets += total_rx_packets;
 	q_vector->rx.total_bytes += total_rx_bytes;
 
-	if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
 		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
-			xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
 		else
-			xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
 
 		return (int)total_rx_packets;
 	}
@@ -373,6 +374,7 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
 
 static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 {
+	struct xsk_buff_pool *pool = xdp_ring->xsk_pool;
 	union ixgbe_adv_tx_desc *tx_desc = NULL;
 	struct ixgbe_tx_buffer *tx_bi;
 	bool work_done = true;
@@ -387,12 +389,11 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 			break;
 		}
 
-		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+		if (!xsk_tx_peek_desc(pool, &desc))
 			break;
 
-		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
-		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
-						 desc.len);
+		dma = xsk_buff_raw_get_dma(pool, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(pool, dma, desc.len);
 
 		tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
 		tx_bi->bytecount = desc.len;
@@ -418,7 +419,7 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 
 	if (tx_desc) {
 		ixgbe_xdp_ring_update_tail(xdp_ring);
-		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+		xsk_tx_release(pool);
 	}
 
 	return !!budget && work_done;
@@ -439,7 +440,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
 {
 	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
 	unsigned int total_packets = 0, total_bytes = 0;
-	struct xdp_umem *umem = tx_ring->xsk_umem;
+	struct xsk_buff_pool *pool = tx_ring->xsk_pool;
 	union ixgbe_adv_tx_desc *tx_desc;
 	struct ixgbe_tx_buffer *tx_bi;
 	u32 xsk_frames = 0;
@@ -484,10 +485,10 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
 	q_vector->tx.total_packets += total_packets;
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(umem, xsk_frames);
+		xsk_tx_completed(pool, xsk_frames);
 
-	if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
-		xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+	if (xsk_uses_need_wakeup(pool))
+		xsk_set_tx_need_wakeup(pool);
 
 	return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
 }
@@ -511,7 +512,7 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
 	if (test_bit(__IXGBE_TX_DISABLED, &ring->state))
 		return -ENETDOWN;
 
-	if (!ring->xsk_umem)
+	if (!ring->xsk_pool)
 		return -ENXIO;
 
 	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
@@ -526,7 +527,7 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
 void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
 {
 	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
-	struct xdp_umem *umem = tx_ring->xsk_umem;
+	struct xsk_buff_pool *pool = tx_ring->xsk_pool;
 	struct ixgbe_tx_buffer *tx_bi;
 	u32 xsk_frames = 0;
 
@@ -546,5 +547,5 @@ void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
 	}
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(umem, xsk_frames);
+		xsk_tx_completed(pool, xsk_frames);
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index a428113e6d54..82fce27f682b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -246,8 +246,9 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter)
 /**
  * ixgbevf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: transmit queue hanging (unused)
  **/
-static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 
@@ -866,10 +867,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data);
+
 	/* Note, we get here by enabling legacy-rx via:
 	 *
 	 *    ethtool --set-priv-flags <dev> legacy-rx on
@@ -947,10 +946,7 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 	 * have a consumer accessing first few bytes of meta data,
 	 * and then actual data.
 	 */
-	prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
-	prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+	net_prefetch(xdp->data_meta);
 
 	/* build an skb around the page buffer */
 	skb = build_skb(xdp->data_hard_start, truesize);
@@ -2526,8 +2522,6 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter)
 
 void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter)
 {
-	WARN_ON(in_interrupt());
-
 	while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state))
 		msleep(1);
 
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index ddc757680089..e9efe074edc1 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1187,9 +1187,9 @@ jme_shutdown_nic(struct jme_adapter *jme)
 }
 
 static void
-jme_pcc_tasklet(unsigned long arg)
+jme_pcc_tasklet(struct tasklet_struct *t)
 {
-	struct jme_adapter *jme = (struct jme_adapter *)arg;
+	struct jme_adapter *jme = from_tasklet(jme, t, pcc_task);
 	struct net_device *netdev = jme->dev;
 
 	if (unlikely(test_bit(JME_FLAG_SHUTDOWN, &jme->flags))) {
@@ -1265,10 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme)
 	jwrite32f(jme, JME_APMC, apmc);
 }
 
-static void
-jme_link_change_tasklet(unsigned long arg)
+static void jme_link_change_tasklet(struct tasklet_struct *t)
 {
-	struct jme_adapter *jme = (struct jme_adapter *)arg;
+	struct jme_adapter *jme = from_tasklet(jme, t, linkch_task);
 	struct net_device *netdev = jme->dev;
 	int rc;
 
@@ -1345,9 +1344,9 @@ out:
 }
 
 static void
-jme_rx_clean_tasklet(unsigned long arg)
+jme_rx_clean_tasklet(struct tasklet_struct *t)
 {
-	struct jme_adapter *jme = (struct jme_adapter *)arg;
+	struct jme_adapter *jme = from_tasklet(jme, t, rxclean_task);
 	struct dynpcc_info *dpi = &(jme->dpi);
 
 	jme_process_receive(jme, jme->rx_ring_size);
@@ -1380,9 +1379,9 @@ jme_poll(JME_NAPI_HOLDER(holder), JME_NAPI_WEIGHT(budget))
 }
 
 static void
-jme_rx_empty_tasklet(unsigned long arg)
+jme_rx_empty_tasklet(struct tasklet_struct *t)
 {
-	struct jme_adapter *jme = (struct jme_adapter *)arg;
+	struct jme_adapter *jme = from_tasklet(jme, t, rxempty_task);
 
 	if (unlikely(atomic_read(&jme->link_changing) != 1))
 		return;
@@ -1392,7 +1391,7 @@ jme_rx_empty_tasklet(unsigned long arg)
 
 	netif_info(jme, rx_status, jme->dev, "RX Queue Full!\n");
 
-	jme_rx_clean_tasklet(arg);
+	jme_rx_clean_tasklet(&jme->rxclean_task);
 
 	while (atomic_read(&jme->rx_empty) > 0) {
 		atomic_dec(&jme->rx_empty);
@@ -1416,10 +1415,9 @@ jme_wake_queue_if_stopped(struct jme_adapter *jme)
 
 }
 
-static void
-jme_tx_clean_tasklet(unsigned long arg)
+static void jme_tx_clean_tasklet(struct tasklet_struct *t)
 {
-	struct jme_adapter *jme = (struct jme_adapter *)arg;
+	struct jme_adapter *jme = from_tasklet(jme, t, txclean_task);
 	struct jme_ring *txring = &(jme->txring[0]);
 	struct txdesc *txdesc = txring->desc;
 	struct jme_buffer_info *txbi = txring->bufinf, *ctxbi, *ttxbi;
@@ -1834,14 +1832,10 @@ jme_open(struct net_device *netdev)
 	jme_clear_pm_disable_wol(jme);
 	JME_NAPI_ENABLE(jme);
 
-	tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
-		     (unsigned long) jme);
-	tasklet_init(&jme->txclean_task, jme_tx_clean_tasklet,
-		     (unsigned long) jme);
-	tasklet_init(&jme->rxclean_task, jme_rx_clean_tasklet,
-		     (unsigned long) jme);
-	tasklet_init(&jme->rxempty_task, jme_rx_empty_tasklet,
-		     (unsigned long) jme);
+	tasklet_setup(&jme->linkch_task, jme_link_change_tasklet);
+	tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet);
+	tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet);
+	tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet);
 
 	rc = jme_request_irq(jme);
 	if (rc)
@@ -3040,9 +3034,7 @@ jme_init_one(struct pci_dev *pdev,
 	atomic_set(&jme->tx_cleaning, 1);
 	atomic_set(&jme->rx_empty, 1);
 
-	tasklet_init(&jme->pcc_task,
-		     jme_pcc_tasklet,
-		     (unsigned long) jme);
+	tasklet_setup(&jme->pcc_task, jme_pcc_tasklet);
 	jme->dpi.cur = PCC_P1;
 
 	jme->reg_ghc = 0;
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 03e034918d14..af441d699a57 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1113,7 +1113,7 @@ out:
 	return rc;
 
 probe_err_register:
-	kfree(lp->td_ring);
+	kfree(KSEG0ADDR(lp->td_ring));
 probe_err_td_ring:
 	iounmap(lp->tx_dma_regs);
 probe_err_dma_tx:
@@ -1133,6 +1133,7 @@ static int korina_remove(struct platform_device *pdev)
 	iounmap(lp->eth_regs);
 	iounmap(lp->rx_dma_regs);
 	iounmap(lp->tx_dma_regs);
+	kfree(KSEG0ADDR(lp->td_ring));
 
 	unregister_netdev(bif->dev);
 	free_netdev(bif->dev);
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index ef4f35ba077d..41815b609569 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -92,6 +92,12 @@ config MVPP2
 	  This driver supports the network interface units in the
 	  Marvell ARMADA 375, 7K and 8K SoCs.
 
+config MVPP2_PTP
+	bool "Marvell Armada 8K Enable PTP support"
+	depends on NETWORK_PHY_TIMESTAMPING
+	depends on (PTP_1588_CLOCK = y && MVPP2 = y) || \
+		   (PTP_1588_CLOCK && MVPP2 = m)
+
 config PXA168_ETH
 	tristate "Marvell pxa168 ethernet support"
 	depends on HAS_IOMEM
@@ -172,5 +178,6 @@ config SKY2_DEBUG
 
 
 source "drivers/net/ethernet/marvell/octeontx2/Kconfig"
+source "drivers/net/ethernet/marvell/prestera/Kconfig"
 
 endif # NET_VENDOR_MARVELL
diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile
index 89dea7284d5b..9f88fe822555 100644
--- a/drivers/net/ethernet/marvell/Makefile
+++ b/drivers/net/ethernet/marvell/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
 obj-$(CONFIG_SKGE) += skge.o
 obj-$(CONFIG_SKY2) += sky2.o
 obj-y		+= octeontx2/
+obj-y		+= prestera/
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 5bf0409f5d42..54b0bf574c05 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -330,7 +330,6 @@
 #define MVNETA_SKB_HEADROOM	ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8)
 #define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
 			 MVNETA_SKB_HEADROOM))
-#define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
 #define MVNETA_MAX_RX_BUF_SIZE	(PAGE_SIZE - MVNETA_SKB_PAD)
 
 #define IS_TSO_HEADER(txq, addr) \
@@ -752,13 +751,12 @@ static void mvneta_txq_inc_put(struct mvneta_tx_queue *txq)
 static void mvneta_mib_counters_clear(struct mvneta_port *pp)
 {
 	int i;
-	u32 dummy;
 
 	/* Perform dummy reads from MIB counters */
 	for (i = 0; i < MVNETA_MIB_LATE_COLLISION; i += 4)
-		dummy = mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i));
-	dummy = mvreg_read(pp, MVNETA_RX_DISCARD_FRAME_COUNT);
-	dummy = mvreg_read(pp, MVNETA_OVERRUN_FRAME_COUNT);
+		mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i));
+	mvreg_read(pp, MVNETA_RX_DISCARD_FRAME_COUNT);
+	mvreg_read(pp, MVNETA_OVERRUN_FRAME_COUNT);
 }
 
 /* Get System Network Statistics */
@@ -1833,7 +1831,7 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
 /* Free tx queue skbuffs */
 static void mvneta_txq_bufs_free(struct mvneta_port *pp,
 				 struct mvneta_tx_queue *txq, int num,
-				 struct netdev_queue *nq)
+				 struct netdev_queue *nq, bool napi)
 {
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 	int i;
@@ -1856,7 +1854,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
 			dev_kfree_skb_any(buf->skb);
 		} else if (buf->type == MVNETA_TYPE_XDP_TX ||
 			   buf->type == MVNETA_TYPE_XDP_NDO) {
-			xdp_return_frame(buf->xdpf);
+			if (napi && buf->type == MVNETA_TYPE_XDP_TX)
+				xdp_return_frame_rx_napi(buf->xdpf);
+			else
+				xdp_return_frame(buf->xdpf);
 		}
 	}
 
@@ -1874,7 +1875,7 @@ static void mvneta_txq_done(struct mvneta_port *pp,
 	if (!tx_done)
 		return;
 
-	mvneta_txq_bufs_free(pp, txq, tx_done, nq);
+	mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
 
 	txq->count -= tx_done;
 
@@ -2227,8 +2228,7 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
 		     struct mvneta_rx_desc *rx_desc,
 		     struct mvneta_rx_queue *rxq,
 		     struct xdp_buff *xdp, int *size,
-		     struct page *page,
-		     struct mvneta_stats *stats)
+		     struct page *page)
 {
 	unsigned char *data = page_address(page);
 	int data_len = -MVNETA_MH_SIZE, len;
@@ -2236,19 +2236,22 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
 	enum dma_data_direction dma_dir;
 	struct skb_shared_info *sinfo;
 
-	if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) {
+	if (*size > MVNETA_MAX_RX_BUF_SIZE) {
 		len = MVNETA_MAX_RX_BUF_SIZE;
 		data_len += len;
 	} else {
-		len = rx_desc->data_size;
+		len = *size;
 		data_len += len - ETH_FCS_LEN;
 	}
+	*size = *size - len;
 
 	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
 	dma_sync_single_for_cpu(dev->dev.parent,
 				rx_desc->buf_phys_addr,
 				len, dma_dir);
 
+	rx_desc->buf_phys_addr = 0;
+
 	/* Prefetch header */
 	prefetch(data);
 
@@ -2259,9 +2262,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
 
 	sinfo = xdp_get_shared_info_from_buff(xdp);
 	sinfo->nr_frags = 0;
-
-	*size = rx_desc->data_size - len;
-	rx_desc->buf_phys_addr = 0;
 }
 
 static void
@@ -2307,11 +2307,8 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 {
 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
 	int i, num_frags = sinfo->nr_frags;
-	skb_frag_t frags[MAX_SKB_FRAGS];
 	struct sk_buff *skb;
 
-	memcpy(frags, sinfo->frags, sizeof(skb_frag_t) * num_frags);
-
 	skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
@@ -2323,12 +2320,12 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 	mvneta_rx_csum(pp, desc_status, skb);
 
 	for (i = 0; i < num_frags; i++) {
-		struct page *page = skb_frag_page(&frags[i]);
+		skb_frag_t *frag = &sinfo->frags[i];
 
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				page, skb_frag_off(&frags[i]),
-				skb_frag_size(&frags[i]), PAGE_SIZE);
-		page_pool_release_page(rxq->page_pool, page);
+				skb_frag_page(frag), skb_frag_off(frag),
+				skb_frag_size(frag), PAGE_SIZE);
+		page_pool_release_page(rxq->page_pool, skb_frag_page(frag));
 	}
 
 	return skb;
@@ -2378,10 +2375,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 
 			size = rx_desc->data_size;
 			frame_sz = size - ETH_FCS_LEN;
-			desc_status = rx_desc->status;
+			desc_status = rx_status;
 
 			mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
-					     &size, page, &ps);
+					     &size, page);
 		} else {
 			if (unlikely(!xdp_buf.data_hard_start)) {
 				rx_desc->buf_phys_addr = 0;
@@ -2865,7 +2862,7 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
 	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
 	int tx_done = txq->count;
 
-	mvneta_txq_bufs_free(pp, txq, tx_done, nq);
+	mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
 
 	/* reset txq */
 	txq->count = 0;
diff --git a/drivers/net/ethernet/marvell/mvpp2/Makefile b/drivers/net/ethernet/marvell/mvpp2/Makefile
index 51f65a202c6e..9bd8e7964b40 100644
--- a/drivers/net/ethernet/marvell/mvpp2/Makefile
+++ b/drivers/net/ethernet/marvell/mvpp2/Makefile
@@ -4,4 +4,5 @@
 #
 obj-$(CONFIG_MVPP2) := mvpp2.o
 
-mvpp2-objs := mvpp2_main.o mvpp2_prs.o mvpp2_cls.o mvpp2_debugfs.o
+mvpp2-y := mvpp2_main.o mvpp2_prs.o mvpp2_cls.o mvpp2_debugfs.o
+mvpp2-$(CONFIG_MVPP2_PTP) += mvpp2_tai.o
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 32753cc771bf..834775843067 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
 #include <linux/phy.h>
 #include <linux/phylink.h>
 #include <net/flow_offload.h>
@@ -461,8 +462,12 @@
 #define     MVPP22_CTRL4_DP_CLK_SEL		BIT(5)
 #define     MVPP22_CTRL4_SYNC_BYPASS_DIS	BIT(6)
 #define     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE	BIT(7)
+#define MVPP22_GMAC_INT_SUM_STAT		0xa0
+#define	    MVPP22_GMAC_INT_SUM_STAT_INTERNAL	BIT(1)
+#define	    MVPP22_GMAC_INT_SUM_STAT_PTP	BIT(2)
 #define MVPP22_GMAC_INT_SUM_MASK		0xa4
 #define     MVPP22_GMAC_INT_SUM_MASK_LINK_STAT	BIT(1)
+#define	    MVPP22_GMAC_INT_SUM_MASK_PTP	BIT(2)
 
 /* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0,
  * relative to port->base.
@@ -488,9 +493,13 @@
 #define     MVPP22_XLG_CTRL3_MACMODESELECT_MASK	(7 << 13)
 #define     MVPP22_XLG_CTRL3_MACMODESELECT_GMAC	(0 << 13)
 #define     MVPP22_XLG_CTRL3_MACMODESELECT_10G	(1 << 13)
+#define MVPP22_XLG_EXT_INT_STAT			0x158
+#define     MVPP22_XLG_EXT_INT_STAT_XLG		BIT(1)
+#define     MVPP22_XLG_EXT_INT_STAT_PTP		BIT(7)
 #define MVPP22_XLG_EXT_INT_MASK			0x15c
 #define     MVPP22_XLG_EXT_INT_MASK_XLG		BIT(1)
 #define     MVPP22_XLG_EXT_INT_MASK_GIG		BIT(2)
+#define     MVPP22_XLG_EXT_INT_MASK_PTP		BIT(7)
 #define MVPP22_XLG_CTRL4_REG			0x184
 #define     MVPP22_XLG_CTRL4_FWD_FC		BIT(5)
 #define     MVPP22_XLG_CTRL4_FWD_PFC		BIT(6)
@@ -501,6 +510,70 @@
 #define MVPP22_SMI_MISC_CFG_REG			0x1204
 #define     MVPP22_SMI_POLLING_EN		BIT(10)
 
+/* TAI registers, PPv2.2 only, relative to priv->iface_base */
+#define MVPP22_TAI_INT_CAUSE			0x1400
+#define MVPP22_TAI_INT_MASK			0x1404
+#define MVPP22_TAI_CR0				0x1408
+#define MVPP22_TAI_CR1				0x140c
+#define MVPP22_TAI_TCFCR0			0x1410
+#define MVPP22_TAI_TCFCR1			0x1414
+#define MVPP22_TAI_TCFCR2			0x1418
+#define MVPP22_TAI_FATWR			0x141c
+#define MVPP22_TAI_TOD_STEP_NANO_CR		0x1420
+#define MVPP22_TAI_TOD_STEP_FRAC_HIGH		0x1424
+#define MVPP22_TAI_TOD_STEP_FRAC_LOW		0x1428
+#define MVPP22_TAI_TAPDC_HIGH			0x142c
+#define MVPP22_TAI_TAPDC_LOW			0x1430
+#define MVPP22_TAI_TGTOD_SEC_HIGH		0x1434
+#define MVPP22_TAI_TGTOD_SEC_MED		0x1438
+#define MVPP22_TAI_TGTOD_SEC_LOW		0x143c
+#define MVPP22_TAI_TGTOD_NANO_HIGH		0x1440
+#define MVPP22_TAI_TGTOD_NANO_LOW		0x1444
+#define MVPP22_TAI_TGTOD_FRAC_HIGH		0x1448
+#define MVPP22_TAI_TGTOD_FRAC_LOW		0x144c
+#define MVPP22_TAI_TLV_SEC_HIGH			0x1450
+#define MVPP22_TAI_TLV_SEC_MED			0x1454
+#define MVPP22_TAI_TLV_SEC_LOW			0x1458
+#define MVPP22_TAI_TLV_NANO_HIGH		0x145c
+#define MVPP22_TAI_TLV_NANO_LOW			0x1460
+#define MVPP22_TAI_TLV_FRAC_HIGH		0x1464
+#define MVPP22_TAI_TLV_FRAC_LOW			0x1468
+#define MVPP22_TAI_TCV0_SEC_HIGH		0x146c
+#define MVPP22_TAI_TCV0_SEC_MED			0x1470
+#define MVPP22_TAI_TCV0_SEC_LOW			0x1474
+#define MVPP22_TAI_TCV0_NANO_HIGH		0x1478
+#define MVPP22_TAI_TCV0_NANO_LOW		0x147c
+#define MVPP22_TAI_TCV0_FRAC_HIGH		0x1480
+#define MVPP22_TAI_TCV0_FRAC_LOW		0x1484
+#define MVPP22_TAI_TCV1_SEC_HIGH		0x1488
+#define MVPP22_TAI_TCV1_SEC_MED			0x148c
+#define MVPP22_TAI_TCV1_SEC_LOW			0x1490
+#define MVPP22_TAI_TCV1_NANO_HIGH		0x1494
+#define MVPP22_TAI_TCV1_NANO_LOW		0x1498
+#define MVPP22_TAI_TCV1_FRAC_HIGH		0x149c
+#define MVPP22_TAI_TCV1_FRAC_LOW		0x14a0
+#define MVPP22_TAI_TCSR				0x14a4
+#define MVPP22_TAI_TUC_LSB			0x14a8
+#define MVPP22_TAI_GFM_SEC_HIGH			0x14ac
+#define MVPP22_TAI_GFM_SEC_MED			0x14b0
+#define MVPP22_TAI_GFM_SEC_LOW			0x14b4
+#define MVPP22_TAI_GFM_NANO_HIGH		0x14b8
+#define MVPP22_TAI_GFM_NANO_LOW			0x14bc
+#define MVPP22_TAI_GFM_FRAC_HIGH		0x14c0
+#define MVPP22_TAI_GFM_FRAC_LOW			0x14c4
+#define MVPP22_TAI_PCLK_DA_HIGH			0x14c8
+#define MVPP22_TAI_PCLK_DA_LOW			0x14cc
+#define MVPP22_TAI_CTCR				0x14d0
+#define MVPP22_TAI_PCLK_CCC_HIGH		0x14d4
+#define MVPP22_TAI_PCLK_CCC_LOW			0x14d8
+#define MVPP22_TAI_DTC_HIGH			0x14dc
+#define MVPP22_TAI_DTC_LOW			0x14e0
+#define MVPP22_TAI_CCC_HIGH			0x14e4
+#define MVPP22_TAI_CCC_LOW			0x14e8
+#define MVPP22_TAI_ICICE			0x14f4
+#define MVPP22_TAI_ICICC_LOW			0x14f8
+#define MVPP22_TAI_TUC_MSB			0x14fc
+
 #define MVPP22_GMAC_BASE(port)		(0x7000 + (port) * 0x1000 + 0xe00)
 
 #define MVPP2_CAUSE_TXQ_SENT_DESC_ALL_MASK	0xff
@@ -527,6 +600,46 @@
 #define     MVPP22_XPCS_CFG0_PCS_MODE(n)	((n) << 3)
 #define     MVPP22_XPCS_CFG0_ACTIVE_LANE(n)	((n) << 5)
 
+/* PTP registers. PPv2.2 only */
+#define MVPP22_PTP_BASE(port)			(0x7800 + (port * 0x1000))
+#define MVPP22_PTP_INT_CAUSE			0x00
+#define     MVPP22_PTP_INT_CAUSE_QUEUE1		BIT(6)
+#define     MVPP22_PTP_INT_CAUSE_QUEUE0		BIT(5)
+#define MVPP22_PTP_INT_MASK			0x04
+#define     MVPP22_PTP_INT_MASK_QUEUE1		BIT(6)
+#define     MVPP22_PTP_INT_MASK_QUEUE0		BIT(5)
+#define MVPP22_PTP_GCR				0x08
+#define     MVPP22_PTP_GCR_RX_RESET		BIT(13)
+#define     MVPP22_PTP_GCR_TX_RESET		BIT(1)
+#define     MVPP22_PTP_GCR_TSU_ENABLE		BIT(0)
+#define MVPP22_PTP_TX_Q0_R0			0x0c
+#define MVPP22_PTP_TX_Q0_R1			0x10
+#define MVPP22_PTP_TX_Q0_R2			0x14
+#define MVPP22_PTP_TX_Q1_R0			0x18
+#define MVPP22_PTP_TX_Q1_R1			0x1c
+#define MVPP22_PTP_TX_Q1_R2			0x20
+#define MVPP22_PTP_TPCR				0x24
+#define MVPP22_PTP_V1PCR			0x28
+#define MVPP22_PTP_V2PCR			0x2c
+#define MVPP22_PTP_Y1731PCR			0x30
+#define MVPP22_PTP_NTPTSPCR			0x34
+#define MVPP22_PTP_NTPRXPCR			0x38
+#define MVPP22_PTP_NTPTXPCR			0x3c
+#define MVPP22_PTP_WAMPPCR			0x40
+#define MVPP22_PTP_NAPCR			0x44
+#define MVPP22_PTP_FAPCR			0x48
+#define MVPP22_PTP_CAPCR			0x50
+#define MVPP22_PTP_ATAPCR			0x54
+#define MVPP22_PTP_ACTAPCR			0x58
+#define MVPP22_PTP_CATAPCR			0x5c
+#define MVPP22_PTP_CACTAPCR			0x60
+#define MVPP22_PTP_AITAPCR			0x64
+#define MVPP22_PTP_CAITAPCR			0x68
+#define MVPP22_PTP_CITAPCR			0x6c
+#define MVPP22_PTP_NTP_OFF_HIGH			0x70
+#define MVPP22_PTP_NTP_OFF_LOW			0x74
+#define MVPP22_PTP_TX_PIPE_STATUS_DELAY		0x78
+
 /* System controller registers. Accessed through a regmap. */
 #define GENCONF_SOFT_RESET1				0x1108
 #define     GENCONF_SOFT_RESET1_GOP			BIT(6)
@@ -692,6 +805,43 @@ enum mvpp2_prs_l3_cast {
 	MVPP2_PRS_L3_BROAD_CAST
 };
 
+/* PTP descriptor constants. The low bits of the descriptor are stored
+ * separately from the high bits.
+ */
+#define MVPP22_PTP_DESC_MASK_LOW	0xfff
+
+/* PTPAction */
+enum mvpp22_ptp_action {
+	MVPP22_PTP_ACTION_NONE = 0,
+	MVPP22_PTP_ACTION_FORWARD = 1,
+	MVPP22_PTP_ACTION_CAPTURE = 3,
+	/* The following have not been verified */
+	MVPP22_PTP_ACTION_ADDTIME = 4,
+	MVPP22_PTP_ACTION_ADDCORRECTEDTIME = 5,
+	MVPP22_PTP_ACTION_CAPTUREADDTIME = 6,
+	MVPP22_PTP_ACTION_CAPTUREADDCORRECTEDTIME = 7,
+	MVPP22_PTP_ACTION_ADDINGRESSTIME = 8,
+	MVPP22_PTP_ACTION_CAPTUREADDINGRESSTIME = 9,
+	MVPP22_PTP_ACTION_CAPTUREINGRESSTIME = 10,
+};
+
+/* PTPPacketFormat */
+enum mvpp22_ptp_packet_format {
+	MVPP22_PTP_PKT_FMT_PTPV2 = 0,
+	MVPP22_PTP_PKT_FMT_PTPV1 = 1,
+	MVPP22_PTP_PKT_FMT_Y1731 = 2,
+	MVPP22_PTP_PKT_FMT_NTPTS = 3,
+	MVPP22_PTP_PKT_FMT_NTPRX = 4,
+	MVPP22_PTP_PKT_FMT_NTPTX = 5,
+	MVPP22_PTP_PKT_FMT_TWAMP = 6,
+};
+
+#define MVPP22_PTP_ACTION(x)		(((x) & 15) << 0)
+#define MVPP22_PTP_PACKETFORMAT(x)	(((x) & 7) << 4)
+#define MVPP22_PTP_MACTIMESTAMPINGEN	BIT(11)
+#define MVPP22_PTP_TIMESTAMPENTRYID(x)	(((x) & 31) << 12)
+#define MVPP22_PTP_TIMESTAMPQUEUESELECT	BIT(18)
+
 /* BM constants */
 #define MVPP2_BM_JUMBO_BUF_NUM		512
 #define MVPP2_BM_LONG_BUF_NUM		1024
@@ -759,6 +909,8 @@ enum mvpp2_prs_l3_cast {
 
 #define MVPP2_DESC_DMA_MASK	DMA_BIT_MASK(40)
 
+struct mvpp2_tai;
+
 /* Definitions */
 struct mvpp2_dbgfs_entries;
 
@@ -794,6 +946,7 @@ struct mvpp2 {
 	/* List of pointers to port structures */
 	int port_count;
 	struct mvpp2_port *port_list[MVPP2_MAX_PORTS];
+	struct mvpp2_tai *tai;
 
 	/* Number of Tx threads used */
 	unsigned int nthreads;
@@ -907,6 +1060,11 @@ struct mvpp2_ethtool_fs {
 	struct ethtool_rxnfc rxnfc;
 };
 
+struct mvpp2_hwtstamp_queue {
+	struct sk_buff *skb[32];
+	u8 next;
+};
+
 struct mvpp2_port {
 	u8 id;
 
@@ -915,7 +1073,7 @@ struct mvpp2_port {
 	 */
 	int gop_id;
 
-	int link_irq;
+	int port_irq;
 
 	struct mvpp2 *priv;
 
@@ -967,6 +1125,7 @@ struct mvpp2_port {
 	phy_interface_t phy_interface;
 	struct phylink *phylink;
 	struct phylink_config phylink_config;
+	struct phylink_pcs phylink_pcs;
 	struct phy *comphy;
 
 	struct mvpp2_bm_pool *pool_long;
@@ -989,6 +1148,11 @@ struct mvpp2_port {
 	 * them from 0
 	 */
 	int rss_ctx[MVPP22_N_RSS_TABLES];
+
+	bool hwtstamp;
+	bool rx_hwtstamp;
+	enum hwtstamp_tx_types tx_hwtstamp_type;
+	struct mvpp2_hwtstamp_queue tx_hwtstamp_queue[2];
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
@@ -1057,7 +1221,8 @@ struct mvpp22_tx_desc {
 	u8  packet_offset;
 	u8  phys_txq;
 	__le16 data_size;
-	__le64 reserved1;
+	__le32 ptp_descriptor;
+	__le32 reserved2;
 	__le64 buf_dma_addr_ptp;
 	__le64 buf_cookie_misc;
 };
@@ -1068,7 +1233,7 @@ struct mvpp22_rx_desc {
 	__le16 reserved1;
 	__le16 data_size;
 	__le32 reserved2;
-	__le32 reserved3;
+	__le32 timestamp;
 	__le64 buf_dma_addr_key_hash;
 	__le64 buf_cookie_misc;
 };
@@ -1248,4 +1413,36 @@ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name);
 
 void mvpp2_dbgfs_cleanup(struct mvpp2 *priv);
 
+#ifdef CONFIG_MVPP2_PTP
+int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv);
+void mvpp22_tai_tstamp(struct mvpp2_tai *tai, u32 tstamp,
+		       struct skb_shared_hwtstamps *hwtstamp);
+void mvpp22_tai_start(struct mvpp2_tai *tai);
+void mvpp22_tai_stop(struct mvpp2_tai *tai);
+int mvpp22_tai_ptp_clock_index(struct mvpp2_tai *tai);
+#else
+static inline int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv)
+{
+	return 0;
+}
+static inline void mvpp22_tai_tstamp(struct mvpp2_tai *tai, u32 tstamp,
+				     struct skb_shared_hwtstamps *hwtstamp)
+{
+}
+static inline void mvpp22_tai_start(struct mvpp2_tai *tai)
+{
+}
+static inline void mvpp22_tai_stop(struct mvpp2_tai *tai)
+{
+}
+static inline int mvpp22_tai_ptp_clock_index(struct mvpp2_tai *tai)
+{
+	return -1;
+}
+#endif
+
+static inline bool mvpp22_rx_hwtstamping(struct mvpp2_port *port)
+{
+	return IS_ENABLED(CONFIG_MVPP2_PTP) && port->rx_hwtstamp;
+}
 #endif
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 6e140d1b8967..f6616c8933ca 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -28,6 +28,7 @@
 #include <linux/phy.h>
 #include <linux/phylink.h>
 #include <linux/phy/phy.h>
+#include <linux/ptp_classify.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
 #include <linux/ktime.h>
@@ -57,13 +58,7 @@ static struct {
 /* The prototype is added here to be used in start_dev when using ACPI. This
  * will be removed once phylink is used for all modes (dt+ACPI).
  */
-static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
-			     const struct phylink_link_state *state);
-static void mvpp2_mac_link_up(struct phylink_config *config,
-			      struct phy_device *phy,
-			      unsigned int mode, phy_interface_t interface,
-			      int speed, int duplex,
-			      bool tx_pause, bool rx_pause);
+static void mvpp2_acpi_start(struct mvpp2_port *port);
 
 /* Queue modes */
 #define MVPP2_QDIST_SINGLE_MODE	0
@@ -1385,6 +1380,10 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
 {
 	u32 val;
 
+	mvpp2_modify(port->base + MVPP22_GMAC_INT_SUM_MASK,
+		     MVPP22_GMAC_INT_SUM_MASK_PTP,
+		     MVPP22_GMAC_INT_SUM_MASK_PTP);
+
 	if (port->phylink ||
 	    phy_interface_mode_is_rgmii(port->phy_interface) ||
 	    phy_interface_mode_is_8023z(port->phy_interface) ||
@@ -1398,6 +1397,10 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
 		val = readl(port->base + MVPP22_XLG_INT_MASK);
 		val |= MVPP22_XLG_INT_MASK_LINK;
 		writel(val, port->base + MVPP22_XLG_INT_MASK);
+
+		mvpp2_modify(port->base + MVPP22_XLG_EXT_INT_MASK,
+			     MVPP22_XLG_EXT_INT_MASK_PTP,
+			     MVPP22_XLG_EXT_INT_MASK_PTP);
 	}
 
 	mvpp22_gop_unmask_irq(port);
@@ -1485,8 +1488,8 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port,
 	else
 		val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
 
-	if (phy_interface_mode_is_8023z(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+	if (phy_interface_mode_is_8023z(state->interface) ||
+	    state->interface == PHY_INTERFACE_MODE_SGMII)
 		val |= MVPP2_GMAC_PCS_LB_EN_MASK;
 	else
 		val &= ~MVPP2_GMAC_PCS_LB_EN_MASK;
@@ -2980,44 +2983,67 @@ static irqreturn_t mvpp2_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-/* Per-port interrupt for link status changes */
-static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
+static void mvpp2_isr_handle_ptp_queue(struct mvpp2_port *port, int nq)
 {
-	struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
-	struct net_device *dev = port->dev;
-	bool event = false, link = false;
-	u32 val;
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct mvpp2_hwtstamp_queue *queue;
+	struct sk_buff *skb;
+	void __iomem *ptp_q;
+	unsigned int id;
+	u32 r0, r1, r2;
 
-	mvpp22_gop_mask_irq(port);
+	ptp_q = port->priv->iface_base + MVPP22_PTP_BASE(port->gop_id);
+	if (nq)
+		ptp_q += MVPP22_PTP_TX_Q1_R0 - MVPP22_PTP_TX_Q0_R0;
 
-	if (mvpp2_port_supports_xlg(port) &&
-	    mvpp2_is_xlg(port->phy_interface)) {
-		val = readl(port->base + MVPP22_XLG_INT_STAT);
-		if (val & MVPP22_XLG_INT_STAT_LINK) {
-			event = true;
-			val = readl(port->base + MVPP22_XLG_STATUS);
-			if (val & MVPP22_XLG_STATUS_LINK_UP)
-				link = true;
-		}
-	} else if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-		   phy_interface_mode_is_8023z(port->phy_interface) ||
-		   port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-		val = readl(port->base + MVPP22_GMAC_INT_STAT);
-		if (val & MVPP22_GMAC_INT_STAT_LINK) {
-			event = true;
-			val = readl(port->base + MVPP2_GMAC_STATUS0);
-			if (val & MVPP2_GMAC_STATUS0_LINK_UP)
-				link = true;
+	queue = &port->tx_hwtstamp_queue[nq];
+
+	while (1) {
+		r0 = readl_relaxed(ptp_q + MVPP22_PTP_TX_Q0_R0) & 0xffff;
+		if (!r0)
+			break;
+
+		r1 = readl_relaxed(ptp_q + MVPP22_PTP_TX_Q0_R1) & 0xffff;
+		r2 = readl_relaxed(ptp_q + MVPP22_PTP_TX_Q0_R2) & 0xffff;
+
+		id = (r0 >> 1) & 31;
+
+		skb = queue->skb[id];
+		queue->skb[id] = NULL;
+		if (skb) {
+			u32 ts = r2 << 19 | r1 << 3 | r0 >> 13;
+
+			mvpp22_tai_tstamp(port->priv->tai, ts, &shhwtstamps);
+			skb_tstamp_tx(skb, &shhwtstamps);
+			dev_kfree_skb_any(skb);
 		}
 	}
+}
+
+static void mvpp2_isr_handle_ptp(struct mvpp2_port *port)
+{
+	void __iomem *ptp;
+	u32 val;
+
+	ptp = port->priv->iface_base + MVPP22_PTP_BASE(port->gop_id);
+	val = readl(ptp + MVPP22_PTP_INT_CAUSE);
+	if (val & MVPP22_PTP_INT_CAUSE_QUEUE0)
+		mvpp2_isr_handle_ptp_queue(port, 0);
+	if (val & MVPP22_PTP_INT_CAUSE_QUEUE1)
+		mvpp2_isr_handle_ptp_queue(port, 1);
+}
+
+static void mvpp2_isr_handle_link(struct mvpp2_port *port, bool link)
+{
+	struct net_device *dev = port->dev;
 
 	if (port->phylink) {
 		phylink_mac_change(port->phylink, link);
-		goto handled;
+		return;
 	}
 
-	if (!netif_running(dev) || !event)
-		goto handled;
+	if (!netif_running(dev))
+		return;
 
 	if (link) {
 		mvpp2_interrupts_enable(port);
@@ -3034,8 +3060,65 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
 
 		mvpp2_interrupts_disable(port);
 	}
+}
+
+static void mvpp2_isr_handle_xlg(struct mvpp2_port *port)
+{
+	bool link;
+	u32 val;
+
+	val = readl(port->base + MVPP22_XLG_INT_STAT);
+	if (val & MVPP22_XLG_INT_STAT_LINK) {
+		val = readl(port->base + MVPP22_XLG_STATUS);
+		link = (val & MVPP22_XLG_STATUS_LINK_UP);
+		mvpp2_isr_handle_link(port, link);
+	}
+}
+
+static void mvpp2_isr_handle_gmac_internal(struct mvpp2_port *port)
+{
+	bool link;
+	u32 val;
+
+	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+	    phy_interface_mode_is_8023z(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		val = readl(port->base + MVPP22_GMAC_INT_STAT);
+		if (val & MVPP22_GMAC_INT_STAT_LINK) {
+			val = readl(port->base + MVPP2_GMAC_STATUS0);
+			link = (val & MVPP2_GMAC_STATUS0_LINK_UP);
+			mvpp2_isr_handle_link(port, link);
+		}
+	}
+}
+
+/* Per-port interrupt for link status changes */
+static irqreturn_t mvpp2_port_isr(int irq, void *dev_id)
+{
+	struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
+	u32 val;
+
+	mvpp22_gop_mask_irq(port);
+
+	if (mvpp2_port_supports_xlg(port) &&
+	    mvpp2_is_xlg(port->phy_interface)) {
+		/* Check the external status register */
+		val = readl(port->base + MVPP22_XLG_EXT_INT_STAT);
+		if (val & MVPP22_XLG_EXT_INT_STAT_XLG)
+			mvpp2_isr_handle_xlg(port);
+		if (val & MVPP22_XLG_EXT_INT_STAT_PTP)
+			mvpp2_isr_handle_ptp(port);
+	} else {
+		/* If it's not the XLG, we must be using the GMAC.
+		 * Check the summary status.
+		 */
+		val = readl(port->base + MVPP22_GMAC_INT_SUM_STAT);
+		if (val & MVPP22_GMAC_INT_SUM_STAT_INTERNAL)
+			mvpp2_isr_handle_gmac_internal(port);
+		if (val & MVPP22_GMAC_INT_SUM_STAT_PTP)
+			mvpp2_isr_handle_ptp(port);
+	}
 
-handled:
 	mvpp22_gop_unmask_irq(port);
 	return IRQ_HANDLED;
 }
@@ -3427,7 +3510,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		unsigned int frag_size;
 		dma_addr_t dma_addr;
 		phys_addr_t phys_addr;
-		u32 rx_status;
+		u32 rx_status, timestamp;
 		int pool, rx_bytes, err, ret;
 		void *data;
 
@@ -3505,6 +3588,15 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 			goto err_drop_frame;
 		}
 
+		/* If we have RX hardware timestamping enabled, grab the
+		 * timestamp from the queue and convert.
+		 */
+		if (mvpp22_rx_hwtstamping(port)) {
+			timestamp = le32_to_cpu(rx_desc->pp22.timestamp);
+			mvpp22_tai_tstamp(port->priv->tai, timestamp,
+					 skb_hwtstamps(skb));
+		}
+
 		err = mvpp2_rx_refill(port, bm_pool, pp, pool);
 		if (err) {
 			netdev_err(port->dev, "failed to refill BM pools\n");
@@ -3579,6 +3671,94 @@ tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 	mvpp2_txq_desc_put(txq);
 }
 
+static void mvpp2_txdesc_clear_ptp(struct mvpp2_port *port,
+				   struct mvpp2_tx_desc *desc)
+{
+	/* We only need to clear the low bits */
+	if (port->priv->hw_version != MVPP21)
+		desc->pp22.ptp_descriptor &=
+			cpu_to_le32(~MVPP22_PTP_DESC_MASK_LOW);
+}
+
+static bool mvpp2_tx_hw_tstamp(struct mvpp2_port *port,
+			       struct mvpp2_tx_desc *tx_desc,
+			       struct sk_buff *skb)
+{
+	struct mvpp2_hwtstamp_queue *queue;
+	unsigned int mtype, type, i;
+	struct ptp_header *hdr;
+	u64 ptpdesc;
+
+	if (port->priv->hw_version == MVPP21 ||
+	    port->tx_hwtstamp_type == HWTSTAMP_TX_OFF)
+		return false;
+
+	type = ptp_classify_raw(skb);
+	if (!type)
+		return false;
+
+	hdr = ptp_parse_header(skb, type);
+	if (!hdr)
+		return false;
+
+	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+	ptpdesc = MVPP22_PTP_MACTIMESTAMPINGEN |
+		  MVPP22_PTP_ACTION_CAPTURE;
+	queue = &port->tx_hwtstamp_queue[0];
+
+	switch (type & PTP_CLASS_VMASK) {
+	case PTP_CLASS_V1:
+		ptpdesc |= MVPP22_PTP_PACKETFORMAT(MVPP22_PTP_PKT_FMT_PTPV1);
+		break;
+
+	case PTP_CLASS_V2:
+		ptpdesc |= MVPP22_PTP_PACKETFORMAT(MVPP22_PTP_PKT_FMT_PTPV2);
+		mtype = hdr->tsmt & 15;
+		/* Direct PTP Sync messages to queue 1 */
+		if (mtype == 0) {
+			ptpdesc |= MVPP22_PTP_TIMESTAMPQUEUESELECT;
+			queue = &port->tx_hwtstamp_queue[1];
+		}
+		break;
+	}
+
+	/* Take a reference on the skb and insert into our queue */
+	i = queue->next;
+	queue->next = (i + 1) & 31;
+	if (queue->skb[i])
+		dev_kfree_skb_any(queue->skb[i]);
+	queue->skb[i] = skb_get(skb);
+
+	ptpdesc |= MVPP22_PTP_TIMESTAMPENTRYID(i);
+
+	/*
+	 * 3:0		- PTPAction
+	 * 6:4		- PTPPacketFormat
+	 * 7		- PTP_CF_WraparoundCheckEn
+	 * 9:8		- IngressTimestampSeconds[1:0]
+	 * 10		- Reserved
+	 * 11		- MACTimestampingEn
+	 * 17:12	- PTP_TimestampQueueEntryID[5:0]
+	 * 18		- PTPTimestampQueueSelect
+	 * 19		- UDPChecksumUpdateEn
+	 * 27:20	- TimestampOffset
+	 *			PTP, NTPTransmit, OWAMP/TWAMP - L3 to PTP header
+	 *			NTPTs, Y.1731 - L3 to timestamp entry
+	 * 35:28	- UDP Checksum Offset
+	 *
+	 * stored in tx descriptor bits 75:64 (11:0) and 191:168 (35:12)
+	 */
+	tx_desc->pp22.ptp_descriptor &=
+		cpu_to_le32(~MVPP22_PTP_DESC_MASK_LOW);
+	tx_desc->pp22.ptp_descriptor |=
+		cpu_to_le32(ptpdesc & MVPP22_PTP_DESC_MASK_LOW);
+	tx_desc->pp22.buf_dma_addr_ptp &= cpu_to_le64(~0xffffff0000000000ULL);
+	tx_desc->pp22.buf_dma_addr_ptp |= cpu_to_le64((ptpdesc >> 12) << 40);
+
+	return true;
+}
+
 /* Handle tx fragmentation processing */
 static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
 				 struct mvpp2_tx_queue *aggr_txq,
@@ -3595,6 +3775,7 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
 		void *addr = skb_frag_address(frag);
 
 		tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+		mvpp2_txdesc_clear_ptp(port, tx_desc);
 		mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
 		mvpp2_txdesc_size_set(port, tx_desc, skb_frag_size(frag));
 
@@ -3644,6 +3825,7 @@ static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
 	struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
 	dma_addr_t addr;
 
+	mvpp2_txdesc_clear_ptp(port, tx_desc);
 	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
 	mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);
 
@@ -3668,6 +3850,7 @@ static inline int mvpp2_tso_put_data(struct sk_buff *skb,
 	struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
 	dma_addr_t buf_dma_addr;
 
+	mvpp2_txdesc_clear_ptp(port, tx_desc);
 	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
 	mvpp2_txdesc_size_set(port, tx_desc, sz);
 
@@ -3784,6 +3967,9 @@ static netdev_tx_t mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 
 	/* Get a descriptor for the first part of the packet */
 	tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ||
+	    !mvpp2_tx_hw_tstamp(port, tx_desc, skb))
+		mvpp2_txdesc_clear_ptp(port, tx_desc);
 	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
 	mvpp2_txdesc_size_set(port, tx_desc, skb_headlen(skb));
 
@@ -4007,17 +4193,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 	if (port->phylink) {
 		phylink_start(port->phylink);
 	} else {
-		/* Phylink isn't used as of now for ACPI, so the MAC has to be
-		 * configured manually when the interface is started. This will
-		 * be removed as soon as the phylink ACPI support lands in.
-		 */
-		struct phylink_link_state state = {
-			.interface = port->phy_interface,
-		};
-		mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
-		mvpp2_mac_link_up(&port->phylink_config, NULL,
-				  MLO_AN_INBAND, port->phy_interface,
-				  SPEED_UNKNOWN, DUPLEX_UNKNOWN, false, false);
+		mvpp2_acpi_start(port);
 	}
 
 	netif_tx_start_all_queues(port->dev);
@@ -4227,12 +4403,13 @@ static int mvpp2_open(struct net_device *dev)
 		valid = true;
 	}
 
-	if (priv->hw_version == MVPP22 && port->link_irq) {
-		err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+	if (priv->hw_version == MVPP22 && port->port_irq) {
+		err = request_irq(port->port_irq, mvpp2_port_isr, 0,
 				  dev->name, port);
 		if (err) {
-			netdev_err(port->dev, "cannot request link IRQ %d\n",
-				   port->link_irq);
+			netdev_err(port->dev,
+				   "cannot request port link/ptp IRQ %d\n",
+				   port->port_irq);
 			goto err_free_irq;
 		}
 
@@ -4243,7 +4420,7 @@ static int mvpp2_open(struct net_device *dev)
 
 		valid = true;
 	} else {
-		port->link_irq = 0;
+		port->port_irq = 0;
 	}
 
 	if (!valid) {
@@ -4287,8 +4464,8 @@ static int mvpp2_stop(struct net_device *dev)
 
 	if (port->phylink)
 		phylink_disconnect_phy(port->phylink);
-	if (port->link_irq)
-		free_irq(port->link_irq, port);
+	if (port->port_irq)
+		free_irq(port->port_irq, port);
 
 	mvpp2_irqs_deinit(port);
 	if (!port->has_tx_irqs) {
@@ -4548,10 +4725,124 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	stats->tx_dropped	= dev->stats.tx_dropped;
 }
 
+static int mvpp2_set_ts_config(struct mvpp2_port *port, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	void __iomem *ptp;
+	u32 gcr, int_mask;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	if (config.flags)
+		return -EINVAL;
+
+	if (config.tx_type != HWTSTAMP_TX_OFF &&
+	    config.tx_type != HWTSTAMP_TX_ON)
+		return -ERANGE;
+
+	ptp = port->priv->iface_base + MVPP22_PTP_BASE(port->gop_id);
+
+	int_mask = gcr = 0;
+	if (config.tx_type != HWTSTAMP_TX_OFF) {
+		gcr |= MVPP22_PTP_GCR_TSU_ENABLE | MVPP22_PTP_GCR_TX_RESET;
+		int_mask |= MVPP22_PTP_INT_MASK_QUEUE1 |
+			    MVPP22_PTP_INT_MASK_QUEUE0;
+	}
+
+	/* It seems we must also release the TX reset when enabling the TSU */
+	if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+		gcr |= MVPP22_PTP_GCR_TSU_ENABLE | MVPP22_PTP_GCR_RX_RESET |
+		       MVPP22_PTP_GCR_TX_RESET;
+
+	if (gcr & MVPP22_PTP_GCR_TSU_ENABLE)
+		mvpp22_tai_start(port->priv->tai);
+
+	if (config.rx_filter != HWTSTAMP_FILTER_NONE) {
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		mvpp2_modify(ptp + MVPP22_PTP_GCR,
+			     MVPP22_PTP_GCR_RX_RESET |
+			     MVPP22_PTP_GCR_TX_RESET |
+			     MVPP22_PTP_GCR_TSU_ENABLE, gcr);
+		port->rx_hwtstamp = true;
+	} else {
+		port->rx_hwtstamp = false;
+		mvpp2_modify(ptp + MVPP22_PTP_GCR,
+			     MVPP22_PTP_GCR_RX_RESET |
+			     MVPP22_PTP_GCR_TX_RESET |
+			     MVPP22_PTP_GCR_TSU_ENABLE, gcr);
+	}
+
+	mvpp2_modify(ptp + MVPP22_PTP_INT_MASK,
+		     MVPP22_PTP_INT_MASK_QUEUE1 |
+		     MVPP22_PTP_INT_MASK_QUEUE0, int_mask);
+
+	if (!(gcr & MVPP22_PTP_GCR_TSU_ENABLE))
+		mvpp22_tai_stop(port->priv->tai);
+
+	port->tx_hwtstamp_type = config.tx_type;
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int mvpp2_get_ts_config(struct mvpp2_port *port, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+
+	memset(&config, 0, sizeof(config));
+
+	config.tx_type = port->tx_hwtstamp_type;
+	config.rx_filter = port->rx_hwtstamp ?
+		HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int mvpp2_ethtool_get_ts_info(struct net_device *dev,
+				     struct ethtool_ts_info *info)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->hwtstamp)
+		return -EOPNOTSUPP;
+
+	info->phc_index = mvpp22_tai_ptp_clock_index(port->priv->tai);
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		if (port->hwtstamp)
+			return mvpp2_set_ts_config(port, ifr);
+		break;
+
+	case SIOCGHWTSTAMP:
+		if (port->hwtstamp)
+			return mvpp2_get_ts_config(port, ifr);
+		break;
+	}
+
 	if (!port->phylink)
 		return -ENOTSUPP;
 
@@ -5021,6 +5312,7 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 				     ETHTOOL_COALESCE_MAX_FRAMES,
 	.nway_reset		= mvpp2_ethtool_nway_reset,
 	.get_link		= ethtool_op_get_link,
+	.get_ts_info		= mvpp2_ethtool_get_ts_info,
 	.set_coalesce		= mvpp2_ethtool_set_coalesce,
 	.get_coalesce		= mvpp2_ethtool_get_coalesce,
 	.get_drvinfo		= mvpp2_ethtool_get_drvinfo,
@@ -5392,6 +5684,155 @@ static struct mvpp2_port *mvpp2_phylink_to_port(struct phylink_config *config)
 	return container_of(config, struct mvpp2_port, phylink_config);
 }
 
+static struct mvpp2_port *mvpp2_pcs_to_port(struct phylink_pcs *pcs)
+{
+	return container_of(pcs, struct mvpp2_port, phylink_pcs);
+}
+
+static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs,
+				    struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+	u32 val;
+
+	state->speed = SPEED_10000;
+	state->duplex = 1;
+	state->an_complete = 1;
+
+	val = readl(port->base + MVPP22_XLG_STATUS);
+	state->link = !!(val & MVPP22_XLG_STATUS_LINK_UP);
+
+	state->pause = 0;
+	val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+	if (val & MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN)
+		state->pause |= MLO_PAUSE_TX;
+	if (val & MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN)
+		state->pause |= MLO_PAUSE_RX;
+}
+
+static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs,
+				unsigned int mode,
+				phy_interface_t interface,
+				const unsigned long *advertising,
+				bool permit_pause_to_mac)
+{
+	return 0;
+}
+
+static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = {
+	.pcs_get_state = mvpp2_xlg_pcs_get_state,
+	.pcs_config = mvpp2_xlg_pcs_config,
+};
+
+static void mvpp2_gmac_pcs_get_state(struct phylink_pcs *pcs,
+				     struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_STATUS0);
+
+	state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
+	state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
+	state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
+
+	switch (port->phy_interface) {
+	case PHY_INTERFACE_MODE_1000BASEX:
+		state->speed = SPEED_1000;
+		break;
+	case PHY_INTERFACE_MODE_2500BASEX:
+		state->speed = SPEED_2500;
+		break;
+	default:
+		if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+			state->speed = SPEED_1000;
+		else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
+			state->speed = SPEED_100;
+		else
+			state->speed = SPEED_10;
+	}
+
+	state->pause = 0;
+	if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
+		state->pause |= MLO_PAUSE_RX;
+	if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
+		state->pause |= MLO_PAUSE_TX;
+}
+
+static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+				 phy_interface_t interface,
+				 const unsigned long *advertising,
+				 bool permit_pause_to_mac)
+{
+	struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+	u32 mask, val, an, old_an, changed;
+
+	mask = MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS |
+	       MVPP2_GMAC_IN_BAND_AUTONEG |
+	       MVPP2_GMAC_AN_SPEED_EN |
+	       MVPP2_GMAC_FLOW_CTRL_AUTONEG |
+	       MVPP2_GMAC_AN_DUPLEX_EN;
+
+	if (phylink_autoneg_inband(mode)) {
+		mask |= MVPP2_GMAC_CONFIG_MII_SPEED |
+			MVPP2_GMAC_CONFIG_GMII_SPEED |
+			MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+		val = MVPP2_GMAC_IN_BAND_AUTONEG;
+
+		if (interface == PHY_INTERFACE_MODE_SGMII) {
+			/* SGMII mode receives the speed and duplex from PHY */
+			val |= MVPP2_GMAC_AN_SPEED_EN |
+			       MVPP2_GMAC_AN_DUPLEX_EN;
+		} else {
+			/* 802.3z mode has fixed speed and duplex */
+			val |= MVPP2_GMAC_CONFIG_GMII_SPEED |
+			       MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+			/* The FLOW_CTRL_AUTONEG bit selects either the hardware
+			 * automatically or the bits in MVPP22_GMAC_CTRL_4_REG
+			 * manually controls the GMAC pause modes.
+			 */
+			if (permit_pause_to_mac)
+				val |= MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+
+			/* Configure advertisement bits */
+			mask |= MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN;
+			if (phylink_test(advertising, Pause))
+				val |= MVPP2_GMAC_FC_ADV_EN;
+			if (phylink_test(advertising, Asym_Pause))
+				val |= MVPP2_GMAC_FC_ADV_ASM_EN;
+		}
+	} else {
+		val = 0;
+	}
+
+	old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	an = (an & ~mask) | val;
+	changed = an ^ old_an;
+	if (changed)
+		writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+	/* We are only interested in the advertisement bits changing */
+	return changed & (MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN);
+}
+
+static void mvpp2_gmac_pcs_an_restart(struct phylink_pcs *pcs)
+{
+	struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+	u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+	writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
+	       port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN,
+	       port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static const struct phylink_pcs_ops mvpp2_phylink_gmac_pcs_ops = {
+	.pcs_get_state = mvpp2_gmac_pcs_get_state,
+	.pcs_config = mvpp2_gmac_pcs_config,
+	.pcs_an_restart = mvpp2_gmac_pcs_an_restart,
+};
+
 static void mvpp2_phylink_validate(struct phylink_config *config,
 				   unsigned long *supported,
 				   struct phylink_link_state *state)
@@ -5480,89 +5921,6 @@ empty_set:
 	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static void mvpp22_xlg_pcs_get_state(struct mvpp2_port *port,
-				     struct phylink_link_state *state)
-{
-	u32 val;
-
-	state->speed = SPEED_10000;
-	state->duplex = 1;
-	state->an_complete = 1;
-
-	val = readl(port->base + MVPP22_XLG_STATUS);
-	state->link = !!(val & MVPP22_XLG_STATUS_LINK_UP);
-
-	state->pause = 0;
-	val = readl(port->base + MVPP22_XLG_CTRL0_REG);
-	if (val & MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN)
-		state->pause |= MLO_PAUSE_TX;
-	if (val & MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN)
-		state->pause |= MLO_PAUSE_RX;
-}
-
-static void mvpp2_gmac_pcs_get_state(struct mvpp2_port *port,
-				     struct phylink_link_state *state)
-{
-	u32 val;
-
-	val = readl(port->base + MVPP2_GMAC_STATUS0);
-
-	state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
-	state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
-	state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
-
-	switch (port->phy_interface) {
-	case PHY_INTERFACE_MODE_1000BASEX:
-		state->speed = SPEED_1000;
-		break;
-	case PHY_INTERFACE_MODE_2500BASEX:
-		state->speed = SPEED_2500;
-		break;
-	default:
-		if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
-			state->speed = SPEED_1000;
-		else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
-			state->speed = SPEED_100;
-		else
-			state->speed = SPEED_10;
-	}
-
-	state->pause = 0;
-	if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
-		state->pause |= MLO_PAUSE_RX;
-	if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
-		state->pause |= MLO_PAUSE_TX;
-}
-
-static void mvpp2_phylink_mac_pcs_get_state(struct phylink_config *config,
-					    struct phylink_link_state *state)
-{
-	struct mvpp2_port *port = mvpp2_phylink_to_port(config);
-
-	if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
-		u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
-		mode &= MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
-
-		if (mode == MVPP22_XLG_CTRL3_MACMODESELECT_10G) {
-			mvpp22_xlg_pcs_get_state(port, state);
-			return;
-		}
-	}
-
-	mvpp2_gmac_pcs_get_state(port, state);
-}
-
-static void mvpp2_mac_an_restart(struct phylink_config *config)
-{
-	struct mvpp2_port *port = mvpp2_phylink_to_port(config);
-	u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
-	writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
-	       port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN,
-	       port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
 static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
 			     const struct phylink_link_state *state)
 {
@@ -5586,23 +5944,16 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
 static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 			      const struct phylink_link_state *state)
 {
-	u32 old_an, an;
 	u32 old_ctrl0, ctrl0;
 	u32 old_ctrl2, ctrl2;
 	u32 old_ctrl4, ctrl4;
 
-	old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 	old_ctrl0 = ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
 	old_ctrl2 = ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
 	old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
 
-	an &= ~(MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN |
-		MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
-		MVPP2_GMAC_AN_DUPLEX_EN | MVPP2_GMAC_IN_BAND_AUTONEG |
-		MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS);
 	ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
-	ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK |
-		   MVPP2_GMAC_PCS_ENABLE_MASK);
+	ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
 
 	/* Configure port type */
 	if (phy_interface_mode_is_8023z(state->interface)) {
@@ -5624,12 +5975,6 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 			 MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
 	}
 
-	/* Configure advertisement bits */
-	if (phylink_test(state->advertising, Pause))
-		an |= MVPP2_GMAC_FC_ADV_EN;
-	if (phylink_test(state->advertising, Asym_Pause))
-		an |= MVPP2_GMAC_FC_ADV_ASM_EN;
-
 	/* Configure negotiation style */
 	if (!phylink_autoneg_inband(mode)) {
 		/* Phy or fixed speed - no in-band AN, nothing to do, leave the
@@ -5638,14 +5983,6 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 	} else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
 		/* SGMII in-band mode receives the speed and duplex from
 		 * the PHY. Flow control information is not received. */
-		an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN |
-			MVPP2_GMAC_FORCE_LINK_PASS |
-			MVPP2_GMAC_CONFIG_MII_SPEED |
-			MVPP2_GMAC_CONFIG_GMII_SPEED |
-			MVPP2_GMAC_CONFIG_FULL_DUPLEX);
-		an |= MVPP2_GMAC_IN_BAND_AUTONEG |
-		      MVPP2_GMAC_AN_SPEED_EN |
-		      MVPP2_GMAC_AN_DUPLEX_EN;
 	} else if (phy_interface_mode_is_8023z(state->interface)) {
 		/* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
 		 * they negotiate duplex: they are always operating with a fixed
@@ -5653,42 +5990,6 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 		 * speed and full duplex here.
 		 */
 		ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
-		an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN |
-			MVPP2_GMAC_FORCE_LINK_PASS |
-			MVPP2_GMAC_CONFIG_MII_SPEED |
-			MVPP2_GMAC_CONFIG_GMII_SPEED |
-			MVPP2_GMAC_CONFIG_FULL_DUPLEX);
-		an |= MVPP2_GMAC_IN_BAND_AUTONEG |
-		      MVPP2_GMAC_CONFIG_GMII_SPEED |
-		      MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
-		if (state->pause & MLO_PAUSE_AN && state->an_enabled)
-			an |= MVPP2_GMAC_FLOW_CTRL_AUTONEG;
-	}
-
-/* Some fields of the auto-negotiation register require the port to be down when
- * their value is updated.
- */
-#define MVPP2_GMAC_AN_PORT_DOWN_MASK	\
-		(MVPP2_GMAC_IN_BAND_AUTONEG | \
-		 MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS | \
-		 MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | \
-		 MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_CONFIG_FULL_DUPLEX | \
-		 MVPP2_GMAC_AN_DUPLEX_EN)
-
-	if ((old_ctrl0 ^ ctrl0) & MVPP2_GMAC_PORT_TYPE_MASK ||
-	    (old_ctrl2 ^ ctrl2) & MVPP2_GMAC_INBAND_AN_MASK ||
-	    (old_an ^ an) & MVPP2_GMAC_AN_PORT_DOWN_MASK) {
-		/* Force link down */
-		old_an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
-		old_an |= MVPP2_GMAC_FORCE_LINK_DOWN;
-		writel(old_an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
-		/* Set the GMAC in a reset state - do this in a way that
-		 * ensures we clear it below.
-		 */
-		old_ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
-		writel(old_ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
 	}
 
 	if (old_ctrl0 != ctrl0)
@@ -5697,41 +5998,85 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 		writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
 	if (old_ctrl4 != ctrl4)
 		writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
-	if (old_an != an)
-		writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
-	if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) {
-		while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
-		       MVPP2_GMAC_PORT_RESET_MASK)
-			continue;
-	}
 }
 
-static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
-			     const struct phylink_link_state *state)
+static int mvpp2__mac_prepare(struct phylink_config *config, unsigned int mode,
+			      phy_interface_t interface)
 {
 	struct mvpp2_port *port = mvpp2_phylink_to_port(config);
-	bool change_interface = port->phy_interface != state->interface;
 
 	/* Check for invalid configuration */
-	if (mvpp2_is_xlg(state->interface) && port->gop_id != 0) {
+	if (mvpp2_is_xlg(interface) && port->gop_id != 0) {
 		netdev_err(port->dev, "Invalid mode on %s\n", port->dev->name);
-		return;
+		return -EINVAL;
+	}
+
+	if (port->phy_interface != interface ||
+	    phylink_autoneg_inband(mode)) {
+		/* Force the link down when changing the interface or if in
+		 * in-band mode to ensure we do not change the configuration
+		 * while the hardware is indicating link is up. We force both
+		 * XLG and GMAC down to ensure that they're both in a known
+		 * state.
+		 */
+		mvpp2_modify(port->base + MVPP2_GMAC_AUTONEG_CONFIG,
+			     MVPP2_GMAC_FORCE_LINK_PASS |
+			     MVPP2_GMAC_FORCE_LINK_DOWN,
+			     MVPP2_GMAC_FORCE_LINK_DOWN);
+
+		if (mvpp2_port_supports_xlg(port))
+			mvpp2_modify(port->base + MVPP22_XLG_CTRL0_REG,
+				     MVPP22_XLG_CTRL0_FORCE_LINK_PASS |
+				     MVPP22_XLG_CTRL0_FORCE_LINK_DOWN,
+				     MVPP22_XLG_CTRL0_FORCE_LINK_DOWN);
 	}
 
 	/* Make sure the port is disabled when reconfiguring the mode */
 	mvpp2_port_disable(port);
 
-	if (port->priv->hw_version == MVPP22 && change_interface) {
-		mvpp22_gop_mask_irq(port);
+	if (port->phy_interface != interface) {
+		/* Place GMAC into reset */
+		mvpp2_modify(port->base + MVPP2_GMAC_CTRL_2_REG,
+			     MVPP2_GMAC_PORT_RESET_MASK,
+			     MVPP2_GMAC_PORT_RESET_MASK);
 
-		port->phy_interface = state->interface;
+		if (port->priv->hw_version == MVPP22) {
+			mvpp22_gop_mask_irq(port);
 
-		/* Reconfigure the serdes lanes */
-		phy_power_off(port->comphy);
-		mvpp22_mode_reconfigure(port);
+			phy_power_off(port->comphy);
+		}
 	}
 
+	/* Select the appropriate PCS operations depending on the
+	 * configured interface mode. We will only switch to a mode
+	 * that the validate() checks have already passed.
+	 */
+	if (mvpp2_is_xlg(interface))
+		port->phylink_pcs.ops = &mvpp2_phylink_xlg_pcs_ops;
+	else
+		port->phylink_pcs.ops = &mvpp2_phylink_gmac_pcs_ops;
+
+	return 0;
+}
+
+static int mvpp2_mac_prepare(struct phylink_config *config, unsigned int mode,
+			     phy_interface_t interface)
+{
+	struct mvpp2_port *port = mvpp2_phylink_to_port(config);
+	int ret;
+
+	ret = mvpp2__mac_prepare(config, mode, interface);
+	if (ret == 0)
+		phylink_set_pcs(port->phylink, &port->phylink_pcs);
+
+	return ret;
+}
+
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
+			     const struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = mvpp2_phylink_to_port(config);
+
 	/* mac (re)configuration */
 	if (mvpp2_is_xlg(state->interface))
 		mvpp2_xlg_config(port, mode, state);
@@ -5742,11 +6087,51 @@ static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 
 	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
 		mvpp2_port_loopback_set(port, state);
+}
+
+static int mvpp2_mac_finish(struct phylink_config *config, unsigned int mode,
+			    phy_interface_t interface)
+{
+	struct mvpp2_port *port = mvpp2_phylink_to_port(config);
+
+	if (port->priv->hw_version == MVPP22 &&
+	    port->phy_interface != interface) {
+		port->phy_interface = interface;
 
-	if (port->priv->hw_version == MVPP22 && change_interface)
+		/* Reconfigure the serdes lanes */
+		mvpp22_mode_reconfigure(port);
+
+		/* Unmask interrupts */
 		mvpp22_gop_unmask_irq(port);
+	}
+
+	if (!mvpp2_is_xlg(interface)) {
+		/* Release GMAC reset and wait */
+		mvpp2_modify(port->base + MVPP2_GMAC_CTRL_2_REG,
+			     MVPP2_GMAC_PORT_RESET_MASK, 0);
+
+		while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
+		       MVPP2_GMAC_PORT_RESET_MASK)
+			continue;
+	}
 
 	mvpp2_port_enable(port);
+
+	/* Allow the link to come up if in in-band mode, otherwise the
+	 * link is forced via mac_link_down()/mac_link_up()
+	 */
+	if (phylink_autoneg_inband(mode)) {
+		if (mvpp2_is_xlg(interface))
+			mvpp2_modify(port->base + MVPP22_XLG_CTRL0_REG,
+				     MVPP22_XLG_CTRL0_FORCE_LINK_PASS |
+				     MVPP22_XLG_CTRL0_FORCE_LINK_DOWN, 0);
+		else
+			mvpp2_modify(port->base + MVPP2_GMAC_AUTONEG_CONFIG,
+				     MVPP2_GMAC_FORCE_LINK_PASS |
+				     MVPP2_GMAC_FORCE_LINK_DOWN, 0);
+	}
+
+	return 0;
 }
 
 static void mvpp2_mac_link_up(struct phylink_config *config,
@@ -5843,13 +6228,36 @@ static void mvpp2_mac_link_down(struct phylink_config *config,
 
 static const struct phylink_mac_ops mvpp2_phylink_ops = {
 	.validate = mvpp2_phylink_validate,
-	.mac_pcs_get_state = mvpp2_phylink_mac_pcs_get_state,
-	.mac_an_restart = mvpp2_mac_an_restart,
+	.mac_prepare = mvpp2_mac_prepare,
 	.mac_config = mvpp2_mac_config,
+	.mac_finish = mvpp2_mac_finish,
 	.mac_link_up = mvpp2_mac_link_up,
 	.mac_link_down = mvpp2_mac_link_down,
 };
 
+/* Work-around for ACPI */
+static void mvpp2_acpi_start(struct mvpp2_port *port)
+{
+	/* Phylink isn't used as of now for ACPI, so the MAC has to be
+	 * configured manually when the interface is started. This will
+	 * be removed as soon as the phylink ACPI support lands in.
+	 */
+	struct phylink_link_state state = {
+		.interface = port->phy_interface,
+	};
+	mvpp2__mac_prepare(&port->phylink_config, MLO_AN_INBAND,
+			   port->phy_interface);
+	mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
+	port->phylink_pcs.ops->pcs_config(&port->phylink_pcs, MLO_AN_INBAND,
+					  port->phy_interface,
+					  state.advertising, false);
+	mvpp2_mac_finish(&port->phylink_config, MLO_AN_INBAND,
+			 port->phy_interface);
+	mvpp2_mac_link_up(&port->phylink_config, NULL,
+			  MLO_AN_INBAND, port->phy_interface,
+			  SPEED_UNKNOWN, DUPLEX_UNKNOWN, false, false);
+}
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
 			    struct fwnode_handle *port_fwnode,
@@ -5937,16 +6345,16 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_netdev;
 
 	if (port_node)
-		port->link_irq = of_irq_get_byname(port_node, "link");
+		port->port_irq = of_irq_get_byname(port_node, "link");
 	else
-		port->link_irq = fwnode_irq_get(port_fwnode, port->nqvecs + 1);
-	if (port->link_irq == -EPROBE_DEFER) {
+		port->port_irq = fwnode_irq_get(port_fwnode, port->nqvecs + 1);
+	if (port->port_irq == -EPROBE_DEFER) {
 		err = -EPROBE_DEFER;
 		goto err_deinit_qvecs;
 	}
-	if (port->link_irq <= 0)
+	if (port->port_irq <= 0)
 		/* the link irq is optional */
-		port->link_irq = 0;
+		port->port_irq = 0;
 
 	if (fwnode_property_read_bool(port_fwnode, "marvell,loopback"))
 		port->flags |= MVPP2_F_LOOPBACK;
@@ -5983,6 +6391,12 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		port->stats_base = port->priv->iface_base +
 				   MVPP22_MIB_COUNTERS_OFFSET +
 				   port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;
+
+		/* We may want a property to describe whether we should use
+		 * MAC hardware timestamping.
+		 */
+		if (priv->tai)
+			port->hwtstamp = true;
 	}
 
 	/* Alloc per-cpu and ethtool stats */
@@ -6110,8 +6524,8 @@ err_free_txq_pcpu:
 err_free_stats:
 	free_percpu(port->stats);
 err_free_irq:
-	if (port->link_irq)
-		irq_dispose_mapping(port->link_irq);
+	if (port->port_irq)
+		irq_dispose_mapping(port->port_irq);
 err_deinit_qvecs:
 	mvpp2_queue_vectors_deinit(port);
 err_free_netdev:
@@ -6132,8 +6546,8 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
 	for (i = 0; i < port->ntxqs; i++)
 		free_percpu(port->txqs[i]->pcpu);
 	mvpp2_queue_vectors_deinit(port);
-	if (port->link_irq)
-		irq_dispose_mapping(port->link_irq);
+	if (port->port_irq)
+		irq_dispose_mapping(port->port_irq);
 	free_netdev(port->dev);
 }
 
@@ -6545,6 +6959,10 @@ static int mvpp2_probe(struct platform_device *pdev)
 		goto err_axi_clk;
 	}
 
+	err = mvpp22_tai_probe(&pdev->dev, priv);
+	if (err < 0)
+		goto err_axi_clk;
+
 	/* Initialize ports */
 	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
 		err = mvpp2_port_probe(pdev, port_fwnode, priv);
@@ -6663,11 +7081,13 @@ static const struct of_device_id mvpp2_match[] = {
 };
 MODULE_DEVICE_TABLE(of, mvpp2_match);
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id mvpp2_acpi_match[] = {
 	{ "MRVL0110", MVPP22 },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, mvpp2_acpi_match);
+#endif
 
 static struct platform_driver mvpp2_driver = {
 	.probe = mvpp2_probe,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_tai.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_tai.c
new file mode 100644
index 000000000000..95862aff49f1
--- /dev/null
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_tai.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Marvell PP2.2 TAI support
+ *
+ * Note:
+ *   Do NOT use the event capture support.
+ *   Do Not even set the MPP muxes to allow PTP_EVENT_REQ to be used.
+ *   It will disrupt the operation of this driver, and there is nothing
+ *   that this driver can do to prevent that.  Even using PTP_EVENT_REQ
+ *   as an output will be seen as a trigger input, which can't be masked.
+ *   When ever a trigger input is seen, the action in the TCFCR0_TCF
+ *   field will be performed - whether it is a set, increment, decrement
+ *   read, or frequency update.
+ *
+ * Other notes (useful, not specified in the documentation):
+ * - PTP_PULSE_OUT (PTP_EVENT_REQ MPP)
+ *   It looks like the hardware can't generate a pulse at nsec=0. (The
+ *   output doesn't trigger if the nsec field is zero.)
+ *   Note: when configured as an output via the register at 0xfX441120,
+ *   the input is still very much alive, and will trigger the current TCF
+ *   function.
+ * - PTP_CLK_OUT (PTP_TRIG_GEN MPP)
+ *   This generates a "PPS" signal determined by the CCC registers. It
+ *   seems this is not aligned to the TOD counter in any way (it may be
+ *   initially, but if you specify a non-round second interval, it won't,
+ *   and you can't easily get it back.)
+ * - PTP_PCLK_OUT
+ *   This generates a 50% duty cycle clock based on the TOD counter, and
+ *   seems it can be set to any period of 1ns resolution. It is probably
+ *   limited by the TOD step size. Its period is defined by the PCLK_CCC
+ *   registers. Again, its alignment to the second is questionable.
+ *
+ * Consequently, we support none of these.
+ */
+#include <linux/io.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/slab.h>
+
+#include "mvpp2.h"
+
+#define CR0_SW_NRESET			BIT(0)
+
+#define TCFCR0_PHASE_UPDATE_ENABLE	BIT(8)
+#define TCFCR0_TCF_MASK			(7 << 2)
+#define TCFCR0_TCF_UPDATE		(0 << 2)
+#define TCFCR0_TCF_FREQUPDATE		(1 << 2)
+#define TCFCR0_TCF_INCREMENT		(2 << 2)
+#define TCFCR0_TCF_DECREMENT		(3 << 2)
+#define TCFCR0_TCF_CAPTURE		(4 << 2)
+#define TCFCR0_TCF_NOP			(7 << 2)
+#define TCFCR0_TCF_TRIGGER		BIT(0)
+
+#define TCSR_CAPTURE_1_VALID		BIT(1)
+#define TCSR_CAPTURE_0_VALID		BIT(0)
+
+struct mvpp2_tai {
+	struct ptp_clock_info caps;
+	struct ptp_clock *ptp_clock;
+	void __iomem *base;
+	spinlock_t lock;
+	u64 period;		// nanosecond period in 32.32 fixed point
+	/* This timestamp is updated every two seconds */
+	struct timespec64 stamp;
+};
+
+static void mvpp2_tai_modify(void __iomem *reg, u32 mask, u32 set)
+{
+	u32 val;
+
+	val = readl_relaxed(reg) & ~mask;
+	val |= set & mask;
+	writel(val, reg);
+}
+
+static void mvpp2_tai_write(u32 val, void __iomem *reg)
+{
+	writel_relaxed(val & 0xffff, reg);
+}
+
+static u32 mvpp2_tai_read(void __iomem *reg)
+{
+	return readl_relaxed(reg) & 0xffff;
+}
+
+static struct mvpp2_tai *ptp_to_tai(struct ptp_clock_info *ptp)
+{
+	return container_of(ptp, struct mvpp2_tai, caps);
+}
+
+static void mvpp22_tai_read_ts(struct timespec64 *ts, void __iomem *base)
+{
+	ts->tv_sec = (u64)mvpp2_tai_read(base + 0) << 32 |
+		     mvpp2_tai_read(base + 4) << 16 |
+		     mvpp2_tai_read(base + 8);
+
+	ts->tv_nsec = mvpp2_tai_read(base + 12) << 16 |
+		      mvpp2_tai_read(base + 16);
+
+	/* Read and discard fractional part */
+	readl_relaxed(base + 20);
+	readl_relaxed(base + 24);
+}
+
+static void mvpp2_tai_write_tlv(const struct timespec64 *ts, u32 frac,
+			        void __iomem *base)
+{
+	mvpp2_tai_write(ts->tv_sec >> 32, base + MVPP22_TAI_TLV_SEC_HIGH);
+	mvpp2_tai_write(ts->tv_sec >> 16, base + MVPP22_TAI_TLV_SEC_MED);
+	mvpp2_tai_write(ts->tv_sec, base + MVPP22_TAI_TLV_SEC_LOW);
+	mvpp2_tai_write(ts->tv_nsec >> 16, base + MVPP22_TAI_TLV_NANO_HIGH);
+	mvpp2_tai_write(ts->tv_nsec, base + MVPP22_TAI_TLV_NANO_LOW);
+	mvpp2_tai_write(frac >> 16, base + MVPP22_TAI_TLV_FRAC_HIGH);
+	mvpp2_tai_write(frac, base + MVPP22_TAI_TLV_FRAC_LOW);
+}
+
+static void mvpp2_tai_op(u32 op, void __iomem *base)
+{
+	/* Trigger the operation. Note that an external unmaskable
+	 * event on PTP_EVENT_REQ will also trigger this action.
+	 */
+	mvpp2_tai_modify(base + MVPP22_TAI_TCFCR0,
+			 TCFCR0_TCF_MASK | TCFCR0_TCF_TRIGGER,
+			 op | TCFCR0_TCF_TRIGGER);
+	mvpp2_tai_modify(base + MVPP22_TAI_TCFCR0, TCFCR0_TCF_MASK,
+			 TCFCR0_TCF_NOP);
+}
+
+/* The adjustment has a range of +0.5ns to -0.5ns in 2^32 steps, so has units
+ * of 2^-32 ns.
+ *
+ * units(s) = 1 / (2^32 * 10^9)
+ * fractional = abs_scaled_ppm / (2^16 * 10^6)
+ *
+ * What we want to achieve:
+ *  freq_adjusted = freq_nominal * (1 + fractional)
+ *  freq_delta = freq_adjusted - freq_nominal => positive = faster
+ *  freq_delta = freq_nominal * (1 + fractional) - freq_nominal
+ * So: freq_delta = freq_nominal * fractional
+ *
+ * However, we are dealing with periods, so:
+ *  period_adjusted = period_nominal / (1 + fractional)
+ *  period_delta = period_nominal - period_adjusted => positive = faster
+ *  period_delta = period_nominal * fractional / (1 + fractional)
+ *
+ * Hence:
+ *  period_delta = period_nominal * abs_scaled_ppm /
+ *		   (2^16 * 10^6 + abs_scaled_ppm)
+ *
+ * To avoid overflow, we reduce both sides of the divide operation by a factor
+ * of 16.
+ */
+static u64 mvpp22_calc_frac_ppm(struct mvpp2_tai *tai, long abs_scaled_ppm)
+{
+	u64 val = tai->period * abs_scaled_ppm >> 4;
+
+	return div_u64(val, (1000000 << 12) + (abs_scaled_ppm >> 4));
+}
+
+static s32 mvpp22_calc_max_adj(struct mvpp2_tai *tai)
+{
+	return 1000000;
+}
+
+static int mvpp22_tai_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mvpp2_tai *tai = ptp_to_tai(ptp);
+	unsigned long flags;
+	void __iomem *base;
+	bool neg_adj;
+	s32 frac;
+	u64 val;
+
+	neg_adj = scaled_ppm < 0;
+	if (neg_adj)
+		scaled_ppm = -scaled_ppm;
+
+	val = mvpp22_calc_frac_ppm(tai, scaled_ppm);
+
+	/* Convert to a signed 32-bit adjustment */
+	if (neg_adj) {
+		/* -S32_MIN warns, -val < S32_MIN fails, so go for the easy
+		 * solution.
+		 */
+		if (val > 0x80000000)
+			return -ERANGE;
+
+		frac = -val;
+	} else {
+		if (val > S32_MAX)
+			return -ERANGE;
+
+		frac = val;
+	}
+
+	base = tai->base;
+	spin_lock_irqsave(&tai->lock, flags);
+	mvpp2_tai_write(frac >> 16, base + MVPP22_TAI_TLV_FRAC_HIGH);
+	mvpp2_tai_write(frac, base + MVPP22_TAI_TLV_FRAC_LOW);
+	mvpp2_tai_op(TCFCR0_TCF_FREQUPDATE, base);
+	spin_unlock_irqrestore(&tai->lock, flags);
+
+	return 0;
+}
+
+static int mvpp22_tai_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mvpp2_tai *tai = ptp_to_tai(ptp);
+	struct timespec64 ts;
+	unsigned long flags;
+	void __iomem *base;
+	u32 tcf;
+
+	/* We can't deal with S64_MIN */
+	if (delta == S64_MIN)
+		return -ERANGE;
+
+	if (delta < 0) {
+		delta = -delta;
+		tcf = TCFCR0_TCF_DECREMENT;
+	} else {
+		tcf = TCFCR0_TCF_INCREMENT;
+	}
+
+	ts = ns_to_timespec64(delta);
+
+	base = tai->base;
+	spin_lock_irqsave(&tai->lock, flags);
+	mvpp2_tai_write_tlv(&ts, 0, base);
+	mvpp2_tai_op(tcf, base);
+	spin_unlock_irqrestore(&tai->lock, flags);
+
+	return 0;
+}
+
+static int mvpp22_tai_gettimex64(struct ptp_clock_info *ptp,
+				 struct timespec64 *ts,
+				 struct ptp_system_timestamp *sts)
+{
+	struct mvpp2_tai *tai = ptp_to_tai(ptp);
+	unsigned long flags;
+	void __iomem *base;
+	u32 tcsr;
+	int ret;
+
+	base = tai->base;
+	spin_lock_irqsave(&tai->lock, flags);
+	/* XXX: the only way to read the PTP time is for the CPU to trigger
+	 * an event. However, there is no way to distinguish between the CPU
+	 * triggered event, and an external event on PTP_EVENT_REQ. So this
+	 * is incompatible with external use of PTP_EVENT_REQ.
+	 */
+	ptp_read_system_prets(sts);
+	mvpp2_tai_modify(base + MVPP22_TAI_TCFCR0,
+			 TCFCR0_TCF_MASK | TCFCR0_TCF_TRIGGER,
+			 TCFCR0_TCF_CAPTURE | TCFCR0_TCF_TRIGGER);
+	ptp_read_system_postts(sts);
+	mvpp2_tai_modify(base + MVPP22_TAI_TCFCR0, TCFCR0_TCF_MASK,
+			 TCFCR0_TCF_NOP);
+
+	tcsr = readl(base + MVPP22_TAI_TCSR);
+	if (tcsr & TCSR_CAPTURE_1_VALID) {
+		mvpp22_tai_read_ts(ts, base + MVPP22_TAI_TCV1_SEC_HIGH);
+		ret = 0;
+	} else if (tcsr & TCSR_CAPTURE_0_VALID) {
+		mvpp22_tai_read_ts(ts, base + MVPP22_TAI_TCV0_SEC_HIGH);
+		ret = 0;
+	} else {
+		/* We don't seem to have a reading... */
+		ret = -EBUSY;
+	}
+	spin_unlock_irqrestore(&tai->lock, flags);
+
+	return ret;
+}
+
+static int mvpp22_tai_settime64(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct mvpp2_tai *tai = ptp_to_tai(ptp);
+	unsigned long flags;
+	void __iomem *base;
+
+	base = tai->base;
+	spin_lock_irqsave(&tai->lock, flags);
+	mvpp2_tai_write_tlv(ts, 0, base);
+
+	/* Trigger an update to load the value from the TLV registers
+	 * into the TOD counter. Note that an external unmaskable event on
+	 * PTP_EVENT_REQ will also trigger this action.
+	 */
+	mvpp2_tai_modify(base + MVPP22_TAI_TCFCR0,
+			 TCFCR0_PHASE_UPDATE_ENABLE |
+			 TCFCR0_TCF_MASK | TCFCR0_TCF_TRIGGER,
+			 TCFCR0_TCF_UPDATE | TCFCR0_TCF_TRIGGER);
+	mvpp2_tai_modify(base + MVPP22_TAI_TCFCR0, TCFCR0_TCF_MASK,
+			 TCFCR0_TCF_NOP);
+	spin_unlock_irqrestore(&tai->lock, flags);
+
+	return 0;
+}
+
+static long mvpp22_tai_aux_work(struct ptp_clock_info *ptp)
+{
+	struct mvpp2_tai *tai = ptp_to_tai(ptp);
+
+	mvpp22_tai_gettimex64(ptp, &tai->stamp, NULL);
+
+	return msecs_to_jiffies(2000);
+}
+
+static void mvpp22_tai_set_step(struct mvpp2_tai *tai)
+{
+	void __iomem *base = tai->base;
+	u32 nano, frac;
+
+	nano = upper_32_bits(tai->period);
+	frac = lower_32_bits(tai->period);
+
+	/* As the fractional nanosecond is a signed offset, if the MSB (sign)
+	 * bit is set, we have to increment the whole nanoseconds.
+	 */
+	if (frac >= 0x80000000)
+		nano += 1;
+
+	mvpp2_tai_write(nano, base + MVPP22_TAI_TOD_STEP_NANO_CR);
+	mvpp2_tai_write(frac >> 16, base + MVPP22_TAI_TOD_STEP_FRAC_HIGH);
+	mvpp2_tai_write(frac, base + MVPP22_TAI_TOD_STEP_FRAC_LOW);
+}
+
+static void mvpp22_tai_init(struct mvpp2_tai *tai)
+{
+	void __iomem *base = tai->base;
+
+	mvpp22_tai_set_step(tai);
+
+	/* Release the TAI reset */
+	mvpp2_tai_modify(base + MVPP22_TAI_CR0, CR0_SW_NRESET, CR0_SW_NRESET);
+}
+
+int mvpp22_tai_ptp_clock_index(struct mvpp2_tai *tai)
+{
+	return ptp_clock_index(tai->ptp_clock);
+}
+
+void mvpp22_tai_tstamp(struct mvpp2_tai *tai, u32 tstamp,
+		       struct skb_shared_hwtstamps *hwtstamp)
+{
+	struct timespec64 ts;
+	int delta;
+
+	/* The tstamp consists of 2 bits of seconds and 30 bits of nanoseconds.
+	 * We use our stored timestamp (tai->stamp) to form a full timestamp,
+	 * and we must read the seconds exactly once.
+	 */
+	ts.tv_sec = READ_ONCE(tai->stamp.tv_sec);
+	ts.tv_nsec = tstamp & 0x3fffffff;
+
+	/* Calculate the delta in seconds between our stored timestamp and
+	 * the value read from the queue. Allow timestamps one second in the
+	 * past, otherwise consider them to be in the future.
+	 */
+	delta = ((tstamp >> 30) - (ts.tv_sec & 3)) & 3;
+	if (delta == 3)
+		delta -= 4;
+	ts.tv_sec += delta;
+
+	memset(hwtstamp, 0, sizeof(*hwtstamp));
+	hwtstamp->hwtstamp = timespec64_to_ktime(ts);
+}
+
+void mvpp22_tai_start(struct mvpp2_tai *tai)
+{
+	long delay;
+
+	delay = mvpp22_tai_aux_work(&tai->caps);
+
+	ptp_schedule_worker(tai->ptp_clock, delay);
+}
+
+void mvpp22_tai_stop(struct mvpp2_tai *tai)
+{
+	ptp_cancel_worker_sync(tai->ptp_clock);
+}
+
+static void mvpp22_tai_remove(void *priv)
+{
+	struct mvpp2_tai *tai = priv;
+
+	if (!IS_ERR(tai->ptp_clock))
+		ptp_clock_unregister(tai->ptp_clock);
+}
+
+int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv)
+{
+	struct mvpp2_tai *tai;
+	int ret;
+
+	tai = devm_kzalloc(dev, sizeof(*tai), GFP_KERNEL);
+	if (!tai)
+		return -ENOMEM;
+
+	spin_lock_init(&tai->lock);
+
+	tai->base = priv->iface_base;
+
+	/* The step size consists of three registers - a 16-bit nanosecond step
+	 * size, and a 32-bit fractional nanosecond step size split over two
+	 * registers. The fractional nanosecond step size has units of 2^-32ns.
+	 *
+	 * To calculate this, we calculate:
+	 *   (10^9 + freq / 2) / (freq * 2^-32)
+	 * which gives us the nanosecond step to the nearest integer in 16.32
+	 * fixed point format, and the fractional part of the step size with
+	 * the MSB inverted.  With rounding of the fractional nanosecond, and
+	 * simplification, this becomes:
+	 *   (10^9 << 32 + freq << 31 + (freq + 1) >> 1) / freq
+	 *
+	 * So:
+	 *   div = (10^9 << 32 + freq << 31 + (freq + 1) >> 1) / freq
+	 *   nano = upper_32_bits(div);
+	 *   frac = lower_32_bits(div) ^ 0x80000000;
+	 * Will give the values for the registers.
+	 *
+	 * This is all seems perfect, but alas it is not when considering the
+	 * whole story.  The system is clocked from 25MHz, which is multiplied
+	 * by a PLL to 1GHz, and then divided by three, giving 333333333Hz
+	 * (recurring).  This gives exactly 3ns, but using 333333333Hz with
+	 * the above gives an error of 13*2^-32ns.
+	 *
+	 * Consequently, we use the period rather than calculating from the
+	 * frequency.
+	 */
+	tai->period = 3ULL << 32;
+
+	mvpp22_tai_init(tai);
+
+	tai->caps.owner = THIS_MODULE;
+	strscpy(tai->caps.name, "Marvell PP2.2", sizeof(tai->caps.name));
+	tai->caps.max_adj = mvpp22_calc_max_adj(tai);
+	tai->caps.adjfine = mvpp22_tai_adjfine;
+	tai->caps.adjtime = mvpp22_tai_adjtime;
+	tai->caps.gettimex64 = mvpp22_tai_gettimex64;
+	tai->caps.settime64 = mvpp22_tai_settime64;
+	tai->caps.do_aux_work = mvpp22_tai_aux_work;
+
+	ret = devm_add_action(dev, mvpp22_tai_remove, tai);
+	if (ret)
+		return ret;
+
+	tai->ptp_clock = ptp_clock_register(&tai->caps, dev);
+	if (IS_ERR(tai->ptp_clock))
+		return PTR_ERR(tai->ptp_clock);
+
+	priv->tai = tai;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index 1b25948c662b..2f7a861d0c7b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -3,9 +3,10 @@
 # Makefile for Marvell's OcteonTX2 RVU Admin Function driver
 #
 
+ccflags-y += -I$(src)
 obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
 obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
 
-octeontx2_mbox-y := mbox.o
+octeontx2_mbox-y := mbox.o rvu_trace.o
 octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
-		  rvu_reg.o rvu_npc.o rvu_debugfs.o
+		  rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index a4e65da8d95b..8f17e26dca53 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -468,6 +468,35 @@ static void cgx_lmac_pause_frm_config(struct cgx *cgx, int lmac_id, bool enable)
 	}
 }
 
+void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgxd;
+	u64 cfg;
+
+	if (!cgx)
+		return;
+
+	if (enable) {
+		/* Enable inbound PTP timestamping */
+		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+		cfg |= CGX_GMP_GMI_RXX_FRM_CTL_PTP_MODE;
+		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+		cfg |= CGX_SMUX_RX_FRM_CTL_PTP_MODE;
+		cgx_write(cgx, lmac_id,	CGXX_SMUX_RX_FRM_CTL, cfg);
+	} else {
+		/* Disable inbound PTP stamping */
+		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+		cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_PTP_MODE;
+		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+		cfg &= ~CGX_SMUX_RX_FRM_CTL_PTP_MODE;
+		cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+	}
+}
+
 /* CGX Firmware interface low level support */
 static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac)
 {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 394f96591feb..27ca3291682b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -58,8 +58,10 @@
 
 #define CGXX_SMUX_RX_FRM_CTL		0x20020
 #define CGX_SMUX_RX_FRM_CTL_CTL_BCK	BIT_ULL(3)
+#define CGX_SMUX_RX_FRM_CTL_PTP_MODE	BIT_ULL(12)
 #define CGXX_GMP_GMI_RXX_FRM_CTL	0x38028
 #define CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK	BIT_ULL(3)
+#define CGX_GMP_GMI_RXX_FRM_CTL_PTP_MODE BIT_ULL(12)
 #define CGXX_SMUX_TX_CTL		0x20178
 #define CGXX_SMUX_TX_PAUSE_PKT_TIME	0x20110
 #define CGXX_SMUX_TX_PAUSE_PKT_INTERVAL	0x20120
@@ -139,4 +141,6 @@ int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id,
 			   u8 *tx_pause, u8 *rx_pause);
 int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id,
 			   u8 tx_pause, u8 rx_pause);
+void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable);
+
 #endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 2718fe201c14..bbabb8e64201 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -14,6 +14,7 @@
 
 #include "rvu_reg.h"
 #include "mbox.h"
+#include "rvu_trace.h"
 
 static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
 
@@ -207,6 +208,9 @@ void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid)
 	 */
 	tx_hdr->num_msgs = mdev->num_msgs;
 	rx_hdr->num_msgs = 0;
+
+	trace_otx2_msg_send(mbox->pdev, tx_hdr->num_msgs, tx_hdr->msg_size);
+
 	spin_unlock(&mdev->mbox_lock);
 
 	/* The interrupt should be fired after num_msgs is written
@@ -303,10 +307,15 @@ int otx2_mbox_check_rsp_msgs(struct otx2_mbox *mbox, int devid)
 		struct mbox_msghdr *preq = mdev->mbase + ireq;
 		struct mbox_msghdr *prsp = mdev->mbase + irsp;
 
-		if (preq->id != prsp->id)
+		if (preq->id != prsp->id) {
+			trace_otx2_msg_check(mbox->pdev, preq->id,
+					     prsp->id, prsp->rc);
 			goto exit;
+		}
 		if (prsp->rc) {
 			rc = prsp->rc;
+			trace_otx2_msg_check(mbox->pdev, preq->id,
+					     prsp->id, prsp->rc);
 			goto exit;
 		}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index ab433789d2c3..263a21129416 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -128,6 +128,7 @@ M(ATTACH_RESOURCES,	0x002, attach_resources, rsrc_attach, msg_rsp)	\
 M(DETACH_RESOURCES,	0x003, detach_resources, rsrc_detach, msg_rsp)	\
 M(MSIX_OFFSET,		0x005, msix_offset, msg_req, msix_offset_rsp)	\
 M(VF_FLR,		0x006, vf_flr, msg_req, msg_rsp)		\
+M(PTP_OP,		0x007, ptp_op, ptp_req, ptp_rsp)		\
 M(GET_HW_CAP,		0x008, get_hw_cap, msg_req, get_hw_cap_rsp)	\
 /* CGX mbox IDs (range 0x200 - 0x3FF) */				\
 M(CGX_START_RXTX,	0x200, cgx_start_rxtx, msg_req, msg_rsp)	\
@@ -144,6 +145,8 @@ M(CGX_STOP_LINKEVENTS,	0x208, cgx_stop_linkevents, msg_req, msg_rsp)	\
 M(CGX_GET_LINKINFO,	0x209, cgx_get_linkinfo, msg_req, cgx_link_info_msg) \
 M(CGX_INTLBK_ENABLE,	0x20A, cgx_intlbk_enable, msg_req, msg_rsp)	\
 M(CGX_INTLBK_DISABLE,	0x20B, cgx_intlbk_disable, msg_req, msg_rsp)	\
+M(CGX_PTP_RX_ENABLE,	0x20C, cgx_ptp_rx_enable, msg_req, msg_rsp)	\
+M(CGX_PTP_RX_DISABLE,	0x20D, cgx_ptp_rx_disable, msg_req, msg_rsp)	\
 M(CGX_CFG_PAUSE_FRM,	0x20E, cgx_cfg_pause_frm, cgx_pause_frm_cfg,	\
 			       cgx_pause_frm_cfg)			\
 /* NPA mbox IDs (range 0x400 - 0x5FF) */				\
@@ -214,6 +217,8 @@ M(NIX_LSO_FORMAT_CFG,	0x8011, nix_lso_format_cfg,			\
 				 nix_lso_format_cfg,			\
 				 nix_lso_format_cfg_rsp)		\
 M(NIX_RXVLAN_ALLOC,	0x8012, nix_rxvlan_alloc, msg_req, msg_rsp)	\
+M(NIX_LF_PTP_TX_ENABLE, 0x8013, nix_lf_ptp_tx_enable, msg_req, msg_rsp)	\
+M(NIX_LF_PTP_TX_DISABLE, 0x8014, nix_lf_ptp_tx_disable, msg_req, msg_rsp) \
 M(NIX_BP_ENABLE,	0x8016, nix_bp_enable, nix_bp_cfg_req,	\
 				nix_bp_cfg_rsp)	\
 M(NIX_BP_DISABLE,	0x8017, nix_bp_disable, nix_bp_cfg_req, msg_rsp) \
@@ -621,6 +626,7 @@ struct nix_rss_flowkey_cfg {
 #define NIX_FLOW_KEY_TYPE_INNR_UDP      BIT(15)
 #define NIX_FLOW_KEY_TYPE_INNR_SCTP     BIT(16)
 #define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17)
+#define NIX_FLOW_KEY_TYPE_VLAN		BIT(20)
 	u32	flowkey_cfg; /* Flowkey types selected */
 	u8	group;       /* RSS context or group */
 };
@@ -859,4 +865,20 @@ struct npc_get_kex_cfg_rsp {
 	u8 mkex_pfl_name[MKEX_NAME_LEN];
 };
 
+enum ptp_op {
+	PTP_OP_ADJFINE = 0,
+	PTP_OP_GET_CLOCK = 1,
+};
+
+struct ptp_req {
+	struct mbox_msghdr hdr;
+	u8 op;
+	s64 scaled_ppm;
+};
+
+struct ptp_rsp {
+	struct mbox_msghdr hdr;
+	u64 clk;
+};
+
 #endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 3803af9231c6..91a9d00e4fb5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -49,6 +49,7 @@ enum npc_kpu_lb_ltype {
 	NPC_LT_LB_EDSA_VLAN,
 	NPC_LT_LB_EXDSA,
 	NPC_LT_LB_EXDSA_VLAN,
+	NPC_LT_LB_FDSA,
 	NPC_LT_LB_CUSTOM0 = 0xE,
 	NPC_LT_LB_CUSTOM1 = 0xF,
 };
@@ -77,21 +78,21 @@ enum npc_kpu_ld_ltype {
 	NPC_LT_LD_ICMP,
 	NPC_LT_LD_SCTP,
 	NPC_LT_LD_ICMP6,
+	NPC_LT_LD_CUSTOM0,
+	NPC_LT_LD_CUSTOM1,
 	NPC_LT_LD_IGMP = 8,
-	NPC_LT_LD_ESP,
 	NPC_LT_LD_AH,
 	NPC_LT_LD_GRE,
 	NPC_LT_LD_NVGRE,
 	NPC_LT_LD_NSH,
 	NPC_LT_LD_TU_MPLS_IN_NSH,
 	NPC_LT_LD_TU_MPLS_IN_IP,
-	NPC_LT_LD_CUSTOM0 = 0xE,
-	NPC_LT_LD_CUSTOM1 = 0xF,
 };
 
 enum npc_kpu_le_ltype {
 	NPC_LT_LE_VXLAN = 1,
 	NPC_LT_LE_GENEVE,
+	NPC_LT_LE_ESP,
 	NPC_LT_LE_GTPU = 4,
 	NPC_LT_LE_VXLANGPE,
 	NPC_LT_LE_GTPC,
@@ -173,8 +174,8 @@ struct npc_kpu_profile_action {
 struct npc_kpu_profile {
 	int cam_entries;
 	int action_entries;
-	struct npc_kpu_profile_cam *cam;
-	struct npc_kpu_profile_action *action;
+	const struct npc_kpu_profile_cam *cam;
+	const struct npc_kpu_profile_action *action;
 };
 
 /* NPC KPU register formats */
@@ -296,6 +297,9 @@ struct nix_rx_action {
 #endif
 };
 
+/* NPC_AF_INTFX_KEX_CFG field masks */
+#define NPC_PARSE_NIBBLE		GENMASK_ULL(30, 0)
+
 /* NIX Receive Vtag Action Structure */
 #define VTAG0_VALID_BIT		BIT_ULL(15)
 #define VTAG0_TYPE_MASK		GENMASK_ULL(14, 12)
@@ -320,4 +324,37 @@ struct npc_mcam_kex {
 	u64 intf_ld_flags[NPC_MAX_INTF][NPC_MAX_LD][NPC_MAX_LFL];
 } __packed;
 
+struct npc_lt_def {
+	u8	ltype_mask;
+	u8	ltype_match;
+	u8	lid;
+};
+
+struct npc_lt_def_ipsec {
+	u8	ltype_mask;
+	u8	ltype_match;
+	u8	lid;
+	u8	spi_offset;
+	u8	spi_nz;
+};
+
+struct npc_lt_def_cfg {
+	struct npc_lt_def	rx_ol2;
+	struct npc_lt_def	rx_oip4;
+	struct npc_lt_def	rx_iip4;
+	struct npc_lt_def	rx_oip6;
+	struct npc_lt_def	rx_iip6;
+	struct npc_lt_def	rx_otcp;
+	struct npc_lt_def	rx_itcp;
+	struct npc_lt_def	rx_oudp;
+	struct npc_lt_def	rx_iudp;
+	struct npc_lt_def	rx_osctp;
+	struct npc_lt_def	rx_isctp;
+	struct npc_lt_def_ipsec	rx_ipsec[2];
+	struct npc_lt_def	pck_ol2;
+	struct npc_lt_def	pck_oip4;
+	struct npc_lt_def	pck_oip6;
+	struct npc_lt_def	pck_iip4;
+};
+
 #endif /* NPC_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index aa2727e6211a..77bb4ed32600 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -63,6 +63,7 @@
 #define NPC_UDP_PORT_VXLANGPE	4790
 #define NPC_UDP_PORT_GENEVE	6081
 #define NPC_UDP_PORT_MPLS	6635
+#define NPC_UDP_PORT_ESP	4500
 
 #define NPC_VXLANGPE_NP_IP	0x1
 #define NPC_VXLANGPE_NP_IP6	0x2
@@ -139,6 +140,13 @@
 
 #define NPC_DSA_EXTEND		0x1000
 #define NPC_DSA_EDSA		0x8000
+#define NPC_DSA_FDSA		0xc000
+
+#define NPC_KEXOF_DMAC	8
+#define MKEX_SIGN	0x19bbfdbd15f /* strtoull of "mkexprof" with base:36 */
+#define KEX_LD_CFG(bytesm1, hdr_ofs, ena, flags_ena, key_ofs)		\
+			(((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \
+			 ((flags_ena) << 6) | ((key_ofs) & 0x3F))
 
 enum npc_kpu_parser_state {
 	NPC_S_NA = 0,
@@ -166,6 +174,7 @@ enum npc_kpu_parser_state {
 	NPC_S_KPU3_DSA,
 	NPC_S_KPU4_MPLS,
 	NPC_S_KPU4_NSH,
+	NPC_S_KPU4_FDSA,
 	NPC_S_KPU5_IP,
 	NPC_S_KPU5_IP6,
 	NPC_S_KPU5_ARP,
@@ -189,7 +198,6 @@ enum npc_kpu_parser_state {
 	NPC_S_KPU8_IGMP,
 	NPC_S_KPU8_ICMP6,
 	NPC_S_KPU8_GRE,
-	NPC_S_KPU8_ESP,
 	NPC_S_KPU8_AH,
 	NPC_S_KPU9_TU_MPLS_IN_GRE,
 	NPC_S_KPU9_TU_MPLS_IN_NSH,
@@ -201,6 +209,7 @@ enum npc_kpu_parser_state {
 	NPC_S_KPU9_GENEVE,
 	NPC_S_KPU9_GTPC,
 	NPC_S_KPU9_GTPU,
+	NPC_S_KPU9_ESP,
 	NPC_S_KPU10_TU_MPLS_IN_VXLANGPE,
 	NPC_S_KPU10_TU_MPLS_PL,
 	NPC_S_KPU10_TU_MPLS,
@@ -271,6 +280,7 @@ enum npc_kpu_lb_lflag {
 	NPC_F_LB_L_EDSA_VLAN,
 	NPC_F_LB_L_EXDSA,
 	NPC_F_LB_L_EXDSA_VLAN,
+	NPC_F_LB_L_FDSA,
 };
 
 enum npc_kpu_lc_uflag {
@@ -418,7 +428,7 @@ enum NPC_ERRLEV_E {
 	NPC_ERRLEV_ENUM_LAST = 16,
 };
 
-static struct npc_kpu_profile_action ikpu_action_entries[] = {
+static const struct npc_kpu_profile_action ikpu_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		12, 16, 20, 0, 0,
@@ -979,7 +989,7 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		12, 16, 20, 0, 0,
+		12, 14, 20, 0, 0,
 		NPC_S_KPU1_EXDSA, 0, 0,
 		NPC_LID_LA, NPC_LT_NA,
 		0,
@@ -997,7 +1007,7 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	{
 		NPC_S_KPU1_ETHER, 0xff,
 		NPC_ETYPE_IP,
@@ -1351,10 +1361,19 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	},
 	{
 		NPC_S_KPU1_EXDSA, 0xff,
+		0x0000,
+		0x0000,
 		NPC_DSA_EXTEND,
 		NPC_DSA_EXTEND,
 		0x0000,
 		0x0000,
+	},
+	{
+		NPC_S_KPU1_EXDSA, 0xff,
+		NPC_DSA_FDSA,
+		NPC_DSA_FDSA,
+		0x0000,
+		0x0000,
 		0x0000,
 		0x0000,
 	},
@@ -1666,7 +1685,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 	{
 		NPC_S_KPU2_CTAG, 0xff,
 		NPC_ETYPE_IP,
@@ -2794,7 +2813,7 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu3_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu3_cam_entries[] = {
 	{
 		NPC_S_KPU3_CTAG, 0xff,
 		NPC_ETYPE_IP,
@@ -3913,7 +3932,7 @@ static struct npc_kpu_profile_cam kpu3_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu4_cam_entries[] = {
 	{
 		NPC_S_KPU4_MPLS, 0xff,
 		NPC_MPLS_S,
@@ -3996,6 +4015,69 @@ static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
 		0x0000,
 	},
 	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_ARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_RARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_PTP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_FCOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		0x0000,
+		NPC_DSA_FDSA,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
 		NPC_S_NA, 0X00,
 		0x0000,
 		0x0000,
@@ -4006,7 +4088,7 @@ static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 	{
 		NPC_S_KPU5_IP, 0xff,
 		0x0000,
@@ -4576,7 +4658,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu6_cam_entries[] = {
 	{
 		NPC_S_KPU6_IP6_EXT, 0xff,
 		0x0000,
@@ -4921,7 +5003,7 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu7_cam_entries[] = {
 	{
 		NPC_S_KPU7_IP6_EXT, 0xff,
 		0x0000,
@@ -5140,7 +5222,7 @@ static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 	{
 		NPC_S_KPU8_TCP, 0xff,
 		0x0000,
@@ -5341,15 +5423,24 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 	},
 	{
 		NPC_S_KPU8_UDP, 0xff,
+		NPC_UDP_PORT_ESP,
+		0xffff,
 		0x0000,
 		0x0000,
 		0x0000,
 		0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff,
+		0x0000,
+		0x0000,
+		NPC_UDP_PORT_ESP,
+		0xffff,
 		0x0000,
 		0x0000,
 	},
 	{
-		NPC_S_KPU8_SCTP, 0xff,
+		NPC_S_KPU8_UDP, 0xff,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -5358,7 +5449,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		0x0000,
 	},
 	{
-		NPC_S_KPU8_ICMP, 0xff,
+		NPC_S_KPU8_SCTP, 0xff,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -5367,7 +5458,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		0x0000,
 	},
 	{
-		NPC_S_KPU8_IGMP, 0xff,
+		NPC_S_KPU8_ICMP, 0xff,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -5376,7 +5467,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		0x0000,
 	},
 	{
-		NPC_S_KPU8_ICMP6, 0xff,
+		NPC_S_KPU8_IGMP, 0xff,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -5385,7 +5476,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		0x0000,
 	},
 	{
-		NPC_S_KPU8_ESP, 0xff,
+		NPC_S_KPU8_ICMP6, 0xff,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -5872,7 +5963,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu9_cam_entries[] = {
 	{
 		NPC_S_KPU9_TU_MPLS_IN_GRE, 0xff,
 		NPC_MPLS_S,
@@ -6324,6 +6415,15 @@ static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
 		NPC_MPLS_S,
 	},
 	{
+		NPC_S_KPU9_ESP, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
 		NPC_S_NA, 0X00,
 		0x0000,
 		0x0000,
@@ -6334,7 +6434,7 @@ static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu10_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu10_cam_entries[] = {
 	{
 		NPC_S_KPU10_TU_MPLS, 0xff,
 		NPC_MPLS_S,
@@ -6499,7 +6599,7 @@ static struct npc_kpu_profile_cam kpu10_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu11_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu11_cam_entries[] = {
 	{
 		NPC_S_KPU11_TU_ETHER, 0xff,
 		NPC_ETYPE_IP,
@@ -6808,7 +6908,7 @@ static struct npc_kpu_profile_cam kpu11_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 	{
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_TCP,
@@ -7063,7 +7163,7 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu13_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu13_cam_entries[] = {
 	{
 		NPC_S_KPU13_TU_IP6_EXT, 0xff,
 		0x0000,
@@ -7075,7 +7175,7 @@ static struct npc_kpu_profile_cam kpu13_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu14_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu14_cam_entries[] = {
 	{
 		NPC_S_KPU14_TU_IP6_EXT, 0xff,
 		0x0000,
@@ -7087,7 +7187,7 @@ static struct npc_kpu_profile_cam kpu14_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 	{
 		NPC_S_KPU15_TU_TCP, 0xff,
 		0x0000,
@@ -7288,7 +7388,7 @@ static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_cam kpu16_cam_entries[] = {
+static const struct npc_kpu_profile_cam kpu16_cam_entries[] = {
 	{
 		NPC_S_KPU16_TCP_DATA, 0xff,
 		0x0000,
@@ -7345,7 +7445,7 @@ static struct npc_kpu_profile_cam kpu16_cam_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu1_action_entries[] = {
+static const struct npc_kpu_profile_action kpu1_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 3, 0,
@@ -7673,6 +7773,14 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 		0, 0, 0, 0,
 	},
 	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 16, 2, 0,
+		NPC_S_KPU4_FDSA, 12, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
 		NPC_ERRLEV_LA, NPC_EC_EDSA_UNK,
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 1,
@@ -7962,7 +8070,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu2_action_entries[] = {
+static const struct npc_kpu_profile_action kpu2_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 2, 0,
@@ -8965,7 +9073,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu3_action_entries[] = {
+static const struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 1, 0,
@@ -9960,7 +10068,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu4_action_entries[] = {
+static const struct npc_kpu_profile_action kpu4_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 0,
@@ -10034,6 +10142,62 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
 		0, 0, 0, 0,
 	},
 	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_IP, 6, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_IP6, 6, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU5_ARP, 6, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_RARP, 6, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_PTP, 6, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU5_FCOE, 6, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_U_UNK_ETYPE | NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
 		NPC_ERRLEV_LB, NPC_EC_L2_K4,
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 0,
@@ -10043,7 +10207,7 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu5_action_entries[] = {
+static const struct npc_kpu_profile_action kpu5_action_entries[] = {
 	{
 		NPC_ERRLEV_LC, NPC_EC_IP_TTL_0,
 		0, 0, 0, 0, 1,
@@ -10102,8 +10266,8 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU8_ESP, 20, 1,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU9_ESP, 20, 1,
 		NPC_LID_LC, NPC_LT_LC_IP,
 		0,
 		0, 0, 0, 0,
@@ -10206,8 +10370,8 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU8_ESP, 0, 1,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU9_ESP, 0, 1,
 		NPC_LID_LC, NPC_LT_LC_IP_OPT,
 		0,
 		0, 0xf, 0, 2,
@@ -10414,8 +10578,8 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU8_ESP, 40, 1,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU9_ESP, 40, 1,
 		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
 		0,
 		0, 0, 0, 0,
@@ -10550,7 +10714,7 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu6_action_entries[] = {
+static const struct npc_kpu_profile_action kpu6_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -10561,80 +10725,80 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		2, 12, 0, 1, 0,
-		NPC_S_KPU8_TCP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		2, 8, 10, 1, 0,
-		NPC_S_KPU8_UDP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_SCTP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_ICMP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_ICMP6, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_ESP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_AH, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_GRE, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 5, 0,
-		NPC_S_KPU12_TU_IP6, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		2, 6, 10, 2, 0,
-		NPC_S_KPU9_TU_MPLS_IN_IP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -10689,8 +10853,8 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_ESP, 8, 0,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU9_ESP, 8, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		1, 0xff, 0, 3,
@@ -10793,8 +10957,8 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU8_ESP, 8, 0,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU9_ESP, 8, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		1, 0xff, 0, 3,
@@ -10857,7 +11021,7 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu7_action_entries[] = {
+static const struct npc_kpu_profile_action kpu7_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -10908,8 +11072,8 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_ESP, 8, 0,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU9_ESP, 8, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		1, 0xff, 0, 3,
@@ -10956,80 +11120,80 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		2, 12, 0, 0, 0,
-		NPC_S_KPU8_TCP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		2, 8, 10, 0, 0,
-		NPC_S_KPU8_UDP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_SCTP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_ICMP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_ICMP6, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_ESP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_AH, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 0,
-		NPC_S_KPU8_GRE, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 4, 0,
-		NPC_S_KPU12_TU_IP6, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		2, 6, 10, 1, 0,
-		NPC_S_KPU9_TU_MPLS_IN_IP, 8, 0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
 		NPC_LID_LC, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -11052,7 +11216,7 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu8_action_entries[] = {
+static const struct npc_kpu_profile_action kpu8_action_entries[] = {
 	{
 		NPC_ERRLEV_LD, NPC_EC_TCP_FLAGS_FIN_ONLY,
 		0, 0, 0, 0, 1,
@@ -11231,6 +11395,22 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU9_ESP, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU9_ESP, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 7, 0,
 		NPC_S_KPU16_UDP_DATA, 8, 1,
 		NPC_LID_LD, NPC_LT_LD_UDP,
@@ -11273,14 +11453,6 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = {
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 1,
-		NPC_LID_LD, NPC_LT_LD_ESP,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 1,
 		NPC_LID_LD, NPC_LT_LD_AH,
 		0,
 		0, 0, 0, 0,
@@ -11703,7 +11875,7 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu9_action_entries[] = {
+static const struct npc_kpu_profile_action kpu9_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 0,
@@ -12105,6 +12277,14 @@ static struct npc_kpu_profile_action kpu9_action_entries[] = {
 		0, 0, 0, 0,
 	},
 	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_ESP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
 		NPC_ERRLEV_LE, NPC_EC_UNK,
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 0,
@@ -12114,7 +12294,7 @@ static struct npc_kpu_profile_action kpu9_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu10_action_entries[] = {
+static const struct npc_kpu_profile_action kpu10_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 1, 0,
@@ -12261,7 +12441,7 @@ static struct npc_kpu_profile_action kpu10_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu11_action_entries[] = {
+static const struct npc_kpu_profile_action kpu11_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 0, 0,
@@ -12536,7 +12716,7 @@ static struct npc_kpu_profile_action kpu11_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu12_action_entries[] = {
+static const struct npc_kpu_profile_action kpu12_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 12, 0, 2, 0,
@@ -12763,7 +12943,7 @@ static struct npc_kpu_profile_action kpu12_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu13_action_entries[] = {
+static const struct npc_kpu_profile_action kpu13_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -12774,7 +12954,7 @@ static struct npc_kpu_profile_action kpu13_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu14_action_entries[] = {
+static const struct npc_kpu_profile_action kpu14_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -12785,7 +12965,7 @@ static struct npc_kpu_profile_action kpu14_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu15_action_entries[] = {
+static const struct npc_kpu_profile_action kpu15_action_entries[] = {
 	{
 		NPC_ERRLEV_LG, NPC_EC_TCP_FLAGS_FIN_ONLY,
 		0, 0, 0, 0, 1,
@@ -12964,7 +13144,7 @@ static struct npc_kpu_profile_action kpu15_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile_action kpu16_action_entries[] = {
+static const struct npc_kpu_profile_action kpu16_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -13015,7 +13195,7 @@ static struct npc_kpu_profile_action kpu16_action_entries[] = {
 	},
 };
 
-static struct npc_kpu_profile npc_kpu_profiles[] = {
+static const struct npc_kpu_profile npc_kpu_profiles[] = {
 	{
 		ARRAY_SIZE(kpu1_cam_entries),
 		ARRAY_SIZE(kpu1_action_entries),
@@ -13114,4 +13294,163 @@ static struct npc_kpu_profile npc_kpu_profiles[] = {
 	},
 };
 
+static const struct npc_lt_def_cfg npc_lt_defaults = {
+	.rx_ol2 = {
+		.lid = NPC_LID_LA,
+		.ltype_match = NPC_LT_LA_ETHER,
+		.ltype_mask = 0x0F,
+	},
+	.rx_oip4 = {
+		.lid = NPC_LID_LC,
+		.ltype_match = NPC_LT_LC_IP,
+		.ltype_mask = 0x0E,
+	},
+	.rx_iip4 = {
+		.lid = NPC_LID_LG,
+		.ltype_match = NPC_LT_LG_TU_IP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_oip6 = {
+		.lid = NPC_LID_LC,
+		.ltype_match = NPC_LT_LC_IP6,
+		.ltype_mask = 0x0E,
+	},
+	.rx_iip6 = {
+		.lid = NPC_LID_LG,
+		.ltype_match = NPC_LT_LG_TU_IP6,
+		.ltype_mask = 0x0F,
+	},
+	.rx_otcp = {
+		.lid = NPC_LID_LD,
+		.ltype_match = NPC_LT_LD_TCP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_itcp = {
+		.lid = NPC_LID_LH,
+		.ltype_match = NPC_LT_LH_TU_TCP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_oudp = {
+		.lid = NPC_LID_LD,
+		.ltype_match = NPC_LT_LD_UDP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_iudp = {
+		.lid = NPC_LID_LH,
+		.ltype_match = NPC_LT_LH_TU_UDP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_osctp = {
+		.lid = NPC_LID_LD,
+		.ltype_match = NPC_LT_LD_SCTP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_isctp = {
+		.lid = NPC_LID_LH,
+		.ltype_match = NPC_LT_LH_TU_SCTP,
+		.ltype_mask = 0x0F,
+	},
+	.rx_ipsec = {
+		{
+			.lid = NPC_LID_LE,
+			.ltype_match = NPC_LT_LE_ESP,
+			.ltype_mask = 0x0F,
+		},
+		{
+			.spi_offset = 8,
+			.lid = NPC_LID_LH,
+			.ltype_match = NPC_LT_LH_TU_ESP,
+			.ltype_mask = 0x0F,
+		},
+	},
+	.pck_ol2 = {
+			.lid = NPC_LID_LA,
+			.ltype_match = NPC_LT_LA_ETHER,
+			.ltype_mask = 0x0F,
+	},
+	.pck_oip4 = {
+			.lid = NPC_LID_LC,
+			.ltype_match = NPC_LT_LC_IP,
+			.ltype_mask = 0x0E,
+	},
+	.pck_iip4 = {
+			.lid = NPC_LID_LG,
+			.ltype_match = NPC_LT_LG_TU_IP,
+			.ltype_mask = 0x0F,
+	},
+};
+
+static const struct npc_mcam_kex npc_mkex_default = {
+	.mkex_sign = MKEX_SIGN,
+	.name = "default",
+	.kpu_version = NPC_KPU_PROFILE_VER,
+	.keyx_cfg = {
+		/* nibble: LA..LE (ltype only) + Channel */
+		[NIX_INTF_RX] = ((u64)NPC_MCAM_KEY_X2 << 32) | 0x49247,
+		[NIX_INTF_TX] = ((u64)NPC_MCAM_KEY_X2 << 32) | ((1ULL << 19) - 1),
+	},
+	.intf_lid_lt_ld = {
+	/* Default RX MCAM KEX profile */
+	[NIX_INTF_RX] = {
+		[NPC_LID_LA] = {
+			/* Layer A: Ethernet: */
+			[NPC_LT_LA_ETHER] = {
+				/* DMAC: 6 bytes, KW1[47:0] */
+				KEX_LD_CFG(0x05, 0x0, 0x1, 0x0, NPC_KEXOF_DMAC),
+				/* Ethertype: 2 bytes, KW0[47:32] */
+				KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x4),
+			},
+		},
+		[NPC_LID_LB] = {
+			/* Layer B: Single VLAN (CTAG) */
+			/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
+			[NPC_LT_LB_CTAG] = {
+				KEX_LD_CFG(0x03, 0x0, 0x1, 0x0, 0x4),
+			},
+			/* Layer B: Stacked VLAN (STAG|QinQ) */
+			[NPC_LT_LB_STAG_QINQ] = {
+				/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
+				KEX_LD_CFG(0x03, 0x4, 0x1, 0x0, 0x4),
+			},
+			[NPC_LT_LB_FDSA] = {
+				/* SWITCH PORT: 1 byte, KW0[63:48] */
+				KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0x6),
+				/* Ethertype: 2 bytes, KW0[47:32] */
+				KEX_LD_CFG(0x01, 0x4, 0x1, 0x0, 0x4),
+			},
+		},
+		[NPC_LID_LC] = {
+			/* Layer C: IPv4 */
+			[NPC_LT_LC_IP] = {
+				/* SIP+DIP: 8 bytes, KW2[63:0] */
+				KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10),
+				/* TOS: 1 byte, KW1[63:56] */
+				KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf),
+			},
+			/* Layer C: IPv6 */
+			[NPC_LT_LC_IP6] = {
+				/* Everything up to SADDR: 8 bytes, KW2[63:0] */
+				KEX_LD_CFG(0x07, 0x0, 0x1, 0x0, 0x10),
+			},
+		},
+		[NPC_LID_LD] = {
+			/* Layer D:UDP */
+			[NPC_LT_LD_UDP] = {
+				/* SPORT: 2 bytes, KW3[15:0] */
+				KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18),
+				/* DPORT: 2 bytes, KW3[31:16] */
+				KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a),
+			},
+			/* Layer D:TCP */
+			[NPC_LT_LD_TCP] = {
+				/* SPORT: 2 bytes, KW3[15:0] */
+				KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18),
+				/* DPORT: 2 bytes, KW3[31:16] */
+				KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a),
+			},
+		},
+	},
+	},
+};
+
 #endif /* NPC_PROFILE_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
new file mode 100644
index 000000000000..f69f4f35ae48
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell PTP driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "ptp.h"
+#include "mbox.h"
+#include "rvu.h"
+
+#define DRV_NAME				"Marvell PTP Driver"
+
+#define PCI_DEVID_OCTEONTX2_PTP			0xA00C
+#define PCI_SUBSYS_DEVID_OCTX2_98xx_PTP		0xB100
+#define PCI_SUBSYS_DEVID_OCTX2_96XX_PTP		0xB200
+#define PCI_SUBSYS_DEVID_OCTX2_95XX_PTP		0xB300
+#define PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP		0xB400
+#define PCI_SUBSYS_DEVID_OCTX2_95MM_PTP		0xB500
+#define PCI_DEVID_OCTEONTX2_RST			0xA085
+
+#define PCI_PTP_BAR_NO				0
+#define PCI_RST_BAR_NO				0
+
+#define PTP_CLOCK_CFG				0xF00ULL
+#define PTP_CLOCK_CFG_PTP_EN			BIT_ULL(0)
+#define PTP_CLOCK_LO				0xF08ULL
+#define PTP_CLOCK_HI				0xF10ULL
+#define PTP_CLOCK_COMP				0xF18ULL
+
+#define RST_BOOT				0x1600ULL
+#define RST_MUL_BITS				GENMASK_ULL(38, 33)
+#define CLOCK_BASE_RATE				50000000ULL
+
+static u64 get_clock_rate(void)
+{
+	u64 cfg, ret = CLOCK_BASE_RATE * 16;
+	struct pci_dev *pdev;
+	void __iomem *base;
+
+	/* To get the input clock frequency with which PTP co-processor
+	 * block is running the base frequency(50 MHz) needs to be multiplied
+	 * with multiplier bits present in RST_BOOT register of RESET block.
+	 * Hence below code gets the multiplier bits from the RESET PCI
+	 * device present in the system.
+	 */
+	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+			      PCI_DEVID_OCTEONTX2_RST, NULL);
+	if (!pdev)
+		goto error;
+
+	base = pci_ioremap_bar(pdev, PCI_RST_BAR_NO);
+	if (!base)
+		goto error_put_pdev;
+
+	cfg = readq(base + RST_BOOT);
+	ret = CLOCK_BASE_RATE * FIELD_GET(RST_MUL_BITS, cfg);
+
+	iounmap(base);
+
+error_put_pdev:
+	pci_dev_put(pdev);
+
+error:
+	return ret;
+}
+
+struct ptp *ptp_get(void)
+{
+	struct pci_dev *pdev;
+	struct ptp *ptp;
+
+	/* If the PTP pci device is found on the system and ptp
+	 * driver is bound to it then the PTP pci device is returned
+	 * to the caller(rvu driver).
+	 */
+	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+			      PCI_DEVID_OCTEONTX2_PTP, NULL);
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	ptp = pci_get_drvdata(pdev);
+	if (!ptp)
+		ptp = ERR_PTR(-EPROBE_DEFER);
+	if (IS_ERR(ptp))
+		pci_dev_put(pdev);
+
+	return ptp;
+}
+
+void ptp_put(struct ptp *ptp)
+{
+	if (!ptp)
+		return;
+
+	pci_dev_put(ptp->pdev);
+}
+
+static int ptp_adjfine(struct ptp *ptp, long scaled_ppm)
+{
+	bool neg_adj = false;
+	u64 comp;
+	u64 adj;
+	s64 ppb;
+
+	if (scaled_ppm < 0) {
+		neg_adj = true;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	/* The hardware adds the clock compensation value to the PTP clock
+	 * on every coprocessor clock cycle. Typical convention is that it
+	 * represent number of nanosecond betwen each cycle. In this
+	 * convention compensation value is in 64 bit fixed-point
+	 * representation where upper 32 bits are number of nanoseconds
+	 * and lower is fractions of nanosecond.
+	 * The scaled_ppm represent the ratio in "parts per million" by which
+	 * the compensation value should be corrected.
+	 * To calculate new compenstation value we use 64bit fixed point
+	 * arithmetic on following formula
+	 * comp = tbase + tbase * scaled_ppm / (1M * 2^16)
+	 * where tbase is the basic compensation value calculated
+	 * initialy in the probe function.
+	 */
+	comp = ((u64)1000000000ull << 32) / ptp->clock_rate;
+	/* convert scaled_ppm to ppb */
+	ppb = 1 + scaled_ppm;
+	ppb *= 125;
+	ppb >>= 13;
+	adj = comp * ppb;
+	adj = div_u64(adj, 1000000000ull);
+	comp = neg_adj ? comp - adj : comp + adj;
+
+	writeq(comp, ptp->reg_base + PTP_CLOCK_COMP);
+
+	return 0;
+}
+
+static int ptp_get_clock(struct ptp *ptp, u64 *clk)
+{
+	/* Return the current PTP clock */
+	*clk = readq(ptp->reg_base + PTP_CLOCK_HI);
+
+	return 0;
+}
+
+static int ptp_probe(struct pci_dev *pdev,
+		     const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct ptp *ptp;
+	u64 clock_comp;
+	u64 clock_cfg;
+	int err;
+
+	ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL);
+	if (!ptp) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	ptp->pdev = pdev;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		goto error_free;
+
+	err = pcim_iomap_regions(pdev, 1 << PCI_PTP_BAR_NO, pci_name(pdev));
+	if (err)
+		goto error_free;
+
+	ptp->reg_base = pcim_iomap_table(pdev)[PCI_PTP_BAR_NO];
+
+	ptp->clock_rate = get_clock_rate();
+
+	/* Enable PTP clock */
+	clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+	clock_cfg |= PTP_CLOCK_CFG_PTP_EN;
+	writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+
+	clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate;
+	/* Initial compensation value to start the nanosecs counter */
+	writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP);
+
+	pci_set_drvdata(pdev, ptp);
+
+	return 0;
+
+error_free:
+	devm_kfree(dev, ptp);
+
+error:
+	/* For `ptp_get()` we need to differentiate between the case
+	 * when the core has not tried to probe this device and the case when
+	 * the probe failed.  In the later case we pretend that the
+	 * initialization was successful and keep the error in
+	 * `dev->driver_data`.
+	 */
+	pci_set_drvdata(pdev, ERR_PTR(err));
+	return 0;
+}
+
+static void ptp_remove(struct pci_dev *pdev)
+{
+	struct ptp *ptp = pci_get_drvdata(pdev);
+	u64 clock_cfg;
+
+	if (IS_ERR_OR_NULL(ptp))
+		return;
+
+	/* Disable PTP clock */
+	clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+	clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN;
+	writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+}
+
+static const struct pci_device_id ptp_id_table[] = {
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_OCTX2_98xx_PTP) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_OCTX2_96XX_PTP) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_OCTX2_95XX_PTP) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_OCTX2_95MM_PTP) },
+	{ 0, }
+};
+
+struct pci_driver ptp_driver = {
+	.name = DRV_NAME,
+	.id_table = ptp_id_table,
+	.probe = ptp_probe,
+	.remove = ptp_remove,
+};
+
+int rvu_mbox_handler_ptp_op(struct rvu *rvu, struct ptp_req *req,
+			    struct ptp_rsp *rsp)
+{
+	int err = 0;
+
+	/* This function is the PTP mailbox handler invoked when
+	 * called by AF consumers/netdev drivers via mailbox mechanism.
+	 * It is used by netdev driver to get the PTP clock and to set
+	 * frequency adjustments. Since mailbox can be called without
+	 * notion of whether the driver is bound to ptp device below
+	 * validation is needed as first step.
+	 */
+	if (!rvu->ptp)
+		return -ENODEV;
+
+	switch (req->op) {
+	case PTP_OP_ADJFINE:
+		err = ptp_adjfine(rvu->ptp, req->scaled_ppm);
+		break;
+	case PTP_OP_GET_CLOCK:
+		err = ptp_get_clock(rvu->ptp, &rsp->clk);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
new file mode 100644
index 000000000000..878bc395d28f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell PTP driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef PTP_H
+#define PTP_H
+
+#include <linux/timecounter.h>
+#include <linux/time64.h>
+#include <linux/spinlock.h>
+
+struct ptp {
+	struct pci_dev *pdev;
+	void __iomem *reg_base;
+	u32 clock_rate;
+};
+
+struct ptp *ptp_get(void);
+void ptp_put(struct ptp *ptp);
+
+extern struct pci_driver ptp_driver;
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 557e4292c846..e1f918960730 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -18,6 +18,9 @@
 #include "cgx.h"
 #include "rvu.h"
 #include "rvu_reg.h"
+#include "ptp.h"
+
+#include "rvu_trace.h"
 
 #define DRV_NAME	"octeontx2-af"
 #define DRV_STRING      "Marvell OcteonTX2 RVU Admin Function Driver"
@@ -1548,6 +1551,7 @@ static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid,
 		if (rsp && err)						\
 			rsp->hdr.rc = err;				\
 									\
+		trace_otx2_msg_process(mbox->pdev, _id, err);		\
 		return rsp ? err : -ENOMEM;				\
 	}
 MBOX_MESSAGES
@@ -1880,6 +1884,8 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
 	intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT);
 	/* Clear interrupts */
 	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT, intr);
+	if (intr)
+		trace_otx2_msg_interrupt(rvu->pdev, "PF(s) to AF", intr);
 
 	/* Sync with mbox memory region */
 	rmb();
@@ -1897,6 +1903,8 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
 
 	intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(0));
 	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), intr);
+	if (intr)
+		trace_otx2_msg_interrupt(rvu->pdev, "VF(s) to AF", intr);
 
 	rvu_queue_work(&rvu->afvf_wq_info, 0, vfs, intr);
 
@@ -2565,13 +2573,21 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_set_master(pdev);
 
+	rvu->ptp = ptp_get();
+	if (IS_ERR(rvu->ptp)) {
+		err = PTR_ERR(rvu->ptp);
+		if (err == -EPROBE_DEFER)
+			goto err_release_regions;
+		rvu->ptp = NULL;
+	}
+
 	/* Map Admin function CSRs */
 	rvu->afreg_base = pcim_iomap(pdev, PCI_AF_REG_BAR_NUM, 0);
 	rvu->pfreg_base = pcim_iomap(pdev, PCI_PF_REG_BAR_NUM, 0);
 	if (!rvu->afreg_base || !rvu->pfreg_base) {
 		dev_err(dev, "Unable to map admin function CSRs, aborting\n");
 		err = -ENOMEM;
-		goto err_release_regions;
+		goto err_put_ptp;
 	}
 
 	/* Store module params in rvu structure */
@@ -2586,7 +2602,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	err = rvu_setup_hw_resources(rvu);
 	if (err)
-		goto err_release_regions;
+		goto err_put_ptp;
 
 	/* Init mailbox btw AF and PFs */
 	err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF,
@@ -2626,6 +2642,8 @@ err_hwsetup:
 	rvu_reset_all_blocks(rvu);
 	rvu_free_hw_resources(rvu);
 	rvu_clear_rvum_blk_revid(rvu);
+err_put_ptp:
+	ptp_put(rvu->ptp);
 err_release_regions:
 	pci_release_regions(pdev);
 err_disable_device:
@@ -2651,6 +2669,7 @@ static void rvu_remove(struct pci_dev *pdev)
 	rvu_reset_all_blocks(rvu);
 	rvu_free_hw_resources(rvu);
 	rvu_clear_rvum_blk_revid(rvu);
+	ptp_put(rvu->ptp);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
@@ -2676,9 +2695,19 @@ static int __init rvu_init_module(void)
 	if (err < 0)
 		return err;
 
+	err = pci_register_driver(&ptp_driver);
+	if (err < 0)
+		goto ptp_err;
+
 	err =  pci_register_driver(&rvu_driver);
 	if (err < 0)
-		pci_unregister_driver(&cgx_driver);
+		goto rvu_err;
+
+	return 0;
+rvu_err:
+	pci_unregister_driver(&ptp_driver);
+ptp_err:
+	pci_unregister_driver(&cgx_driver);
 
 	return err;
 }
@@ -2686,6 +2715,7 @@ static int __init rvu_init_module(void)
 static void __exit rvu_cleanup_module(void)
 {
 	pci_unregister_driver(&rvu_driver);
+	pci_unregister_driver(&ptp_driver);
 	pci_unregister_driver(&cgx_driver);
 }
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index b89dde2c8b08..90eed3160915 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -289,6 +289,22 @@ struct rvu_fwdata {
 	u64 reserved[FWDATA_RESERVED_MEM];
 };
 
+struct ptp;
+
+/* KPU profile adapter structure gathering all KPU configuration data and abstracting out the
+ * source where it came from.
+ */
+struct npc_kpu_profile_adapter {
+	const char			*name;
+	u64				version;
+	const struct npc_lt_def_cfg	*lt_def;
+	const struct npc_kpu_profile_action	*ikpu; /* array[pkinds] */
+	const struct npc_kpu_profile	*kpu; /* array[kpus] */
+	const struct npc_mcam_kex	*mkex;
+	size_t				pkinds;
+	size_t				kpus;
+};
+
 struct rvu {
 	void __iomem		*afreg_base;
 	void __iomem		*pfreg_base;
@@ -337,6 +353,11 @@ struct rvu {
 	/* Firmware data */
 	struct rvu_fwdata	*fwdata;
 
+	/* NPC KPU data */
+	struct npc_kpu_profile_adapter kpu;
+
+	struct ptp		*ptp;
+
 #ifdef CONFIG_DEBUG_FS
 	struct rvu_debugfs	rvu_dbg;
 #endif
@@ -470,6 +491,7 @@ int rvu_npc_init(struct rvu *rvu);
 void rvu_npc_freemem(struct rvu *rvu);
 int rvu_npc_get_pkind(struct rvu *rvu, u16 pf);
 void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf);
+int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool en);
 void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 				 int nixlf, u64 chan, u8 *mac_addr);
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index f3c82e489897..fa9152ff5e2a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -15,6 +15,7 @@
 #include "rvu.h"
 #include "cgx.h"
 #include "rvu_reg.h"
+#include "rvu_trace.h"
 
 struct cgx_evq_entry {
 	struct list_head evq_node;
@@ -34,6 +35,7 @@ static struct _req_type __maybe_unused					\
 		return NULL;						\
 	req->hdr.sig = OTX2_MBOX_REQ_SIG;				\
 	req->hdr.id = _id;						\
+	trace_otx2_msg_alloc(rvu->pdev, _id, sizeof(*req));		\
 	return req;							\
 }
 
@@ -509,6 +511,45 @@ int rvu_mbox_handler_cgx_promisc_disable(struct rvu *rvu, struct msg_req *req,
 	return 0;
 }
 
+static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+	void *cgxd;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+	cgxd = rvu_cgx_pdata(cgx_id, rvu);
+
+	cgx_lmac_ptp_config(cgxd, lmac_id, enable);
+	/* If PTP is enabled then inform NPC that packets to be
+	 * parsed by this PF will have their data shifted by 8 bytes
+	 * and if PTP is disabled then no shift is required
+	 */
+	if (npc_config_ts_kpuaction(rvu, pf, pcifunc, enable))
+		return -EINVAL;
+
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_ptp_rx_enable(struct rvu *rvu, struct msg_req *req,
+				       struct msg_rsp *rsp)
+{
+	return rvu_cgx_ptp_rx_cfg(rvu, req->hdr.pcifunc, true);
+}
+
+int rvu_mbox_handler_cgx_ptp_rx_disable(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp)
+{
+	return rvu_cgx_ptp_rx_cfg(rvu, req->hdr.pcifunc, false);
+}
+
 static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en)
 {
 	int pf = rvu_get_pf(pcifunc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 0fc70824fd6b..21a89dd76d3c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2508,6 +2508,14 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
 			field->ltype_match = NPC_LT_LE_GTPU;
 			field->ltype_mask = 0xF;
 			break;
+		case NIX_FLOW_KEY_TYPE_VLAN:
+			field->lid = NPC_LID_LB;
+			field->hdr_offset = 2; /* Skip TPID (2-bytes) */
+			field->bytesm1 = 1; /* 2 Bytes (Actually 12 bits) */
+			field->ltype_match = NPC_LT_LB_CTAG;
+			field->ltype_mask = 0xF;
+			field->fn_mask = 1; /* Mask out the first nibble */
+			break;
 		}
 		field->ena = 1;
 
@@ -3103,6 +3111,7 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
 
 int rvu_nix_init(struct rvu *rvu)
 {
+	const struct npc_lt_def_cfg *ltdefs;
 	struct rvu_hwinfo *hw = rvu->hw;
 	struct rvu_block *block;
 	int blkaddr, err;
@@ -3133,6 +3142,7 @@ int rvu_nix_init(struct rvu *rvu)
 		rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
 	}
 
+	ltdefs = rvu->kpu.lt_def;
 	/* Calibrate X2P bus to check if CGX/LBK links are fine */
 	err = nix_calibrate_x2p(rvu, blkaddr);
 	if (err)
@@ -3180,28 +3190,38 @@ int rvu_nix_init(struct rvu *rvu)
 		 * and validate length and checksums.
 		 */
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OL2,
-			    (NPC_LID_LA << 8) | (NPC_LT_LA_ETHER << 4) | 0x0F);
+			    (ltdefs->rx_ol2.lid << 8) | (ltdefs->rx_ol2.ltype_match << 4) |
+			    ltdefs->rx_ol2.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OIP4,
-			    (NPC_LID_LC << 8) | (NPC_LT_LC_IP << 4) | 0x0F);
+			    (ltdefs->rx_oip4.lid << 8) | (ltdefs->rx_oip4.ltype_match << 4) |
+			    ltdefs->rx_oip4.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IIP4,
-			    (NPC_LID_LG << 8) | (NPC_LT_LG_TU_IP << 4) | 0x0F);
+			    (ltdefs->rx_iip4.lid << 8) | (ltdefs->rx_iip4.ltype_match << 4) |
+			    ltdefs->rx_iip4.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OIP6,
-			    (NPC_LID_LC << 8) | (NPC_LT_LC_IP6 << 4) | 0x0F);
+			    (ltdefs->rx_oip6.lid << 8) | (ltdefs->rx_oip6.ltype_match << 4) |
+			    ltdefs->rx_oip6.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IIP6,
-			    (NPC_LID_LG << 8) | (NPC_LT_LG_TU_IP6 << 4) | 0x0F);
+			    (ltdefs->rx_iip6.lid << 8) | (ltdefs->rx_iip6.ltype_match << 4) |
+			    ltdefs->rx_iip6.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OTCP,
-			    (NPC_LID_LD << 8) | (NPC_LT_LD_TCP << 4) | 0x0F);
+			    (ltdefs->rx_otcp.lid << 8) | (ltdefs->rx_otcp.ltype_match << 4) |
+			    ltdefs->rx_otcp.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_ITCP,
-			    (NPC_LID_LH << 8) | (NPC_LT_LH_TU_TCP << 4) | 0x0F);
+			    (ltdefs->rx_itcp.lid << 8) | (ltdefs->rx_itcp.ltype_match << 4) |
+			    ltdefs->rx_itcp.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OUDP,
-			    (NPC_LID_LD << 8) | (NPC_LT_LD_UDP << 4) | 0x0F);
+			    (ltdefs->rx_oudp.lid << 8) | (ltdefs->rx_oudp.ltype_match << 4) |
+			    ltdefs->rx_oudp.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IUDP,
-			    (NPC_LID_LH << 8) | (NPC_LT_LH_TU_UDP << 4) | 0x0F);
+			    (ltdefs->rx_iudp.lid << 8) | (ltdefs->rx_iudp.ltype_match << 4) |
+			    ltdefs->rx_iudp.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OSCTP,
-			    (NPC_LID_LD << 8) | (NPC_LT_LD_SCTP << 4) | 0x0F);
+			    (ltdefs->rx_osctp.lid << 8) | (ltdefs->rx_osctp.ltype_match << 4) |
+			    ltdefs->rx_osctp.ltype_mask);
 		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_ISCTP,
-			    (NPC_LID_LH << 8) | (NPC_LT_LH_TU_SCTP << 4) |
-			    0x0F);
+			    (ltdefs->rx_isctp.lid << 8) | (ltdefs->rx_isctp.ltype_match << 4) |
+			    ltdefs->rx_isctp.ltype_mask);
 
 		err = nix_rx_flowkey_alg_cfg(rvu, blkaddr);
 		if (err)
@@ -3318,6 +3338,49 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
 	nix_ctx_free(rvu, pfvf);
 }
 
+#define NIX_AF_LFX_TX_CFG_PTP_EN	BIT_ULL(32)
+
+static int rvu_nix_lf_ptp_tx_cfg(struct rvu *rvu, u16 pcifunc, bool enable)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkaddr;
+	int nixlf;
+	u64 cfg;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf));
+
+	if (enable)
+		cfg |= NIX_AF_LFX_TX_CFG_PTP_EN;
+	else
+		cfg &= ~NIX_AF_LFX_TX_CFG_PTP_EN;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf), cfg);
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_lf_ptp_tx_enable(struct rvu *rvu, struct msg_req *req,
+					  struct msg_rsp *rsp)
+{
+	return rvu_nix_lf_ptp_tx_cfg(rvu, req->hdr.pcifunc, true);
+}
+
+int rvu_mbox_handler_nix_lf_ptp_tx_disable(struct rvu *rvu, struct msg_req *req,
+					   struct msg_rsp *rsp)
+{
+	return rvu_nix_lf_ptp_tx_cfg(rvu, req->hdr.pcifunc, false);
+}
+
 int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu,
 					struct nix_lso_format_cfg *req,
 					struct nix_lso_format_cfg_rsp *rsp)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index fbaf9bcd83f2..511b01dd03ed 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -27,6 +27,9 @@
 #define NIXLF_PROMISC_ENTRY	2
 
 #define NPC_PARSE_RESULT_DMAC_OFFSET	8
+#define NPC_HW_TSTAMP_OFFSET		8
+
+static const char def_pfl_name[] = "default";
 
 static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam,
 				      int blkaddr, u16 pcifunc);
@@ -61,6 +64,36 @@ int rvu_npc_get_pkind(struct rvu *rvu, u16 pf)
 	return -1;
 }
 
+#define NPC_AF_ACTION0_PTR_ADVANCE	GENMASK_ULL(27, 20)
+
+int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool enable)
+{
+	int pkind, blkaddr;
+	u64 val;
+
+	pkind = rvu_npc_get_pkind(rvu, pf);
+	if (pkind < 0) {
+		dev_err(rvu->dev, "%s: pkind not mapped\n", __func__);
+		return -EINVAL;
+	}
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, pcifunc);
+	if (blkaddr < 0) {
+		dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__);
+		return -EINVAL;
+	}
+
+	val = rvu_read64(rvu, blkaddr, NPC_AF_PKINDX_ACTION0(pkind));
+	val &= ~NPC_AF_ACTION0_PTR_ADVANCE;
+	/* If timestamp is enabled then configure NPC to shift 8 bytes */
+	if (enable)
+		val |= FIELD_PREP(NPC_AF_ACTION0_PTR_ADVANCE,
+				  NPC_HW_TSTAMP_OFFSET);
+	rvu_write64(rvu, blkaddr, NPC_AF_PKINDX_ACTION0(pkind), val);
+
+	return 0;
+}
+
 static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
 				    u16 pcifunc, int nixlf, int type)
 {
@@ -417,7 +450,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 	entry.kw_mask[0] = 0xFFFULL;
 
 	if (allmulti) {
-		kwi = NPC_PARSE_RESULT_DMAC_OFFSET / sizeof(u64);
+		kwi = NPC_KEXOF_DMAC / sizeof(u64);
 		entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */
 		entry.kw_mask[kwi] = BIT_ULL(40);
 	}
@@ -699,88 +732,8 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 	rvu_write64(rvu, blkaddr,			\
 		NPC_AF_INTFX_LDATAX_FLAGSX_CFG(intf, ld, flags), cfg)
 
-#define KEX_LD_CFG(bytesm1, hdr_ofs, ena, flags_ena, key_ofs)		\
-			(((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \
-			 ((flags_ena) << 6) | ((key_ofs) & 0x3F))
-
-static void npc_config_ldata_extract(struct rvu *rvu, int blkaddr)
-{
-	struct npc_mcam *mcam = &rvu->hw->mcam;
-	int lid, ltype;
-	int lid_count;
-	u64 cfg;
-
-	cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST);
-	lid_count = (cfg >> 4) & 0xF;
-
-	/* First clear any existing config i.e
-	 * disable LDATA and FLAGS extraction.
-	 */
-	for (lid = 0; lid < lid_count; lid++) {
-		for (ltype = 0; ltype < 16; ltype++) {
-			SET_KEX_LD(NIX_INTF_RX, lid, ltype, 0, 0ULL);
-			SET_KEX_LD(NIX_INTF_RX, lid, ltype, 1, 0ULL);
-			SET_KEX_LD(NIX_INTF_TX, lid, ltype, 0, 0ULL);
-			SET_KEX_LD(NIX_INTF_TX, lid, ltype, 1, 0ULL);
-
-			SET_KEX_LDFLAGS(NIX_INTF_RX, 0, ltype, 0ULL);
-			SET_KEX_LDFLAGS(NIX_INTF_RX, 1, ltype, 0ULL);
-			SET_KEX_LDFLAGS(NIX_INTF_TX, 0, ltype, 0ULL);
-			SET_KEX_LDFLAGS(NIX_INTF_TX, 1, ltype, 0ULL);
-		}
-	}
-
-	if (mcam->keysize != NPC_MCAM_KEY_X2)
-		return;
-
-	/* Default MCAM KEX profile */
-	/* Layer A: Ethernet: */
-
-	/* DMAC: 6 bytes, KW1[47:0] */
-	cfg = KEX_LD_CFG(0x05, 0x0, 0x1, 0x0, NPC_PARSE_RESULT_DMAC_OFFSET);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LA, NPC_LT_LA_ETHER, 0, cfg);
-
-	/* Ethertype: 2 bytes, KW0[47:32] */
-	cfg = KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x4);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LA, NPC_LT_LA_ETHER, 1, cfg);
-
-	/* Layer B: Single VLAN (CTAG) */
-	/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
-	cfg = KEX_LD_CFG(0x03, 0x0, 0x1, 0x0, 0x4);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LB, NPC_LT_LB_CTAG, 0, cfg);
-
-	/* Layer B: Stacked VLAN (STAG|QinQ) */
-	/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
-	cfg = KEX_LD_CFG(0x03, 0x4, 0x1, 0x0, 0x4);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LB, NPC_LT_LB_STAG_QINQ, 0, cfg);
-
-	/* Layer C: IPv4 */
-	/* SIP+DIP: 8 bytes, KW2[63:0] */
-	cfg = KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LC, NPC_LT_LC_IP, 0, cfg);
-	/* TOS: 1 byte, KW1[63:56] */
-	cfg = KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LC, NPC_LT_LC_IP, 1, cfg);
-
-	/* Layer D:UDP */
-	/* SPORT: 2 bytes, KW3[15:0] */
-	cfg = KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_UDP, 0, cfg);
-	/* DPORT: 2 bytes, KW3[31:16] */
-	cfg = KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_UDP, 1, cfg);
-
-	/* Layer D:TCP */
-	/* SPORT: 2 bytes, KW3[15:0] */
-	cfg = KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_TCP, 0, cfg);
-	/* DPORT: 2 bytes, KW3[31:16] */
-	cfg = KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a);
-	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_TCP, 1, cfg);
-}
-
 static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr,
-				     struct npc_mcam_kex *mkex)
+				     const struct npc_mcam_kex *mkex)
 {
 	int lid, lt, ld, fl;
 
@@ -820,34 +773,31 @@ static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr,
 	}
 }
 
-/* strtoull of "mkexprof" with base:36 */
-#define MKEX_SIGN      0x19bbfdbd15f
 #define MKEX_END_SIGN  0xdeadbeef
 
-static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr)
+static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr,
+				  const char *mkex_profile)
 {
-	const char *mkex_profile = rvu->mkex_pfl_name;
 	struct device *dev = &rvu->pdev->dev;
-	void __iomem *mkex_prfl_addr = NULL;
 	struct npc_mcam_kex *mcam_kex;
-	u64 prfl_addr;
-	u64 prfl_sz;
+	void *mkex_prfl_addr = NULL;
+	u64 prfl_addr, prfl_sz;
 
 	/* If user not selected mkex profile */
-	if (!strncmp(mkex_profile, "default", MKEX_NAME_LEN))
-		goto load_default;
+	if (!strncmp(mkex_profile, def_pfl_name, MKEX_NAME_LEN))
+		goto program_mkex;
 
 	if (!rvu->fwdata)
-		goto load_default;
+		goto program_mkex;
 	prfl_addr = rvu->fwdata->mcam_addr;
 	prfl_sz = rvu->fwdata->mcam_sz;
 
 	if (!prfl_addr || !prfl_sz)
-		goto load_default;
+		goto program_mkex;
 
-	mkex_prfl_addr = ioremap_wc(prfl_addr, prfl_sz);
+	mkex_prfl_addr = memremap(prfl_addr, prfl_sz, MEMREMAP_WC);
 	if (!mkex_prfl_addr)
-		goto load_default;
+		goto program_mkex;
 
 	mcam_kex = (struct npc_mcam_kex *)mkex_prfl_addr;
 
@@ -859,35 +809,27 @@ static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr)
 			 * parse nibble enable configuration has to be
 			 * identical for both Rx and Tx interfaces.
 			 */
-			if (is_rvu_96xx_B0(rvu) &&
-			    mcam_kex->keyx_cfg[NIX_INTF_RX] !=
-			    mcam_kex->keyx_cfg[NIX_INTF_TX])
-				goto load_default;
-
-			/* Program selected mkex profile */
-			npc_program_mkex_profile(rvu, blkaddr, mcam_kex);
-
-			goto unmap;
+			if (!is_rvu_96xx_B0(rvu) ||
+			    mcam_kex->keyx_cfg[NIX_INTF_RX] == mcam_kex->keyx_cfg[NIX_INTF_TX])
+				rvu->kpu.mkex = mcam_kex;
+			goto program_mkex;
 		}
 
 		mcam_kex++;
 		prfl_sz -= sizeof(struct npc_mcam_kex);
 	}
-	dev_warn(dev, "Failed to load requested profile: %s\n",
-		 rvu->mkex_pfl_name);
+	dev_warn(dev, "Failed to load requested profile: %s\n", mkex_profile);
 
-load_default:
-	dev_info(rvu->dev, "Using default mkex profile\n");
-	/* Config packet data and flags extraction into PARSE result */
-	npc_config_ldata_extract(rvu, blkaddr);
-
-unmap:
+program_mkex:
+	dev_info(rvu->dev, "Using %s mkex profile\n", rvu->kpu.mkex->name);
+	/* Program selected mkex profile */
+	npc_program_mkex_profile(rvu, blkaddr, rvu->kpu.mkex);
 	if (mkex_prfl_addr)
-		iounmap(mkex_prfl_addr);
+		memunmap(mkex_prfl_addr);
 }
 
 static void npc_config_kpuaction(struct rvu *rvu, int blkaddr,
-				 struct npc_kpu_profile_action *kpuaction,
+				 const struct npc_kpu_profile_action *kpuaction,
 				 int kpu, int entry, bool pkind)
 {
 	struct npc_kpu_action0 action0 = {0};
@@ -929,7 +871,7 @@ static void npc_config_kpuaction(struct rvu *rvu, int blkaddr,
 }
 
 static void npc_config_kpucam(struct rvu *rvu, int blkaddr,
-			      struct npc_kpu_profile_cam *kpucam,
+			      const struct npc_kpu_profile_cam *kpucam,
 			      int kpu, int entry)
 {
 	struct npc_kpu_cam cam0 = {0};
@@ -957,7 +899,7 @@ static inline u64 enable_mask(int count)
 }
 
 static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu,
-				    struct npc_kpu_profile *profile)
+				    const struct npc_kpu_profile *profile)
 {
 	int entry, num_entries, max_entries;
 
@@ -995,6 +937,27 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu,
 	rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(kpu), 0x01);
 }
 
+static int npc_prepare_default_kpu(struct npc_kpu_profile_adapter *profile)
+{
+	profile->name = def_pfl_name;
+	profile->version = NPC_KPU_PROFILE_VER;
+	profile->ikpu = ikpu_action_entries;
+	profile->pkinds = ARRAY_SIZE(ikpu_action_entries);
+	profile->kpu = npc_kpu_profiles;
+	profile->kpus = ARRAY_SIZE(npc_kpu_profiles);
+	profile->lt_def = &npc_lt_defaults;
+	profile->mkex = &npc_mkex_default;
+
+	return 0;
+}
+
+static void npc_load_kpu_profile(struct rvu *rvu)
+{
+	struct npc_kpu_profile_adapter *profile = &rvu->kpu;
+
+	npc_prepare_default_kpu(profile);
+}
+
 static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
 {
 	struct rvu_hwinfo *hw = rvu->hw;
@@ -1013,25 +976,26 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
 		rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(idx), 0x00);
 	}
 
+	/* Load and customize KPU profile. */
+	npc_load_kpu_profile(rvu);
+
 	/* First program IKPU profile i.e PKIND configs.
 	 * Check HW max count to avoid configuring junk or
 	 * writing to unsupported CSR addresses.
 	 */
 	pkind = &hw->pkind;
-	num_pkinds = ARRAY_SIZE(ikpu_action_entries);
+	num_pkinds = rvu->kpu.pkinds;
 	num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds);
 
 	for (idx = 0; idx < num_pkinds; idx++)
-		npc_config_kpuaction(rvu, blkaddr,
-				     &ikpu_action_entries[idx], 0, idx, true);
+		npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true);
 
 	/* Program KPU CAM and Action profiles */
-	num_kpus = ARRAY_SIZE(npc_kpu_profiles);
+	num_kpus = rvu->kpu.kpus;
 	num_kpus = min_t(int, hw->npc_kpus, num_kpus);
 
 	for (idx = 0; idx < num_kpus; idx++)
-		npc_program_kpu_profile(rvu, blkaddr,
-					idx, &npc_kpu_profiles[idx]);
+		npc_program_kpu_profile(rvu, blkaddr, idx, &rvu->kpu.kpu[idx]);
 }
 
 static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
@@ -1156,11 +1120,11 @@ free_mem:
 
 int rvu_npc_init(struct rvu *rvu)
 {
+	struct npc_kpu_profile_adapter *kpu = &rvu->kpu;
 	struct npc_pkind *pkind = &rvu->hw->pkind;
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	u64 keyz = NPC_MCAM_KEY_X2;
+	u64 cfg, nibble_ena, rx_kex, tx_kex;
 	int blkaddr, entry, bank, err;
-	u64 cfg, nibble_ena;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
 	if (blkaddr < 0) {
@@ -1194,13 +1158,16 @@ int rvu_npc_init(struct rvu *rvu)
 
 	/* Config Outer L2, IPv4's NPC layer info */
 	rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_OL2,
-		    (NPC_LID_LA << 8) | (NPC_LT_LA_ETHER << 4) | 0x0F);
+		    (kpu->lt_def->pck_ol2.lid << 8) | (kpu->lt_def->pck_ol2.ltype_match << 4) |
+		    kpu->lt_def->pck_ol2.ltype_mask);
 	rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_OIP4,
-		    (NPC_LID_LC << 8) | (NPC_LT_LC_IP << 4) | 0x0F);
+		    (kpu->lt_def->pck_oip4.lid << 8) | (kpu->lt_def->pck_oip4.ltype_match << 4) |
+		    kpu->lt_def->pck_oip4.ltype_mask);
 
 	/* Config Inner IPV4 NPC layer info */
 	rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_IIP4,
-		    (NPC_LID_LG << 8) | (NPC_LT_LG_TU_IP << 4) | 0x0F);
+		    (kpu->lt_def->pck_iip4.lid << 8) | (kpu->lt_def->pck_iip4.ltype_match << 4) |
+		    kpu->lt_def->pck_iip4.ltype_mask);
 
 	/* Enable below for Rx pkts.
 	 * - Outer IPv4 header checksum validation.
@@ -1216,23 +1183,25 @@ int rvu_npc_init(struct rvu *rvu)
 	/* Set RX and TX side MCAM search key size.
 	 * LA..LD (ltype only) + Channel
 	 */
-	nibble_ena = 0x49247;
-	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX),
-			((keyz & 0x3) << 32) | nibble_ena);
+	rx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_RX];
+	tx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_TX];
+	nibble_ena = FIELD_GET(NPC_PARSE_NIBBLE, rx_kex);
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX), rx_kex);
 	/* Due to an errata (35786) in A0 pass silicon, parse nibble enable
 	 * configuration has to be identical for both Rx and Tx interfaces.
 	 */
-	if (!is_rvu_96xx_B0(rvu))
-		nibble_ena = (1ULL << 19) - 1;
-	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX),
-			((keyz & 0x3) << 32) | nibble_ena);
+	if (is_rvu_96xx_B0(rvu)) {
+		tx_kex &= ~NPC_PARSE_NIBBLE;
+		tx_kex |= FIELD_PREP(NPC_PARSE_NIBBLE, nibble_ena);
+	}
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX), tx_kex);
 
 	err = npc_mcam_rsrcs_init(rvu, blkaddr);
 	if (err)
 		return err;
 
 	/* Configure MKEX profile */
-	npc_load_mkex_profile(rvu, blkaddr);
+	npc_load_mkex_profile(rvu, blkaddr, rvu->mkex_pfl_name);
 
 	/* Set TX miss action to UCAST_DEFAULT i.e
 	 * transmit the packet on NIX LF SQ's default channel.
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
new file mode 100644
index 000000000000..56f90cf9c4c0
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#define CREATE_TRACE_POINTS
+#include "rvu_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(otx2_msg_alloc);
+EXPORT_TRACEPOINT_SYMBOL(otx2_msg_interrupt);
+EXPORT_TRACEPOINT_SYMBOL(otx2_msg_process);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
new file mode 100644
index 000000000000..e6609068e81b
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvu
+
+#if !defined(__RVU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVU_TRACE_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/pci.h>
+
+TRACE_EVENT(otx2_msg_alloc,
+	    TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size),
+	    TP_ARGS(pdev, id, size),
+	    TP_STRUCT__entry(__string(dev, pci_name(pdev))
+			     __field(u16, id)
+			     __field(u64, size)
+	    ),
+	    TP_fast_assign(__assign_str(dev, pci_name(pdev))
+			   __entry->id = id;
+			   __entry->size = size;
+	    ),
+	    TP_printk("[%s] msg:(0x%x) size:%lld\n", __get_str(dev),
+		      __entry->id, __entry->size)
+);
+
+TRACE_EVENT(otx2_msg_send,
+	    TP_PROTO(const struct pci_dev *pdev, u16 num_msgs, u64 msg_size),
+	    TP_ARGS(pdev, num_msgs, msg_size),
+	    TP_STRUCT__entry(__string(dev, pci_name(pdev))
+			     __field(u16, num_msgs)
+			     __field(u64, msg_size)
+	    ),
+	    TP_fast_assign(__assign_str(dev, pci_name(pdev))
+			   __entry->num_msgs = num_msgs;
+			   __entry->msg_size = msg_size;
+	    ),
+	    TP_printk("[%s] sent %d msg(s) of size:%lld\n", __get_str(dev),
+		      __entry->num_msgs, __entry->msg_size)
+);
+
+TRACE_EVENT(otx2_msg_check,
+	    TP_PROTO(const struct pci_dev *pdev, u16 reqid, u16 rspid, int rc),
+	    TP_ARGS(pdev, reqid, rspid, rc),
+	    TP_STRUCT__entry(__string(dev, pci_name(pdev))
+			     __field(u16, reqid)
+			     __field(u16, rspid)
+			     __field(int, rc)
+	    ),
+	    TP_fast_assign(__assign_str(dev, pci_name(pdev))
+			   __entry->reqid = reqid;
+			   __entry->rspid = rspid;
+			   __entry->rc = rc;
+	    ),
+	    TP_printk("[%s] req->id:0x%x rsp->id:0x%x resp_code:%d\n",
+		      __get_str(dev), __entry->reqid,
+		      __entry->rspid, __entry->rc)
+);
+
+TRACE_EVENT(otx2_msg_interrupt,
+	    TP_PROTO(const struct pci_dev *pdev, const char *msg, u64 intr),
+	    TP_ARGS(pdev, msg, intr),
+	    TP_STRUCT__entry(__string(dev, pci_name(pdev))
+			     __string(str, msg)
+			     __field(u64, intr)
+	    ),
+	    TP_fast_assign(__assign_str(dev, pci_name(pdev))
+			   __assign_str(str, msg)
+			   __entry->intr = intr;
+	    ),
+	    TP_printk("[%s] mbox interrupt %s (0x%llx)\n", __get_str(dev),
+		      __get_str(str), __entry->intr)
+);
+
+TRACE_EVENT(otx2_msg_process,
+	    TP_PROTO(const struct pci_dev *pdev, u16 id, int err),
+	    TP_ARGS(pdev, id, err),
+	    TP_STRUCT__entry(__string(dev, pci_name(pdev))
+			     __field(u16, id)
+			     __field(int, err)
+	    ),
+	    TP_fast_assign(__assign_str(dev, pci_name(pdev))
+			   __entry->id = id;
+			   __entry->err = err;
+	    ),
+	    TP_printk("[%s] msg:(0x%x) error:%d\n", __get_str(dev),
+		      __entry->id, __entry->err)
+);
+
+#endif /* __RVU_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE rvu_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 778df331c8ac..b2c6385707c9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -6,7 +6,8 @@
 obj-$(CONFIG_OCTEONTX2_PF) += octeontx2_nicpf.o
 obj-$(CONFIG_OCTEONTX2_VF) += octeontx2_nicvf.o
 
-octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o
+octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
+		     otx2_ptp.o
 octeontx2_nicvf-y := otx2_vf.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 93c4cf7fedbf..d2581090f9a4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -355,7 +355,7 @@ int otx2_rss_init(struct otx2_nic *pfvf)
 	rss->flowkey_cfg = rss->enable ? rss->flowkey_cfg :
 			   NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6 |
 			   NIX_FLOW_KEY_TYPE_TCP | NIX_FLOW_KEY_TYPE_UDP |
-			   NIX_FLOW_KEY_TYPE_SCTP;
+			   NIX_FLOW_KEY_TYPE_SCTP | NIX_FLOW_KEY_TYPE_VLAN;
 
 	ret = otx2_set_flowkey_cfg(pfvf);
 	if (ret)
@@ -365,6 +365,95 @@ int otx2_rss_init(struct otx2_nic *pfvf)
 	return 0;
 }
 
+/* Setup UDP segmentation algorithm in HW */
+static void otx2_setup_udp_segmentation(struct nix_lso_format_cfg *lso, bool v4)
+{
+	struct nix_lso_format *field;
+
+	field = (struct nix_lso_format *)&lso->fields[0];
+	lso->field_mask = GENMASK(18, 0);
+
+	/* IP's Length field */
+	field->layer = NIX_TXLAYER_OL3;
+	/* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */
+	field->offset = v4 ? 2 : 4;
+	field->sizem1 = 1; /* i.e 2 bytes */
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+	field++;
+
+	/* No ID field in IPv6 header */
+	if (v4) {
+		/* Increment IPID */
+		field->layer = NIX_TXLAYER_OL3;
+		field->offset = 4;
+		field->sizem1 = 1; /* i.e 2 bytes */
+		field->alg = NIX_LSOALG_ADD_SEGNUM;
+		field++;
+	}
+
+	/* Update length in UDP header */
+	field->layer = NIX_TXLAYER_OL4;
+	field->offset = 4;
+	field->sizem1 = 1;
+	field->alg = NIX_LSOALG_ADD_PAYLEN;
+}
+
+/* Setup segmentation algorithms in HW and retrieve algorithm index */
+void otx2_setup_segmentation(struct otx2_nic *pfvf)
+{
+	struct nix_lso_format_cfg_rsp *rsp;
+	struct nix_lso_format_cfg *lso;
+	struct otx2_hw *hw = &pfvf->hw;
+	int err;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	/* UDPv4 segmentation */
+	lso = otx2_mbox_alloc_msg_nix_lso_format_cfg(&pfvf->mbox);
+	if (!lso)
+		goto fail;
+
+	/* Setup UDP/IP header fields that HW should update per segment */
+	otx2_setup_udp_segmentation(lso, true);
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		goto fail;
+
+	rsp = (struct nix_lso_format_cfg_rsp *)
+			otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &lso->hdr);
+	if (IS_ERR(rsp))
+		goto fail;
+
+	hw->lso_udpv4_idx = rsp->lso_format_idx;
+
+	/* UDPv6 segmentation */
+	lso = otx2_mbox_alloc_msg_nix_lso_format_cfg(&pfvf->mbox);
+	if (!lso)
+		goto fail;
+
+	/* Setup UDP/IP header fields that HW should update per segment */
+	otx2_setup_udp_segmentation(lso, false);
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		goto fail;
+
+	rsp = (struct nix_lso_format_cfg_rsp *)
+			otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &lso->hdr);
+	if (IS_ERR(rsp))
+		goto fail;
+
+	hw->lso_udpv6_idx = rsp->lso_format_idx;
+	mutex_unlock(&pfvf->mbox.lock);
+	return;
+fail:
+	mutex_unlock(&pfvf->mbox.lock);
+	netdev_info(pfvf->netdev,
+		    "Failed to get LSO index for UDP GSO offload, disabling\n");
+	pfvf->netdev->hw_features &= ~NETIF_F_GSO_UDP_L4;
+}
+
 void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
 {
 	/* Configure CQE interrupt coalescing parameters
@@ -671,6 +760,13 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
 	if (!sq->sg)
 		return -ENOMEM;
 
+	if (pfvf->ptp) {
+		err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt,
+				 sizeof(*sq->timestamps));
+		if (err)
+			return err;
+	}
+
 	sq->head = 0;
 	sq->sqe_per_sqb = (pfvf->hw.sqb_size / sq->sqe_size) - 1;
 	sq->num_sqbs = (qset->sqe_cnt + sq->sqe_per_sqb) / sq->sqe_per_sqb;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 2fa29889522e..d6253f2a414d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -13,10 +13,14 @@
 
 #include <linux/pci.h>
 #include <linux/iommu.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 
 #include <mbox.h>
 #include "otx2_reg.h"
 #include "otx2_txrx.h"
+#include <rvu_trace.h>
 
 /* PCI device IDs */
 #define PCI_DEVID_OCTEONTX2_RVU_PF              0xA063
@@ -174,9 +178,11 @@ struct otx2_hw {
 	u16			rq_skid;
 	u8			cq_time_wait;
 
-	/* For TSO segmentation */
+	/* Segmentation */
 	u8			lso_tsov4_idx;
 	u8			lso_tsov6_idx;
+	u8			lso_udpv4_idx;
+	u8			lso_udpv6_idx;
 	u8			hw_tso;
 
 	/* MSI-X */
@@ -209,6 +215,17 @@ struct refill_work {
 	struct otx2_nic *pf;
 };
 
+struct otx2_ptp {
+	struct ptp_clock_info ptp_info;
+	struct ptp_clock *ptp_clock;
+	struct otx2_nic *nic;
+
+	struct cyclecounter cycle_counter;
+	struct timecounter time_counter;
+};
+
+#define OTX2_HW_TIMESTAMP_LEN	8
+
 struct otx2_nic {
 	void __iomem		*reg_base;
 	struct net_device	*netdev;
@@ -216,6 +233,8 @@ struct otx2_nic {
 	u16			max_frs;
 	u16			rbsize; /* Receive buffer size */
 
+#define OTX2_FLAG_RX_TSTAMP_ENABLED		BIT_ULL(0)
+#define OTX2_FLAG_TX_TSTAMP_ENABLED		BIT_ULL(1)
 #define OTX2_FLAG_INTF_DOWN			BIT_ULL(2)
 #define OTX2_FLAG_RX_PAUSE_ENABLED		BIT_ULL(9)
 #define OTX2_FLAG_TX_PAUSE_ENABLED		BIT_ULL(10)
@@ -251,6 +270,9 @@ struct otx2_nic {
 
 	/* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */
 	int			nix_blkaddr;
+
+	struct otx2_ptp		*ptp;
+	struct hwtstamp_config	tstamp;
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -502,6 +524,7 @@ static struct _req_type __maybe_unused					\
 		return NULL;						\
 	req->hdr.sig = OTX2_MBOX_REQ_SIG;				\
 	req->hdr.id = _id;						\
+	trace_otx2_msg_alloc(mbox->mbox.pdev, _id, sizeof(*req));	\
 	return req;							\
 }
 
@@ -561,6 +584,7 @@ void otx2_tx_timeout(struct net_device *netdev, unsigned int txq);
 void otx2_get_mac_from_af(struct net_device *netdev);
 void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
 int otx2_config_pause_frm(struct otx2_nic *pfvf);
+void otx2_setup_segmentation(struct otx2_nic *pfvf);
 
 /* RVU block related APIs */
 int otx2_attach_npa_nix(struct otx2_nic *pfvf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index d59f5a9c7273..662fb80dbb9d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -13,8 +13,10 @@
 #include <linux/stddef.h>
 #include <linux/etherdevice.h>
 #include <linux/log2.h>
+#include <linux/net_tstamp.h>
 
 #include "otx2_common.h"
+#include "otx2_ptp.h"
 
 #define DRV_NAME	"octeontx2-nicpf"
 #define DRV_VF_NAME	"octeontx2-nicvf"
@@ -426,6 +428,8 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
 
 	/* Mimimum is IPv4 and IPv6, SIP/DIP */
 	nfc->data = RXH_IP_SRC | RXH_IP_DST;
+	if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_VLAN)
+		nfc->data |= RXH_VLAN;
 
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
@@ -475,6 +479,11 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
 	if (!(nfc->data & RXH_IP_SRC) || !(nfc->data & RXH_IP_DST))
 		return -EINVAL;
 
+	if (nfc->data & RXH_VLAN)
+		rss_cfg |=  NIX_FLOW_KEY_TYPE_VLAN;
+	else
+		rss_cfg &= ~NIX_FLOW_KEY_TYPE_VLAN;
+
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
 	case TCP_V6_FLOW:
@@ -663,6 +672,31 @@ static u32 otx2_get_link(struct net_device *netdev)
 	return pfvf->linfo.link_up;
 }
 
+static int otx2_get_ts_info(struct net_device *netdev,
+			    struct ethtool_ts_info *info)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+
+	if (!pfvf->ptp)
+		return ethtool_op_get_ts_info(netdev, info);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->phc_index = otx2_ptp_clock_index(pfvf);
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 static const struct ethtool_ops otx2_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -687,6 +721,7 @@ static const struct ethtool_ops otx2_ethtool_ops = {
 	.set_msglevel		= otx2_set_msglevel,
 	.get_pauseparam		= otx2_get_pauseparam,
 	.set_pauseparam		= otx2_set_pauseparam,
+	.get_ts_info		= otx2_get_ts_info,
 };
 
 void otx2_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 2fb45670aca4..66f1a212f1f4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -21,6 +21,8 @@
 #include "otx2_common.h"
 #include "otx2_txrx.h"
 #include "otx2_struct.h"
+#include "otx2_ptp.h"
+#include <rvu_trace.h>
 
 #define DRV_NAME	"octeontx2-nicpf"
 #define DRV_STRING	"Marvell OcteonTX2 NIC Physical Function Driver"
@@ -41,6 +43,9 @@ enum {
 	TYPE_PFVF,
 };
 
+static int otx2_config_hw_tx_tstamp(struct otx2_nic *pfvf, bool enable);
+static int otx2_config_hw_rx_tstamp(struct otx2_nic *pfvf, bool enable);
+
 static int otx2_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	bool if_up = netif_running(netdev);
@@ -554,6 +559,8 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
 
 	otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF);
 
+	trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
+
 	return IRQ_HANDLED;
 }
 
@@ -937,6 +944,9 @@ static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq)
 	otx2_write64(pf, RVU_PF_INT, BIT_ULL(0));
 
 	mbox = &pf->mbox;
+
+	trace_otx2_msg_interrupt(mbox->mbox.pdev, "AF to PF", BIT_ULL(0));
+
 	otx2_queue_work(mbox, pf->mbox_wq, 0, 1, 1, TYPE_PFAF);
 
 	return IRQ_HANDLED;
@@ -1282,7 +1292,8 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
 	hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
 
 	/* Get the size of receive buffers to allocate */
-	pf->rbsize = RCV_FRAG_LEN(pf->netdev->mtu + OTX2_ETH_HLEN);
+	pf->rbsize = RCV_FRAG_LEN(OTX2_HW_TIMESTAMP_LEN + pf->netdev->mtu +
+				  OTX2_ETH_HLEN);
 
 	mutex_lock(&mbox->lock);
 	/* NPA init */
@@ -1497,6 +1508,9 @@ int otx2_open(struct net_device *netdev)
 	if (err)
 		goto err_disable_napi;
 
+	/* Setup segmentation algorithms, if failed, clear offload capability */
+	otx2_setup_segmentation(pf);
+
 	/* Initialize RSS */
 	err = otx2_rss_init(pf);
 	if (err)
@@ -1548,6 +1562,16 @@ int otx2_open(struct net_device *netdev)
 
 	otx2_set_cints_affinity(pf);
 
+	/* When reinitializing enable time stamping if it is enabled before */
+	if (pf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED) {
+		pf->flags &= ~OTX2_FLAG_TX_TSTAMP_ENABLED;
+		otx2_config_hw_tx_tstamp(pf, true);
+	}
+	if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED) {
+		pf->flags &= ~OTX2_FLAG_RX_TSTAMP_ENABLED;
+		otx2_config_hw_rx_tstamp(pf, true);
+	}
+
 	pf->flags &= ~OTX2_FLAG_INTF_DOWN;
 	/* 'intf_down' may be checked on any cpu */
 	smp_wmb();
@@ -1742,6 +1766,143 @@ static void otx2_reset_task(struct work_struct *work)
 	rtnl_unlock();
 }
 
+static int otx2_config_hw_rx_tstamp(struct otx2_nic *pfvf, bool enable)
+{
+	struct msg_req *req;
+	int err;
+
+	if (pfvf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED && enable)
+		return 0;
+
+	mutex_lock(&pfvf->mbox.lock);
+	if (enable)
+		req = otx2_mbox_alloc_msg_cgx_ptp_rx_enable(&pfvf->mbox);
+	else
+		req = otx2_mbox_alloc_msg_cgx_ptp_rx_disable(&pfvf->mbox);
+	if (!req) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return err;
+	}
+
+	mutex_unlock(&pfvf->mbox.lock);
+	if (enable)
+		pfvf->flags |= OTX2_FLAG_RX_TSTAMP_ENABLED;
+	else
+		pfvf->flags &= ~OTX2_FLAG_RX_TSTAMP_ENABLED;
+	return 0;
+}
+
+static int otx2_config_hw_tx_tstamp(struct otx2_nic *pfvf, bool enable)
+{
+	struct msg_req *req;
+	int err;
+
+	if (pfvf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED && enable)
+		return 0;
+
+	mutex_lock(&pfvf->mbox.lock);
+	if (enable)
+		req = otx2_mbox_alloc_msg_nix_lf_ptp_tx_enable(&pfvf->mbox);
+	else
+		req = otx2_mbox_alloc_msg_nix_lf_ptp_tx_disable(&pfvf->mbox);
+	if (!req) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return err;
+	}
+
+	mutex_unlock(&pfvf->mbox.lock);
+	if (enable)
+		pfvf->flags |= OTX2_FLAG_TX_TSTAMP_ENABLED;
+	else
+		pfvf->flags &= ~OTX2_FLAG_TX_TSTAMP_ENABLED;
+	return 0;
+}
+
+static int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct hwtstamp_config config;
+
+	if (!pfvf->ptp)
+		return -ENODEV;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		otx2_config_hw_tx_tstamp(pfvf, false);
+		break;
+	case HWTSTAMP_TX_ON:
+		otx2_config_hw_tx_tstamp(pfvf, true);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		otx2_config_hw_rx_tstamp(pfvf, false);
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		otx2_config_hw_rx_tstamp(pfvf, true);
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	memcpy(&pfvf->tstamp, &config, sizeof(config));
+
+	return copy_to_user(ifr->ifr_data, &config,
+			    sizeof(config)) ? -EFAULT : 0;
+}
+
+static int otx2_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct hwtstamp_config *cfg = &pfvf->tstamp;
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return otx2_config_hwtstamp(netdev, req);
+	case SIOCGHWTSTAMP:
+		return copy_to_user(req->ifr_data, cfg,
+				    sizeof(*cfg)) ? -EFAULT : 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_open		= otx2_open,
 	.ndo_stop		= otx2_stop,
@@ -1752,6 +1913,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_set_features	= otx2_set_features,
 	.ndo_tx_timeout		= otx2_tx_timeout,
 	.ndo_get_stats64	= otx2_get_stats64,
+	.ndo_do_ioctl		= otx2_ioctl,
 };
 
 static int otx2_wq_init(struct otx2_nic *pf)
@@ -1924,6 +2086,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	/* Assign default mac address */
 	otx2_get_mac_from_af(netdev);
 
+	/* Don't check for error.  Proceed without ptp */
+	otx2_ptp_init(pf);
+
 	/* NPA's pool is a stack to which SW frees buffer pointers via Aura.
 	 * HW allocates buffer pointer from stack and uses it for DMA'ing
 	 * ingress packet. In some scenarios HW can free back allocated buffer
@@ -1939,7 +2104,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 			       NETIF_F_IPV6_CSUM | NETIF_F_RXHASH |
-			       NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6);
+			       NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+			       NETIF_F_GSO_UDP_L4);
 	netdev->features |= netdev->hw_features;
 
 	netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
@@ -1956,7 +2122,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(dev, "Failed to register netdevice\n");
-		goto err_detach_rsrc;
+		goto err_ptp_destroy;
 	}
 
 	err = otx2_wq_init(pf);
@@ -1976,6 +2142,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 err_unreg_netdev:
 	unregister_netdev(netdev);
+err_ptp_destroy:
+	otx2_ptp_destroy(pf);
 err_detach_rsrc:
 	otx2_detach_resources(&pf->mbox);
 err_disable_mbox_intr:
@@ -2117,6 +2285,11 @@ static void otx2_remove(struct pci_dev *pdev)
 
 	pf = netdev_priv(netdev);
 
+	if (pf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED)
+		otx2_config_hw_tx_tstamp(pf, false);
+	if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED)
+		otx2_config_hw_rx_tstamp(pf, false);
+
 	cancel_work_sync(&pf->reset_task);
 	/* Disable link notifications */
 	otx2_cgx_config_linkevents(pf, false);
@@ -2126,6 +2299,7 @@ static void otx2_remove(struct pci_dev *pdev)
 	if (pf->otx2_wq)
 		destroy_workqueue(pf->otx2_wq);
 
+	otx2_ptp_destroy(pf);
 	otx2_detach_resources(&pf->mbox);
 	otx2_disable_mbox_intr(pf);
 	otx2_pfaf_mbox_destroy(pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
new file mode 100644
index 000000000000..7bcf5246350f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 PTP support for ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include "otx2_common.h"
+#include "otx2_ptp.h"
+
+static int otx2_ptp_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
+{
+	struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+					    ptp_info);
+	struct ptp_req *req;
+	int err;
+
+	if (!ptp->nic)
+		return -ENODEV;
+
+	req = otx2_mbox_alloc_msg_ptp_op(&ptp->nic->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	req->op = PTP_OP_ADJFINE;
+	req->scaled_ppm = scaled_ppm;
+
+	err = otx2_sync_mbox_msg(&ptp->nic->mbox);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static u64 ptp_cc_read(const struct cyclecounter *cc)
+{
+	struct otx2_ptp *ptp = container_of(cc, struct otx2_ptp, cycle_counter);
+	struct ptp_req *req;
+	struct ptp_rsp *rsp;
+	int err;
+
+	if (!ptp->nic)
+		return 0;
+
+	req = otx2_mbox_alloc_msg_ptp_op(&ptp->nic->mbox);
+	if (!req)
+		return 0;
+
+	req->op = PTP_OP_GET_CLOCK;
+
+	err = otx2_sync_mbox_msg(&ptp->nic->mbox);
+	if (err)
+		return 0;
+
+	rsp = (struct ptp_rsp *)otx2_mbox_get_rsp(&ptp->nic->mbox.mbox, 0,
+						  &req->hdr);
+	if (IS_ERR(rsp))
+		return 0;
+
+	return rsp->clk;
+}
+
+static int otx2_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
+{
+	struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+					    ptp_info);
+	struct otx2_nic *pfvf = ptp->nic;
+
+	mutex_lock(&pfvf->mbox.lock);
+	timecounter_adjtime(&ptp->time_counter, delta);
+	mutex_unlock(&pfvf->mbox.lock);
+
+	return 0;
+}
+
+static int otx2_ptp_gettime(struct ptp_clock_info *ptp_info,
+			    struct timespec64 *ts)
+{
+	struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+					    ptp_info);
+	struct otx2_nic *pfvf = ptp->nic;
+	u64 nsec;
+
+	mutex_lock(&pfvf->mbox.lock);
+	nsec = timecounter_read(&ptp->time_counter);
+	mutex_unlock(&pfvf->mbox.lock);
+
+	*ts = ns_to_timespec64(nsec);
+
+	return 0;
+}
+
+static int otx2_ptp_settime(struct ptp_clock_info *ptp_info,
+			    const struct timespec64 *ts)
+{
+	struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+					    ptp_info);
+	struct otx2_nic *pfvf = ptp->nic;
+	u64 nsec;
+
+	nsec = timespec64_to_ns(ts);
+
+	mutex_lock(&pfvf->mbox.lock);
+	timecounter_init(&ptp->time_counter, &ptp->cycle_counter, nsec);
+	mutex_unlock(&pfvf->mbox.lock);
+
+	return 0;
+}
+
+static int otx2_ptp_enable(struct ptp_clock_info *ptp_info,
+			   struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+int otx2_ptp_init(struct otx2_nic *pfvf)
+{
+	struct otx2_ptp *ptp_ptr;
+	struct cyclecounter *cc;
+	struct ptp_req *req;
+	int err;
+
+	mutex_lock(&pfvf->mbox.lock);
+	/* check if PTP block is available */
+	req = otx2_mbox_alloc_msg_ptp_op(&pfvf->mbox);
+	if (!req) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->op = PTP_OP_GET_CLOCK;
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return err;
+	}
+	mutex_unlock(&pfvf->mbox.lock);
+
+	ptp_ptr = kzalloc(sizeof(*ptp_ptr), GFP_KERNEL);
+	if (!ptp_ptr) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	ptp_ptr->nic = pfvf;
+
+	cc = &ptp_ptr->cycle_counter;
+	cc->read = ptp_cc_read;
+	cc->mask = CYCLECOUNTER_MASK(64);
+	cc->mult = 1;
+	cc->shift = 0;
+
+	timecounter_init(&ptp_ptr->time_counter, &ptp_ptr->cycle_counter,
+			 ktime_to_ns(ktime_get_real()));
+
+	ptp_ptr->ptp_info = (struct ptp_clock_info) {
+		.owner          = THIS_MODULE,
+		.name           = "OcteonTX2 PTP",
+		.max_adj        = 1000000000ull,
+		.n_ext_ts       = 0,
+		.n_pins         = 0,
+		.pps            = 0,
+		.adjfine        = otx2_ptp_adjfine,
+		.adjtime        = otx2_ptp_adjtime,
+		.gettime64      = otx2_ptp_gettime,
+		.settime64      = otx2_ptp_settime,
+		.enable         = otx2_ptp_enable,
+	};
+
+	ptp_ptr->ptp_clock = ptp_clock_register(&ptp_ptr->ptp_info, pfvf->dev);
+	if (IS_ERR_OR_NULL(ptp_ptr->ptp_clock)) {
+		err = ptp_ptr->ptp_clock ?
+		      PTR_ERR(ptp_ptr->ptp_clock) : -ENODEV;
+		kfree(ptp_ptr);
+		goto error;
+	}
+
+	pfvf->ptp = ptp_ptr;
+
+error:
+	return err;
+}
+
+void otx2_ptp_destroy(struct otx2_nic *pfvf)
+{
+	struct otx2_ptp *ptp = pfvf->ptp;
+
+	if (!ptp)
+		return;
+
+	ptp_clock_unregister(ptp->ptp_clock);
+	kfree(ptp);
+	pfvf->ptp = NULL;
+}
+
+int otx2_ptp_clock_index(struct otx2_nic *pfvf)
+{
+	if (!pfvf->ptp)
+		return -ENODEV;
+
+	return ptp_clock_index(pfvf->ptp->ptp_clock);
+}
+
+int otx2_ptp_tstamp2time(struct otx2_nic *pfvf, u64 tstamp, u64 *tsns)
+{
+	if (!pfvf->ptp)
+		return -ENODEV;
+
+	*tsns = timecounter_cyc2time(&pfvf->ptp->time_counter, tstamp);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
new file mode 100644
index 000000000000..706d63a43ae1
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 PTP support for ethernet driver */
+
+#ifndef OTX2_PTP_H
+#define OTX2_PTP_H
+
+int otx2_ptp_init(struct otx2_nic *pfvf);
+void otx2_ptp_destroy(struct otx2_nic *pfvf);
+
+int otx2_ptp_clock_index(struct otx2_nic *pfvf);
+int otx2_ptp_tstamp2time(struct otx2_nic *pfvf, u64 tstamp, u64 *tsns);
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index e46834e043be..d5d7a2f37493 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -16,6 +16,7 @@
 #include "otx2_common.h"
 #include "otx2_struct.h"
 #include "otx2_txrx.h"
+#include "otx2_ptp.h"
 
 #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
 
@@ -81,8 +82,11 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
 				 int budget, int *tx_pkts, int *tx_bytes)
 {
 	struct nix_send_comp_s *snd_comp = &cqe->comp;
+	struct skb_shared_hwtstamps ts;
 	struct sk_buff *skb = NULL;
+	u64 timestamp, tsns;
 	struct sg_list *sg;
+	int err;
 
 	if (unlikely(snd_comp->status) && netif_msg_tx_err(pfvf))
 		net_err_ratelimited("%s: TX%d: Error in send CQ status:%x\n",
@@ -94,6 +98,18 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
 	if (unlikely(!skb))
 		return;
 
+	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
+		timestamp = ((u64 *)sq->timestamps->base)[snd_comp->sqe_id];
+		if (timestamp != 1) {
+			err = otx2_ptp_tstamp2time(pfvf, timestamp, &tsns);
+			if (!err) {
+				memset(&ts, 0, sizeof(ts));
+				ts.hwtstamp = ns_to_ktime(tsns);
+				skb_tstamp_tx(skb, &ts);
+			}
+		}
+	}
+
 	*tx_bytes += skb->len;
 	(*tx_pkts)++;
 	otx2_dma_unmap_skb_frags(pfvf, sg);
@@ -101,16 +117,47 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
 	sg->skb = (u64)NULL;
 }
 
+static void otx2_set_rxtstamp(struct otx2_nic *pfvf,
+			      struct sk_buff *skb, void *data)
+{
+	u64 tsns;
+	int err;
+
+	if (!(pfvf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED))
+		return;
+
+	/* The first 8 bytes is the timestamp */
+	err = otx2_ptp_tstamp2time(pfvf, be64_to_cpu(*(__be64 *)data), &tsns);
+	if (err)
+		return;
+
+	skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(tsns);
+}
+
 static void otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb,
-			      u64 iova, int len)
+			      u64 iova, int len, struct nix_rx_parse_s *parse)
 {
 	struct page *page;
+	int off = 0;
 	void *va;
 
 	va = phys_to_virt(otx2_iova_to_phys(pfvf->iommu_domain, iova));
+
+	if (likely(!skb_shinfo(skb)->nr_frags)) {
+		/* Check if data starts at some nonzero offset
+		 * from the start of the buffer.  For now the
+		 * only possible offset is 8 bytes in the case
+		 * where packet is prepended by a timestamp.
+		 */
+		if (parse->laptr) {
+			otx2_set_rxtstamp(pfvf, skb, va);
+			off = OTX2_HW_TIMESTAMP_LEN;
+		}
+	}
+
 	page = virt_to_page(va);
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			va - page_address(page), len, pfvf->rbsize);
+			va - page_address(page) + off, len - off, pfvf->rbsize);
 
 	otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM,
 			    pfvf->rbsize, DMA_FROM_DEVICE);
@@ -239,7 +286,7 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
 	if (unlikely(!skb))
 		return;
 
-	otx2_skb_add_frag(pfvf, skb, cqe->sg.seg_addr, cqe->sg.seg_size);
+	otx2_skb_add_frag(pfvf, skb, cqe->sg.seg_addr, cqe->sg.seg_size, parse);
 	cq->pool_ptrs++;
 
 	otx2_set_rxhash(pfvf, cqe, skb);
@@ -477,15 +524,55 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
 			 */
 			ip_hdr(skb)->tot_len =
 				htons(ext->lso_sb - skb_network_offset(skb));
-		} else {
+		} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
 			ext->lso_format = pfvf->hw.lso_tsov6_idx;
+
 			ipv6_hdr(skb)->payload_len =
 				htons(ext->lso_sb - skb_network_offset(skb));
+		} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+			__be16 l3_proto = vlan_get_protocol(skb);
+			struct udphdr *udph = udp_hdr(skb);
+			u16 iplen;
+
+			ext->lso_sb = skb_transport_offset(skb) +
+					sizeof(struct udphdr);
+
+			/* HW adds payload size to length fields in IP and
+			 * UDP headers while segmentation, hence adjust the
+			 * lengths to just header sizes.
+			 */
+			iplen = htons(ext->lso_sb - skb_network_offset(skb));
+			if (l3_proto == htons(ETH_P_IP)) {
+				ip_hdr(skb)->tot_len = iplen;
+				ext->lso_format = pfvf->hw.lso_udpv4_idx;
+			} else {
+				ipv6_hdr(skb)->payload_len = iplen;
+				ext->lso_format = pfvf->hw.lso_udpv6_idx;
+			}
+
+			udph->len = htons(sizeof(struct udphdr));
 		}
+	} else if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+		ext->tstmp = 1;
 	}
+
 	*offset += sizeof(*ext);
 }
 
+static void otx2_sqe_add_mem(struct otx2_snd_queue *sq, int *offset,
+			     int alg, u64 iova)
+{
+	struct nix_sqe_mem_s *mem;
+
+	mem = (struct nix_sqe_mem_s *)(sq->sqe_base + *offset);
+	mem->subdc = NIX_SUBDC_MEM;
+	mem->alg = alg;
+	mem->wmem = 1; /* wait for the memory operation */
+	mem->addr = iova;
+
+	*offset += sizeof(*mem);
+}
+
 /* Add SQE header subdescriptor structure */
 static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
 			     struct nix_sqe_hdr_s *sqe_hdr,
@@ -737,6 +824,21 @@ static int otx2_get_sqe_count(struct otx2_nic *pfvf, struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_segs;
 }
 
+static void otx2_set_txtstamp(struct otx2_nic *pfvf, struct sk_buff *skb,
+			      struct otx2_snd_queue *sq, int *offset)
+{
+	u64 iova;
+
+	if (!skb_shinfo(skb)->gso_size &&
+	    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		iova = sq->timestamps->iova + (sq->head * sizeof(u64));
+		otx2_sqe_add_mem(sq, offset, NIX_SENDMEMALG_E_SETTSTMP, iova);
+	} else {
+		skb_tx_timestamp(skb);
+	}
+}
+
 bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
 			struct sk_buff *skb, u16 qidx)
 {
@@ -790,6 +892,8 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
 		return false;
 	}
 
+	otx2_set_txtstamp(pfvf, skb, sq, &offset);
+
 	sqe_hdr->sizem1 = (offset / 16) - 1;
 
 	netdev_tx_sent_queue(txq, skb->len);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index da97f2d4416f..73af15685657 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -91,6 +91,7 @@ struct otx2_snd_queue {
 	struct qmem		*sqe;
 	struct qmem		*tso_hdrs;
 	struct sg_list		*sg;
+	struct qmem		*timestamps;
 	struct queue_stats	stats;
 	u16			sqb_count;
 	u64			*sqb_ptrs;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 2f90f1721441..67fabf265fe6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -187,6 +187,8 @@ static irqreturn_t otx2vf_vfaf_mbox_intr_handler(int irq, void *vf_irq)
 	mdev = &mbox->dev[0];
 	otx2_sync_mbox_bbuf(mbox, 0);
 
+	trace_otx2_msg_interrupt(mbox->pdev, "PF to VF", BIT_ULL(0));
+
 	hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start);
 	if (hdr->num_msgs) {
 		vf->mbox.num_msgs = hdr->num_msgs;
@@ -553,7 +555,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 			      NETIF_F_IPV6_CSUM | NETIF_F_RXHASH |
-			      NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6;
+			      NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+			      NETIF_F_GSO_UDP_L4;
 	netdev->features = netdev->hw_features;
 
 	netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
diff --git a/drivers/net/ethernet/marvell/prestera/Kconfig b/drivers/net/ethernet/marvell/prestera/Kconfig
new file mode 100644
index 000000000000..b1fcc44f566a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/Kconfig
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Marvell Prestera drivers configuration
+#
+
+config PRESTERA
+	tristate "Marvell Prestera Switch ASICs support"
+	depends on NET_SWITCHDEV && VLAN_8021Q
+	select NET_DEVLINK
+	help
+	  This driver supports Marvell Prestera Switch ASICs family.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called prestera.
+
+config PRESTERA_PCI
+	tristate "PCI interface driver for Marvell Prestera Switch ASICs family"
+	depends on PCI && HAS_IOMEM && PRESTERA
+	default PRESTERA
+	help
+	  This is implementation of PCI interface support for Marvell Prestera
+	  Switch ASICs family.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called prestera_pci.
diff --git a/drivers/net/ethernet/marvell/prestera/Makefile b/drivers/net/ethernet/marvell/prestera/Makefile
new file mode 100644
index 000000000000..93129e32ebc5
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PRESTERA)	+= prestera.o
+prestera-objs		:= prestera_main.o prestera_hw.o prestera_dsa.o \
+			   prestera_rxtx.o prestera_devlink.o prestera_ethtool.o \
+			   prestera_switchdev.o
+
+obj-$(CONFIG_PRESTERA_PCI)	+= prestera_pci.o
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
new file mode 100644
index 000000000000..55aa4bf8a27c
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_H_
+#define _PRESTERA_H_
+
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/devlink.h>
+#include <uapi/linux/if_ether.h>
+
+#define PRESTERA_DRV_NAME	"prestera"
+
+#define PRESTERA_DEFAULT_VID    1
+
+struct prestera_fw_rev {
+	u16 maj;
+	u16 min;
+	u16 sub;
+};
+
+struct prestera_port_stats {
+	u64 good_octets_received;
+	u64 bad_octets_received;
+	u64 mac_trans_error;
+	u64 broadcast_frames_received;
+	u64 multicast_frames_received;
+	u64 frames_64_octets;
+	u64 frames_65_to_127_octets;
+	u64 frames_128_to_255_octets;
+	u64 frames_256_to_511_octets;
+	u64 frames_512_to_1023_octets;
+	u64 frames_1024_to_max_octets;
+	u64 excessive_collision;
+	u64 multicast_frames_sent;
+	u64 broadcast_frames_sent;
+	u64 fc_sent;
+	u64 fc_received;
+	u64 buffer_overrun;
+	u64 undersize;
+	u64 fragments;
+	u64 oversize;
+	u64 jabber;
+	u64 rx_error_frame_received;
+	u64 bad_crc;
+	u64 collisions;
+	u64 late_collision;
+	u64 unicast_frames_received;
+	u64 unicast_frames_sent;
+	u64 sent_multiple;
+	u64 sent_deferred;
+	u64 good_octets_sent;
+};
+
+struct prestera_port_caps {
+	u64 supp_link_modes;
+	u8 supp_fec;
+	u8 type;
+	u8 transceiver;
+};
+
+struct prestera_port {
+	struct net_device *dev;
+	struct prestera_switch *sw;
+	struct devlink_port dl_port;
+	u32 id;
+	u32 hw_id;
+	u32 dev_id;
+	u16 fp_id;
+	u16 pvid;
+	bool autoneg;
+	u64 adver_link_modes;
+	u8 adver_fec;
+	struct prestera_port_caps caps;
+	struct list_head list;
+	struct list_head vlans_list;
+	struct {
+		struct prestera_port_stats stats;
+		struct delayed_work caching_dw;
+	} cached_hw_stats;
+};
+
+struct prestera_device {
+	struct device *dev;
+	u8 __iomem *ctl_regs;
+	u8 __iomem *pp_regs;
+	struct prestera_fw_rev fw_rev;
+	void *priv;
+
+	/* called by device driver to handle received packets */
+	void (*recv_pkt)(struct prestera_device *dev);
+
+	/* called by device driver to pass event up to the higher layer */
+	int (*recv_msg)(struct prestera_device *dev, void *msg, size_t size);
+
+	/* called by higher layer to send request to the firmware */
+	int (*send_req)(struct prestera_device *dev, void *in_msg,
+			size_t in_size, void *out_msg, size_t out_size,
+			unsigned int wait);
+};
+
+enum prestera_event_type {
+	PRESTERA_EVENT_TYPE_UNSPEC,
+
+	PRESTERA_EVENT_TYPE_PORT,
+	PRESTERA_EVENT_TYPE_FDB,
+	PRESTERA_EVENT_TYPE_RXTX,
+
+	PRESTERA_EVENT_TYPE_MAX
+};
+
+enum prestera_rxtx_event_id {
+	PRESTERA_RXTX_EVENT_UNSPEC,
+	PRESTERA_RXTX_EVENT_RCV_PKT,
+};
+
+enum prestera_port_event_id {
+	PRESTERA_PORT_EVENT_UNSPEC,
+	PRESTERA_PORT_EVENT_STATE_CHANGED,
+};
+
+struct prestera_port_event {
+	u32 port_id;
+	union {
+		u32 oper_state;
+	} data;
+};
+
+enum prestera_fdb_event_id {
+	PRESTERA_FDB_EVENT_UNSPEC,
+	PRESTERA_FDB_EVENT_LEARNED,
+	PRESTERA_FDB_EVENT_AGED,
+};
+
+struct prestera_fdb_event {
+	u32 port_id;
+	u32 vid;
+	union {
+		u8 mac[ETH_ALEN];
+	} data;
+};
+
+struct prestera_event {
+	u16 id;
+	union {
+		struct prestera_port_event port_evt;
+		struct prestera_fdb_event fdb_evt;
+	};
+};
+
+struct prestera_switchdev;
+struct prestera_rxtx;
+
+struct prestera_switch {
+	struct prestera_device *dev;
+	struct prestera_switchdev *swdev;
+	struct prestera_rxtx *rxtx;
+	struct list_head event_handlers;
+	struct notifier_block netdev_nb;
+	char base_mac[ETH_ALEN];
+	struct list_head port_list;
+	rwlock_t port_list_lock;
+	u32 port_count;
+	u32 mtu_min;
+	u32 mtu_max;
+	u8 id;
+};
+
+struct prestera_rxtx_params {
+	bool use_sdma;
+	u32 map_addr;
+};
+
+#define prestera_dev(sw)		((sw)->dev->dev)
+
+static inline void prestera_write(const struct prestera_switch *sw,
+				  unsigned int reg, u32 val)
+{
+	writel(val, sw->dev->pp_regs + reg);
+}
+
+static inline u32 prestera_read(const struct prestera_switch *sw,
+				unsigned int reg)
+{
+	return readl(sw->dev->pp_regs + reg);
+}
+
+int prestera_device_register(struct prestera_device *dev);
+void prestera_device_unregister(struct prestera_device *dev);
+
+struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw,
+						 u32 dev_id, u32 hw_id);
+
+int prestera_port_autoneg_set(struct prestera_port *port, bool enable,
+			      u64 adver_link_modes, u8 adver_fec);
+
+struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id);
+
+struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev);
+
+int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
+
+bool prestera_netdev_check(const struct net_device *dev);
+
+#endif /* _PRESTERA_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
new file mode 100644
index 000000000000..94c185a0e2b8
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <net/devlink.h>
+
+#include "prestera_devlink.h"
+
+static int prestera_dl_info_get(struct devlink *dl,
+				struct devlink_info_req *req,
+				struct netlink_ext_ack *extack)
+{
+	struct prestera_switch *sw = devlink_priv(dl);
+	char buf[16];
+	int err;
+
+	err = devlink_info_driver_name_put(req, PRESTERA_DRV_NAME);
+	if (err)
+		return err;
+
+	snprintf(buf, sizeof(buf), "%d.%d.%d",
+		 sw->dev->fw_rev.maj,
+		 sw->dev->fw_rev.min,
+		 sw->dev->fw_rev.sub);
+
+	return devlink_info_version_running_put(req,
+					       DEVLINK_INFO_VERSION_GENERIC_FW,
+					       buf);
+}
+
+static const struct devlink_ops prestera_dl_ops = {
+	.info_get = prestera_dl_info_get,
+};
+
+struct prestera_switch *prestera_devlink_alloc(void)
+{
+	struct devlink *dl;
+
+	dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch));
+
+	return devlink_priv(dl);
+}
+
+void prestera_devlink_free(struct prestera_switch *sw)
+{
+	struct devlink *dl = priv_to_devlink(sw);
+
+	devlink_free(dl);
+}
+
+int prestera_devlink_register(struct prestera_switch *sw)
+{
+	struct devlink *dl = priv_to_devlink(sw);
+	int err;
+
+	err = devlink_register(dl, sw->dev->dev);
+	if (err)
+		dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
+
+	return err;
+}
+
+void prestera_devlink_unregister(struct prestera_switch *sw)
+{
+	struct devlink *dl = priv_to_devlink(sw);
+
+	devlink_unregister(dl);
+}
+
+int prestera_devlink_port_register(struct prestera_port *port)
+{
+	struct prestera_switch *sw = port->sw;
+	struct devlink *dl = priv_to_devlink(sw);
+	struct devlink_port_attrs attrs = {};
+	int err;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+	attrs.phys.port_number = port->fp_id;
+	attrs.switch_id.id_len = sizeof(sw->id);
+	memcpy(attrs.switch_id.id, &sw->id, attrs.switch_id.id_len);
+
+	devlink_port_attrs_set(&port->dl_port, &attrs);
+
+	err = devlink_port_register(dl, &port->dl_port, port->fp_id);
+	if (err) {
+		dev_err(prestera_dev(sw), "devlink_port_register failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+void prestera_devlink_port_unregister(struct prestera_port *port)
+{
+	devlink_port_unregister(&port->dl_port);
+}
+
+void prestera_devlink_port_set(struct prestera_port *port)
+{
+	devlink_port_type_eth_set(&port->dl_port, port->dev);
+}
+
+void prestera_devlink_port_clear(struct prestera_port *port)
+{
+	devlink_port_type_clear(&port->dl_port);
+}
+
+struct devlink_port *prestera_devlink_get_port(struct net_device *dev)
+{
+	struct prestera_port *port = netdev_priv(dev);
+
+	return &port->dl_port;
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
new file mode 100644
index 000000000000..51bee9f75415
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_DEVLINK_H_
+#define _PRESTERA_DEVLINK_H_
+
+#include "prestera.h"
+
+struct prestera_switch *prestera_devlink_alloc(void);
+void prestera_devlink_free(struct prestera_switch *sw);
+
+int prestera_devlink_register(struct prestera_switch *sw);
+void prestera_devlink_unregister(struct prestera_switch *sw);
+
+int prestera_devlink_port_register(struct prestera_port *port);
+void prestera_devlink_port_unregister(struct prestera_port *port);
+
+void prestera_devlink_port_set(struct prestera_port *port);
+void prestera_devlink_port_clear(struct prestera_port *port);
+
+struct devlink_port *prestera_devlink_get_port(struct net_device *dev);
+
+#endif /* _PRESTERA_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_dsa.c b/drivers/net/ethernet/marvell/prestera/prestera_dsa.c
new file mode 100644
index 000000000000..a5e01c7a307b
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_dsa.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+
+#include "prestera_dsa.h"
+
+#define PRESTERA_DSA_W0_CMD		GENMASK(31, 30)
+#define PRESTERA_DSA_W0_IS_TAGGED	BIT(29)
+#define PRESTERA_DSA_W0_DEV_NUM		GENMASK(28, 24)
+#define PRESTERA_DSA_W0_PORT_NUM	GENMASK(23, 19)
+#define PRESTERA_DSA_W0_VPT		GENMASK(15, 13)
+#define PRESTERA_DSA_W0_EXT_BIT		BIT(12)
+#define PRESTERA_DSA_W0_VID		GENMASK(11, 0)
+
+#define PRESTERA_DSA_W1_EXT_BIT		BIT(31)
+#define PRESTERA_DSA_W1_CFI_BIT		BIT(30)
+#define PRESTERA_DSA_W1_PORT_NUM	GENMASK(11, 10)
+
+#define PRESTERA_DSA_W2_EXT_BIT		BIT(31)
+#define PRESTERA_DSA_W2_PORT_NUM	BIT(20)
+
+#define PRESTERA_DSA_W3_VID		GENMASK(30, 27)
+#define PRESTERA_DSA_W3_DST_EPORT	GENMASK(23, 7)
+#define PRESTERA_DSA_W3_DEV_NUM		GENMASK(6, 0)
+
+#define PRESTERA_DSA_VID		GENMASK(15, 12)
+#define PRESTERA_DSA_DEV_NUM		GENMASK(11, 5)
+
+int prestera_dsa_parse(struct prestera_dsa *dsa, const u8 *dsa_buf)
+{
+	__be32 *dsa_words = (__be32 *)dsa_buf;
+	enum prestera_dsa_cmd cmd;
+	u32 words[4];
+	u32 field;
+
+	words[0] = ntohl(dsa_words[0]);
+	words[1] = ntohl(dsa_words[1]);
+	words[2] = ntohl(dsa_words[2]);
+	words[3] = ntohl(dsa_words[3]);
+
+	/* set the common parameters */
+	cmd = (enum prestera_dsa_cmd)FIELD_GET(PRESTERA_DSA_W0_CMD, words[0]);
+
+	/* only to CPU is supported */
+	if (unlikely(cmd != PRESTERA_DSA_CMD_TO_CPU))
+		return -EINVAL;
+
+	if (FIELD_GET(PRESTERA_DSA_W0_EXT_BIT, words[0]) == 0)
+		return -EINVAL;
+	if (FIELD_GET(PRESTERA_DSA_W1_EXT_BIT, words[1]) == 0)
+		return -EINVAL;
+	if (FIELD_GET(PRESTERA_DSA_W2_EXT_BIT, words[2]) == 0)
+		return -EINVAL;
+
+	field = FIELD_GET(PRESTERA_DSA_W3_VID, words[3]);
+
+	dsa->vlan.is_tagged = FIELD_GET(PRESTERA_DSA_W0_IS_TAGGED, words[0]);
+	dsa->vlan.cfi_bit = FIELD_GET(PRESTERA_DSA_W1_CFI_BIT, words[1]);
+	dsa->vlan.vpt = FIELD_GET(PRESTERA_DSA_W0_VPT, words[0]);
+	dsa->vlan.vid = FIELD_GET(PRESTERA_DSA_W0_VID, words[0]);
+	dsa->vlan.vid &= ~PRESTERA_DSA_VID;
+	dsa->vlan.vid |= FIELD_PREP(PRESTERA_DSA_VID, field);
+
+	field = FIELD_GET(PRESTERA_DSA_W3_DEV_NUM, words[3]);
+
+	dsa->hw_dev_num = FIELD_GET(PRESTERA_DSA_W0_DEV_NUM, words[0]);
+	dsa->hw_dev_num |= FIELD_PREP(PRESTERA_DSA_DEV_NUM, field);
+
+	dsa->port_num = (FIELD_GET(PRESTERA_DSA_W0_PORT_NUM, words[0]) << 0) |
+			(FIELD_GET(PRESTERA_DSA_W1_PORT_NUM, words[1]) << 5) |
+			(FIELD_GET(PRESTERA_DSA_W2_PORT_NUM, words[2]) << 7);
+
+	return 0;
+}
+
+int prestera_dsa_build(const struct prestera_dsa *dsa, u8 *dsa_buf)
+{
+	__be32 *dsa_words = (__be32 *)dsa_buf;
+	u32 dev_num = dsa->hw_dev_num;
+	u32 words[4] = { 0 };
+
+	words[0] |= FIELD_PREP(PRESTERA_DSA_W0_CMD, PRESTERA_DSA_CMD_FROM_CPU);
+
+	words[0] |= FIELD_PREP(PRESTERA_DSA_W0_DEV_NUM, dev_num);
+	dev_num = FIELD_GET(PRESTERA_DSA_DEV_NUM, dev_num);
+	words[3] |= FIELD_PREP(PRESTERA_DSA_W3_DEV_NUM, dev_num);
+
+	words[3] |= FIELD_PREP(PRESTERA_DSA_W3_DST_EPORT, dsa->port_num);
+
+	words[0] |= FIELD_PREP(PRESTERA_DSA_W0_EXT_BIT, 1);
+	words[1] |= FIELD_PREP(PRESTERA_DSA_W1_EXT_BIT, 1);
+	words[2] |= FIELD_PREP(PRESTERA_DSA_W2_EXT_BIT, 1);
+
+	dsa_words[0] = htonl(words[0]);
+	dsa_words[1] = htonl(words[1]);
+	dsa_words[2] = htonl(words[2]);
+	dsa_words[3] = htonl(words[3]);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_dsa.h b/drivers/net/ethernet/marvell/prestera/prestera_dsa.h
new file mode 100644
index 000000000000..67018629bdd2
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_dsa.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef __PRESTERA_DSA_H_
+#define __PRESTERA_DSA_H_
+
+#include <linux/types.h>
+
+#define PRESTERA_DSA_HLEN	16
+
+enum prestera_dsa_cmd {
+	/* DSA command is "To CPU" */
+	PRESTERA_DSA_CMD_TO_CPU = 0,
+
+	/* DSA command is "From CPU" */
+	PRESTERA_DSA_CMD_FROM_CPU,
+};
+
+struct prestera_dsa_vlan {
+	u16 vid;
+	u8 vpt;
+	u8 cfi_bit;
+	bool is_tagged;
+};
+
+struct prestera_dsa {
+	struct prestera_dsa_vlan vlan;
+	u32 hw_dev_num;
+	u32 port_num;
+};
+
+int prestera_dsa_parse(struct prestera_dsa *dsa, const u8 *dsa_buf);
+int prestera_dsa_build(const struct prestera_dsa *dsa, u8 *dsa_buf);
+
+#endif /* _PRESTERA_DSA_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c b/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c
new file mode 100644
index 000000000000..93a5e2baf808
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+#include "prestera_ethtool.h"
+#include "prestera.h"
+#include "prestera_hw.h"
+
+#define PRESTERA_STATS_CNT \
+	(sizeof(struct prestera_port_stats) / sizeof(u64))
+#define PRESTERA_STATS_IDX(name) \
+	(offsetof(struct prestera_port_stats, name) / sizeof(u64))
+#define PRESTERA_STATS_FIELD(name)	\
+	[PRESTERA_STATS_IDX(name)] = __stringify(name)
+
+static const char driver_kind[] = "prestera";
+
+static const struct prestera_link_mode {
+	enum ethtool_link_mode_bit_indices eth_mode;
+	u32 speed;
+	u64 pr_mask;
+	u8 duplex;
+	u8 port_type;
+} port_link_modes[PRESTERA_LINK_MODE_MAX] = {
+	[PRESTERA_LINK_MODE_10baseT_Half] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+		.speed = 10,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_10baseT_Half,
+		.duplex = PRESTERA_PORT_DUPLEX_HALF,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_10baseT_Full] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+		.speed = 10,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_10baseT_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_100baseT_Half] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+		.speed = 100,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_100baseT_Half,
+		.duplex = PRESTERA_PORT_DUPLEX_HALF,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_100baseT_Full] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+		.speed = 100,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_100baseT_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_1000baseT_Half] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+		.speed = 1000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_1000baseT_Half,
+		.duplex = PRESTERA_PORT_DUPLEX_HALF,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_1000baseT_Full] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+		.speed = 1000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_1000baseT_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_1000baseX_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+		.speed = 1000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_1000baseX_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_1000baseKX_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+		.speed = 1000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_1000baseKX_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_2500baseX_Full] = {
+		.eth_mode =  ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+		.speed = 2500,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_2500baseX_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+	},
+	[PRESTERA_LINK_MODE_10GbaseKR_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+		.speed = 10000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_10GbaseKR_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_10GbaseSR_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+		.speed = 10000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_10GbaseSR_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_10GbaseLR_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+		.speed = 10000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_10GbaseLR_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_20GbaseKR2_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
+		.speed = 20000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_20GbaseKR2_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_25GbaseCR_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+		.speed = 25000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_25GbaseCR_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_DA,
+	},
+	[PRESTERA_LINK_MODE_25GbaseKR_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+		.speed = 25000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_25GbaseKR_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_25GbaseSR_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+		.speed = 25000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_25GbaseSR_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_40GbaseKR4_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+		.speed = 40000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_40GbaseKR4_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_40GbaseCR4_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+		.speed = 40000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_40GbaseCR4_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_DA,
+	},
+	[PRESTERA_LINK_MODE_40GbaseSR4_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+		.speed = 40000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_40GbaseSR4_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_50GbaseCR2_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+		.speed = 50000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_50GbaseCR2_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_DA,
+	},
+	[PRESTERA_LINK_MODE_50GbaseKR2_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+		.speed = 50000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_50GbaseKR2_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_50GbaseSR2_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+		.speed = 50000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_50GbaseSR2_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_100GbaseKR4_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+		.speed = 100000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_100GbaseKR4_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_TP,
+	},
+	[PRESTERA_LINK_MODE_100GbaseSR4_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+		.speed = 100000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_100GbaseSR4_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_FIBRE,
+	},
+	[PRESTERA_LINK_MODE_100GbaseCR4_Full] = {
+		.eth_mode = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+		.speed = 100000,
+		.pr_mask = 1 << PRESTERA_LINK_MODE_100GbaseCR4_Full,
+		.duplex = PRESTERA_PORT_DUPLEX_FULL,
+		.port_type = PRESTERA_PORT_TYPE_DA,
+	}
+};
+
+static const struct prestera_fec {
+	u32 eth_fec;
+	enum ethtool_link_mode_bit_indices eth_mode;
+	u8 pr_fec;
+} port_fec_caps[PRESTERA_PORT_FEC_MAX] = {
+	[PRESTERA_PORT_FEC_OFF] = {
+		.eth_fec = ETHTOOL_FEC_OFF,
+		.eth_mode = ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+		.pr_fec = 1 << PRESTERA_PORT_FEC_OFF,
+	},
+	[PRESTERA_PORT_FEC_BASER] = {
+		.eth_fec = ETHTOOL_FEC_BASER,
+		.eth_mode = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+		.pr_fec = 1 << PRESTERA_PORT_FEC_BASER,
+	},
+	[PRESTERA_PORT_FEC_RS] = {
+		.eth_fec = ETHTOOL_FEC_RS,
+		.eth_mode = ETHTOOL_LINK_MODE_FEC_RS_BIT,
+		.pr_fec = 1 << PRESTERA_PORT_FEC_RS,
+	}
+};
+
+static const struct prestera_port_type {
+	enum ethtool_link_mode_bit_indices eth_mode;
+	u8 eth_type;
+} port_types[PRESTERA_PORT_TYPE_MAX] = {
+	[PRESTERA_PORT_TYPE_NONE] = {
+		.eth_mode = __ETHTOOL_LINK_MODE_MASK_NBITS,
+		.eth_type = PORT_NONE,
+	},
+	[PRESTERA_PORT_TYPE_TP] = {
+		.eth_mode = ETHTOOL_LINK_MODE_TP_BIT,
+		.eth_type = PORT_TP,
+	},
+	[PRESTERA_PORT_TYPE_AUI] = {
+		.eth_mode = ETHTOOL_LINK_MODE_AUI_BIT,
+		.eth_type = PORT_AUI,
+	},
+	[PRESTERA_PORT_TYPE_MII] = {
+		.eth_mode = ETHTOOL_LINK_MODE_MII_BIT,
+		.eth_type = PORT_MII,
+	},
+	[PRESTERA_PORT_TYPE_FIBRE] = {
+		.eth_mode = ETHTOOL_LINK_MODE_FIBRE_BIT,
+		.eth_type = PORT_FIBRE,
+	},
+	[PRESTERA_PORT_TYPE_BNC] = {
+		.eth_mode = ETHTOOL_LINK_MODE_BNC_BIT,
+		.eth_type = PORT_BNC,
+	},
+	[PRESTERA_PORT_TYPE_DA] = {
+		.eth_mode = ETHTOOL_LINK_MODE_TP_BIT,
+		.eth_type = PORT_TP,
+	},
+	[PRESTERA_PORT_TYPE_OTHER] = {
+		.eth_mode = __ETHTOOL_LINK_MODE_MASK_NBITS,
+		.eth_type = PORT_OTHER,
+	}
+};
+
+static const char prestera_cnt_name[PRESTERA_STATS_CNT][ETH_GSTRING_LEN] = {
+	PRESTERA_STATS_FIELD(good_octets_received),
+	PRESTERA_STATS_FIELD(bad_octets_received),
+	PRESTERA_STATS_FIELD(mac_trans_error),
+	PRESTERA_STATS_FIELD(broadcast_frames_received),
+	PRESTERA_STATS_FIELD(multicast_frames_received),
+	PRESTERA_STATS_FIELD(frames_64_octets),
+	PRESTERA_STATS_FIELD(frames_65_to_127_octets),
+	PRESTERA_STATS_FIELD(frames_128_to_255_octets),
+	PRESTERA_STATS_FIELD(frames_256_to_511_octets),
+	PRESTERA_STATS_FIELD(frames_512_to_1023_octets),
+	PRESTERA_STATS_FIELD(frames_1024_to_max_octets),
+	PRESTERA_STATS_FIELD(excessive_collision),
+	PRESTERA_STATS_FIELD(multicast_frames_sent),
+	PRESTERA_STATS_FIELD(broadcast_frames_sent),
+	PRESTERA_STATS_FIELD(fc_sent),
+	PRESTERA_STATS_FIELD(fc_received),
+	PRESTERA_STATS_FIELD(buffer_overrun),
+	PRESTERA_STATS_FIELD(undersize),
+	PRESTERA_STATS_FIELD(fragments),
+	PRESTERA_STATS_FIELD(oversize),
+	PRESTERA_STATS_FIELD(jabber),
+	PRESTERA_STATS_FIELD(rx_error_frame_received),
+	PRESTERA_STATS_FIELD(bad_crc),
+	PRESTERA_STATS_FIELD(collisions),
+	PRESTERA_STATS_FIELD(late_collision),
+	PRESTERA_STATS_FIELD(unicast_frames_received),
+	PRESTERA_STATS_FIELD(unicast_frames_sent),
+	PRESTERA_STATS_FIELD(sent_multiple),
+	PRESTERA_STATS_FIELD(sent_deferred),
+	PRESTERA_STATS_FIELD(good_octets_sent),
+};
+
+static void prestera_ethtool_get_drvinfo(struct net_device *dev,
+					 struct ethtool_drvinfo *drvinfo)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	struct prestera_switch *sw = port->sw;
+
+	strlcpy(drvinfo->driver, driver_kind, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->bus_info, dev_name(prestera_dev(sw)),
+		sizeof(drvinfo->bus_info));
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d.%d",
+		 sw->dev->fw_rev.maj,
+		 sw->dev->fw_rev.min,
+		 sw->dev->fw_rev.sub);
+}
+
+static u8 prestera_port_type_get(struct prestera_port *port)
+{
+	if (port->caps.type < PRESTERA_PORT_TYPE_MAX)
+		return port_types[port->caps.type].eth_type;
+
+	return PORT_OTHER;
+}
+
+static int prestera_port_type_set(const struct ethtool_link_ksettings *ecmd,
+				  struct prestera_port *port)
+{
+	u32 new_mode = PRESTERA_LINK_MODE_MAX;
+	u32 type, mode;
+	int err;
+
+	for (type = 0; type < PRESTERA_PORT_TYPE_MAX; type++) {
+		if (port_types[type].eth_type == ecmd->base.port &&
+		    test_bit(port_types[type].eth_mode,
+			     ecmd->link_modes.supported)) {
+			break;
+		}
+	}
+
+	if (type == port->caps.type)
+		return 0;
+	if (type != port->caps.type && ecmd->base.autoneg == AUTONEG_ENABLE)
+		return -EINVAL;
+	if (type == PRESTERA_PORT_TYPE_MAX)
+		return -EOPNOTSUPP;
+
+	for (mode = 0; mode < PRESTERA_LINK_MODE_MAX; mode++) {
+		if ((port_link_modes[mode].pr_mask &
+		    port->caps.supp_link_modes) &&
+		    type == port_link_modes[mode].port_type) {
+			new_mode = mode;
+		}
+	}
+
+	if (new_mode < PRESTERA_LINK_MODE_MAX)
+		err = prestera_hw_port_link_mode_set(port, new_mode);
+	else
+		err = -EINVAL;
+
+	if (err)
+		return err;
+
+	port->caps.type = type;
+	port->autoneg = false;
+
+	return 0;
+}
+
+static void prestera_modes_to_eth(unsigned long *eth_modes, u64 link_modes,
+				  u8 fec, u8 type)
+{
+	u32 mode;
+
+	for (mode = 0; mode < PRESTERA_LINK_MODE_MAX; mode++) {
+		if ((port_link_modes[mode].pr_mask & link_modes) == 0)
+			continue;
+
+		if (type != PRESTERA_PORT_TYPE_NONE &&
+		    port_link_modes[mode].port_type != type)
+			continue;
+
+		__set_bit(port_link_modes[mode].eth_mode, eth_modes);
+	}
+
+	for (mode = 0; mode < PRESTERA_PORT_FEC_MAX; mode++) {
+		if ((port_fec_caps[mode].pr_fec & fec) == 0)
+			continue;
+
+		__set_bit(port_fec_caps[mode].eth_mode, eth_modes);
+	}
+}
+
+static void prestera_modes_from_eth(const unsigned long *eth_modes,
+				    u64 *link_modes, u8 *fec, u8 type)
+{
+	u64 adver_modes = 0;
+	u32 fec_modes = 0;
+	u32 mode;
+
+	for (mode = 0; mode < PRESTERA_LINK_MODE_MAX; mode++) {
+		if (!test_bit(port_link_modes[mode].eth_mode, eth_modes))
+			continue;
+
+		if (port_link_modes[mode].port_type != type)
+			continue;
+
+		adver_modes |= port_link_modes[mode].pr_mask;
+	}
+
+	for (mode = 0; mode < PRESTERA_PORT_FEC_MAX; mode++) {
+		if (!test_bit(port_fec_caps[mode].eth_mode, eth_modes))
+			continue;
+
+		fec_modes |= port_fec_caps[mode].pr_fec;
+	}
+
+	*link_modes = adver_modes;
+	*fec = fec_modes;
+}
+
+static void prestera_port_supp_types_get(struct ethtool_link_ksettings *ecmd,
+					 struct prestera_port *port)
+{
+	u32 mode;
+	u8 ptype;
+
+	for (mode = 0; mode < PRESTERA_LINK_MODE_MAX; mode++) {
+		if ((port_link_modes[mode].pr_mask &
+		    port->caps.supp_link_modes) == 0)
+			continue;
+
+		ptype = port_link_modes[mode].port_type;
+		__set_bit(port_types[ptype].eth_mode,
+			  ecmd->link_modes.supported);
+	}
+}
+
+static void prestera_port_remote_cap_get(struct ethtool_link_ksettings *ecmd,
+					 struct prestera_port *port)
+{
+	bool asym_pause;
+	bool pause;
+	u64 bitmap;
+	int err;
+
+	err = prestera_hw_port_remote_cap_get(port, &bitmap);
+	if (!err) {
+		prestera_modes_to_eth(ecmd->link_modes.lp_advertising,
+				      bitmap, 0, PRESTERA_PORT_TYPE_NONE);
+
+		if (!bitmap_empty(ecmd->link_modes.lp_advertising,
+				  __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+			ethtool_link_ksettings_add_link_mode(ecmd,
+							     lp_advertising,
+							     Autoneg);
+		}
+	}
+
+	err = prestera_hw_port_remote_fc_get(port, &pause, &asym_pause);
+	if (err)
+		return;
+
+	if (pause)
+		ethtool_link_ksettings_add_link_mode(ecmd,
+						     lp_advertising,
+						     Pause);
+	if (asym_pause)
+		ethtool_link_ksettings_add_link_mode(ecmd,
+						     lp_advertising,
+						     Asym_Pause);
+}
+
+static void prestera_port_speed_get(struct ethtool_link_ksettings *ecmd,
+				    struct prestera_port *port)
+{
+	u32 speed;
+	int err;
+
+	err = prestera_hw_port_speed_get(port, &speed);
+	ecmd->base.speed = err ? SPEED_UNKNOWN : speed;
+}
+
+static void prestera_port_duplex_get(struct ethtool_link_ksettings *ecmd,
+				     struct prestera_port *port)
+{
+	u8 duplex;
+	int err;
+
+	err = prestera_hw_port_duplex_get(port, &duplex);
+	if (err) {
+		ecmd->base.duplex = DUPLEX_UNKNOWN;
+		return;
+	}
+
+	ecmd->base.duplex = duplex == PRESTERA_PORT_DUPLEX_FULL ?
+			    DUPLEX_FULL : DUPLEX_HALF;
+}
+
+static int
+prestera_ethtool_get_link_ksettings(struct net_device *dev,
+				    struct ethtool_link_ksettings *ecmd)
+{
+	struct prestera_port *port = netdev_priv(dev);
+
+	ethtool_link_ksettings_zero_link_mode(ecmd, supported);
+	ethtool_link_ksettings_zero_link_mode(ecmd, advertising);
+	ethtool_link_ksettings_zero_link_mode(ecmd, lp_advertising);
+
+	ecmd->base.autoneg = port->autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	if (port->caps.type == PRESTERA_PORT_TYPE_TP) {
+		ethtool_link_ksettings_add_link_mode(ecmd, supported, Autoneg);
+
+		if (netif_running(dev) &&
+		    (port->autoneg ||
+		     port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER))
+			ethtool_link_ksettings_add_link_mode(ecmd, advertising,
+							     Autoneg);
+	}
+
+	prestera_modes_to_eth(ecmd->link_modes.supported,
+			      port->caps.supp_link_modes,
+			      port->caps.supp_fec,
+			      port->caps.type);
+
+	prestera_port_supp_types_get(ecmd, port);
+
+	if (netif_carrier_ok(dev)) {
+		prestera_port_speed_get(ecmd, port);
+		prestera_port_duplex_get(ecmd, port);
+	} else {
+		ecmd->base.speed = SPEED_UNKNOWN;
+		ecmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	ecmd->base.port = prestera_port_type_get(port);
+
+	if (port->autoneg) {
+		if (netif_running(dev))
+			prestera_modes_to_eth(ecmd->link_modes.advertising,
+					      port->adver_link_modes,
+					      port->adver_fec,
+					      port->caps.type);
+
+		if (netif_carrier_ok(dev) &&
+		    port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER)
+			prestera_port_remote_cap_get(ecmd, port);
+	}
+
+	if (port->caps.type == PRESTERA_PORT_TYPE_TP &&
+	    port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER)
+		prestera_hw_port_mdix_get(port, &ecmd->base.eth_tp_mdix,
+					  &ecmd->base.eth_tp_mdix_ctrl);
+
+	return 0;
+}
+
+static int prestera_port_mdix_set(const struct ethtool_link_ksettings *ecmd,
+				  struct prestera_port *port)
+{
+	if (ecmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_INVALID &&
+	    port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER &&
+	    port->caps.type == PRESTERA_PORT_TYPE_TP)
+		return prestera_hw_port_mdix_set(port,
+						 ecmd->base.eth_tp_mdix_ctrl);
+
+	return 0;
+}
+
+static int prestera_port_link_mode_set(struct prestera_port *port,
+				       u32 speed, u8 duplex, u8 type)
+{
+	u32 new_mode = PRESTERA_LINK_MODE_MAX;
+	u32 mode;
+
+	for (mode = 0; mode < PRESTERA_LINK_MODE_MAX; mode++) {
+		if (speed != port_link_modes[mode].speed)
+			continue;
+
+		if (duplex != port_link_modes[mode].duplex)
+			continue;
+
+		if (!(port_link_modes[mode].pr_mask &
+		    port->caps.supp_link_modes))
+			continue;
+
+		if (type != port_link_modes[mode].port_type)
+			continue;
+
+		new_mode = mode;
+		break;
+	}
+
+	if (new_mode == PRESTERA_LINK_MODE_MAX)
+		return -EOPNOTSUPP;
+
+	return prestera_hw_port_link_mode_set(port, new_mode);
+}
+
+static int
+prestera_port_speed_duplex_set(const struct ethtool_link_ksettings *ecmd,
+			       struct prestera_port *port)
+{
+	u32 curr_mode;
+	u8 duplex;
+	u32 speed;
+	int err;
+
+	err = prestera_hw_port_link_mode_get(port, &curr_mode);
+	if (err)
+		return err;
+	if (curr_mode >= PRESTERA_LINK_MODE_MAX)
+		return -EINVAL;
+
+	if (ecmd->base.duplex != DUPLEX_UNKNOWN)
+		duplex = ecmd->base.duplex == DUPLEX_FULL ?
+			 PRESTERA_PORT_DUPLEX_FULL : PRESTERA_PORT_DUPLEX_HALF;
+	else
+		duplex = port_link_modes[curr_mode].duplex;
+
+	if (ecmd->base.speed != SPEED_UNKNOWN)
+		speed = ecmd->base.speed;
+	else
+		speed = port_link_modes[curr_mode].speed;
+
+	return prestera_port_link_mode_set(port, speed, duplex,
+					   port->caps.type);
+}
+
+static int
+prestera_ethtool_set_link_ksettings(struct net_device *dev,
+				    const struct ethtool_link_ksettings *ecmd)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	u64 adver_modes;
+	u8 adver_fec;
+	int err;
+
+	err = prestera_port_type_set(ecmd, port);
+	if (err)
+		return err;
+
+	if (port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER) {
+		err = prestera_port_mdix_set(ecmd, port);
+		if (err)
+			return err;
+	}
+
+	prestera_modes_from_eth(ecmd->link_modes.advertising, &adver_modes,
+				&adver_fec, port->caps.type);
+
+	err = prestera_port_autoneg_set(port,
+					ecmd->base.autoneg == AUTONEG_ENABLE,
+					adver_modes, adver_fec);
+	if (err)
+		return err;
+
+	if (ecmd->base.autoneg == AUTONEG_DISABLE) {
+		err = prestera_port_speed_duplex_set(ecmd, port);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int prestera_ethtool_get_fecparam(struct net_device *dev,
+					 struct ethtool_fecparam *fecparam)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	u8 active;
+	u32 mode;
+	int err;
+
+	err = prestera_hw_port_fec_get(port, &active);
+	if (err)
+		return err;
+
+	fecparam->fec = 0;
+
+	for (mode = 0; mode < PRESTERA_PORT_FEC_MAX; mode++) {
+		if ((port_fec_caps[mode].pr_fec & port->caps.supp_fec) == 0)
+			continue;
+
+		fecparam->fec |= port_fec_caps[mode].eth_fec;
+	}
+
+	if (active < PRESTERA_PORT_FEC_MAX)
+		fecparam->active_fec = port_fec_caps[active].eth_fec;
+	else
+		fecparam->active_fec = ETHTOOL_FEC_AUTO;
+
+	return 0;
+}
+
+static int prestera_ethtool_set_fecparam(struct net_device *dev,
+					 struct ethtool_fecparam *fecparam)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	u8 fec, active;
+	u32 mode;
+	int err;
+
+	if (port->autoneg) {
+		netdev_err(dev, "FEC set is not allowed while autoneg is on\n");
+		return -EINVAL;
+	}
+
+	err = prestera_hw_port_fec_get(port, &active);
+	if (err)
+		return err;
+
+	fec = PRESTERA_PORT_FEC_MAX;
+	for (mode = 0; mode < PRESTERA_PORT_FEC_MAX; mode++) {
+		if ((port_fec_caps[mode].eth_fec & fecparam->fec) &&
+		    (port_fec_caps[mode].pr_fec & port->caps.supp_fec)) {
+			fec = mode;
+			break;
+		}
+	}
+
+	if (fec == active)
+		return 0;
+
+	if (fec == PRESTERA_PORT_FEC_MAX)
+		return -EOPNOTSUPP;
+
+	return prestera_hw_port_fec_set(port, fec);
+}
+
+static int prestera_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return PRESTERA_STATS_CNT;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void prestera_ethtool_get_strings(struct net_device *dev,
+					 u32 stringset, u8 *data)
+{
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	memcpy(data, prestera_cnt_name, sizeof(prestera_cnt_name));
+}
+
+static void prestera_ethtool_get_stats(struct net_device *dev,
+				       struct ethtool_stats *stats, u64 *data)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	struct prestera_port_stats *port_stats;
+
+	port_stats = &port->cached_hw_stats.stats;
+
+	memcpy(data, port_stats, sizeof(*port_stats));
+}
+
+static int prestera_ethtool_nway_reset(struct net_device *dev)
+{
+	struct prestera_port *port = netdev_priv(dev);
+
+	if (netif_running(dev) &&
+	    port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER &&
+	    port->caps.type == PRESTERA_PORT_TYPE_TP)
+		return prestera_hw_port_autoneg_restart(port);
+
+	return -EINVAL;
+}
+
+const struct ethtool_ops prestera_ethtool_ops = {
+	.get_drvinfo = prestera_ethtool_get_drvinfo,
+	.get_link_ksettings = prestera_ethtool_get_link_ksettings,
+	.set_link_ksettings = prestera_ethtool_set_link_ksettings,
+	.get_fecparam = prestera_ethtool_get_fecparam,
+	.set_fecparam = prestera_ethtool_set_fecparam,
+	.get_sset_count = prestera_ethtool_get_sset_count,
+	.get_strings = prestera_ethtool_get_strings,
+	.get_ethtool_stats = prestera_ethtool_get_stats,
+	.get_link = ethtool_op_get_link,
+	.nway_reset = prestera_ethtool_nway_reset
+};
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_ethtool.h b/drivers/net/ethernet/marvell/prestera/prestera_ethtool.h
new file mode 100644
index 000000000000..523ef1f592ce
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_ethtool.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef __PRESTERA_ETHTOOL_H_
+#define __PRESTERA_ETHTOOL_H_
+
+#include <linux/ethtool.h>
+
+extern const struct ethtool_ops prestera_ethtool_ops;
+
+#endif /* _PRESTERA_ETHTOOL_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
new file mode 100644
index 000000000000..0424718d5998
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -0,0 +1,1253 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/list.h>
+
+#include "prestera.h"
+#include "prestera_hw.h"
+
+#define PRESTERA_SWITCH_INIT_TIMEOUT_MS (30 * 1000)
+
+#define PRESTERA_MIN_MTU 64
+
+enum prestera_cmd_type_t {
+	PRESTERA_CMD_TYPE_SWITCH_INIT = 0x1,
+	PRESTERA_CMD_TYPE_SWITCH_ATTR_SET = 0x2,
+
+	PRESTERA_CMD_TYPE_PORT_ATTR_SET = 0x100,
+	PRESTERA_CMD_TYPE_PORT_ATTR_GET = 0x101,
+	PRESTERA_CMD_TYPE_PORT_INFO_GET = 0x110,
+
+	PRESTERA_CMD_TYPE_VLAN_CREATE = 0x200,
+	PRESTERA_CMD_TYPE_VLAN_DELETE = 0x201,
+	PRESTERA_CMD_TYPE_VLAN_PORT_SET = 0x202,
+	PRESTERA_CMD_TYPE_VLAN_PVID_SET = 0x203,
+
+	PRESTERA_CMD_TYPE_FDB_ADD = 0x300,
+	PRESTERA_CMD_TYPE_FDB_DELETE = 0x301,
+	PRESTERA_CMD_TYPE_FDB_FLUSH_PORT = 0x310,
+	PRESTERA_CMD_TYPE_FDB_FLUSH_VLAN = 0x311,
+	PRESTERA_CMD_TYPE_FDB_FLUSH_PORT_VLAN = 0x312,
+
+	PRESTERA_CMD_TYPE_BRIDGE_CREATE = 0x400,
+	PRESTERA_CMD_TYPE_BRIDGE_DELETE = 0x401,
+	PRESTERA_CMD_TYPE_BRIDGE_PORT_ADD = 0x402,
+	PRESTERA_CMD_TYPE_BRIDGE_PORT_DELETE = 0x403,
+
+	PRESTERA_CMD_TYPE_RXTX_INIT = 0x800,
+	PRESTERA_CMD_TYPE_RXTX_PORT_INIT = 0x801,
+
+	PRESTERA_CMD_TYPE_STP_PORT_SET = 0x1000,
+
+	PRESTERA_CMD_TYPE_ACK = 0x10000,
+	PRESTERA_CMD_TYPE_MAX
+};
+
+enum {
+	PRESTERA_CMD_PORT_ATTR_ADMIN_STATE = 1,
+	PRESTERA_CMD_PORT_ATTR_MTU = 3,
+	PRESTERA_CMD_PORT_ATTR_MAC = 4,
+	PRESTERA_CMD_PORT_ATTR_SPEED = 5,
+	PRESTERA_CMD_PORT_ATTR_ACCEPT_FRAME_TYPE = 6,
+	PRESTERA_CMD_PORT_ATTR_LEARNING = 7,
+	PRESTERA_CMD_PORT_ATTR_FLOOD = 8,
+	PRESTERA_CMD_PORT_ATTR_CAPABILITY = 9,
+	PRESTERA_CMD_PORT_ATTR_REMOTE_CAPABILITY = 10,
+	PRESTERA_CMD_PORT_ATTR_REMOTE_FC = 11,
+	PRESTERA_CMD_PORT_ATTR_LINK_MODE = 12,
+	PRESTERA_CMD_PORT_ATTR_TYPE = 13,
+	PRESTERA_CMD_PORT_ATTR_FEC = 14,
+	PRESTERA_CMD_PORT_ATTR_AUTONEG = 15,
+	PRESTERA_CMD_PORT_ATTR_DUPLEX = 16,
+	PRESTERA_CMD_PORT_ATTR_STATS = 17,
+	PRESTERA_CMD_PORT_ATTR_MDIX = 18,
+	PRESTERA_CMD_PORT_ATTR_AUTONEG_RESTART = 19,
+};
+
+enum {
+	PRESTERA_CMD_SWITCH_ATTR_MAC = 1,
+	PRESTERA_CMD_SWITCH_ATTR_AGEING = 2,
+};
+
+enum {
+	PRESTERA_CMD_ACK_OK,
+	PRESTERA_CMD_ACK_FAILED,
+
+	PRESTERA_CMD_ACK_MAX
+};
+
+enum {
+	PRESTERA_PORT_TP_NA,
+	PRESTERA_PORT_TP_MDI,
+	PRESTERA_PORT_TP_MDIX,
+	PRESTERA_PORT_TP_AUTO,
+};
+
+enum {
+	PRESTERA_PORT_GOOD_OCTETS_RCV_CNT,
+	PRESTERA_PORT_BAD_OCTETS_RCV_CNT,
+	PRESTERA_PORT_MAC_TRANSMIT_ERR_CNT,
+	PRESTERA_PORT_BRDC_PKTS_RCV_CNT,
+	PRESTERA_PORT_MC_PKTS_RCV_CNT,
+	PRESTERA_PORT_PKTS_64L_CNT,
+	PRESTERA_PORT_PKTS_65TO127L_CNT,
+	PRESTERA_PORT_PKTS_128TO255L_CNT,
+	PRESTERA_PORT_PKTS_256TO511L_CNT,
+	PRESTERA_PORT_PKTS_512TO1023L_CNT,
+	PRESTERA_PORT_PKTS_1024TOMAXL_CNT,
+	PRESTERA_PORT_EXCESSIVE_COLLISIONS_CNT,
+	PRESTERA_PORT_MC_PKTS_SENT_CNT,
+	PRESTERA_PORT_BRDC_PKTS_SENT_CNT,
+	PRESTERA_PORT_FC_SENT_CNT,
+	PRESTERA_PORT_GOOD_FC_RCV_CNT,
+	PRESTERA_PORT_DROP_EVENTS_CNT,
+	PRESTERA_PORT_UNDERSIZE_PKTS_CNT,
+	PRESTERA_PORT_FRAGMENTS_PKTS_CNT,
+	PRESTERA_PORT_OVERSIZE_PKTS_CNT,
+	PRESTERA_PORT_JABBER_PKTS_CNT,
+	PRESTERA_PORT_MAC_RCV_ERROR_CNT,
+	PRESTERA_PORT_BAD_CRC_CNT,
+	PRESTERA_PORT_COLLISIONS_CNT,
+	PRESTERA_PORT_LATE_COLLISIONS_CNT,
+	PRESTERA_PORT_GOOD_UC_PKTS_RCV_CNT,
+	PRESTERA_PORT_GOOD_UC_PKTS_SENT_CNT,
+	PRESTERA_PORT_MULTIPLE_PKTS_SENT_CNT,
+	PRESTERA_PORT_DEFERRED_PKTS_SENT_CNT,
+	PRESTERA_PORT_GOOD_OCTETS_SENT_CNT,
+
+	PRESTERA_PORT_CNT_MAX
+};
+
+enum {
+	PRESTERA_FC_NONE,
+	PRESTERA_FC_SYMMETRIC,
+	PRESTERA_FC_ASYMMETRIC,
+	PRESTERA_FC_SYMM_ASYMM,
+};
+
+struct prestera_fw_event_handler {
+	struct list_head list;
+	struct rcu_head rcu;
+	enum prestera_event_type type;
+	prestera_event_cb_t func;
+	void *arg;
+};
+
+struct prestera_msg_cmd {
+	u32 type;
+};
+
+struct prestera_msg_ret {
+	struct prestera_msg_cmd cmd;
+	u32 status;
+};
+
+struct prestera_msg_common_req {
+	struct prestera_msg_cmd cmd;
+};
+
+struct prestera_msg_common_resp {
+	struct prestera_msg_ret ret;
+};
+
+union prestera_msg_switch_param {
+	u8 mac[ETH_ALEN];
+	u32 ageing_timeout_ms;
+};
+
+struct prestera_msg_switch_attr_req {
+	struct prestera_msg_cmd cmd;
+	u32 attr;
+	union prestera_msg_switch_param param;
+};
+
+struct prestera_msg_switch_init_resp {
+	struct prestera_msg_ret ret;
+	u32 port_count;
+	u32 mtu_max;
+	u8  switch_id;
+};
+
+struct prestera_msg_port_autoneg_param {
+	u64 link_mode;
+	u8  enable;
+	u8  fec;
+};
+
+struct prestera_msg_port_cap_param {
+	u64 link_mode;
+	u8  type;
+	u8  fec;
+	u8  transceiver;
+};
+
+struct prestera_msg_port_mdix_param {
+	u8 status;
+	u8 admin_mode;
+};
+
+union prestera_msg_port_param {
+	u8  admin_state;
+	u8  oper_state;
+	u32 mtu;
+	u8  mac[ETH_ALEN];
+	u8  accept_frm_type;
+	u32 speed;
+	u8 learning;
+	u8 flood;
+	u32 link_mode;
+	u8  type;
+	u8  duplex;
+	u8  fec;
+	u8  fc;
+	struct prestera_msg_port_mdix_param mdix;
+	struct prestera_msg_port_autoneg_param autoneg;
+	struct prestera_msg_port_cap_param cap;
+};
+
+struct prestera_msg_port_attr_req {
+	struct prestera_msg_cmd cmd;
+	u32 attr;
+	u32 port;
+	u32 dev;
+	union prestera_msg_port_param param;
+};
+
+struct prestera_msg_port_attr_resp {
+	struct prestera_msg_ret ret;
+	union prestera_msg_port_param param;
+};
+
+struct prestera_msg_port_stats_resp {
+	struct prestera_msg_ret ret;
+	u64 stats[PRESTERA_PORT_CNT_MAX];
+};
+
+struct prestera_msg_port_info_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+};
+
+struct prestera_msg_port_info_resp {
+	struct prestera_msg_ret ret;
+	u32 hw_id;
+	u32 dev_id;
+	u16 fp_id;
+};
+
+struct prestera_msg_vlan_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+	u16 vid;
+	u8  is_member;
+	u8  is_tagged;
+};
+
+struct prestera_msg_fdb_req {
+	struct prestera_msg_cmd cmd;
+	u8 dest_type;
+	u32 port;
+	u32 dev;
+	u8  mac[ETH_ALEN];
+	u16 vid;
+	u8  dynamic;
+	u32 flush_mode;
+};
+
+struct prestera_msg_bridge_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+	u16 bridge;
+};
+
+struct prestera_msg_bridge_resp {
+	struct prestera_msg_ret ret;
+	u16 bridge;
+};
+
+struct prestera_msg_stp_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+	u16 vid;
+	u8  state;
+};
+
+struct prestera_msg_rxtx_req {
+	struct prestera_msg_cmd cmd;
+	u8 use_sdma;
+};
+
+struct prestera_msg_rxtx_resp {
+	struct prestera_msg_ret ret;
+	u32 map_addr;
+};
+
+struct prestera_msg_rxtx_port_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+};
+
+struct prestera_msg_event {
+	u16 type;
+	u16 id;
+};
+
+union prestera_msg_event_port_param {
+	u32 oper_state;
+};
+
+struct prestera_msg_event_port {
+	struct prestera_msg_event id;
+	u32 port_id;
+	union prestera_msg_event_port_param param;
+};
+
+union prestera_msg_event_fdb_param {
+	u8 mac[ETH_ALEN];
+};
+
+struct prestera_msg_event_fdb {
+	struct prestera_msg_event id;
+	u8 dest_type;
+	u32 port_id;
+	u32 vid;
+	union prestera_msg_event_fdb_param param;
+};
+
+static int __prestera_cmd_ret(struct prestera_switch *sw,
+			      enum prestera_cmd_type_t type,
+			      struct prestera_msg_cmd *cmd, size_t clen,
+			      struct prestera_msg_ret *ret, size_t rlen,
+			      int waitms)
+{
+	struct prestera_device *dev = sw->dev;
+	int err;
+
+	cmd->type = type;
+
+	err = dev->send_req(dev, cmd, clen, ret, rlen, waitms);
+	if (err)
+		return err;
+
+	if (ret->cmd.type != PRESTERA_CMD_TYPE_ACK)
+		return -EBADE;
+	if (ret->status != PRESTERA_CMD_ACK_OK)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int prestera_cmd_ret(struct prestera_switch *sw,
+			    enum prestera_cmd_type_t type,
+			    struct prestera_msg_cmd *cmd, size_t clen,
+			    struct prestera_msg_ret *ret, size_t rlen)
+{
+	return __prestera_cmd_ret(sw, type, cmd, clen, ret, rlen, 0);
+}
+
+static int prestera_cmd_ret_wait(struct prestera_switch *sw,
+				 enum prestera_cmd_type_t type,
+				 struct prestera_msg_cmd *cmd, size_t clen,
+				 struct prestera_msg_ret *ret, size_t rlen,
+				 int waitms)
+{
+	return __prestera_cmd_ret(sw, type, cmd, clen, ret, rlen, waitms);
+}
+
+static int prestera_cmd(struct prestera_switch *sw,
+			enum prestera_cmd_type_t type,
+			struct prestera_msg_cmd *cmd, size_t clen)
+{
+	struct prestera_msg_common_resp resp;
+
+	return prestera_cmd_ret(sw, type, cmd, clen, &resp.ret, sizeof(resp));
+}
+
+static int prestera_fw_parse_port_evt(void *msg, struct prestera_event *evt)
+{
+	struct prestera_msg_event_port *hw_evt = msg;
+
+	if (evt->id != PRESTERA_PORT_EVENT_STATE_CHANGED)
+		return -EINVAL;
+
+	evt->port_evt.data.oper_state = hw_evt->param.oper_state;
+	evt->port_evt.port_id = hw_evt->port_id;
+
+	return 0;
+}
+
+static int prestera_fw_parse_fdb_evt(void *msg, struct prestera_event *evt)
+{
+	struct prestera_msg_event_fdb *hw_evt = msg;
+
+	evt->fdb_evt.port_id = hw_evt->port_id;
+	evt->fdb_evt.vid = hw_evt->vid;
+
+	ether_addr_copy(evt->fdb_evt.data.mac, hw_evt->param.mac);
+
+	return 0;
+}
+
+static struct prestera_fw_evt_parser {
+	int (*func)(void *msg, struct prestera_event *evt);
+} fw_event_parsers[PRESTERA_EVENT_TYPE_MAX] = {
+	[PRESTERA_EVENT_TYPE_PORT] = { .func = prestera_fw_parse_port_evt },
+	[PRESTERA_EVENT_TYPE_FDB] = { .func = prestera_fw_parse_fdb_evt },
+};
+
+static struct prestera_fw_event_handler *
+__find_event_handler(const struct prestera_switch *sw,
+		     enum prestera_event_type type)
+{
+	struct prestera_fw_event_handler *eh;
+
+	list_for_each_entry_rcu(eh, &sw->event_handlers, list) {
+		if (eh->type == type)
+			return eh;
+	}
+
+	return NULL;
+}
+
+static int prestera_find_event_handler(const struct prestera_switch *sw,
+				       enum prestera_event_type type,
+				       struct prestera_fw_event_handler *eh)
+{
+	struct prestera_fw_event_handler *tmp;
+	int err = 0;
+
+	rcu_read_lock();
+	tmp = __find_event_handler(sw, type);
+	if (tmp)
+		*eh = *tmp;
+	else
+		err = -ENOENT;
+	rcu_read_unlock();
+
+	return err;
+}
+
+static int prestera_evt_recv(struct prestera_device *dev, void *buf, size_t size)
+{
+	struct prestera_switch *sw = dev->priv;
+	struct prestera_msg_event *msg = buf;
+	struct prestera_fw_event_handler eh;
+	struct prestera_event evt;
+	int err;
+
+	if (msg->type >= PRESTERA_EVENT_TYPE_MAX)
+		return -EINVAL;
+	if (!fw_event_parsers[msg->type].func)
+		return -ENOENT;
+
+	err = prestera_find_event_handler(sw, msg->type, &eh);
+	if (err)
+		return err;
+
+	evt.id = msg->id;
+
+	err = fw_event_parsers[msg->type].func(buf, &evt);
+	if (err)
+		return err;
+
+	eh.func(sw, &evt, eh.arg);
+
+	return 0;
+}
+
+static void prestera_pkt_recv(struct prestera_device *dev)
+{
+	struct prestera_switch *sw = dev->priv;
+	struct prestera_fw_event_handler eh;
+	struct prestera_event ev;
+	int err;
+
+	ev.id = PRESTERA_RXTX_EVENT_RCV_PKT;
+
+	err = prestera_find_event_handler(sw, PRESTERA_EVENT_TYPE_RXTX, &eh);
+	if (err)
+		return;
+
+	eh.func(sw, &ev, eh.arg);
+}
+
+int prestera_hw_port_info_get(const struct prestera_port *port,
+			      u32 *dev_id, u32 *hw_id, u16 *fp_id)
+{
+	struct prestera_msg_port_info_req req = {
+		.port = port->id,
+	};
+	struct prestera_msg_port_info_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_INFO_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*dev_id = resp.dev_id;
+	*hw_id = resp.hw_id;
+	*fp_id = resp.fp_id;
+
+	return 0;
+}
+
+int prestera_hw_switch_mac_set(struct prestera_switch *sw, const char *mac)
+{
+	struct prestera_msg_switch_attr_req req = {
+		.attr = PRESTERA_CMD_SWITCH_ATTR_MAC,
+	};
+
+	ether_addr_copy(req.param.mac, mac);
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_SWITCH_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_switch_init(struct prestera_switch *sw)
+{
+	struct prestera_msg_switch_init_resp resp;
+	struct prestera_msg_common_req req;
+	int err;
+
+	INIT_LIST_HEAD(&sw->event_handlers);
+
+	err = prestera_cmd_ret_wait(sw, PRESTERA_CMD_TYPE_SWITCH_INIT,
+				    &req.cmd, sizeof(req),
+				    &resp.ret, sizeof(resp),
+				    PRESTERA_SWITCH_INIT_TIMEOUT_MS);
+	if (err)
+		return err;
+
+	sw->dev->recv_msg = prestera_evt_recv;
+	sw->dev->recv_pkt = prestera_pkt_recv;
+	sw->port_count = resp.port_count;
+	sw->mtu_min = PRESTERA_MIN_MTU;
+	sw->mtu_max = resp.mtu_max;
+	sw->id = resp.switch_id;
+
+	return 0;
+}
+
+void prestera_hw_switch_fini(struct prestera_switch *sw)
+{
+	WARN_ON(!list_empty(&sw->event_handlers));
+}
+
+int prestera_hw_switch_ageing_set(struct prestera_switch *sw, u32 ageing_ms)
+{
+	struct prestera_msg_switch_attr_req req = {
+		.attr = PRESTERA_CMD_SWITCH_ATTR_AGEING,
+		.param = {
+			.ageing_timeout_ms = ageing_ms,
+		},
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_SWITCH_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_state_set(const struct prestera_port *port,
+			       bool admin_state)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_ADMIN_STATE,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.admin_state = admin_state,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_mtu_set(const struct prestera_port *port, u32 mtu)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_MTU,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.mtu = mtu,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_mac_set(const struct prestera_port *port, const char *mac)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_MAC,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	ether_addr_copy(req.param.mac, mac);
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_accept_frm_type(struct prestera_port *port,
+				     enum prestera_accept_frm_type type)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_ACCEPT_FRAME_TYPE,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.accept_frm_type = type,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_cap_get(const struct prestera_port *port,
+			     struct prestera_port_caps *caps)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_CAPABILITY,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	caps->supp_link_modes = resp.param.cap.link_mode;
+	caps->transceiver = resp.param.cap.transceiver;
+	caps->supp_fec = resp.param.cap.fec;
+	caps->type = resp.param.cap.type;
+
+	return err;
+}
+
+int prestera_hw_port_remote_cap_get(const struct prestera_port *port,
+				    u64 *link_mode_bitmap)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_REMOTE_CAPABILITY,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*link_mode_bitmap = resp.param.cap.link_mode;
+
+	return 0;
+}
+
+int prestera_hw_port_remote_fc_get(const struct prestera_port *port,
+				   bool *pause, bool *asym_pause)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_REMOTE_FC,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	switch (resp.param.fc) {
+	case PRESTERA_FC_SYMMETRIC:
+		*pause = true;
+		*asym_pause = false;
+		break;
+	case PRESTERA_FC_ASYMMETRIC:
+		*pause = false;
+		*asym_pause = true;
+		break;
+	case PRESTERA_FC_SYMM_ASYMM:
+		*pause = true;
+		*asym_pause = true;
+		break;
+	default:
+		*pause = false;
+		*asym_pause = false;
+	}
+
+	return 0;
+}
+
+int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_TYPE,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*type = resp.param.type;
+
+	return 0;
+}
+
+int prestera_hw_port_fec_get(const struct prestera_port *port, u8 *fec)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_FEC,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*fec = resp.param.fec;
+
+	return 0;
+}
+
+int prestera_hw_port_fec_set(const struct prestera_port *port, u8 fec)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_FEC,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.fec = fec,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+static u8 prestera_hw_mdix_to_eth(u8 mode)
+{
+	switch (mode) {
+	case PRESTERA_PORT_TP_MDI:
+		return ETH_TP_MDI;
+	case PRESTERA_PORT_TP_MDIX:
+		return ETH_TP_MDI_X;
+	case PRESTERA_PORT_TP_AUTO:
+		return ETH_TP_MDI_AUTO;
+	default:
+		return ETH_TP_MDI_INVALID;
+	}
+}
+
+static u8 prestera_hw_mdix_from_eth(u8 mode)
+{
+	switch (mode) {
+	case ETH_TP_MDI:
+		return PRESTERA_PORT_TP_MDI;
+	case ETH_TP_MDI_X:
+		return PRESTERA_PORT_TP_MDIX;
+	case ETH_TP_MDI_AUTO:
+		return PRESTERA_PORT_TP_AUTO;
+	default:
+		return PRESTERA_PORT_TP_NA;
+	}
+}
+
+int prestera_hw_port_mdix_get(const struct prestera_port *port, u8 *status,
+			      u8 *admin_mode)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_MDIX,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*status = prestera_hw_mdix_to_eth(resp.param.mdix.status);
+	*admin_mode = prestera_hw_mdix_to_eth(resp.param.mdix.admin_mode);
+
+	return 0;
+}
+
+int prestera_hw_port_mdix_set(const struct prestera_port *port, u8 mode)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_MDIX,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	req.param.mdix.admin_mode = prestera_hw_mdix_from_eth(mode);
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_link_mode_set(const struct prestera_port *port, u32 mode)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_LINK_MODE,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.link_mode = mode,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_link_mode_get(const struct prestera_port *port, u32 *mode)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_LINK_MODE,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*mode = resp.param.link_mode;
+
+	return 0;
+}
+
+int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_SPEED,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*speed = resp.param.speed;
+
+	return 0;
+}
+
+int prestera_hw_port_autoneg_set(const struct prestera_port *port,
+				 bool autoneg, u64 link_modes, u8 fec)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_AUTONEG,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.autoneg = {
+				.link_mode = link_modes,
+				.enable = autoneg,
+				.fec = fec,
+			}
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_autoneg_restart(struct prestera_port *port)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_AUTONEG_RESTART,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_duplex_get(const struct prestera_port *port, u8 *duplex)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_DUPLEX,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_attr_resp resp;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*duplex = resp.param.duplex;
+
+	return 0;
+}
+
+int prestera_hw_port_stats_get(const struct prestera_port *port,
+			       struct prestera_port_stats *st)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_STATS,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	struct prestera_msg_port_stats_resp resp;
+	u64 *hw = resp.stats;
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	st->good_octets_received = hw[PRESTERA_PORT_GOOD_OCTETS_RCV_CNT];
+	st->bad_octets_received = hw[PRESTERA_PORT_BAD_OCTETS_RCV_CNT];
+	st->mac_trans_error = hw[PRESTERA_PORT_MAC_TRANSMIT_ERR_CNT];
+	st->broadcast_frames_received = hw[PRESTERA_PORT_BRDC_PKTS_RCV_CNT];
+	st->multicast_frames_received = hw[PRESTERA_PORT_MC_PKTS_RCV_CNT];
+	st->frames_64_octets = hw[PRESTERA_PORT_PKTS_64L_CNT];
+	st->frames_65_to_127_octets = hw[PRESTERA_PORT_PKTS_65TO127L_CNT];
+	st->frames_128_to_255_octets = hw[PRESTERA_PORT_PKTS_128TO255L_CNT];
+	st->frames_256_to_511_octets = hw[PRESTERA_PORT_PKTS_256TO511L_CNT];
+	st->frames_512_to_1023_octets = hw[PRESTERA_PORT_PKTS_512TO1023L_CNT];
+	st->frames_1024_to_max_octets = hw[PRESTERA_PORT_PKTS_1024TOMAXL_CNT];
+	st->excessive_collision = hw[PRESTERA_PORT_EXCESSIVE_COLLISIONS_CNT];
+	st->multicast_frames_sent = hw[PRESTERA_PORT_MC_PKTS_SENT_CNT];
+	st->broadcast_frames_sent = hw[PRESTERA_PORT_BRDC_PKTS_SENT_CNT];
+	st->fc_sent = hw[PRESTERA_PORT_FC_SENT_CNT];
+	st->fc_received = hw[PRESTERA_PORT_GOOD_FC_RCV_CNT];
+	st->buffer_overrun = hw[PRESTERA_PORT_DROP_EVENTS_CNT];
+	st->undersize = hw[PRESTERA_PORT_UNDERSIZE_PKTS_CNT];
+	st->fragments = hw[PRESTERA_PORT_FRAGMENTS_PKTS_CNT];
+	st->oversize = hw[PRESTERA_PORT_OVERSIZE_PKTS_CNT];
+	st->jabber = hw[PRESTERA_PORT_JABBER_PKTS_CNT];
+	st->rx_error_frame_received = hw[PRESTERA_PORT_MAC_RCV_ERROR_CNT];
+	st->bad_crc = hw[PRESTERA_PORT_BAD_CRC_CNT];
+	st->collisions = hw[PRESTERA_PORT_COLLISIONS_CNT];
+	st->late_collision = hw[PRESTERA_PORT_LATE_COLLISIONS_CNT];
+	st->unicast_frames_received = hw[PRESTERA_PORT_GOOD_UC_PKTS_RCV_CNT];
+	st->unicast_frames_sent = hw[PRESTERA_PORT_GOOD_UC_PKTS_SENT_CNT];
+	st->sent_multiple = hw[PRESTERA_PORT_MULTIPLE_PKTS_SENT_CNT];
+	st->sent_deferred = hw[PRESTERA_PORT_DEFERRED_PKTS_SENT_CNT];
+	st->good_octets_sent = hw[PRESTERA_PORT_GOOD_OCTETS_SENT_CNT];
+
+	return 0;
+}
+
+int prestera_hw_port_learning_set(struct prestera_port *port, bool enable)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_LEARNING,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.learning = enable,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_flood_set(struct prestera_port *port, bool flood)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.flood = flood,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
+{
+	struct prestera_msg_vlan_req req = {
+		.vid = vid,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_VLAN_CREATE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_vlan_delete(struct prestera_switch *sw, u16 vid)
+{
+	struct prestera_msg_vlan_req req = {
+		.vid = vid,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_VLAN_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_vlan_port_set(struct prestera_port *port, u16 vid,
+			      bool is_member, bool untagged)
+{
+	struct prestera_msg_vlan_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.vid = vid,
+		.is_member = is_member,
+		.is_tagged = !untagged,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_VLAN_PORT_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_vlan_port_vid_set(struct prestera_port *port, u16 vid)
+{
+	struct prestera_msg_vlan_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.vid = vid,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_VLAN_PVID_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_vlan_port_stp_set(struct prestera_port *port, u16 vid, u8 state)
+{
+	struct prestera_msg_stp_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.vid = vid,
+		.state = state,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_STP_PORT_SET,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_add(struct prestera_port *port, const unsigned char *mac,
+			u16 vid, bool dynamic)
+{
+	struct prestera_msg_fdb_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.vid = vid,
+		.dynamic = dynamic,
+	};
+
+	ether_addr_copy(req.mac, mac);
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_FDB_ADD,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
+			u16 vid)
+{
+	struct prestera_msg_fdb_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.vid = vid,
+	};
+
+	ether_addr_copy(req.mac, mac);
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_FDB_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_flush_port(struct prestera_port *port, u32 mode)
+{
+	struct prestera_msg_fdb_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.flush_mode = mode,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_flush_vlan(struct prestera_switch *sw, u16 vid, u32 mode)
+{
+	struct prestera_msg_fdb_req req = {
+		.vid = vid,
+		.flush_mode = mode,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_VLAN,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
+				    u32 mode)
+{
+	struct prestera_msg_fdb_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.vid = vid,
+		.flush_mode = mode,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT_VLAN,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_bridge_create(struct prestera_switch *sw, u16 *bridge_id)
+{
+	struct prestera_msg_bridge_resp resp;
+	struct prestera_msg_bridge_req req;
+	int err;
+
+	err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_BRIDGE_CREATE,
+			       &req.cmd, sizeof(req),
+			       &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*bridge_id = resp.bridge;
+
+	return 0;
+}
+
+int prestera_hw_bridge_delete(struct prestera_switch *sw, u16 bridge_id)
+{
+	struct prestera_msg_bridge_req req = {
+		.bridge = bridge_id,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_BRIDGE_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_bridge_port_add(struct prestera_port *port, u16 bridge_id)
+{
+	struct prestera_msg_bridge_req req = {
+		.bridge = bridge_id,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_BRIDGE_PORT_ADD,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_bridge_port_delete(struct prestera_port *port, u16 bridge_id)
+{
+	struct prestera_msg_bridge_req req = {
+		.bridge = bridge_id,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_BRIDGE_PORT_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_rxtx_init(struct prestera_switch *sw,
+			  struct prestera_rxtx_params *params)
+{
+	struct prestera_msg_rxtx_resp resp;
+	struct prestera_msg_rxtx_req req;
+	int err;
+
+	req.use_sdma = params->use_sdma;
+
+	err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_RXTX_INIT,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	params->map_addr = resp.map_addr;
+
+	return 0;
+}
+
+int prestera_hw_rxtx_port_init(struct prestera_port *port)
+{
+	struct prestera_msg_rxtx_port_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_RXTX_PORT_INIT,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_event_handler_register(struct prestera_switch *sw,
+				       enum prestera_event_type type,
+				       prestera_event_cb_t fn,
+				       void *arg)
+{
+	struct prestera_fw_event_handler *eh;
+
+	eh = __find_event_handler(sw, type);
+	if (eh)
+		return -EEXIST;
+
+	eh = kmalloc(sizeof(*eh), GFP_KERNEL);
+	if (!eh)
+		return -ENOMEM;
+
+	eh->type = type;
+	eh->func = fn;
+	eh->arg = arg;
+
+	INIT_LIST_HEAD(&eh->list);
+
+	list_add_rcu(&eh->list, &sw->event_handlers);
+
+	return 0;
+}
+
+void prestera_hw_event_handler_unregister(struct prestera_switch *sw,
+					  enum prestera_event_type type,
+					  prestera_event_cb_t fn)
+{
+	struct prestera_fw_event_handler *eh;
+
+	eh = __find_event_handler(sw, type);
+	if (!eh)
+		return;
+
+	list_del_rcu(&eh->list);
+	kfree_rcu(eh, rcu);
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
new file mode 100644
index 000000000000..b2b5ac95b4e3
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_HW_H_
+#define _PRESTERA_HW_H_
+
+#include <linux/types.h>
+
+enum prestera_accept_frm_type {
+	PRESTERA_ACCEPT_FRAME_TYPE_TAGGED,
+	PRESTERA_ACCEPT_FRAME_TYPE_UNTAGGED,
+	PRESTERA_ACCEPT_FRAME_TYPE_ALL,
+};
+
+enum prestera_fdb_flush_mode {
+	PRESTERA_FDB_FLUSH_MODE_DYNAMIC = BIT(0),
+	PRESTERA_FDB_FLUSH_MODE_STATIC = BIT(1),
+	PRESTERA_FDB_FLUSH_MODE_ALL = PRESTERA_FDB_FLUSH_MODE_DYNAMIC
+					| PRESTERA_FDB_FLUSH_MODE_STATIC,
+};
+
+enum {
+	PRESTERA_LINK_MODE_10baseT_Half,
+	PRESTERA_LINK_MODE_10baseT_Full,
+	PRESTERA_LINK_MODE_100baseT_Half,
+	PRESTERA_LINK_MODE_100baseT_Full,
+	PRESTERA_LINK_MODE_1000baseT_Half,
+	PRESTERA_LINK_MODE_1000baseT_Full,
+	PRESTERA_LINK_MODE_1000baseX_Full,
+	PRESTERA_LINK_MODE_1000baseKX_Full,
+	PRESTERA_LINK_MODE_2500baseX_Full,
+	PRESTERA_LINK_MODE_10GbaseKR_Full,
+	PRESTERA_LINK_MODE_10GbaseSR_Full,
+	PRESTERA_LINK_MODE_10GbaseLR_Full,
+	PRESTERA_LINK_MODE_20GbaseKR2_Full,
+	PRESTERA_LINK_MODE_25GbaseCR_Full,
+	PRESTERA_LINK_MODE_25GbaseKR_Full,
+	PRESTERA_LINK_MODE_25GbaseSR_Full,
+	PRESTERA_LINK_MODE_40GbaseKR4_Full,
+	PRESTERA_LINK_MODE_40GbaseCR4_Full,
+	PRESTERA_LINK_MODE_40GbaseSR4_Full,
+	PRESTERA_LINK_MODE_50GbaseCR2_Full,
+	PRESTERA_LINK_MODE_50GbaseKR2_Full,
+	PRESTERA_LINK_MODE_50GbaseSR2_Full,
+	PRESTERA_LINK_MODE_100GbaseKR4_Full,
+	PRESTERA_LINK_MODE_100GbaseSR4_Full,
+	PRESTERA_LINK_MODE_100GbaseCR4_Full,
+
+	PRESTERA_LINK_MODE_MAX
+};
+
+enum {
+	PRESTERA_PORT_TYPE_NONE,
+	PRESTERA_PORT_TYPE_TP,
+	PRESTERA_PORT_TYPE_AUI,
+	PRESTERA_PORT_TYPE_MII,
+	PRESTERA_PORT_TYPE_FIBRE,
+	PRESTERA_PORT_TYPE_BNC,
+	PRESTERA_PORT_TYPE_DA,
+	PRESTERA_PORT_TYPE_OTHER,
+
+	PRESTERA_PORT_TYPE_MAX
+};
+
+enum {
+	PRESTERA_PORT_TCVR_COPPER,
+	PRESTERA_PORT_TCVR_SFP,
+
+	PRESTERA_PORT_TCVR_MAX
+};
+
+enum {
+	PRESTERA_PORT_FEC_OFF,
+	PRESTERA_PORT_FEC_BASER,
+	PRESTERA_PORT_FEC_RS,
+
+	PRESTERA_PORT_FEC_MAX
+};
+
+enum {
+	PRESTERA_PORT_DUPLEX_HALF,
+	PRESTERA_PORT_DUPLEX_FULL,
+};
+
+enum {
+	PRESTERA_STP_DISABLED,
+	PRESTERA_STP_BLOCK_LISTEN,
+	PRESTERA_STP_LEARN,
+	PRESTERA_STP_FORWARD,
+};
+
+struct prestera_switch;
+struct prestera_port;
+struct prestera_port_stats;
+struct prestera_port_caps;
+enum prestera_event_type;
+struct prestera_event;
+
+typedef void (*prestera_event_cb_t)
+	(struct prestera_switch *sw, struct prestera_event *evt, void *arg);
+
+struct prestera_rxtx_params;
+
+/* Switch API */
+int prestera_hw_switch_init(struct prestera_switch *sw);
+void prestera_hw_switch_fini(struct prestera_switch *sw);
+int prestera_hw_switch_ageing_set(struct prestera_switch *sw, u32 ageing_ms);
+int prestera_hw_switch_mac_set(struct prestera_switch *sw, const char *mac);
+
+/* Port API */
+int prestera_hw_port_info_get(const struct prestera_port *port,
+			      u32 *dev_id, u32 *hw_id, u16 *fp_id);
+int prestera_hw_port_state_set(const struct prestera_port *port,
+			       bool admin_state);
+int prestera_hw_port_mtu_set(const struct prestera_port *port, u32 mtu);
+int prestera_hw_port_mtu_get(const struct prestera_port *port, u32 *mtu);
+int prestera_hw_port_mac_set(const struct prestera_port *port, const char *mac);
+int prestera_hw_port_mac_get(const struct prestera_port *port, char *mac);
+int prestera_hw_port_cap_get(const struct prestera_port *port,
+			     struct prestera_port_caps *caps);
+int prestera_hw_port_remote_cap_get(const struct prestera_port *port,
+				    u64 *link_mode_bitmap);
+int prestera_hw_port_remote_fc_get(const struct prestera_port *port,
+				   bool *pause, bool *asym_pause);
+int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type);
+int prestera_hw_port_fec_get(const struct prestera_port *port, u8 *fec);
+int prestera_hw_port_fec_set(const struct prestera_port *port, u8 fec);
+int prestera_hw_port_autoneg_set(const struct prestera_port *port,
+				 bool autoneg, u64 link_modes, u8 fec);
+int prestera_hw_port_autoneg_restart(struct prestera_port *port);
+int prestera_hw_port_duplex_get(const struct prestera_port *port, u8 *duplex);
+int prestera_hw_port_stats_get(const struct prestera_port *port,
+			       struct prestera_port_stats *stats);
+int prestera_hw_port_link_mode_set(const struct prestera_port *port, u32 mode);
+int prestera_hw_port_link_mode_get(const struct prestera_port *port, u32 *mode);
+int prestera_hw_port_mdix_get(const struct prestera_port *port, u8 *status,
+			      u8 *admin_mode);
+int prestera_hw_port_mdix_set(const struct prestera_port *port, u8 mode);
+int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
+int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
+int prestera_hw_port_flood_set(struct prestera_port *port, bool flood);
+int prestera_hw_port_accept_frm_type(struct prestera_port *port,
+				     enum prestera_accept_frm_type type);
+/* Vlan API */
+int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid);
+int prestera_hw_vlan_delete(struct prestera_switch *sw, u16 vid);
+int prestera_hw_vlan_port_set(struct prestera_port *port, u16 vid,
+			      bool is_member, bool untagged);
+int prestera_hw_vlan_port_vid_set(struct prestera_port *port, u16 vid);
+int prestera_hw_vlan_port_stp_set(struct prestera_port *port, u16 vid, u8 state);
+
+/* FDB API */
+int prestera_hw_fdb_add(struct prestera_port *port, const unsigned char *mac,
+			u16 vid, bool dynamic);
+int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
+			u16 vid);
+int prestera_hw_fdb_flush_port(struct prestera_port *port, u32 mode);
+int prestera_hw_fdb_flush_vlan(struct prestera_switch *sw, u16 vid, u32 mode);
+int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
+				    u32 mode);
+
+/* Bridge API */
+int prestera_hw_bridge_create(struct prestera_switch *sw, u16 *bridge_id);
+int prestera_hw_bridge_delete(struct prestera_switch *sw, u16 bridge_id);
+int prestera_hw_bridge_port_add(struct prestera_port *port, u16 bridge_id);
+int prestera_hw_bridge_port_delete(struct prestera_port *port, u16 bridge_id);
+
+/* Event handlers */
+int prestera_hw_event_handler_register(struct prestera_switch *sw,
+				       enum prestera_event_type type,
+				       prestera_event_cb_t fn,
+				       void *arg);
+void prestera_hw_event_handler_unregister(struct prestera_switch *sw,
+					  enum prestera_event_type type,
+					  prestera_event_cb_t fn);
+
+/* RX/TX */
+int prestera_hw_rxtx_init(struct prestera_switch *sw,
+			  struct prestera_rxtx_params *params);
+int prestera_hw_rxtx_port_init(struct prestera_port *port);
+
+#endif /* _PRESTERA_HW_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
new file mode 100644
index 000000000000..0f20e0788cce
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/etherdevice.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/netdev_features.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+
+#include "prestera.h"
+#include "prestera_hw.h"
+#include "prestera_rxtx.h"
+#include "prestera_devlink.h"
+#include "prestera_ethtool.h"
+#include "prestera_switchdev.h"
+
+#define PRESTERA_MTU_DEFAULT	1536
+
+#define PRESTERA_STATS_DELAY_MS	1000
+
+#define PRESTERA_MAC_ADDR_NUM_MAX	255
+
+static struct workqueue_struct *prestera_wq;
+
+int prestera_port_pvid_set(struct prestera_port *port, u16 vid)
+{
+	enum prestera_accept_frm_type frm_type;
+	int err;
+
+	frm_type = PRESTERA_ACCEPT_FRAME_TYPE_TAGGED;
+
+	if (vid) {
+		err = prestera_hw_vlan_port_vid_set(port, vid);
+		if (err)
+			return err;
+
+		frm_type = PRESTERA_ACCEPT_FRAME_TYPE_ALL;
+	}
+
+	err = prestera_hw_port_accept_frm_type(port, frm_type);
+	if (err && frm_type == PRESTERA_ACCEPT_FRAME_TYPE_ALL)
+		prestera_hw_vlan_port_vid_set(port, port->pvid);
+
+	port->pvid = vid;
+	return 0;
+}
+
+struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw,
+						 u32 dev_id, u32 hw_id)
+{
+	struct prestera_port *port = NULL;
+
+	read_lock(&sw->port_list_lock);
+	list_for_each_entry(port, &sw->port_list, list) {
+		if (port->dev_id == dev_id && port->hw_id == hw_id)
+			break;
+	}
+	read_unlock(&sw->port_list_lock);
+
+	return port;
+}
+
+struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id)
+{
+	struct prestera_port *port = NULL;
+
+	read_lock(&sw->port_list_lock);
+	list_for_each_entry(port, &sw->port_list, list) {
+		if (port->id == id)
+			break;
+	}
+	read_unlock(&sw->port_list_lock);
+
+	return port;
+}
+
+static int prestera_port_open(struct net_device *dev)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	int err;
+
+	err = prestera_hw_port_state_set(port, true);
+	if (err)
+		return err;
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int prestera_port_close(struct net_device *dev)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	int err;
+
+	netif_stop_queue(dev);
+
+	err = prestera_hw_port_state_set(port, false);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static netdev_tx_t prestera_port_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	return prestera_rxtx_xmit(netdev_priv(dev), skb);
+}
+
+static int prestera_is_valid_mac_addr(struct prestera_port *port, u8 *addr)
+{
+	if (!is_valid_ether_addr(addr))
+		return -EADDRNOTAVAIL;
+
+	/* firmware requires that port's MAC address contains first 5 bytes
+	 * of the base MAC address
+	 */
+	if (memcmp(port->sw->base_mac, addr, ETH_ALEN - 1))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int prestera_port_set_mac_address(struct net_device *dev, void *p)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	struct sockaddr *addr = p;
+	int err;
+
+	err = prestera_is_valid_mac_addr(port, addr->sa_data);
+	if (err)
+		return err;
+
+	err = prestera_hw_port_mac_set(port, addr->sa_data);
+	if (err)
+		return err;
+
+	ether_addr_copy(dev->dev_addr, addr->sa_data);
+
+	return 0;
+}
+
+static int prestera_port_change_mtu(struct net_device *dev, int mtu)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	int err;
+
+	err = prestera_hw_port_mtu_set(port, mtu);
+	if (err)
+		return err;
+
+	dev->mtu = mtu;
+
+	return 0;
+}
+
+static void prestera_port_get_stats64(struct net_device *dev,
+				      struct rtnl_link_stats64 *stats)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	struct prestera_port_stats *port_stats = &port->cached_hw_stats.stats;
+
+	stats->rx_packets = port_stats->broadcast_frames_received +
+				port_stats->multicast_frames_received +
+				port_stats->unicast_frames_received;
+
+	stats->tx_packets = port_stats->broadcast_frames_sent +
+				port_stats->multicast_frames_sent +
+				port_stats->unicast_frames_sent;
+
+	stats->rx_bytes = port_stats->good_octets_received;
+
+	stats->tx_bytes = port_stats->good_octets_sent;
+
+	stats->rx_errors = port_stats->rx_error_frame_received;
+	stats->tx_errors = port_stats->mac_trans_error;
+
+	stats->rx_dropped = port_stats->buffer_overrun;
+	stats->tx_dropped = 0;
+
+	stats->multicast = port_stats->multicast_frames_received;
+	stats->collisions = port_stats->excessive_collision;
+
+	stats->rx_crc_errors = port_stats->bad_crc;
+}
+
+static void prestera_port_get_hw_stats(struct prestera_port *port)
+{
+	prestera_hw_port_stats_get(port, &port->cached_hw_stats.stats);
+}
+
+static void prestera_port_stats_update(struct work_struct *work)
+{
+	struct prestera_port *port =
+		container_of(work, struct prestera_port,
+			     cached_hw_stats.caching_dw.work);
+
+	prestera_port_get_hw_stats(port);
+
+	queue_delayed_work(prestera_wq, &port->cached_hw_stats.caching_dw,
+			   msecs_to_jiffies(PRESTERA_STATS_DELAY_MS));
+}
+
+static const struct net_device_ops prestera_netdev_ops = {
+	.ndo_open = prestera_port_open,
+	.ndo_stop = prestera_port_close,
+	.ndo_start_xmit = prestera_port_xmit,
+	.ndo_change_mtu = prestera_port_change_mtu,
+	.ndo_get_stats64 = prestera_port_get_stats64,
+	.ndo_set_mac_address = prestera_port_set_mac_address,
+	.ndo_get_devlink_port = prestera_devlink_get_port,
+};
+
+int prestera_port_autoneg_set(struct prestera_port *port, bool enable,
+			      u64 adver_link_modes, u8 adver_fec)
+{
+	bool refresh = false;
+	u64 link_modes;
+	int err;
+	u8 fec;
+
+	if (port->caps.type != PRESTERA_PORT_TYPE_TP)
+		return enable ? -EINVAL : 0;
+
+	if (!enable)
+		goto set_autoneg;
+
+	link_modes = port->caps.supp_link_modes & adver_link_modes;
+	fec = port->caps.supp_fec & adver_fec;
+
+	if (!link_modes && !fec)
+		return -EOPNOTSUPP;
+
+	if (link_modes && port->adver_link_modes != link_modes) {
+		port->adver_link_modes = link_modes;
+		refresh = true;
+	}
+
+	if (fec && port->adver_fec != fec) {
+		port->adver_fec = fec;
+		refresh = true;
+	}
+
+set_autoneg:
+	if (port->autoneg == enable && !refresh)
+		return 0;
+
+	err = prestera_hw_port_autoneg_set(port, enable, port->adver_link_modes,
+					   port->adver_fec);
+	if (err)
+		return err;
+
+	port->autoneg = enable;
+
+	return 0;
+}
+
+static void prestera_port_list_add(struct prestera_port *port)
+{
+	write_lock(&port->sw->port_list_lock);
+	list_add(&port->list, &port->sw->port_list);
+	write_unlock(&port->sw->port_list_lock);
+}
+
+static void prestera_port_list_del(struct prestera_port *port)
+{
+	write_lock(&port->sw->port_list_lock);
+	list_del(&port->list);
+	write_unlock(&port->sw->port_list_lock);
+}
+
+static int prestera_port_create(struct prestera_switch *sw, u32 id)
+{
+	struct prestera_port *port;
+	struct net_device *dev;
+	int err;
+
+	dev = alloc_etherdev(sizeof(*port));
+	if (!dev)
+		return -ENOMEM;
+
+	port = netdev_priv(dev);
+
+	INIT_LIST_HEAD(&port->vlans_list);
+	port->pvid = PRESTERA_DEFAULT_VID;
+	port->dev = dev;
+	port->id = id;
+	port->sw = sw;
+
+	err = prestera_hw_port_info_get(port, &port->dev_id, &port->hw_id,
+					&port->fp_id);
+	if (err) {
+		dev_err(prestera_dev(sw), "Failed to get port(%u) info\n", id);
+		goto err_port_info_get;
+	}
+
+	err = prestera_devlink_port_register(port);
+	if (err)
+		goto err_dl_port_register;
+
+	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->netdev_ops = &prestera_netdev_ops;
+	dev->ethtool_ops = &prestera_ethtool_ops;
+
+	netif_carrier_off(dev);
+
+	dev->mtu = min_t(unsigned int, sw->mtu_max, PRESTERA_MTU_DEFAULT);
+	dev->min_mtu = sw->mtu_min;
+	dev->max_mtu = sw->mtu_max;
+
+	err = prestera_hw_port_mtu_set(port, dev->mtu);
+	if (err) {
+		dev_err(prestera_dev(sw), "Failed to set port(%u) mtu(%d)\n",
+			id, dev->mtu);
+		goto err_port_init;
+	}
+
+	if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX)
+		goto err_port_init;
+
+	/* firmware requires that port's MAC address consist of the first
+	 * 5 bytes of the base MAC address
+	 */
+	memcpy(dev->dev_addr, sw->base_mac, dev->addr_len - 1);
+	dev->dev_addr[dev->addr_len - 1] = port->fp_id;
+
+	err = prestera_hw_port_mac_set(port, dev->dev_addr);
+	if (err) {
+		dev_err(prestera_dev(sw), "Failed to set port(%u) mac addr\n", id);
+		goto err_port_init;
+	}
+
+	err = prestera_hw_port_cap_get(port, &port->caps);
+	if (err) {
+		dev_err(prestera_dev(sw), "Failed to get port(%u) caps\n", id);
+		goto err_port_init;
+	}
+
+	port->adver_fec = BIT(PRESTERA_PORT_FEC_OFF);
+	prestera_port_autoneg_set(port, true, port->caps.supp_link_modes,
+				  port->caps.supp_fec);
+
+	err = prestera_hw_port_state_set(port, false);
+	if (err) {
+		dev_err(prestera_dev(sw), "Failed to set port(%u) down\n", id);
+		goto err_port_init;
+	}
+
+	err = prestera_rxtx_port_init(port);
+	if (err)
+		goto err_port_init;
+
+	INIT_DELAYED_WORK(&port->cached_hw_stats.caching_dw,
+			  &prestera_port_stats_update);
+
+	prestera_port_list_add(port);
+
+	err = register_netdev(dev);
+	if (err)
+		goto err_register_netdev;
+
+	prestera_devlink_port_set(port);
+
+	return 0;
+
+err_register_netdev:
+	prestera_port_list_del(port);
+err_port_init:
+	prestera_devlink_port_unregister(port);
+err_dl_port_register:
+err_port_info_get:
+	free_netdev(dev);
+	return err;
+}
+
+static void prestera_port_destroy(struct prestera_port *port)
+{
+	struct net_device *dev = port->dev;
+
+	cancel_delayed_work_sync(&port->cached_hw_stats.caching_dw);
+	prestera_devlink_port_clear(port);
+	unregister_netdev(dev);
+	prestera_port_list_del(port);
+	prestera_devlink_port_unregister(port);
+	free_netdev(dev);
+}
+
+static void prestera_destroy_ports(struct prestera_switch *sw)
+{
+	struct prestera_port *port, *tmp;
+
+	list_for_each_entry_safe(port, tmp, &sw->port_list, list)
+		prestera_port_destroy(port);
+}
+
+static int prestera_create_ports(struct prestera_switch *sw)
+{
+	struct prestera_port *port, *tmp;
+	u32 port_idx;
+	int err;
+
+	for (port_idx = 0; port_idx < sw->port_count; port_idx++) {
+		err = prestera_port_create(sw, port_idx);
+		if (err)
+			goto err_port_create;
+	}
+
+	return 0;
+
+err_port_create:
+	list_for_each_entry_safe(port, tmp, &sw->port_list, list)
+		prestera_port_destroy(port);
+
+	return err;
+}
+
+static void prestera_port_handle_event(struct prestera_switch *sw,
+				       struct prestera_event *evt, void *arg)
+{
+	struct delayed_work *caching_dw;
+	struct prestera_port *port;
+
+	port = prestera_find_port(sw, evt->port_evt.port_id);
+	if (!port || !port->dev)
+		return;
+
+	caching_dw = &port->cached_hw_stats.caching_dw;
+
+	if (evt->id == PRESTERA_PORT_EVENT_STATE_CHANGED) {
+		if (evt->port_evt.data.oper_state) {
+			netif_carrier_on(port->dev);
+			if (!delayed_work_pending(caching_dw))
+				queue_delayed_work(prestera_wq, caching_dw, 0);
+		} else {
+			netif_carrier_off(port->dev);
+			if (delayed_work_pending(caching_dw))
+				cancel_delayed_work(caching_dw);
+		}
+	}
+}
+
+static int prestera_event_handlers_register(struct prestera_switch *sw)
+{
+	return prestera_hw_event_handler_register(sw, PRESTERA_EVENT_TYPE_PORT,
+						  prestera_port_handle_event,
+						  NULL);
+}
+
+static void prestera_event_handlers_unregister(struct prestera_switch *sw)
+{
+	prestera_hw_event_handler_unregister(sw, PRESTERA_EVENT_TYPE_PORT,
+					     prestera_port_handle_event);
+}
+
+static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
+{
+	struct device_node *base_mac_np;
+	struct device_node *np;
+	const char *base_mac;
+
+	np = of_find_compatible_node(NULL, NULL, "marvell,prestera");
+	base_mac_np = of_parse_phandle(np, "base-mac-provider", 0);
+
+	base_mac = of_get_mac_address(base_mac_np);
+	of_node_put(base_mac_np);
+	if (!IS_ERR(base_mac))
+		ether_addr_copy(sw->base_mac, base_mac);
+
+	if (!is_valid_ether_addr(sw->base_mac)) {
+		eth_random_addr(sw->base_mac);
+		dev_info(prestera_dev(sw), "using random base mac address\n");
+	}
+
+	return prestera_hw_switch_mac_set(sw, sw->base_mac);
+}
+
+bool prestera_netdev_check(const struct net_device *dev)
+{
+	return dev->netdev_ops == &prestera_netdev_ops;
+}
+
+static int prestera_lower_dev_walk(struct net_device *dev,
+				   struct netdev_nested_priv *priv)
+{
+	struct prestera_port **pport = (struct prestera_port **)priv->data;
+
+	if (prestera_netdev_check(dev)) {
+		*pport = netdev_priv(dev);
+		return 1;
+	}
+
+	return 0;
+}
+
+struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev)
+{
+	struct prestera_port *port = NULL;
+	struct netdev_nested_priv priv = {
+		.data = (void *)&port,
+	};
+
+	if (prestera_netdev_check(dev))
+		return netdev_priv(dev);
+
+	netdev_walk_all_lower_dev(dev, prestera_lower_dev_walk, &priv);
+
+	return port;
+}
+
+static int prestera_netdev_port_event(struct net_device *dev,
+				      unsigned long event, void *ptr)
+{
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+	case NETDEV_CHANGEUPPER:
+		return prestera_bridge_port_event(dev, event, ptr);
+	default:
+		return 0;
+	}
+}
+
+static int prestera_netdev_event_handler(struct notifier_block *nb,
+					 unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	int err = 0;
+
+	if (prestera_netdev_check(dev))
+		err = prestera_netdev_port_event(dev, event, ptr);
+
+	return notifier_from_errno(err);
+}
+
+static int prestera_netdev_event_handler_register(struct prestera_switch *sw)
+{
+	sw->netdev_nb.notifier_call = prestera_netdev_event_handler;
+
+	return register_netdevice_notifier(&sw->netdev_nb);
+}
+
+static void prestera_netdev_event_handler_unregister(struct prestera_switch *sw)
+{
+	unregister_netdevice_notifier(&sw->netdev_nb);
+}
+
+static int prestera_switch_init(struct prestera_switch *sw)
+{
+	int err;
+
+	err = prestera_hw_switch_init(sw);
+	if (err) {
+		dev_err(prestera_dev(sw), "Failed to init Switch device\n");
+		return err;
+	}
+
+	rwlock_init(&sw->port_list_lock);
+	INIT_LIST_HEAD(&sw->port_list);
+
+	err = prestera_switch_set_base_mac_addr(sw);
+	if (err)
+		return err;
+
+	err = prestera_netdev_event_handler_register(sw);
+	if (err)
+		return err;
+
+	err = prestera_switchdev_init(sw);
+	if (err)
+		goto err_swdev_register;
+
+	err = prestera_rxtx_switch_init(sw);
+	if (err)
+		goto err_rxtx_register;
+
+	err = prestera_event_handlers_register(sw);
+	if (err)
+		goto err_handlers_register;
+
+	err = prestera_devlink_register(sw);
+	if (err)
+		goto err_dl_register;
+
+	err = prestera_create_ports(sw);
+	if (err)
+		goto err_ports_create;
+
+	return 0;
+
+err_ports_create:
+	prestera_devlink_unregister(sw);
+err_dl_register:
+	prestera_event_handlers_unregister(sw);
+err_handlers_register:
+	prestera_rxtx_switch_fini(sw);
+err_rxtx_register:
+	prestera_switchdev_fini(sw);
+err_swdev_register:
+	prestera_netdev_event_handler_unregister(sw);
+	prestera_hw_switch_fini(sw);
+
+	return err;
+}
+
+static void prestera_switch_fini(struct prestera_switch *sw)
+{
+	prestera_destroy_ports(sw);
+	prestera_devlink_unregister(sw);
+	prestera_event_handlers_unregister(sw);
+	prestera_rxtx_switch_fini(sw);
+	prestera_switchdev_fini(sw);
+	prestera_netdev_event_handler_unregister(sw);
+	prestera_hw_switch_fini(sw);
+}
+
+int prestera_device_register(struct prestera_device *dev)
+{
+	struct prestera_switch *sw;
+	int err;
+
+	sw = prestera_devlink_alloc();
+	if (!sw)
+		return -ENOMEM;
+
+	dev->priv = sw;
+	sw->dev = dev;
+
+	err = prestera_switch_init(sw);
+	if (err) {
+		prestera_devlink_free(sw);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(prestera_device_register);
+
+void prestera_device_unregister(struct prestera_device *dev)
+{
+	struct prestera_switch *sw = dev->priv;
+
+	prestera_switch_fini(sw);
+	prestera_devlink_free(sw);
+}
+EXPORT_SYMBOL(prestera_device_unregister);
+
+static int __init prestera_module_init(void)
+{
+	prestera_wq = alloc_workqueue("prestera", 0, 0);
+	if (!prestera_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit prestera_module_exit(void)
+{
+	destroy_workqueue(prestera_wq);
+}
+
+module_init(prestera_module_init);
+module_exit(prestera_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Marvell Prestera switch driver");
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
new file mode 100644
index 000000000000..1b97adae542e
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -0,0 +1,769 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/circ_buf.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "prestera.h"
+
+#define PRESTERA_MSG_MAX_SIZE 1500
+
+#define PRESTERA_SUPP_FW_MAJ_VER	2
+#define PRESTERA_SUPP_FW_MIN_VER	0
+
+#define PRESTERA_FW_PATH_FMT	"mrvl/prestera/mvsw_prestera_fw-v%u.%u.img"
+
+#define PRESTERA_FW_HDR_MAGIC		0x351D9D06
+#define PRESTERA_FW_DL_TIMEOUT_MS	50000
+#define PRESTERA_FW_BLK_SZ		1024
+
+#define PRESTERA_FW_VER_MAJ_MUL 1000000
+#define PRESTERA_FW_VER_MIN_MUL 1000
+
+#define PRESTERA_FW_VER_MAJ(v)	((v) / PRESTERA_FW_VER_MAJ_MUL)
+
+#define PRESTERA_FW_VER_MIN(v) \
+	(((v) - (PRESTERA_FW_VER_MAJ(v) * PRESTERA_FW_VER_MAJ_MUL)) / \
+			PRESTERA_FW_VER_MIN_MUL)
+
+#define PRESTERA_FW_VER_PATCH(v) \
+	((v) - (PRESTERA_FW_VER_MAJ(v) * PRESTERA_FW_VER_MAJ_MUL) - \
+			(PRESTERA_FW_VER_MIN(v) * PRESTERA_FW_VER_MIN_MUL))
+
+enum prestera_pci_bar_t {
+	PRESTERA_PCI_BAR_FW = 2,
+	PRESTERA_PCI_BAR_PP = 4,
+};
+
+struct prestera_fw_header {
+	__be32 magic_number;
+	__be32 version_value;
+	u8 reserved[8];
+};
+
+struct prestera_ldr_regs {
+	u32 ldr_ready;
+	u32 pad1;
+
+	u32 ldr_img_size;
+	u32 ldr_ctl_flags;
+
+	u32 ldr_buf_offs;
+	u32 ldr_buf_size;
+
+	u32 ldr_buf_rd;
+	u32 pad2;
+	u32 ldr_buf_wr;
+
+	u32 ldr_status;
+};
+
+#define PRESTERA_LDR_REG_OFFSET(f)	offsetof(struct prestera_ldr_regs, f)
+
+#define PRESTERA_LDR_READY_MAGIC	0xf00dfeed
+
+#define PRESTERA_LDR_STATUS_IMG_DL	BIT(0)
+#define PRESTERA_LDR_STATUS_START_FW	BIT(1)
+#define PRESTERA_LDR_STATUS_INVALID_IMG	BIT(2)
+#define PRESTERA_LDR_STATUS_NOMEM	BIT(3)
+
+#define PRESTERA_LDR_REG_BASE(fw)	((fw)->ldr_regs)
+#define PRESTERA_LDR_REG_ADDR(fw, reg)	(PRESTERA_LDR_REG_BASE(fw) + (reg))
+
+/* fw loader registers */
+#define PRESTERA_LDR_READY_REG		PRESTERA_LDR_REG_OFFSET(ldr_ready)
+#define PRESTERA_LDR_IMG_SIZE_REG	PRESTERA_LDR_REG_OFFSET(ldr_img_size)
+#define PRESTERA_LDR_CTL_REG		PRESTERA_LDR_REG_OFFSET(ldr_ctl_flags)
+#define PRESTERA_LDR_BUF_SIZE_REG	PRESTERA_LDR_REG_OFFSET(ldr_buf_size)
+#define PRESTERA_LDR_BUF_OFFS_REG	PRESTERA_LDR_REG_OFFSET(ldr_buf_offs)
+#define PRESTERA_LDR_BUF_RD_REG		PRESTERA_LDR_REG_OFFSET(ldr_buf_rd)
+#define PRESTERA_LDR_BUF_WR_REG		PRESTERA_LDR_REG_OFFSET(ldr_buf_wr)
+#define PRESTERA_LDR_STATUS_REG		PRESTERA_LDR_REG_OFFSET(ldr_status)
+
+#define PRESTERA_LDR_CTL_DL_START	BIT(0)
+
+#define PRESTERA_EVT_QNUM_MAX	4
+
+struct prestera_fw_evtq_regs {
+	u32 rd_idx;
+	u32 pad1;
+	u32 wr_idx;
+	u32 pad2;
+	u32 offs;
+	u32 len;
+};
+
+struct prestera_fw_regs {
+	u32 fw_ready;
+	u32 pad;
+	u32 cmd_offs;
+	u32 cmd_len;
+	u32 evt_offs;
+	u32 evt_qnum;
+
+	u32 cmd_req_ctl;
+	u32 cmd_req_len;
+	u32 cmd_rcv_ctl;
+	u32 cmd_rcv_len;
+
+	u32 fw_status;
+	u32 rx_status;
+
+	struct prestera_fw_evtq_regs evtq_list[PRESTERA_EVT_QNUM_MAX];
+};
+
+#define PRESTERA_FW_REG_OFFSET(f)	offsetof(struct prestera_fw_regs, f)
+
+#define PRESTERA_FW_READY_MAGIC		0xcafebabe
+
+/* fw registers */
+#define PRESTERA_FW_READY_REG		PRESTERA_FW_REG_OFFSET(fw_ready)
+
+#define PRESTERA_CMD_BUF_OFFS_REG	PRESTERA_FW_REG_OFFSET(cmd_offs)
+#define PRESTERA_CMD_BUF_LEN_REG	PRESTERA_FW_REG_OFFSET(cmd_len)
+#define PRESTERA_EVT_BUF_OFFS_REG	PRESTERA_FW_REG_OFFSET(evt_offs)
+#define PRESTERA_EVT_QNUM_REG		PRESTERA_FW_REG_OFFSET(evt_qnum)
+
+#define PRESTERA_CMD_REQ_CTL_REG	PRESTERA_FW_REG_OFFSET(cmd_req_ctl)
+#define PRESTERA_CMD_REQ_LEN_REG	PRESTERA_FW_REG_OFFSET(cmd_req_len)
+
+#define PRESTERA_CMD_RCV_CTL_REG	PRESTERA_FW_REG_OFFSET(cmd_rcv_ctl)
+#define PRESTERA_CMD_RCV_LEN_REG	PRESTERA_FW_REG_OFFSET(cmd_rcv_len)
+#define PRESTERA_FW_STATUS_REG		PRESTERA_FW_REG_OFFSET(fw_status)
+#define PRESTERA_RX_STATUS_REG		PRESTERA_FW_REG_OFFSET(rx_status)
+
+/* PRESTERA_CMD_REQ_CTL_REG flags */
+#define PRESTERA_CMD_F_REQ_SENT		BIT(0)
+#define PRESTERA_CMD_F_REPL_RCVD	BIT(1)
+
+/* PRESTERA_CMD_RCV_CTL_REG flags */
+#define PRESTERA_CMD_F_REPL_SENT	BIT(0)
+
+#define PRESTERA_EVTQ_REG_OFFSET(q, f)			\
+	(PRESTERA_FW_REG_OFFSET(evtq_list) +		\
+	 (q) * sizeof(struct prestera_fw_evtq_regs) +	\
+	 offsetof(struct prestera_fw_evtq_regs, f))
+
+#define PRESTERA_EVTQ_RD_IDX_REG(q)	PRESTERA_EVTQ_REG_OFFSET(q, rd_idx)
+#define PRESTERA_EVTQ_WR_IDX_REG(q)	PRESTERA_EVTQ_REG_OFFSET(q, wr_idx)
+#define PRESTERA_EVTQ_OFFS_REG(q)	PRESTERA_EVTQ_REG_OFFSET(q, offs)
+#define PRESTERA_EVTQ_LEN_REG(q)	PRESTERA_EVTQ_REG_OFFSET(q, len)
+
+#define PRESTERA_FW_REG_BASE(fw)	((fw)->dev.ctl_regs)
+#define PRESTERA_FW_REG_ADDR(fw, reg)	PRESTERA_FW_REG_BASE((fw)) + (reg)
+
+#define PRESTERA_FW_CMD_DEFAULT_WAIT_MS	30000
+#define PRESTERA_FW_READY_WAIT_MS	20000
+
+struct prestera_fw_evtq {
+	u8 __iomem *addr;
+	size_t len;
+};
+
+struct prestera_fw {
+	struct workqueue_struct *wq;
+	struct prestera_device dev;
+	u8 __iomem *ldr_regs;
+	u8 __iomem *ldr_ring_buf;
+	u32 ldr_buf_len;
+	u32 ldr_wr_idx;
+	struct mutex cmd_mtx; /* serialize access to dev->send_req */
+	size_t cmd_mbox_len;
+	u8 __iomem *cmd_mbox;
+	struct prestera_fw_evtq evt_queue[PRESTERA_EVT_QNUM_MAX];
+	u8 evt_qnum;
+	struct work_struct evt_work;
+	u8 __iomem *evt_buf;
+	u8 *evt_msg;
+};
+
+static int prestera_fw_load(struct prestera_fw *fw);
+
+static void prestera_fw_write(struct prestera_fw *fw, u32 reg, u32 val)
+{
+	writel(val, PRESTERA_FW_REG_ADDR(fw, reg));
+}
+
+static u32 prestera_fw_read(struct prestera_fw *fw, u32 reg)
+{
+	return readl(PRESTERA_FW_REG_ADDR(fw, reg));
+}
+
+static u32 prestera_fw_evtq_len(struct prestera_fw *fw, u8 qid)
+{
+	return fw->evt_queue[qid].len;
+}
+
+static u32 prestera_fw_evtq_avail(struct prestera_fw *fw, u8 qid)
+{
+	u32 wr_idx = prestera_fw_read(fw, PRESTERA_EVTQ_WR_IDX_REG(qid));
+	u32 rd_idx = prestera_fw_read(fw, PRESTERA_EVTQ_RD_IDX_REG(qid));
+
+	return CIRC_CNT(wr_idx, rd_idx, prestera_fw_evtq_len(fw, qid));
+}
+
+static void prestera_fw_evtq_rd_set(struct prestera_fw *fw,
+				    u8 qid, u32 idx)
+{
+	u32 rd_idx = idx & (prestera_fw_evtq_len(fw, qid) - 1);
+
+	prestera_fw_write(fw, PRESTERA_EVTQ_RD_IDX_REG(qid), rd_idx);
+}
+
+static u8 __iomem *prestera_fw_evtq_buf(struct prestera_fw *fw, u8 qid)
+{
+	return fw->evt_queue[qid].addr;
+}
+
+static u32 prestera_fw_evtq_read32(struct prestera_fw *fw, u8 qid)
+{
+	u32 rd_idx = prestera_fw_read(fw, PRESTERA_EVTQ_RD_IDX_REG(qid));
+	u32 val;
+
+	val = readl(prestera_fw_evtq_buf(fw, qid) + rd_idx);
+	prestera_fw_evtq_rd_set(fw, qid, rd_idx + 4);
+	return val;
+}
+
+static ssize_t prestera_fw_evtq_read_buf(struct prestera_fw *fw,
+					 u8 qid, void *buf, size_t len)
+{
+	u32 idx = prestera_fw_read(fw, PRESTERA_EVTQ_RD_IDX_REG(qid));
+	u8 __iomem *evtq_addr = prestera_fw_evtq_buf(fw, qid);
+	u32 *buf32 = buf;
+	int i;
+
+	for (i = 0; i < len / 4; buf32++, i++) {
+		*buf32 = readl_relaxed(evtq_addr + idx);
+		idx = (idx + 4) & (prestera_fw_evtq_len(fw, qid) - 1);
+	}
+
+	prestera_fw_evtq_rd_set(fw, qid, idx);
+
+	return i;
+}
+
+static u8 prestera_fw_evtq_pick(struct prestera_fw *fw)
+{
+	int qid;
+
+	for (qid = 0; qid < fw->evt_qnum; qid++) {
+		if (prestera_fw_evtq_avail(fw, qid) >= 4)
+			return qid;
+	}
+
+	return PRESTERA_EVT_QNUM_MAX;
+}
+
+static void prestera_fw_evt_work_fn(struct work_struct *work)
+{
+	struct prestera_fw *fw;
+	void *msg;
+	u8 qid;
+
+	fw = container_of(work, struct prestera_fw, evt_work);
+	msg = fw->evt_msg;
+
+	while ((qid = prestera_fw_evtq_pick(fw)) < PRESTERA_EVT_QNUM_MAX) {
+		u32 idx;
+		u32 len;
+
+		len = prestera_fw_evtq_read32(fw, qid);
+		idx = prestera_fw_read(fw, PRESTERA_EVTQ_RD_IDX_REG(qid));
+
+		WARN_ON(prestera_fw_evtq_avail(fw, qid) < len);
+
+		if (WARN_ON(len > PRESTERA_MSG_MAX_SIZE)) {
+			prestera_fw_evtq_rd_set(fw, qid, idx + len);
+			continue;
+		}
+
+		prestera_fw_evtq_read_buf(fw, qid, msg, len);
+
+		if (fw->dev.recv_msg)
+			fw->dev.recv_msg(&fw->dev, msg, len);
+	}
+}
+
+static int prestera_fw_wait_reg32(struct prestera_fw *fw, u32 reg, u32 cmp,
+				  unsigned int waitms)
+{
+	u8 __iomem *addr = PRESTERA_FW_REG_ADDR(fw, reg);
+	u32 val;
+
+	return readl_poll_timeout(addr, val, cmp == val,
+				  1 * USEC_PER_MSEC, waitms * USEC_PER_MSEC);
+}
+
+static int prestera_fw_cmd_send(struct prestera_fw *fw,
+				void *in_msg, size_t in_size,
+				void *out_msg, size_t out_size,
+				unsigned int waitms)
+{
+	u32 ret_size;
+	int err;
+
+	if (!waitms)
+		waitms = PRESTERA_FW_CMD_DEFAULT_WAIT_MS;
+
+	if (ALIGN(in_size, 4) > fw->cmd_mbox_len)
+		return -EMSGSIZE;
+
+	/* wait for finish previous reply from FW */
+	err = prestera_fw_wait_reg32(fw, PRESTERA_CMD_RCV_CTL_REG, 0, 30);
+	if (err) {
+		dev_err(fw->dev.dev, "finish reply from FW is timed out\n");
+		return err;
+	}
+
+	prestera_fw_write(fw, PRESTERA_CMD_REQ_LEN_REG, in_size);
+	memcpy_toio(fw->cmd_mbox, in_msg, in_size);
+
+	prestera_fw_write(fw, PRESTERA_CMD_REQ_CTL_REG, PRESTERA_CMD_F_REQ_SENT);
+
+	/* wait for reply from FW */
+	err = prestera_fw_wait_reg32(fw, PRESTERA_CMD_RCV_CTL_REG,
+				     PRESTERA_CMD_F_REPL_SENT, waitms);
+	if (err) {
+		dev_err(fw->dev.dev, "reply from FW is timed out\n");
+		goto cmd_exit;
+	}
+
+	ret_size = prestera_fw_read(fw, PRESTERA_CMD_RCV_LEN_REG);
+	if (ret_size > out_size) {
+		dev_err(fw->dev.dev, "ret_size (%u) > out_len(%zu)\n",
+			ret_size, out_size);
+		err = -EMSGSIZE;
+		goto cmd_exit;
+	}
+
+	memcpy_fromio(out_msg, fw->cmd_mbox + in_size, ret_size);
+
+cmd_exit:
+	prestera_fw_write(fw, PRESTERA_CMD_REQ_CTL_REG, PRESTERA_CMD_F_REPL_RCVD);
+	return err;
+}
+
+static int prestera_fw_send_req(struct prestera_device *dev,
+				void *in_msg, size_t in_size, void *out_msg,
+				size_t out_size, unsigned int waitms)
+{
+	struct prestera_fw *fw;
+	ssize_t ret;
+
+	fw = container_of(dev, struct prestera_fw, dev);
+
+	mutex_lock(&fw->cmd_mtx);
+	ret = prestera_fw_cmd_send(fw, in_msg, in_size, out_msg, out_size, waitms);
+	mutex_unlock(&fw->cmd_mtx);
+
+	return ret;
+}
+
+static int prestera_fw_init(struct prestera_fw *fw)
+{
+	u8 __iomem *base;
+	int err;
+	u8 qid;
+
+	fw->dev.send_req = prestera_fw_send_req;
+	fw->ldr_regs = fw->dev.ctl_regs;
+
+	err = prestera_fw_load(fw);
+	if (err)
+		return err;
+
+	err = prestera_fw_wait_reg32(fw, PRESTERA_FW_READY_REG,
+				     PRESTERA_FW_READY_MAGIC,
+				     PRESTERA_FW_READY_WAIT_MS);
+	if (err) {
+		dev_err(fw->dev.dev, "FW failed to start\n");
+		return err;
+	}
+
+	base = fw->dev.ctl_regs;
+
+	fw->cmd_mbox = base + prestera_fw_read(fw, PRESTERA_CMD_BUF_OFFS_REG);
+	fw->cmd_mbox_len = prestera_fw_read(fw, PRESTERA_CMD_BUF_LEN_REG);
+	mutex_init(&fw->cmd_mtx);
+
+	fw->evt_buf = base + prestera_fw_read(fw, PRESTERA_EVT_BUF_OFFS_REG);
+	fw->evt_qnum = prestera_fw_read(fw, PRESTERA_EVT_QNUM_REG);
+	fw->evt_msg = kmalloc(PRESTERA_MSG_MAX_SIZE, GFP_KERNEL);
+	if (!fw->evt_msg)
+		return -ENOMEM;
+
+	for (qid = 0; qid < fw->evt_qnum; qid++) {
+		u32 offs = prestera_fw_read(fw, PRESTERA_EVTQ_OFFS_REG(qid));
+		struct prestera_fw_evtq *evtq = &fw->evt_queue[qid];
+
+		evtq->len = prestera_fw_read(fw, PRESTERA_EVTQ_LEN_REG(qid));
+		evtq->addr = fw->evt_buf + offs;
+	}
+
+	return 0;
+}
+
+static void prestera_fw_uninit(struct prestera_fw *fw)
+{
+	kfree(fw->evt_msg);
+}
+
+static irqreturn_t prestera_pci_irq_handler(int irq, void *dev_id)
+{
+	struct prestera_fw *fw = dev_id;
+
+	if (prestera_fw_read(fw, PRESTERA_RX_STATUS_REG)) {
+		prestera_fw_write(fw, PRESTERA_RX_STATUS_REG, 0);
+
+		if (fw->dev.recv_pkt)
+			fw->dev.recv_pkt(&fw->dev);
+	}
+
+	queue_work(fw->wq, &fw->evt_work);
+
+	return IRQ_HANDLED;
+}
+
+static void prestera_ldr_write(struct prestera_fw *fw, u32 reg, u32 val)
+{
+	writel(val, PRESTERA_LDR_REG_ADDR(fw, reg));
+}
+
+static u32 prestera_ldr_read(struct prestera_fw *fw, u32 reg)
+{
+	return readl(PRESTERA_LDR_REG_ADDR(fw, reg));
+}
+
+static int prestera_ldr_wait_reg32(struct prestera_fw *fw,
+				   u32 reg, u32 cmp, unsigned int waitms)
+{
+	u8 __iomem *addr = PRESTERA_LDR_REG_ADDR(fw, reg);
+	u32 val;
+
+	return readl_poll_timeout(addr, val, cmp == val,
+				  10 * USEC_PER_MSEC, waitms * USEC_PER_MSEC);
+}
+
+static u32 prestera_ldr_wait_buf(struct prestera_fw *fw, size_t len)
+{
+	u8 __iomem *addr = PRESTERA_LDR_REG_ADDR(fw, PRESTERA_LDR_BUF_RD_REG);
+	u32 buf_len = fw->ldr_buf_len;
+	u32 wr_idx = fw->ldr_wr_idx;
+	u32 rd_idx;
+
+	return readl_poll_timeout(addr, rd_idx,
+				 CIRC_SPACE(wr_idx, rd_idx, buf_len) >= len,
+				 1 * USEC_PER_MSEC, 100 * USEC_PER_MSEC);
+}
+
+static int prestera_ldr_wait_dl_finish(struct prestera_fw *fw)
+{
+	u8 __iomem *addr = PRESTERA_LDR_REG_ADDR(fw, PRESTERA_LDR_STATUS_REG);
+	unsigned long mask = ~(PRESTERA_LDR_STATUS_IMG_DL);
+	u32 val;
+	int err;
+
+	err = readl_poll_timeout(addr, val, val & mask, 10 * USEC_PER_MSEC,
+				 PRESTERA_FW_DL_TIMEOUT_MS * USEC_PER_MSEC);
+	if (err) {
+		dev_err(fw->dev.dev, "Timeout to load FW img [state=%d]",
+			prestera_ldr_read(fw, PRESTERA_LDR_STATUS_REG));
+		return err;
+	}
+
+	return 0;
+}
+
+static void prestera_ldr_wr_idx_move(struct prestera_fw *fw, unsigned int n)
+{
+	fw->ldr_wr_idx = (fw->ldr_wr_idx + (n)) & (fw->ldr_buf_len - 1);
+}
+
+static void prestera_ldr_wr_idx_commit(struct prestera_fw *fw)
+{
+	prestera_ldr_write(fw, PRESTERA_LDR_BUF_WR_REG, fw->ldr_wr_idx);
+}
+
+static u8 __iomem *prestera_ldr_wr_ptr(struct prestera_fw *fw)
+{
+	return fw->ldr_ring_buf + fw->ldr_wr_idx;
+}
+
+static int prestera_ldr_send(struct prestera_fw *fw, const u8 *buf, size_t len)
+{
+	int err;
+	int i;
+
+	err = prestera_ldr_wait_buf(fw, len);
+	if (err) {
+		dev_err(fw->dev.dev, "failed wait for sending firmware\n");
+		return err;
+	}
+
+	for (i = 0; i < len; i += 4) {
+		writel_relaxed(*(u32 *)(buf + i), prestera_ldr_wr_ptr(fw));
+		prestera_ldr_wr_idx_move(fw, 4);
+	}
+
+	prestera_ldr_wr_idx_commit(fw);
+	return 0;
+}
+
+static int prestera_ldr_fw_send(struct prestera_fw *fw,
+				const char *img, u32 fw_size)
+{
+	u32 status;
+	u32 pos;
+	int err;
+
+	err = prestera_ldr_wait_reg32(fw, PRESTERA_LDR_STATUS_REG,
+				      PRESTERA_LDR_STATUS_IMG_DL,
+				      5 * MSEC_PER_SEC);
+	if (err) {
+		dev_err(fw->dev.dev, "Loader is not ready to load image\n");
+		return err;
+	}
+
+	for (pos = 0; pos < fw_size; pos += PRESTERA_FW_BLK_SZ) {
+		if (pos + PRESTERA_FW_BLK_SZ > fw_size)
+			break;
+
+		err = prestera_ldr_send(fw, img + pos, PRESTERA_FW_BLK_SZ);
+		if (err)
+			return err;
+	}
+
+	if (pos < fw_size) {
+		err = prestera_ldr_send(fw, img + pos, fw_size - pos);
+		if (err)
+			return err;
+	}
+
+	err = prestera_ldr_wait_dl_finish(fw);
+	if (err)
+		return err;
+
+	status = prestera_ldr_read(fw, PRESTERA_LDR_STATUS_REG);
+
+	switch (status) {
+	case PRESTERA_LDR_STATUS_INVALID_IMG:
+		dev_err(fw->dev.dev, "FW img has bad CRC\n");
+		return -EINVAL;
+	case PRESTERA_LDR_STATUS_NOMEM:
+		dev_err(fw->dev.dev, "Loader has no enough mem\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void prestera_fw_rev_parse(const struct prestera_fw_header *hdr,
+				  struct prestera_fw_rev *rev)
+{
+	u32 version = be32_to_cpu(hdr->version_value);
+
+	rev->maj = PRESTERA_FW_VER_MAJ(version);
+	rev->min = PRESTERA_FW_VER_MIN(version);
+	rev->sub = PRESTERA_FW_VER_PATCH(version);
+}
+
+static int prestera_fw_rev_check(struct prestera_fw *fw)
+{
+	struct prestera_fw_rev *rev = &fw->dev.fw_rev;
+	u16 maj_supp = PRESTERA_SUPP_FW_MAJ_VER;
+	u16 min_supp = PRESTERA_SUPP_FW_MIN_VER;
+
+	if (rev->maj == maj_supp && rev->min >= min_supp)
+		return 0;
+
+	dev_err(fw->dev.dev, "Driver supports FW version only '%u.%u.x'",
+		PRESTERA_SUPP_FW_MAJ_VER, PRESTERA_SUPP_FW_MIN_VER);
+
+	return -EINVAL;
+}
+
+static int prestera_fw_hdr_parse(struct prestera_fw *fw,
+				 const struct firmware *img)
+{
+	struct prestera_fw_header *hdr = (struct prestera_fw_header *)img->data;
+	struct prestera_fw_rev *rev = &fw->dev.fw_rev;
+	u32 magic;
+
+	magic = be32_to_cpu(hdr->magic_number);
+	if (magic != PRESTERA_FW_HDR_MAGIC) {
+		dev_err(fw->dev.dev, "FW img hdr magic is invalid");
+		return -EINVAL;
+	}
+
+	prestera_fw_rev_parse(hdr, rev);
+
+	dev_info(fw->dev.dev, "FW version '%u.%u.%u'\n",
+		 rev->maj, rev->min, rev->sub);
+
+	return prestera_fw_rev_check(fw);
+}
+
+static int prestera_fw_load(struct prestera_fw *fw)
+{
+	size_t hlen = sizeof(struct prestera_fw_header);
+	const struct firmware *f;
+	char fw_path[128];
+	int err;
+
+	err = prestera_ldr_wait_reg32(fw, PRESTERA_LDR_READY_REG,
+				      PRESTERA_LDR_READY_MAGIC,
+				      5 * MSEC_PER_SEC);
+	if (err) {
+		dev_err(fw->dev.dev, "waiting for FW loader is timed out");
+		return err;
+	}
+
+	fw->ldr_ring_buf = fw->ldr_regs +
+		prestera_ldr_read(fw, PRESTERA_LDR_BUF_OFFS_REG);
+
+	fw->ldr_buf_len =
+		prestera_ldr_read(fw, PRESTERA_LDR_BUF_SIZE_REG);
+
+	fw->ldr_wr_idx = 0;
+
+	snprintf(fw_path, sizeof(fw_path), PRESTERA_FW_PATH_FMT,
+		 PRESTERA_SUPP_FW_MAJ_VER, PRESTERA_SUPP_FW_MIN_VER);
+
+	err = request_firmware_direct(&f, fw_path, fw->dev.dev);
+	if (err) {
+		dev_err(fw->dev.dev, "failed to request firmware file\n");
+		return err;
+	}
+
+	err = prestera_fw_hdr_parse(fw, f);
+	if (err) {
+		dev_err(fw->dev.dev, "FW image header is invalid\n");
+		goto out_release;
+	}
+
+	prestera_ldr_write(fw, PRESTERA_LDR_IMG_SIZE_REG, f->size - hlen);
+	prestera_ldr_write(fw, PRESTERA_LDR_CTL_REG, PRESTERA_LDR_CTL_DL_START);
+
+	dev_info(fw->dev.dev, "Loading %s ...", fw_path);
+
+	err = prestera_ldr_fw_send(fw, f->data + hlen, f->size - hlen);
+
+out_release:
+	release_firmware(f);
+	return err;
+}
+
+static int prestera_pci_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	const char *driver_name = pdev->driver->name;
+	struct prestera_fw *fw;
+	int err;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, BIT(PRESTERA_PCI_BAR_FW) |
+				 BIT(PRESTERA_PCI_BAR_PP),
+				 pci_name(pdev));
+	if (err)
+		return err;
+
+	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30))) {
+		dev_err(&pdev->dev, "fail to set DMA mask\n");
+		goto err_dma_mask;
+	}
+
+	pci_set_master(pdev);
+
+	fw = devm_kzalloc(&pdev->dev, sizeof(*fw), GFP_KERNEL);
+	if (!fw) {
+		err = -ENOMEM;
+		goto err_pci_dev_alloc;
+	}
+
+	fw->dev.ctl_regs = pcim_iomap_table(pdev)[PRESTERA_PCI_BAR_FW];
+	fw->dev.pp_regs = pcim_iomap_table(pdev)[PRESTERA_PCI_BAR_PP];
+	fw->dev.dev = &pdev->dev;
+
+	pci_set_drvdata(pdev, fw);
+
+	err = prestera_fw_init(fw);
+	if (err)
+		goto err_prestera_fw_init;
+
+	dev_info(fw->dev.dev, "Prestera FW is ready\n");
+
+	fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI, 1);
+	if (!fw->wq)
+		goto err_wq_alloc;
+
+	INIT_WORK(&fw->evt_work, prestera_fw_evt_work_fn);
+
+	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	if (err < 0) {
+		dev_err(&pdev->dev, "MSI IRQ init failed\n");
+		goto err_irq_alloc;
+	}
+
+	err = request_irq(pci_irq_vector(pdev, 0), prestera_pci_irq_handler,
+			  0, driver_name, fw);
+	if (err) {
+		dev_err(&pdev->dev, "fail to request IRQ\n");
+		goto err_request_irq;
+	}
+
+	err = prestera_device_register(&fw->dev);
+	if (err)
+		goto err_prestera_dev_register;
+
+	return 0;
+
+err_prestera_dev_register:
+	free_irq(pci_irq_vector(pdev, 0), fw);
+err_request_irq:
+	pci_free_irq_vectors(pdev);
+err_irq_alloc:
+	destroy_workqueue(fw->wq);
+err_wq_alloc:
+	prestera_fw_uninit(fw);
+err_prestera_fw_init:
+err_pci_dev_alloc:
+err_dma_mask:
+	return err;
+}
+
+static void prestera_pci_remove(struct pci_dev *pdev)
+{
+	struct prestera_fw *fw = pci_get_drvdata(pdev);
+
+	prestera_device_unregister(&fw->dev);
+	free_irq(pci_irq_vector(pdev, 0), fw);
+	pci_free_irq_vectors(pdev);
+	destroy_workqueue(fw->wq);
+	prestera_fw_uninit(fw);
+}
+
+static const struct pci_device_id prestera_pci_devices[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC804) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, prestera_pci_devices);
+
+static struct pci_driver prestera_pci_driver = {
+	.name     = "Prestera DX",
+	.id_table = prestera_pci_devices,
+	.probe    = prestera_pci_probe,
+	.remove   = prestera_pci_remove,
+};
+module_pci_driver(prestera_pci_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Marvell Prestera switch PCI interface");
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
new file mode 100644
index 000000000000..2a13c318048c
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
@@ -0,0 +1,820 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/bitfield.h>
+#include <linux/dmapool.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "prestera_dsa.h"
+#include "prestera.h"
+#include "prestera_hw.h"
+#include "prestera_rxtx.h"
+
+#define PRESTERA_SDMA_WAIT_MUL		10
+
+struct prestera_sdma_desc {
+	__le32 word1;
+	__le32 word2;
+	__le32 buff;
+	__le32 next;
+} __packed __aligned(16);
+
+#define PRESTERA_SDMA_BUFF_SIZE_MAX	1544
+
+#define PRESTERA_SDMA_RX_DESC_PKT_LEN(desc) \
+	((le32_to_cpu((desc)->word2) >> 16) & GENMASK(13, 0))
+
+#define PRESTERA_SDMA_RX_DESC_OWNER(desc) \
+	((le32_to_cpu((desc)->word1) & BIT(31)) >> 31)
+
+#define PRESTERA_SDMA_RX_DESC_IS_RCVD(desc) \
+	(PRESTERA_SDMA_RX_DESC_OWNER(desc) == PRESTERA_SDMA_RX_DESC_CPU_OWN)
+
+#define PRESTERA_SDMA_RX_DESC_CPU_OWN	0
+#define PRESTERA_SDMA_RX_DESC_DMA_OWN	1
+
+#define PRESTERA_SDMA_RX_QUEUE_NUM	8
+
+#define PRESTERA_SDMA_RX_DESC_PER_Q	1000
+
+#define PRESTERA_SDMA_TX_DESC_PER_Q	1000
+#define PRESTERA_SDMA_TX_MAX_BURST	64
+
+#define PRESTERA_SDMA_TX_DESC_OWNER(desc) \
+	((le32_to_cpu((desc)->word1) & BIT(31)) >> 31)
+
+#define PRESTERA_SDMA_TX_DESC_CPU_OWN	0
+#define PRESTERA_SDMA_TX_DESC_DMA_OWN	1U
+
+#define PRESTERA_SDMA_TX_DESC_IS_SENT(desc) \
+	(PRESTERA_SDMA_TX_DESC_OWNER(desc) == PRESTERA_SDMA_TX_DESC_CPU_OWN)
+
+#define PRESTERA_SDMA_TX_DESC_LAST	BIT(20)
+#define PRESTERA_SDMA_TX_DESC_FIRST	BIT(21)
+#define PRESTERA_SDMA_TX_DESC_CALC_CRC	BIT(12)
+
+#define PRESTERA_SDMA_TX_DESC_SINGLE	\
+	(PRESTERA_SDMA_TX_DESC_FIRST | PRESTERA_SDMA_TX_DESC_LAST)
+
+#define PRESTERA_SDMA_TX_DESC_INIT	\
+	(PRESTERA_SDMA_TX_DESC_SINGLE | PRESTERA_SDMA_TX_DESC_CALC_CRC)
+
+#define PRESTERA_SDMA_RX_INTR_MASK_REG		0x2814
+#define PRESTERA_SDMA_RX_QUEUE_STATUS_REG	0x2680
+#define PRESTERA_SDMA_RX_QUEUE_DESC_REG(n)	(0x260C + (n) * 16)
+
+#define PRESTERA_SDMA_TX_QUEUE_DESC_REG		0x26C0
+#define PRESTERA_SDMA_TX_QUEUE_START_REG	0x2868
+
+struct prestera_sdma_buf {
+	struct prestera_sdma_desc *desc;
+	dma_addr_t desc_dma;
+	struct sk_buff *skb;
+	dma_addr_t buf_dma;
+	bool is_used;
+};
+
+struct prestera_rx_ring {
+	struct prestera_sdma_buf *bufs;
+	int next_rx;
+};
+
+struct prestera_tx_ring {
+	struct prestera_sdma_buf *bufs;
+	int next_tx;
+	int max_burst;
+	int burst;
+};
+
+struct prestera_sdma {
+	struct prestera_rx_ring rx_ring[PRESTERA_SDMA_RX_QUEUE_NUM];
+	struct prestera_tx_ring tx_ring;
+	struct prestera_switch *sw;
+	struct dma_pool *desc_pool;
+	struct work_struct tx_work;
+	struct napi_struct rx_napi;
+	struct net_device napi_dev;
+	u32 map_addr;
+	u64 dma_mask;
+	/* protect SDMA with concurrrent access from multiple CPUs */
+	spinlock_t tx_lock;
+};
+
+struct prestera_rxtx {
+	struct prestera_sdma sdma;
+};
+
+static int prestera_sdma_buf_init(struct prestera_sdma *sdma,
+				  struct prestera_sdma_buf *buf)
+{
+	struct prestera_sdma_desc *desc;
+	dma_addr_t dma;
+
+	desc = dma_pool_alloc(sdma->desc_pool, GFP_DMA | GFP_KERNEL, &dma);
+	if (!desc)
+		return -ENOMEM;
+
+	buf->buf_dma = DMA_MAPPING_ERROR;
+	buf->desc_dma = dma;
+	buf->desc = desc;
+	buf->skb = NULL;
+
+	return 0;
+}
+
+static u32 prestera_sdma_map(struct prestera_sdma *sdma, dma_addr_t pa)
+{
+	return sdma->map_addr + pa;
+}
+
+static void prestera_sdma_rx_desc_init(struct prestera_sdma *sdma,
+				       struct prestera_sdma_desc *desc,
+				       dma_addr_t buf)
+{
+	u32 word = le32_to_cpu(desc->word2);
+
+	u32p_replace_bits(&word, PRESTERA_SDMA_BUFF_SIZE_MAX, GENMASK(15, 0));
+	desc->word2 = cpu_to_le32(word);
+
+	desc->buff = cpu_to_le32(prestera_sdma_map(sdma, buf));
+
+	/* make sure buffer is set before reset the descriptor */
+	wmb();
+
+	desc->word1 = cpu_to_le32(0xA0000000);
+}
+
+static void prestera_sdma_rx_desc_set_next(struct prestera_sdma *sdma,
+					   struct prestera_sdma_desc *desc,
+					   dma_addr_t next)
+{
+	desc->next = cpu_to_le32(prestera_sdma_map(sdma, next));
+}
+
+static int prestera_sdma_rx_skb_alloc(struct prestera_sdma *sdma,
+				      struct prestera_sdma_buf *buf)
+{
+	struct device *dev = sdma->sw->dev->dev;
+	struct sk_buff *skb;
+	dma_addr_t dma;
+
+	skb = alloc_skb(PRESTERA_SDMA_BUFF_SIZE_MAX, GFP_DMA | GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	dma = dma_map_single(dev, skb->data, skb->len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dma))
+		goto err_dma_map;
+
+	if (buf->skb)
+		dma_unmap_single(dev, buf->buf_dma, buf->skb->len,
+				 DMA_FROM_DEVICE);
+
+	buf->buf_dma = dma;
+	buf->skb = skb;
+
+	return 0;
+
+err_dma_map:
+	kfree_skb(skb);
+
+	return -ENOMEM;
+}
+
+static struct sk_buff *prestera_sdma_rx_skb_get(struct prestera_sdma *sdma,
+						struct prestera_sdma_buf *buf)
+{
+	dma_addr_t buf_dma = buf->buf_dma;
+	struct sk_buff *skb = buf->skb;
+	u32 len = skb->len;
+	int err;
+
+	err = prestera_sdma_rx_skb_alloc(sdma, buf);
+	if (err) {
+		buf->buf_dma = buf_dma;
+		buf->skb = skb;
+
+		skb = alloc_skb(skb->len, GFP_ATOMIC);
+		if (skb) {
+			skb_put(skb, len);
+			skb_copy_from_linear_data(buf->skb, skb->data, len);
+		}
+	}
+
+	prestera_sdma_rx_desc_init(sdma, buf->desc, buf->buf_dma);
+
+	return skb;
+}
+
+static int prestera_rxtx_process_skb(struct prestera_sdma *sdma,
+				     struct sk_buff *skb)
+{
+	const struct prestera_port *port;
+	struct prestera_dsa dsa;
+	u32 hw_port, dev_id;
+	int err;
+
+	skb_pull(skb, ETH_HLEN);
+
+	/* ethertype field is part of the dsa header */
+	err = prestera_dsa_parse(&dsa, skb->data - ETH_TLEN);
+	if (err)
+		return err;
+
+	dev_id = dsa.hw_dev_num;
+	hw_port = dsa.port_num;
+
+	port = prestera_port_find_by_hwid(sdma->sw, dev_id, hw_port);
+	if (unlikely(!port)) {
+		dev_warn_ratelimited(prestera_dev(sdma->sw), "received pkt for non-existent port(%u, %u)\n",
+				     dev_id, hw_port);
+		return -ENOENT;
+	}
+
+	if (unlikely(!pskb_may_pull(skb, PRESTERA_DSA_HLEN)))
+		return -EINVAL;
+
+	/* remove DSA tag and update checksum */
+	skb_pull_rcsum(skb, PRESTERA_DSA_HLEN);
+
+	memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - PRESTERA_DSA_HLEN,
+		ETH_ALEN * 2);
+
+	skb_push(skb, ETH_HLEN);
+
+	skb->protocol = eth_type_trans(skb, port->dev);
+
+	if (dsa.vlan.is_tagged) {
+		u16 tci = dsa.vlan.vid & VLAN_VID_MASK;
+
+		tci |= dsa.vlan.vpt << VLAN_PRIO_SHIFT;
+		if (dsa.vlan.cfi_bit)
+			tci |= VLAN_CFI_MASK;
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tci);
+	}
+
+	return 0;
+}
+
+static int prestera_sdma_next_rx_buf_idx(int buf_idx)
+{
+	return (buf_idx + 1) % PRESTERA_SDMA_RX_DESC_PER_Q;
+}
+
+static int prestera_sdma_rx_poll(struct napi_struct *napi, int budget)
+{
+	int qnum = PRESTERA_SDMA_RX_QUEUE_NUM;
+	unsigned int rxq_done_map = 0;
+	struct prestera_sdma *sdma;
+	struct list_head rx_list;
+	unsigned int qmask;
+	int pkts_done = 0;
+	int q;
+
+	qnum = PRESTERA_SDMA_RX_QUEUE_NUM;
+	qmask = GENMASK(qnum - 1, 0);
+
+	INIT_LIST_HEAD(&rx_list);
+
+	sdma = container_of(napi, struct prestera_sdma, rx_napi);
+
+	while (pkts_done < budget && rxq_done_map != qmask) {
+		for (q = 0; q < qnum && pkts_done < budget; q++) {
+			struct prestera_rx_ring *ring = &sdma->rx_ring[q];
+			struct prestera_sdma_desc *desc;
+			struct prestera_sdma_buf *buf;
+			int buf_idx = ring->next_rx;
+			struct sk_buff *skb;
+
+			buf = &ring->bufs[buf_idx];
+			desc = buf->desc;
+
+			if (PRESTERA_SDMA_RX_DESC_IS_RCVD(desc)) {
+				rxq_done_map &= ~BIT(q);
+			} else {
+				rxq_done_map |= BIT(q);
+				continue;
+			}
+
+			pkts_done++;
+
+			__skb_trim(buf->skb, PRESTERA_SDMA_RX_DESC_PKT_LEN(desc));
+
+			skb = prestera_sdma_rx_skb_get(sdma, buf);
+			if (!skb)
+				goto rx_next_buf;
+
+			if (unlikely(prestera_rxtx_process_skb(sdma, skb)))
+				goto rx_next_buf;
+
+			list_add_tail(&skb->list, &rx_list);
+rx_next_buf:
+			ring->next_rx = prestera_sdma_next_rx_buf_idx(buf_idx);
+		}
+	}
+
+	if (pkts_done < budget && napi_complete_done(napi, pkts_done))
+		prestera_write(sdma->sw, PRESTERA_SDMA_RX_INTR_MASK_REG,
+			       GENMASK(9, 2));
+
+	netif_receive_skb_list(&rx_list);
+
+	return pkts_done;
+}
+
+static void prestera_sdma_rx_fini(struct prestera_sdma *sdma)
+{
+	int qnum = PRESTERA_SDMA_RX_QUEUE_NUM;
+	int q, b;
+
+	/* disable all rx queues */
+	prestera_write(sdma->sw, PRESTERA_SDMA_RX_QUEUE_STATUS_REG,
+		       GENMASK(15, 8));
+
+	for (q = 0; q < qnum; q++) {
+		struct prestera_rx_ring *ring = &sdma->rx_ring[q];
+
+		if (!ring->bufs)
+			break;
+
+		for (b = 0; b < PRESTERA_SDMA_RX_DESC_PER_Q; b++) {
+			struct prestera_sdma_buf *buf = &ring->bufs[b];
+
+			if (buf->desc_dma)
+				dma_pool_free(sdma->desc_pool, buf->desc,
+					      buf->desc_dma);
+
+			if (!buf->skb)
+				continue;
+
+			if (buf->buf_dma != DMA_MAPPING_ERROR)
+				dma_unmap_single(sdma->sw->dev->dev,
+						 buf->buf_dma, buf->skb->len,
+						 DMA_FROM_DEVICE);
+			kfree_skb(buf->skb);
+		}
+	}
+}
+
+static int prestera_sdma_rx_init(struct prestera_sdma *sdma)
+{
+	int bnum = PRESTERA_SDMA_RX_DESC_PER_Q;
+	int qnum = PRESTERA_SDMA_RX_QUEUE_NUM;
+	int err;
+	int q;
+
+	/* disable all rx queues */
+	prestera_write(sdma->sw, PRESTERA_SDMA_RX_QUEUE_STATUS_REG,
+		       GENMASK(15, 8));
+
+	for (q = 0; q < qnum; q++) {
+		struct prestera_sdma_buf *head, *tail, *next, *prev;
+		struct prestera_rx_ring *ring = &sdma->rx_ring[q];
+
+		ring->bufs = kmalloc_array(bnum, sizeof(*head), GFP_KERNEL);
+		if (!ring->bufs)
+			return -ENOMEM;
+
+		ring->next_rx = 0;
+
+		tail = &ring->bufs[bnum - 1];
+		head = &ring->bufs[0];
+		next = head;
+		prev = next;
+
+		do {
+			err = prestera_sdma_buf_init(sdma, next);
+			if (err)
+				return err;
+
+			err = prestera_sdma_rx_skb_alloc(sdma, next);
+			if (err)
+				return err;
+
+			prestera_sdma_rx_desc_init(sdma, next->desc,
+						   next->buf_dma);
+
+			prestera_sdma_rx_desc_set_next(sdma, prev->desc,
+						       next->desc_dma);
+
+			prev = next;
+			next++;
+		} while (prev != tail);
+
+		/* join tail with head to make a circular list */
+		prestera_sdma_rx_desc_set_next(sdma, tail->desc, head->desc_dma);
+
+		prestera_write(sdma->sw, PRESTERA_SDMA_RX_QUEUE_DESC_REG(q),
+			       prestera_sdma_map(sdma, head->desc_dma));
+	}
+
+	/* make sure all rx descs are filled before enabling all rx queues */
+	wmb();
+
+	prestera_write(sdma->sw, PRESTERA_SDMA_RX_QUEUE_STATUS_REG,
+		       GENMASK(7, 0));
+
+	return 0;
+}
+
+static void prestera_sdma_tx_desc_init(struct prestera_sdma *sdma,
+				       struct prestera_sdma_desc *desc)
+{
+	desc->word1 = cpu_to_le32(PRESTERA_SDMA_TX_DESC_INIT);
+	desc->word2 = 0;
+}
+
+static void prestera_sdma_tx_desc_set_next(struct prestera_sdma *sdma,
+					   struct prestera_sdma_desc *desc,
+					   dma_addr_t next)
+{
+	desc->next = cpu_to_le32(prestera_sdma_map(sdma, next));
+}
+
+static void prestera_sdma_tx_desc_set_buf(struct prestera_sdma *sdma,
+					  struct prestera_sdma_desc *desc,
+					  dma_addr_t buf, size_t len)
+{
+	u32 word = le32_to_cpu(desc->word2);
+
+	u32p_replace_bits(&word, len + ETH_FCS_LEN, GENMASK(30, 16));
+
+	desc->buff = cpu_to_le32(prestera_sdma_map(sdma, buf));
+	desc->word2 = cpu_to_le32(word);
+}
+
+static void prestera_sdma_tx_desc_xmit(struct prestera_sdma_desc *desc)
+{
+	u32 word = le32_to_cpu(desc->word1);
+
+	word |= PRESTERA_SDMA_TX_DESC_DMA_OWN << 31;
+
+	/* make sure everything is written before enable xmit */
+	wmb();
+
+	desc->word1 = cpu_to_le32(word);
+}
+
+static int prestera_sdma_tx_buf_map(struct prestera_sdma *sdma,
+				    struct prestera_sdma_buf *buf,
+				    struct sk_buff *skb)
+{
+	struct device *dma_dev = sdma->sw->dev->dev;
+	dma_addr_t dma;
+
+	dma = dma_map_single(dma_dev, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_dev, dma))
+		return -ENOMEM;
+
+	buf->buf_dma = dma;
+	buf->skb = skb;
+
+	return 0;
+}
+
+static void prestera_sdma_tx_buf_unmap(struct prestera_sdma *sdma,
+				       struct prestera_sdma_buf *buf)
+{
+	struct device *dma_dev = sdma->sw->dev->dev;
+
+	dma_unmap_single(dma_dev, buf->buf_dma, buf->skb->len, DMA_TO_DEVICE);
+}
+
+static void prestera_sdma_tx_recycle_work_fn(struct work_struct *work)
+{
+	int bnum = PRESTERA_SDMA_TX_DESC_PER_Q;
+	struct prestera_tx_ring *tx_ring;
+	struct prestera_sdma *sdma;
+	int b;
+
+	sdma = container_of(work, struct prestera_sdma, tx_work);
+
+	tx_ring = &sdma->tx_ring;
+
+	for (b = 0; b < bnum; b++) {
+		struct prestera_sdma_buf *buf = &tx_ring->bufs[b];
+
+		if (!buf->is_used)
+			continue;
+
+		if (!PRESTERA_SDMA_TX_DESC_IS_SENT(buf->desc))
+			continue;
+
+		prestera_sdma_tx_buf_unmap(sdma, buf);
+		dev_consume_skb_any(buf->skb);
+		buf->skb = NULL;
+
+		/* make sure everything is cleaned up */
+		wmb();
+
+		buf->is_used = false;
+	}
+}
+
+static int prestera_sdma_tx_init(struct prestera_sdma *sdma)
+{
+	struct prestera_sdma_buf *head, *tail, *next, *prev;
+	struct prestera_tx_ring *tx_ring = &sdma->tx_ring;
+	int bnum = PRESTERA_SDMA_TX_DESC_PER_Q;
+	int err;
+
+	INIT_WORK(&sdma->tx_work, prestera_sdma_tx_recycle_work_fn);
+	spin_lock_init(&sdma->tx_lock);
+
+	tx_ring->bufs = kmalloc_array(bnum, sizeof(*head), GFP_KERNEL);
+	if (!tx_ring->bufs)
+		return -ENOMEM;
+
+	tail = &tx_ring->bufs[bnum - 1];
+	head = &tx_ring->bufs[0];
+	next = head;
+	prev = next;
+
+	tx_ring->max_burst = PRESTERA_SDMA_TX_MAX_BURST;
+	tx_ring->burst = tx_ring->max_burst;
+	tx_ring->next_tx = 0;
+
+	do {
+		err = prestera_sdma_buf_init(sdma, next);
+		if (err)
+			return err;
+
+		next->is_used = false;
+
+		prestera_sdma_tx_desc_init(sdma, next->desc);
+
+		prestera_sdma_tx_desc_set_next(sdma, prev->desc,
+					       next->desc_dma);
+
+		prev = next;
+		next++;
+	} while (prev != tail);
+
+	/* join tail with head to make a circular list */
+	prestera_sdma_tx_desc_set_next(sdma, tail->desc, head->desc_dma);
+
+	/* make sure descriptors are written */
+	wmb();
+
+	prestera_write(sdma->sw, PRESTERA_SDMA_TX_QUEUE_DESC_REG,
+		       prestera_sdma_map(sdma, head->desc_dma));
+
+	return 0;
+}
+
+static void prestera_sdma_tx_fini(struct prestera_sdma *sdma)
+{
+	struct prestera_tx_ring *ring = &sdma->tx_ring;
+	int bnum = PRESTERA_SDMA_TX_DESC_PER_Q;
+	int b;
+
+	cancel_work_sync(&sdma->tx_work);
+
+	if (!ring->bufs)
+		return;
+
+	for (b = 0; b < bnum; b++) {
+		struct prestera_sdma_buf *buf = &ring->bufs[b];
+
+		if (buf->desc)
+			dma_pool_free(sdma->desc_pool, buf->desc,
+				      buf->desc_dma);
+
+		if (!buf->skb)
+			continue;
+
+		dma_unmap_single(sdma->sw->dev->dev, buf->buf_dma,
+				 buf->skb->len, DMA_TO_DEVICE);
+
+		dev_consume_skb_any(buf->skb);
+	}
+}
+
+static void prestera_rxtx_handle_event(struct prestera_switch *sw,
+				       struct prestera_event *evt,
+				       void *arg)
+{
+	struct prestera_sdma *sdma = arg;
+
+	if (evt->id != PRESTERA_RXTX_EVENT_RCV_PKT)
+		return;
+
+	prestera_write(sdma->sw, PRESTERA_SDMA_RX_INTR_MASK_REG, 0);
+	napi_schedule(&sdma->rx_napi);
+}
+
+static int prestera_sdma_switch_init(struct prestera_switch *sw)
+{
+	struct prestera_sdma *sdma = &sw->rxtx->sdma;
+	struct device *dev = sw->dev->dev;
+	struct prestera_rxtx_params p;
+	int err;
+
+	p.use_sdma = true;
+
+	err = prestera_hw_rxtx_init(sw, &p);
+	if (err) {
+		dev_err(dev, "failed to init rxtx by hw\n");
+		return err;
+	}
+
+	sdma->dma_mask = dma_get_mask(dev);
+	sdma->map_addr = p.map_addr;
+	sdma->sw = sw;
+
+	sdma->desc_pool = dma_pool_create("desc_pool", dev,
+					  sizeof(struct prestera_sdma_desc),
+					  16, 0);
+	if (!sdma->desc_pool)
+		return -ENOMEM;
+
+	err = prestera_sdma_rx_init(sdma);
+	if (err) {
+		dev_err(dev, "failed to init rx ring\n");
+		goto err_rx_init;
+	}
+
+	err = prestera_sdma_tx_init(sdma);
+	if (err) {
+		dev_err(dev, "failed to init tx ring\n");
+		goto err_tx_init;
+	}
+
+	err = prestera_hw_event_handler_register(sw, PRESTERA_EVENT_TYPE_RXTX,
+						 prestera_rxtx_handle_event,
+						 sdma);
+	if (err)
+		goto err_evt_register;
+
+	init_dummy_netdev(&sdma->napi_dev);
+
+	netif_napi_add(&sdma->napi_dev, &sdma->rx_napi, prestera_sdma_rx_poll, 64);
+	napi_enable(&sdma->rx_napi);
+
+	return 0;
+
+err_evt_register:
+err_tx_init:
+	prestera_sdma_tx_fini(sdma);
+err_rx_init:
+	prestera_sdma_rx_fini(sdma);
+
+	dma_pool_destroy(sdma->desc_pool);
+	return err;
+}
+
+static void prestera_sdma_switch_fini(struct prestera_switch *sw)
+{
+	struct prestera_sdma *sdma = &sw->rxtx->sdma;
+
+	napi_disable(&sdma->rx_napi);
+	netif_napi_del(&sdma->rx_napi);
+	prestera_hw_event_handler_unregister(sw, PRESTERA_EVENT_TYPE_RXTX,
+					     prestera_rxtx_handle_event);
+	prestera_sdma_tx_fini(sdma);
+	prestera_sdma_rx_fini(sdma);
+	dma_pool_destroy(sdma->desc_pool);
+}
+
+static bool prestera_sdma_is_ready(struct prestera_sdma *sdma)
+{
+	return !(prestera_read(sdma->sw, PRESTERA_SDMA_TX_QUEUE_START_REG) & 1);
+}
+
+static int prestera_sdma_tx_wait(struct prestera_sdma *sdma,
+				 struct prestera_tx_ring *tx_ring)
+{
+	int tx_wait_num = PRESTERA_SDMA_WAIT_MUL * tx_ring->max_burst;
+
+	do {
+		if (prestera_sdma_is_ready(sdma))
+			return 0;
+
+		udelay(1);
+	} while (--tx_wait_num);
+
+	return -EBUSY;
+}
+
+static void prestera_sdma_tx_start(struct prestera_sdma *sdma)
+{
+	prestera_write(sdma->sw, PRESTERA_SDMA_TX_QUEUE_START_REG, 1);
+	schedule_work(&sdma->tx_work);
+}
+
+static netdev_tx_t prestera_sdma_xmit(struct prestera_sdma *sdma,
+				      struct sk_buff *skb)
+{
+	struct device *dma_dev = sdma->sw->dev->dev;
+	struct net_device *dev = skb->dev;
+	struct prestera_tx_ring *tx_ring;
+	struct prestera_sdma_buf *buf;
+	int err;
+
+	spin_lock(&sdma->tx_lock);
+
+	tx_ring = &sdma->tx_ring;
+
+	buf = &tx_ring->bufs[tx_ring->next_tx];
+	if (buf->is_used) {
+		schedule_work(&sdma->tx_work);
+		goto drop_skb;
+	}
+
+	if (unlikely(eth_skb_pad(skb)))
+		goto drop_skb_nofree;
+
+	err = prestera_sdma_tx_buf_map(sdma, buf, skb);
+	if (err)
+		goto drop_skb;
+
+	prestera_sdma_tx_desc_set_buf(sdma, buf->desc, buf->buf_dma, skb->len);
+
+	dma_sync_single_for_device(dma_dev, buf->buf_dma, skb->len,
+				   DMA_TO_DEVICE);
+
+	if (tx_ring->burst) {
+		tx_ring->burst--;
+	} else {
+		tx_ring->burst = tx_ring->max_burst;
+
+		err = prestera_sdma_tx_wait(sdma, tx_ring);
+		if (err)
+			goto drop_skb_unmap;
+	}
+
+	tx_ring->next_tx = (tx_ring->next_tx + 1) % PRESTERA_SDMA_TX_DESC_PER_Q;
+	prestera_sdma_tx_desc_xmit(buf->desc);
+	buf->is_used = true;
+
+	prestera_sdma_tx_start(sdma);
+
+	goto tx_done;
+
+drop_skb_unmap:
+	prestera_sdma_tx_buf_unmap(sdma, buf);
+drop_skb:
+	dev_consume_skb_any(skb);
+drop_skb_nofree:
+	dev->stats.tx_dropped++;
+tx_done:
+	spin_unlock(&sdma->tx_lock);
+	return NETDEV_TX_OK;
+}
+
+int prestera_rxtx_switch_init(struct prestera_switch *sw)
+{
+	struct prestera_rxtx *rxtx;
+
+	rxtx = kzalloc(sizeof(*rxtx), GFP_KERNEL);
+	if (!rxtx)
+		return -ENOMEM;
+
+	sw->rxtx = rxtx;
+
+	return prestera_sdma_switch_init(sw);
+}
+
+void prestera_rxtx_switch_fini(struct prestera_switch *sw)
+{
+	prestera_sdma_switch_fini(sw);
+	kfree(sw->rxtx);
+}
+
+int prestera_rxtx_port_init(struct prestera_port *port)
+{
+	int err;
+
+	err = prestera_hw_rxtx_port_init(port);
+	if (err)
+		return err;
+
+	port->dev->needed_headroom = PRESTERA_DSA_HLEN;
+
+	return 0;
+}
+
+netdev_tx_t prestera_rxtx_xmit(struct prestera_port *port, struct sk_buff *skb)
+{
+	struct prestera_dsa dsa;
+
+	dsa.hw_dev_num = port->dev_id;
+	dsa.port_num = port->hw_id;
+
+	if (skb_cow_head(skb, PRESTERA_DSA_HLEN) < 0)
+		return NET_XMIT_DROP;
+
+	skb_push(skb, PRESTERA_DSA_HLEN);
+	memmove(skb->data, skb->data + PRESTERA_DSA_HLEN, 2 * ETH_ALEN);
+
+	if (prestera_dsa_build(&dsa, skb->data + 2 * ETH_ALEN) != 0)
+		return NET_XMIT_DROP;
+
+	return prestera_sdma_xmit(&port->sw->rxtx->sdma, skb);
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.h b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.h
new file mode 100644
index 000000000000..882a1225c323
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_RXTX_H_
+#define _PRESTERA_RXTX_H_
+
+#include <linux/netdevice.h>
+
+struct prestera_switch;
+struct prestera_port;
+
+int prestera_rxtx_switch_init(struct prestera_switch *sw);
+void prestera_rxtx_switch_fini(struct prestera_switch *sw);
+
+int prestera_rxtx_port_init(struct prestera_port *port);
+
+netdev_tx_t prestera_rxtx_xmit(struct prestera_port *port, struct sk_buff *skb);
+
+#endif /* _PRESTERA_RXTX_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
new file mode 100644
index 000000000000..7d83e1f91ef1
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -0,0 +1,1277 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+
+#include "prestera.h"
+#include "prestera_hw.h"
+#include "prestera_switchdev.h"
+
+#define PRESTERA_VID_ALL (0xffff)
+
+#define PRESTERA_DEFAULT_AGEING_TIME_MS 300000
+#define PRESTERA_MAX_AGEING_TIME_MS 1000000000
+#define PRESTERA_MIN_AGEING_TIME_MS 32000
+
+struct prestera_fdb_event_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+struct prestera_switchdev {
+	struct prestera_switch *sw;
+	struct list_head bridge_list;
+	bool bridge_8021q_exists;
+	struct notifier_block swdev_nb_blk;
+	struct notifier_block swdev_nb;
+};
+
+struct prestera_bridge {
+	struct list_head head;
+	struct net_device *dev;
+	struct prestera_switchdev *swdev;
+	struct list_head port_list;
+	bool vlan_enabled;
+	u16 bridge_id;
+};
+
+struct prestera_bridge_port {
+	struct list_head head;
+	struct net_device *dev;
+	struct prestera_bridge *bridge;
+	struct list_head vlan_list;
+	refcount_t ref_count;
+	unsigned long flags;
+	u8 stp_state;
+};
+
+struct prestera_bridge_vlan {
+	struct list_head head;
+	struct list_head port_vlan_list;
+	u16 vid;
+};
+
+struct prestera_port_vlan {
+	struct list_head br_vlan_head;
+	struct list_head port_head;
+	struct prestera_port *port;
+	struct prestera_bridge_port *br_port;
+	u16 vid;
+};
+
+static struct workqueue_struct *swdev_wq;
+
+static void prestera_bridge_port_put(struct prestera_bridge_port *br_port);
+
+static int prestera_port_vid_stp_set(struct prestera_port *port, u16 vid,
+				     u8 state);
+
+static struct prestera_bridge_vlan *
+prestera_bridge_vlan_create(struct prestera_bridge_port *br_port, u16 vid)
+{
+	struct prestera_bridge_vlan *br_vlan;
+
+	br_vlan = kzalloc(sizeof(*br_vlan), GFP_KERNEL);
+	if (!br_vlan)
+		return NULL;
+
+	INIT_LIST_HEAD(&br_vlan->port_vlan_list);
+	br_vlan->vid = vid;
+	list_add(&br_vlan->head, &br_port->vlan_list);
+
+	return br_vlan;
+}
+
+static void prestera_bridge_vlan_destroy(struct prestera_bridge_vlan *br_vlan)
+{
+	list_del(&br_vlan->head);
+	WARN_ON(!list_empty(&br_vlan->port_vlan_list));
+	kfree(br_vlan);
+}
+
+static struct prestera_bridge_vlan *
+prestera_bridge_vlan_by_vid(struct prestera_bridge_port *br_port, u16 vid)
+{
+	struct prestera_bridge_vlan *br_vlan;
+
+	list_for_each_entry(br_vlan, &br_port->vlan_list, head) {
+		if (br_vlan->vid == vid)
+			return br_vlan;
+	}
+
+	return NULL;
+}
+
+static int prestera_bridge_vlan_port_count(struct prestera_bridge *bridge,
+					   u16 vid)
+{
+	struct prestera_bridge_port *br_port;
+	struct prestera_bridge_vlan *br_vlan;
+	int count = 0;
+
+	list_for_each_entry(br_port, &bridge->port_list, head) {
+		list_for_each_entry(br_vlan, &br_port->vlan_list, head) {
+			if (br_vlan->vid == vid) {
+				count += 1;
+				break;
+			}
+		}
+	}
+
+	return count;
+}
+
+static void prestera_bridge_vlan_put(struct prestera_bridge_vlan *br_vlan)
+{
+	if (list_empty(&br_vlan->port_vlan_list))
+		prestera_bridge_vlan_destroy(br_vlan);
+}
+
+static struct prestera_port_vlan *
+prestera_port_vlan_by_vid(struct prestera_port *port, u16 vid)
+{
+	struct prestera_port_vlan *port_vlan;
+
+	list_for_each_entry(port_vlan, &port->vlans_list, port_head) {
+		if (port_vlan->vid == vid)
+			return port_vlan;
+	}
+
+	return NULL;
+}
+
+static struct prestera_port_vlan *
+prestera_port_vlan_create(struct prestera_port *port, u16 vid, bool untagged)
+{
+	struct prestera_port_vlan *port_vlan;
+	int err;
+
+	port_vlan = prestera_port_vlan_by_vid(port, vid);
+	if (port_vlan)
+		return ERR_PTR(-EEXIST);
+
+	err = prestera_hw_vlan_port_set(port, vid, true, untagged);
+	if (err)
+		return ERR_PTR(err);
+
+	port_vlan = kzalloc(sizeof(*port_vlan), GFP_KERNEL);
+	if (!port_vlan) {
+		err = -ENOMEM;
+		goto err_port_vlan_alloc;
+	}
+
+	port_vlan->port = port;
+	port_vlan->vid = vid;
+
+	list_add(&port_vlan->port_head, &port->vlans_list);
+
+	return port_vlan;
+
+err_port_vlan_alloc:
+	prestera_hw_vlan_port_set(port, vid, false, false);
+	return ERR_PTR(err);
+}
+
+static void
+prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
+{
+	u32 fdb_flush_mode = PRESTERA_FDB_FLUSH_MODE_DYNAMIC;
+	struct prestera_port *port = port_vlan->port;
+	struct prestera_bridge_vlan *br_vlan;
+	struct prestera_bridge_port *br_port;
+	bool last_port, last_vlan;
+	u16 vid = port_vlan->vid;
+	int port_count;
+
+	br_port = port_vlan->br_port;
+	port_count = prestera_bridge_vlan_port_count(br_port->bridge, vid);
+	br_vlan = prestera_bridge_vlan_by_vid(br_port, vid);
+
+	last_vlan = list_is_singular(&br_port->vlan_list);
+	last_port = port_count == 1;
+
+	if (last_vlan)
+		prestera_hw_fdb_flush_port(port, fdb_flush_mode);
+	else if (last_port)
+		prestera_hw_fdb_flush_vlan(port->sw, vid, fdb_flush_mode);
+	else
+		prestera_hw_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
+
+	list_del(&port_vlan->br_vlan_head);
+	prestera_bridge_vlan_put(br_vlan);
+	prestera_bridge_port_put(br_port);
+	port_vlan->br_port = NULL;
+}
+
+static void prestera_port_vlan_destroy(struct prestera_port_vlan *port_vlan)
+{
+	struct prestera_port *port = port_vlan->port;
+	u16 vid = port_vlan->vid;
+
+	if (port_vlan->br_port)
+		prestera_port_vlan_bridge_leave(port_vlan);
+
+	prestera_hw_vlan_port_set(port, vid, false, false);
+	list_del(&port_vlan->port_head);
+	kfree(port_vlan);
+}
+
+static struct prestera_bridge *
+prestera_bridge_create(struct prestera_switchdev *swdev, struct net_device *dev)
+{
+	bool vlan_enabled = br_vlan_enabled(dev);
+	struct prestera_bridge *bridge;
+	u16 bridge_id;
+	int err;
+
+	if (vlan_enabled && swdev->bridge_8021q_exists) {
+		netdev_err(dev, "Only one VLAN-aware bridge is supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
+	if (!bridge)
+		return ERR_PTR(-ENOMEM);
+
+	if (vlan_enabled) {
+		swdev->bridge_8021q_exists = true;
+	} else {
+		err = prestera_hw_bridge_create(swdev->sw, &bridge_id);
+		if (err) {
+			kfree(bridge);
+			return ERR_PTR(err);
+		}
+
+		bridge->bridge_id = bridge_id;
+	}
+
+	bridge->vlan_enabled = vlan_enabled;
+	bridge->swdev = swdev;
+	bridge->dev = dev;
+
+	INIT_LIST_HEAD(&bridge->port_list);
+
+	list_add(&bridge->head, &swdev->bridge_list);
+
+	return bridge;
+}
+
+static void prestera_bridge_destroy(struct prestera_bridge *bridge)
+{
+	struct prestera_switchdev *swdev = bridge->swdev;
+
+	list_del(&bridge->head);
+
+	if (bridge->vlan_enabled)
+		swdev->bridge_8021q_exists = false;
+	else
+		prestera_hw_bridge_delete(swdev->sw, bridge->bridge_id);
+
+	WARN_ON(!list_empty(&bridge->port_list));
+	kfree(bridge);
+}
+
+static void prestera_bridge_put(struct prestera_bridge *bridge)
+{
+	if (list_empty(&bridge->port_list))
+		prestera_bridge_destroy(bridge);
+}
+
+static
+struct prestera_bridge *prestera_bridge_by_dev(struct prestera_switchdev *swdev,
+					       const struct net_device *dev)
+{
+	struct prestera_bridge *bridge;
+
+	list_for_each_entry(bridge, &swdev->bridge_list, head)
+		if (bridge->dev == dev)
+			return bridge;
+
+	return NULL;
+}
+
+static struct prestera_bridge_port *
+__prestera_bridge_port_by_dev(struct prestera_bridge *bridge,
+			      struct net_device *dev)
+{
+	struct prestera_bridge_port *br_port;
+
+	list_for_each_entry(br_port, &bridge->port_list, head) {
+		if (br_port->dev == dev)
+			return br_port;
+	}
+
+	return NULL;
+}
+
+static struct prestera_bridge_port *
+prestera_bridge_port_by_dev(struct prestera_switchdev *swdev,
+			    struct net_device *dev)
+{
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+	struct prestera_bridge *bridge;
+
+	if (!br_dev)
+		return NULL;
+
+	bridge = prestera_bridge_by_dev(swdev, br_dev);
+	if (!bridge)
+		return NULL;
+
+	return __prestera_bridge_port_by_dev(bridge, dev);
+}
+
+static struct prestera_bridge_port *
+prestera_bridge_port_create(struct prestera_bridge *bridge,
+			    struct net_device *dev)
+{
+	struct prestera_bridge_port *br_port;
+
+	br_port = kzalloc(sizeof(*br_port), GFP_KERNEL);
+	if (!br_port)
+		return NULL;
+
+	br_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC |
+				BR_MCAST_FLOOD;
+	br_port->stp_state = BR_STATE_DISABLED;
+	refcount_set(&br_port->ref_count, 1);
+	br_port->bridge = bridge;
+	br_port->dev = dev;
+
+	INIT_LIST_HEAD(&br_port->vlan_list);
+	list_add(&br_port->head, &bridge->port_list);
+
+	return br_port;
+}
+
+static void
+prestera_bridge_port_destroy(struct prestera_bridge_port *br_port)
+{
+	list_del(&br_port->head);
+	WARN_ON(!list_empty(&br_port->vlan_list));
+	kfree(br_port);
+}
+
+static void prestera_bridge_port_get(struct prestera_bridge_port *br_port)
+{
+	refcount_inc(&br_port->ref_count);
+}
+
+static void prestera_bridge_port_put(struct prestera_bridge_port *br_port)
+{
+	struct prestera_bridge *bridge = br_port->bridge;
+
+	if (refcount_dec_and_test(&br_port->ref_count)) {
+		prestera_bridge_port_destroy(br_port);
+		prestera_bridge_put(bridge);
+	}
+}
+
+static struct prestera_bridge_port *
+prestera_bridge_port_add(struct prestera_bridge *bridge, struct net_device *dev)
+{
+	struct prestera_bridge_port *br_port;
+
+	br_port = __prestera_bridge_port_by_dev(bridge, dev);
+	if (br_port) {
+		prestera_bridge_port_get(br_port);
+		return br_port;
+	}
+
+	br_port = prestera_bridge_port_create(bridge, dev);
+	if (!br_port)
+		return ERR_PTR(-ENOMEM);
+
+	return br_port;
+}
+
+static int
+prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
+{
+	struct prestera_port *port = netdev_priv(br_port->dev);
+	struct prestera_bridge *bridge = br_port->bridge;
+	int err;
+
+	err = prestera_hw_bridge_port_add(port, bridge->bridge_id);
+	if (err)
+		return err;
+
+	err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+	if (err)
+		goto err_port_flood_set;
+
+	err = prestera_hw_port_learning_set(port, br_port->flags & BR_LEARNING);
+	if (err)
+		goto err_port_learning_set;
+
+	return 0;
+
+err_port_learning_set:
+	prestera_hw_port_flood_set(port, false);
+err_port_flood_set:
+	prestera_hw_bridge_port_delete(port, bridge->bridge_id);
+
+	return err;
+}
+
+static int prestera_port_bridge_join(struct prestera_port *port,
+				     struct net_device *upper)
+{
+	struct prestera_switchdev *swdev = port->sw->swdev;
+	struct prestera_bridge_port *br_port;
+	struct prestera_bridge *bridge;
+	int err;
+
+	bridge = prestera_bridge_by_dev(swdev, upper);
+	if (!bridge) {
+		bridge = prestera_bridge_create(swdev, upper);
+		if (IS_ERR(bridge))
+			return PTR_ERR(bridge);
+	}
+
+	br_port = prestera_bridge_port_add(bridge, port->dev);
+	if (IS_ERR(br_port)) {
+		err = PTR_ERR(br_port);
+		goto err_brport_create;
+	}
+
+	if (bridge->vlan_enabled)
+		return 0;
+
+	err = prestera_bridge_1d_port_join(br_port);
+	if (err)
+		goto err_port_join;
+
+	return 0;
+
+err_port_join:
+	prestera_bridge_port_put(br_port);
+err_brport_create:
+	prestera_bridge_put(bridge);
+	return err;
+}
+
+static void prestera_bridge_1q_port_leave(struct prestera_bridge_port *br_port)
+{
+	struct prestera_port *port = netdev_priv(br_port->dev);
+
+	prestera_hw_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
+	prestera_port_pvid_set(port, PRESTERA_DEFAULT_VID);
+}
+
+static void prestera_bridge_1d_port_leave(struct prestera_bridge_port *br_port)
+{
+	struct prestera_port *port = netdev_priv(br_port->dev);
+
+	prestera_hw_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
+	prestera_hw_bridge_port_delete(port, br_port->bridge->bridge_id);
+}
+
+static int prestera_port_vid_stp_set(struct prestera_port *port, u16 vid,
+				     u8 state)
+{
+	u8 hw_state = state;
+
+	switch (state) {
+	case BR_STATE_DISABLED:
+		hw_state = PRESTERA_STP_DISABLED;
+		break;
+
+	case BR_STATE_BLOCKING:
+	case BR_STATE_LISTENING:
+		hw_state = PRESTERA_STP_BLOCK_LISTEN;
+		break;
+
+	case BR_STATE_LEARNING:
+		hw_state = PRESTERA_STP_LEARN;
+		break;
+
+	case BR_STATE_FORWARDING:
+		hw_state = PRESTERA_STP_FORWARD;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return prestera_hw_vlan_port_stp_set(port, vid, hw_state);
+}
+
+static void prestera_port_bridge_leave(struct prestera_port *port,
+				       struct net_device *upper)
+{
+	struct prestera_switchdev *swdev = port->sw->swdev;
+	struct prestera_bridge_port *br_port;
+	struct prestera_bridge *bridge;
+
+	bridge = prestera_bridge_by_dev(swdev, upper);
+	if (!bridge)
+		return;
+
+	br_port = __prestera_bridge_port_by_dev(bridge, port->dev);
+	if (!br_port)
+		return;
+
+	bridge = br_port->bridge;
+
+	if (bridge->vlan_enabled)
+		prestera_bridge_1q_port_leave(br_port);
+	else
+		prestera_bridge_1d_port_leave(br_port);
+
+	prestera_hw_port_learning_set(port, false);
+	prestera_hw_port_flood_set(port, false);
+	prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING);
+	prestera_bridge_port_put(br_port);
+}
+
+int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
+			       void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct prestera_port *port;
+	struct net_device *upper;
+	int err;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+	port = netdev_priv(dev);
+	upper = info->upper_dev;
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		if (!netif_is_bridge_master(upper)) {
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+			return -EINVAL;
+		}
+
+		if (!info->linking)
+			break;
+
+		if (netdev_has_any_upper_dev(upper)) {
+			NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
+			return -EINVAL;
+		}
+		break;
+
+	case NETDEV_CHANGEUPPER:
+		if (!netif_is_bridge_master(upper))
+			break;
+
+		if (info->linking) {
+			err = prestera_port_bridge_join(port, upper);
+			if (err)
+				return err;
+		} else {
+			prestera_port_bridge_leave(port, upper);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int prestera_port_attr_br_flags_set(struct prestera_port *port,
+					   struct switchdev_trans *trans,
+					   struct net_device *dev,
+					   unsigned long flags)
+{
+	struct prestera_bridge_port *br_port;
+	int err;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	br_port = prestera_bridge_port_by_dev(port->sw->swdev, dev);
+	if (!br_port)
+		return 0;
+
+	err = prestera_hw_port_flood_set(port, flags & BR_FLOOD);
+	if (err)
+		return err;
+
+	err = prestera_hw_port_learning_set(port, flags & BR_LEARNING);
+	if (err)
+		return err;
+
+	memcpy(&br_port->flags, &flags, sizeof(flags));
+
+	return 0;
+}
+
+static int prestera_port_attr_br_ageing_set(struct prestera_port *port,
+					    struct switchdev_trans *trans,
+					    unsigned long ageing_clock_t)
+{
+	unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
+	u32 ageing_time_ms = jiffies_to_msecs(ageing_jiffies);
+	struct prestera_switch *sw = port->sw;
+
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (ageing_time_ms < PRESTERA_MIN_AGEING_TIME_MS ||
+		    ageing_time_ms > PRESTERA_MAX_AGEING_TIME_MS)
+			return -ERANGE;
+		else
+			return 0;
+	}
+
+	return prestera_hw_switch_ageing_set(sw, ageing_time_ms);
+}
+
+static int prestera_port_attr_br_vlan_set(struct prestera_port *port,
+					  struct switchdev_trans *trans,
+					  struct net_device *dev,
+					  bool vlan_enabled)
+{
+	struct prestera_switch *sw = port->sw;
+	struct prestera_bridge *bridge;
+
+	if (!switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	bridge = prestera_bridge_by_dev(sw->swdev, dev);
+	if (WARN_ON(!bridge))
+		return -EINVAL;
+
+	if (bridge->vlan_enabled == vlan_enabled)
+		return 0;
+
+	netdev_err(bridge->dev, "VLAN filtering can't be changed for existing bridge\n");
+
+	return -EINVAL;
+}
+
+static int prestera_port_bridge_vlan_stp_set(struct prestera_port *port,
+					     struct prestera_bridge_vlan *br_vlan,
+					     u8 state)
+{
+	struct prestera_port_vlan *port_vlan;
+
+	list_for_each_entry(port_vlan, &br_vlan->port_vlan_list, br_vlan_head) {
+		if (port_vlan->port != port)
+			continue;
+
+		return prestera_port_vid_stp_set(port, br_vlan->vid, state);
+	}
+
+	return 0;
+}
+
+static int presterar_port_attr_stp_state_set(struct prestera_port *port,
+					     struct switchdev_trans *trans,
+					     struct net_device *dev,
+					     u8 state)
+{
+	struct prestera_bridge_port *br_port;
+	struct prestera_bridge_vlan *br_vlan;
+	int err;
+	u16 vid;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	br_port = prestera_bridge_port_by_dev(port->sw->swdev, dev);
+	if (!br_port)
+		return 0;
+
+	if (!br_port->bridge->vlan_enabled) {
+		vid = br_port->bridge->bridge_id;
+		err = prestera_port_vid_stp_set(port, vid, state);
+		if (err)
+			goto err_port_stp_set;
+	} else {
+		list_for_each_entry(br_vlan, &br_port->vlan_list, head) {
+			err = prestera_port_bridge_vlan_stp_set(port, br_vlan,
+								state);
+			if (err)
+				goto err_port_vlan_stp_set;
+		}
+	}
+
+	br_port->stp_state = state;
+
+	return 0;
+
+err_port_vlan_stp_set:
+	list_for_each_entry_continue_reverse(br_vlan, &br_port->vlan_list, head)
+		prestera_port_bridge_vlan_stp_set(port, br_vlan, br_port->stp_state);
+	return err;
+
+err_port_stp_set:
+	prestera_port_vid_stp_set(port, vid, br_port->stp_state);
+
+	return err;
+}
+
+static int prestera_port_obj_attr_set(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	int err = 0;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		err = presterar_port_attr_stp_state_set(port, trans,
+							attr->orig_dev,
+							attr->u.stp_state);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		if (attr->u.brport_flags &
+		    ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
+			err = -EINVAL;
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		err = prestera_port_attr_br_flags_set(port, trans,
+						      attr->orig_dev,
+						      attr->u.brport_flags);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		err = prestera_port_attr_br_ageing_set(port, trans,
+						       attr->u.ageing_time);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		err = prestera_port_attr_br_vlan_set(port, trans,
+						     attr->orig_dev,
+						     attr->u.vlan_filtering);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static void
+prestera_fdb_offload_notify(struct prestera_port *port,
+			    struct switchdev_notifier_fdb_info *info)
+{
+	struct switchdev_notifier_fdb_info send_info;
+
+	send_info.addr = info->addr;
+	send_info.vid = info->vid;
+	send_info.offloaded = true;
+
+	call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, port->dev,
+				 &send_info.info, NULL);
+}
+
+static int prestera_port_fdb_set(struct prestera_port *port,
+				 struct switchdev_notifier_fdb_info *fdb_info,
+				 bool adding)
+{
+	struct prestera_switch *sw = port->sw;
+	struct prestera_bridge_port *br_port;
+	struct prestera_bridge *bridge;
+	int err;
+	u16 vid;
+
+	br_port = prestera_bridge_port_by_dev(sw->swdev, port->dev);
+	if (!br_port)
+		return -EINVAL;
+
+	bridge = br_port->bridge;
+
+	if (bridge->vlan_enabled)
+		vid = fdb_info->vid;
+	else
+		vid = bridge->bridge_id;
+
+	if (adding)
+		err = prestera_hw_fdb_add(port, fdb_info->addr, vid, false);
+	else
+		err = prestera_hw_fdb_del(port, fdb_info->addr, vid);
+
+	return err;
+}
+
+static void prestera_fdb_event_work(struct work_struct *work)
+{
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct prestera_fdb_event_work *swdev_work;
+	struct prestera_port *port;
+	struct net_device *dev;
+	int err;
+
+	swdev_work = container_of(work, struct prestera_fdb_event_work, work);
+	dev = swdev_work->dev;
+
+	rtnl_lock();
+
+	port = prestera_port_dev_lower_find(dev);
+	if (!port)
+		goto out_unlock;
+
+	switch (swdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		fdb_info = &swdev_work->fdb_info;
+		if (!fdb_info->added_by_user)
+			break;
+
+		err = prestera_port_fdb_set(port, fdb_info, true);
+		if (err)
+			break;
+
+		prestera_fdb_offload_notify(port, fdb_info);
+		break;
+
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		fdb_info = &swdev_work->fdb_info;
+		prestera_port_fdb_set(port, fdb_info, false);
+		break;
+	}
+
+out_unlock:
+	rtnl_unlock();
+
+	kfree(swdev_work->fdb_info.addr);
+	kfree(swdev_work);
+	dev_put(dev);
+}
+
+static int prestera_switchdev_event(struct notifier_block *unused,
+				    unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct switchdev_notifier_info *info = ptr;
+	struct prestera_fdb_event_work *swdev_work;
+	struct net_device *upper;
+	int err;
+
+	if (event == SWITCHDEV_PORT_ATTR_SET) {
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     prestera_netdev_check,
+						     prestera_port_obj_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	if (!prestera_netdev_check(dev))
+		return NOTIFY_DONE;
+
+	upper = netdev_master_upper_dev_get_rcu(dev);
+	if (!upper)
+		return NOTIFY_DONE;
+
+	if (!netif_is_bridge_master(upper))
+		return NOTIFY_DONE;
+
+	swdev_work = kzalloc(sizeof(*swdev_work), GFP_ATOMIC);
+	if (!swdev_work)
+		return NOTIFY_BAD;
+
+	swdev_work->event = event;
+	swdev_work->dev = dev;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		fdb_info = container_of(info,
+					struct switchdev_notifier_fdb_info,
+					info);
+
+		INIT_WORK(&swdev_work->work, prestera_fdb_event_work);
+		memcpy(&swdev_work->fdb_info, ptr,
+		       sizeof(swdev_work->fdb_info));
+
+		swdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (!swdev_work->fdb_info.addr)
+			goto out_bad;
+
+		ether_addr_copy((u8 *)swdev_work->fdb_info.addr,
+				fdb_info->addr);
+		dev_hold(dev);
+		break;
+
+	default:
+		kfree(swdev_work);
+		return NOTIFY_DONE;
+	}
+
+	queue_work(swdev_wq, &swdev_work->work);
+	return NOTIFY_DONE;
+
+out_bad:
+	kfree(swdev_work);
+	return NOTIFY_BAD;
+}
+
+static int
+prestera_port_vlan_bridge_join(struct prestera_port_vlan *port_vlan,
+			       struct prestera_bridge_port *br_port)
+{
+	struct prestera_port *port = port_vlan->port;
+	struct prestera_bridge_vlan *br_vlan;
+	u16 vid = port_vlan->vid;
+	int err;
+
+	if (port_vlan->br_port)
+		return 0;
+
+	err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+	if (err)
+		return err;
+
+	err = prestera_hw_port_learning_set(port, br_port->flags & BR_LEARNING);
+	if (err)
+		goto err_port_learning_set;
+
+	err = prestera_port_vid_stp_set(port, vid, br_port->stp_state);
+	if (err)
+		goto err_port_vid_stp_set;
+
+	br_vlan = prestera_bridge_vlan_by_vid(br_port, vid);
+	if (!br_vlan) {
+		br_vlan = prestera_bridge_vlan_create(br_port, vid);
+		if (!br_vlan) {
+			err = -ENOMEM;
+			goto err_bridge_vlan_get;
+		}
+	}
+
+	list_add(&port_vlan->br_vlan_head, &br_vlan->port_vlan_list);
+
+	prestera_bridge_port_get(br_port);
+	port_vlan->br_port = br_port;
+
+	return 0;
+
+err_bridge_vlan_get:
+	prestera_port_vid_stp_set(port, vid, BR_STATE_FORWARDING);
+err_port_vid_stp_set:
+	prestera_hw_port_learning_set(port, false);
+err_port_learning_set:
+	return err;
+}
+
+static int
+prestera_bridge_port_vlan_add(struct prestera_port *port,
+			      struct prestera_bridge_port *br_port,
+			      u16 vid, bool is_untagged, bool is_pvid,
+			      struct netlink_ext_ack *extack)
+{
+	struct prestera_port_vlan *port_vlan;
+	u16 old_pvid = port->pvid;
+	u16 pvid;
+	int err;
+
+	if (is_pvid)
+		pvid = vid;
+	else
+		pvid = port->pvid == vid ? 0 : port->pvid;
+
+	port_vlan = prestera_port_vlan_by_vid(port, vid);
+	if (port_vlan && port_vlan->br_port != br_port)
+		return -EEXIST;
+
+	if (!port_vlan) {
+		port_vlan = prestera_port_vlan_create(port, vid, is_untagged);
+		if (IS_ERR(port_vlan))
+			return PTR_ERR(port_vlan);
+	} else {
+		err = prestera_hw_vlan_port_set(port, vid, true, is_untagged);
+		if (err)
+			goto err_port_vlan_set;
+	}
+
+	err = prestera_port_pvid_set(port, pvid);
+	if (err)
+		goto err_port_pvid_set;
+
+	err = prestera_port_vlan_bridge_join(port_vlan, br_port);
+	if (err)
+		goto err_port_vlan_bridge_join;
+
+	return 0;
+
+err_port_vlan_bridge_join:
+	prestera_port_pvid_set(port, old_pvid);
+err_port_pvid_set:
+	prestera_hw_vlan_port_set(port, vid, false, false);
+err_port_vlan_set:
+	prestera_port_vlan_destroy(port_vlan);
+
+	return err;
+}
+
+static void
+prestera_bridge_port_vlan_del(struct prestera_port *port,
+			      struct prestera_bridge_port *br_port, u16 vid)
+{
+	u16 pvid = port->pvid == vid ? 0 : port->pvid;
+	struct prestera_port_vlan *port_vlan;
+
+	port_vlan = prestera_port_vlan_by_vid(port, vid);
+	if (WARN_ON(!port_vlan))
+		return;
+
+	prestera_port_vlan_bridge_leave(port_vlan);
+	prestera_port_pvid_set(port, pvid);
+	prestera_port_vlan_destroy(port_vlan);
+}
+
+static int prestera_port_vlans_add(struct prestera_port *port,
+				   const struct switchdev_obj_port_vlan *vlan,
+				   struct switchdev_trans *trans,
+				   struct netlink_ext_ack *extack)
+{
+	bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+	struct net_device *dev = vlan->obj.orig_dev;
+	struct prestera_bridge_port *br_port;
+	struct prestera_switch *sw = port->sw;
+	struct prestera_bridge *bridge;
+	u16 vid;
+
+	if (netif_is_bridge_master(dev))
+		return 0;
+
+	if (switchdev_trans_ph_commit(trans))
+		return 0;
+
+	br_port = prestera_bridge_port_by_dev(sw->swdev, dev);
+	if (WARN_ON(!br_port))
+		return -EINVAL;
+
+	bridge = br_port->bridge;
+	if (!bridge->vlan_enabled)
+		return 0;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+		int err;
+
+		err = prestera_bridge_port_vlan_add(port, br_port,
+						    vid, flag_untagged,
+						    flag_pvid, extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int prestera_port_obj_add(struct net_device *dev,
+				 const struct switchdev_obj *obj,
+				 struct switchdev_trans *trans,
+				 struct netlink_ext_ack *extack)
+{
+	struct prestera_port *port = netdev_priv(dev);
+	const struct switchdev_obj_port_vlan *vlan;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+		return prestera_port_vlans_add(port, vlan, trans, extack);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int prestera_port_vlans_del(struct prestera_port *port,
+				   const struct switchdev_obj_port_vlan *vlan)
+{
+	struct net_device *dev = vlan->obj.orig_dev;
+	struct prestera_bridge_port *br_port;
+	struct prestera_switch *sw = port->sw;
+	u16 vid;
+
+	if (netif_is_bridge_master(dev))
+		return -EOPNOTSUPP;
+
+	br_port = prestera_bridge_port_by_dev(sw->swdev, dev);
+	if (WARN_ON(!br_port))
+		return -EINVAL;
+
+	if (!br_port->bridge->vlan_enabled)
+		return 0;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++)
+		prestera_bridge_port_vlan_del(port, br_port, vid);
+
+	return 0;
+}
+
+static int prestera_port_obj_del(struct net_device *dev,
+				 const struct switchdev_obj *obj)
+{
+	struct prestera_port *port = netdev_priv(dev);
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		return prestera_port_vlans_del(port, SWITCHDEV_OBJ_PORT_VLAN(obj));
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int prestera_switchdev_blk_event(struct notifier_block *unused,
+					unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = switchdev_handle_port_obj_add(dev, ptr,
+						    prestera_netdev_check,
+						    prestera_port_obj_add);
+		break;
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = switchdev_handle_port_obj_del(dev, ptr,
+						    prestera_netdev_check,
+						    prestera_port_obj_del);
+		break;
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     prestera_netdev_check,
+						     prestera_port_obj_attr_set);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return notifier_from_errno(err);
+}
+
+static void prestera_fdb_event(struct prestera_switch *sw,
+			       struct prestera_event *evt, void *arg)
+{
+	struct switchdev_notifier_fdb_info info;
+	struct prestera_port *port;
+
+	port = prestera_find_port(sw, evt->fdb_evt.port_id);
+	if (!port)
+		return;
+
+	info.addr = evt->fdb_evt.data.mac;
+	info.vid = evt->fdb_evt.vid;
+	info.offloaded = true;
+
+	rtnl_lock();
+
+	switch (evt->id) {
+	case PRESTERA_FDB_EVENT_LEARNED:
+		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
+					 port->dev, &info.info, NULL);
+		break;
+	case PRESTERA_FDB_EVENT_AGED:
+		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+					 port->dev, &info.info, NULL);
+		break;
+	}
+
+	rtnl_unlock();
+}
+
+static int prestera_fdb_init(struct prestera_switch *sw)
+{
+	int err;
+
+	err = prestera_hw_event_handler_register(sw, PRESTERA_EVENT_TYPE_FDB,
+						 prestera_fdb_event, NULL);
+	if (err)
+		return err;
+
+	err = prestera_hw_switch_ageing_set(sw, PRESTERA_DEFAULT_AGEING_TIME_MS);
+	if (err)
+		goto err_ageing_set;
+
+	return 0;
+
+err_ageing_set:
+	prestera_hw_event_handler_unregister(sw, PRESTERA_EVENT_TYPE_FDB,
+					     prestera_fdb_event);
+	return err;
+}
+
+static void prestera_fdb_fini(struct prestera_switch *sw)
+{
+	prestera_hw_event_handler_unregister(sw, PRESTERA_EVENT_TYPE_FDB,
+					     prestera_fdb_event);
+}
+
+static int prestera_switchdev_handler_init(struct prestera_switchdev *swdev)
+{
+	int err;
+
+	swdev->swdev_nb.notifier_call = prestera_switchdev_event;
+	err = register_switchdev_notifier(&swdev->swdev_nb);
+	if (err)
+		goto err_register_swdev_notifier;
+
+	swdev->swdev_nb_blk.notifier_call = prestera_switchdev_blk_event;
+	err = register_switchdev_blocking_notifier(&swdev->swdev_nb_blk);
+	if (err)
+		goto err_register_blk_swdev_notifier;
+
+	return 0;
+
+err_register_blk_swdev_notifier:
+	unregister_switchdev_notifier(&swdev->swdev_nb);
+err_register_swdev_notifier:
+	destroy_workqueue(swdev_wq);
+	return err;
+}
+
+static void prestera_switchdev_handler_fini(struct prestera_switchdev *swdev)
+{
+	unregister_switchdev_blocking_notifier(&swdev->swdev_nb_blk);
+	unregister_switchdev_notifier(&swdev->swdev_nb);
+}
+
+int prestera_switchdev_init(struct prestera_switch *sw)
+{
+	struct prestera_switchdev *swdev;
+	int err;
+
+	swdev = kzalloc(sizeof(*swdev), GFP_KERNEL);
+	if (!swdev)
+		return -ENOMEM;
+
+	sw->swdev = swdev;
+	swdev->sw = sw;
+
+	INIT_LIST_HEAD(&swdev->bridge_list);
+
+	swdev_wq = alloc_ordered_workqueue("%s_ordered", 0, "prestera_br");
+	if (!swdev_wq) {
+		err = -ENOMEM;
+		goto err_alloc_wq;
+	}
+
+	err = prestera_switchdev_handler_init(swdev);
+	if (err)
+		goto err_swdev_init;
+
+	err = prestera_fdb_init(sw);
+	if (err)
+		goto err_fdb_init;
+
+	return 0;
+
+err_fdb_init:
+err_swdev_init:
+	destroy_workqueue(swdev_wq);
+err_alloc_wq:
+	kfree(swdev);
+
+	return err;
+}
+
+void prestera_switchdev_fini(struct prestera_switch *sw)
+{
+	struct prestera_switchdev *swdev = sw->swdev;
+
+	prestera_fdb_fini(sw);
+	prestera_switchdev_handler_fini(swdev);
+	destroy_workqueue(swdev_wq);
+	kfree(swdev);
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
new file mode 100644
index 000000000000..606e21d2355b
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_SWITCHDEV_H_
+#define _PRESTERA_SWITCHDEV_H_
+
+int prestera_switchdev_init(struct prestera_switch *sw);
+void prestera_switchdev_fini(struct prestera_switch *sw);
+
+int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
+			       void *ptr);
+
+#endif /* _PRESTERA_SWITCHDEV_H_ */
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index eb8cf60ecf12..d1e4d42e497d 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1187,11 +1187,10 @@ static int pxa168_eth_stop(struct net_device *dev)
 
 static int pxa168_eth_change_mtu(struct net_device *dev, int mtu)
 {
-	int retval;
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 
 	dev->mtu = mtu;
-	retval = set_port_config_ext(pep);
+	set_port_config_ext(pep);
 
 	if (!netif_running(dev))
 		return 0;
@@ -1541,10 +1540,8 @@ static int pxa168_eth_remove(struct platform_device *pdev)
 	}
 	if (dev->phydev)
 		phy_disconnect(dev->phydev);
-	if (pep->clk) {
-		clk_disable_unprepare(pep->clk);
-	}
 
+	clk_disable_unprepare(pep->clk);
 	mdiobus_unregister(pep->smi_bus);
 	mdiobus_free(pep->smi_bus);
 	unregister_netdev(dev);
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 6a930351cb23..8a9c0f490bfb 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3338,9 +3338,9 @@ static void skge_error_irq(struct skge_hw *hw)
  * because accessing phy registers requires spin wait which might
  * cause excess interrupt latency.
  */
-static void skge_extirq(unsigned long arg)
+static void skge_extirq(struct tasklet_struct *t)
 {
-	struct skge_hw *hw = (struct skge_hw *) arg;
+	struct skge_hw *hw = from_tasklet(hw, t, phy_task);
 	int port;
 
 	for (port = 0; port < hw->ports; port++) {
@@ -3927,7 +3927,7 @@ static int skge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw->pdev = pdev;
 	spin_lock_init(&hw->hw_lock);
 	spin_lock_init(&hw->phy_lock);
-	tasklet_init(&hw->phy_task, skge_extirq, (unsigned long) hw);
+	tasklet_setup(&hw->phy_task, skge_extirq);
 
 	hw->regs = ioremap(pci_resource_start(pdev, 0), 0x4000);
 	if (!hw->regs) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 65f8a4b6ed0c..3b8576b9c2f9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -55,11 +55,11 @@
 #define TASKLET_MAX_TIME 2
 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
 
-void mlx4_cq_tasklet_cb(unsigned long data)
+void mlx4_cq_tasklet_cb(struct tasklet_struct *t)
 {
 	unsigned long flags;
 	unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
-	struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
+	struct mlx4_eq_tasklet *ctx = from_tasklet(ctx, t, task);
 	struct mlx4_cq *mcq, *temp;
 
 	spin_lock_irqsave(&ctx->lock, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index b816154bc79a..23849f2b9c25 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1106,6 +1106,24 @@ static int mlx4_en_set_pauseparam(struct net_device *dev,
 	return err;
 }
 
+static void mlx4_en_get_pause_stats(struct net_device *dev,
+				    struct ethtool_pause_stats *stats)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct bitmap_iterator it;
+
+	bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+	spin_lock_bh(&priv->stats_lock);
+	if (test_bit(FLOW_PRIORITY_STATS_IDX_TX_FRAMES,
+		     priv->stats_bitmap.bitmap))
+		stats->tx_pause_frames = priv->tx_flowstats.tx_pause;
+	if (test_bit(FLOW_PRIORITY_STATS_IDX_RX_FRAMES,
+		     priv->stats_bitmap.bitmap))
+		stats->rx_pause_frames = priv->rx_flowstats.rx_pause;
+	spin_unlock_bh(&priv->stats_lock);
+}
+
 static void mlx4_en_get_pauseparam(struct net_device *dev,
 				 struct ethtool_pauseparam *pause)
 {
@@ -2138,6 +2156,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.set_msglevel = mlx4_en_set_msglevel,
 	.get_coalesce = mlx4_en_get_coalesce,
 	.set_coalesce = mlx4_en_set_coalesce,
+	.get_pause_stats = mlx4_en_get_pause_stats,
 	.get_pauseparam = mlx4_en_get_pauseparam,
 	.set_pauseparam = mlx4_en_set_pauseparam,
 	.get_ringparam = mlx4_en_get_ringparam,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b50c567ef508..502d1b97855c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -705,7 +705,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 		frags = ring->rx_info + (index << priv->log_rx_info);
 		va = page_address(frags[0].page) + frags[0].page_offset;
-		prefetchw(va);
+		net_prefetchw(va);
 		/*
 		 * make sure we read the CQE after we read the ownership bit
 		 */
@@ -943,6 +943,9 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 	bool clean_complete = true;
 	int done;
 
+	if (!budget)
+		return 0;
+
 	if (priv->tx_ring_num[TX_XDP]) {
 		xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
 		if (xdp_tx_cq->xdp_busy) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 9dff7b086c9f..3ddb7268e415 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -350,7 +350,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 		.dma = tx_info->map0_dma,
 	};
 
-	if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+	if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
 		dma_unmap_page(priv->ddev, tx_info->map0_dma,
 			       PAGE_SIZE, priv->dma_dir);
 		put_page(tx_info->page);
@@ -842,6 +842,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct mlx4_en_tx_desc *tx_desc;
 	struct mlx4_wqe_data_seg *data;
 	struct mlx4_en_tx_info *tx_info;
+	u32 __maybe_unused ring_cons;
 	int tx_ind;
 	int nr_txbb;
 	int desc_size;
@@ -855,7 +856,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	bool stop_queue;
 	bool inline_ok;
 	u8 data_offset;
-	u32 ring_cons;
 	bool bf_ok;
 
 	tx_ind = skb_get_queue_mapping(skb);
@@ -1075,7 +1075,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		 */
 		smp_rmb();
 
-		ring_cons = READ_ONCE(ring->cons);
 		if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
 			netif_tx_wake_queue(ring->tx_queue);
 			ring->wake_queue++;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index ae305c2e9225..9e48509ed3b2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1057,8 +1057,7 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 	INIT_LIST_HEAD(&eq->tasklet_ctx.list);
 	INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
 	spin_lock_init(&eq->tasklet_ctx.lock);
-	tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb,
-		     (unsigned long)&eq->tasklet_ctx);
+	tasklet_setup(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb);
 
 	return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 258c7a96f269..c326b434734e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3031,6 +3031,17 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 	if (err)
 		return err;
 
+	/* Ethernet and IB drivers will normally set the port type,
+	 * but if they are not built set the type now to prevent
+	 * devlink_port_type_warn() from firing.
+	 */
+	if (!IS_ENABLED(CONFIG_MLX4_EN) &&
+	    dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+		devlink_port_type_eth_set(&info->devlink_port, NULL);
+	else if (!IS_ENABLED(CONFIG_MLX4_INFINIBAND) &&
+		 dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
+		devlink_port_type_ib_set(&info->devlink_port, NULL);
+
 	info->dev = dev;
 	info->port = port;
 	if (!mlx4_is_slave(dev)) {
@@ -3935,6 +3946,8 @@ static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
 			       struct devlink *devlink);
 
 static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
+				    enum devlink_reload_action action,
+				    enum devlink_reload_limit limit,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
@@ -3951,7 +3964,8 @@ static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
 	return 0;
 }
 
-static int mlx4_devlink_reload_up(struct devlink *devlink,
+static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+				  enum devlink_reload_limit limit, u32 *actions_performed,
 				  struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
@@ -3959,6 +3973,7 @@ static int mlx4_devlink_reload_up(struct devlink *devlink,
 	struct mlx4_dev_persistent *persist = dev->persist;
 	int err;
 
+	*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
 	err = mlx4_restart_one_up(persist->pdev, true, devlink);
 	if (err)
 		mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n",
@@ -3969,6 +3984,7 @@ static int mlx4_devlink_reload_up(struct devlink *devlink,
 
 static const struct devlink_ops mlx4_devlink_ops = {
 	.port_type_set	= mlx4_devlink_port_type_set,
+	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
 	.reload_down	= mlx4_devlink_reload_down,
 	.reload_up	= mlx4_devlink_reload_up,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 527b52e48276..64bed7ac3836 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1217,7 +1217,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev);
 int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
 		  u16 op, unsigned long timeout);
 
-void mlx4_cq_tasklet_cb(unsigned long data);
+void mlx4_cq_tasklet_cb(struct tasklet_struct *t);
 void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
 void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index 86b6051da8ec..51d4eaab6a2f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -84,6 +84,11 @@ struct mlx4_en_flow_stats_rx {
 					 MLX4_NUM_PRIORITIES)
 };
 
+#define FLOW_PRIORITY_STATS_IDX_RX_FRAMES	(NUM_MAIN_STATS +	\
+						 NUM_PORT_STATS +	\
+						 NUM_PF_STATS +		\
+						 NUM_FLOW_PRIORITY_STATS_RX)
+
 struct mlx4_en_flow_stats_tx {
 	u64 tx_pause;
 	u64 tx_pause_duration;
@@ -93,6 +98,13 @@ struct mlx4_en_flow_stats_tx {
 					 MLX4_NUM_PRIORITIES)
 };
 
+#define FLOW_PRIORITY_STATS_IDX_TX_FRAMES	(NUM_MAIN_STATS +	\
+						 NUM_PORT_STATS +	\
+						 NUM_PF_STATS +		\
+						 NUM_FLOW_PRIORITY_STATS_RX + \
+						 NUM_FLOW_STATS_RX +	\
+						 NUM_FLOW_PRIORITY_STATS_TX)
+
 #define NUM_FLOW_STATS (NUM_FLOW_STATS_RX + NUM_FLOW_STATS_TX + \
 			NUM_FLOW_PRIORITY_STATS_TX + \
 			NUM_FLOW_PRIORITY_STATS_RX)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 10e6886c96ba..2d477f9a8cb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o
+		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o
 
 #
 # Netdev basic
@@ -24,7 +24,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
 		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
-		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
+		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
 		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o
 
 #
@@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag_mp.o lib/geneve.o lib/port_tun.o \
 					en_rep.o en/rep/bond.o en/mod_hdr.o
 mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
-					en/mapping.o esw/chains.o en/tc_tun.o \
+					en/mapping.o lib/fs_chains.o en/tc_tun.o \
 					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
 					en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
 mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
@@ -49,7 +49,8 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
 				      ecpf.o rdma.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
-				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
+				      esw/devlink_port.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 8db4b5f0f963..291e427e9e4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -56,8 +56,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
 					   size_t size, dma_addr_t *dma_handle,
 					   int node)
 {
+	struct device *device = mlx5_core_dma_dev(dev);
 	struct mlx5_priv *priv = &dev->priv;
-	struct device *device = dev->device;
 	int original_node;
 	void *cpu_handle;
 
@@ -111,7 +111,7 @@ EXPORT_SYMBOL(mlx5_buf_alloc);
 
 void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
-	dma_free_coherent(dev->device, buf->size, buf->frags->buf,
+	dma_free_coherent(mlx5_core_dma_dev(dev), buf->size, buf->frags->buf,
 			  buf->frags->map);
 
 	kfree(buf->frags);
@@ -140,7 +140,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 		if (!frag->buf)
 			goto err_free_buf;
 		if (frag->map & ((1 << buf->page_shift) - 1)) {
-			dma_free_coherent(dev->device, frag_sz,
+			dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz,
 					  buf->frags[i].buf, buf->frags[i].map);
 			mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
 				       &frag->map, buf->page_shift);
@@ -153,7 +153,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 
 err_free_buf:
 	while (i--)
-		dma_free_coherent(dev->device, PAGE_SIZE, buf->frags[i].buf,
+		dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf,
 				  buf->frags[i].map);
 	kfree(buf->frags);
 err_out:
@@ -169,7 +169,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 	for (i = 0; i < buf->npages; i++) {
 		int frag_sz = min_t(int, size, PAGE_SIZE);
 
-		dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf,
+		dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf,
 				  buf->frags[i].map);
 		size -= frag_sz;
 	}
@@ -275,7 +275,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 	__set_bit(db->index, db->u.pgdir->bitmap);
 
 	if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
-		dma_free_coherent(dev->device, PAGE_SIZE,
+		dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE,
 				  db->u.pgdir->db_page, db->u.pgdir->db_dma);
 		list_del(&db->u.pgdir->list);
 		bitmap_free(db->u.pgdir->bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 2d1f4b3be9bf..e49387dbef98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1989,9 +1989,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev)
 
 static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 {
-	struct device *ddev = dev->device;
-
-	cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+	cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE,
 						&cmd->alloc_dma, GFP_KERNEL);
 	if (!cmd->cmd_alloc_buf)
 		return -ENOMEM;
@@ -2004,9 +2002,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 		return 0;
 	}
 
-	dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+	dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
 			  cmd->alloc_dma);
-	cmd->cmd_alloc_buf = dma_alloc_coherent(ddev,
+	cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev),
 						2 * MLX5_ADAPTER_PAGE_SIZE - 1,
 						&cmd->alloc_dma, GFP_KERNEL);
 	if (!cmd->cmd_alloc_buf)
@@ -2020,9 +2018,7 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 
 static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 {
-	struct device *ddev = dev->device;
-
-	dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
+	dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf,
 			  cmd->alloc_dma);
 }
 
@@ -2054,7 +2050,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 	if (!cmd->stats)
 		return -ENOMEM;
 
-	cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0);
+	cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
 	if (!cmd->pool) {
 		err = -ENOMEM;
 		goto dma_pool_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 8379b24cb838..df3e4938ecdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -42,11 +42,11 @@
 #define TASKLET_MAX_TIME 2
 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
 
-void mlx5_cq_tasklet_cb(unsigned long data)
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
 {
 	unsigned long flags;
 	unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
-	struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+	struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task);
 	struct mlx5_core_cq *mcq;
 	struct mlx5_core_cq *temp;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index c709e9a385f6..a28f95df2901 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -4,22 +4,19 @@
 #include <devlink.h>
 
 #include "mlx5_core.h"
+#include "fw_reset.h"
 #include "fs_core.h"
 #include "eswitch.h"
 
 static int mlx5_devlink_flash_update(struct devlink *devlink,
-				     const char *file_name,
-				     const char *component,
+				     struct devlink_flash_update_params *params,
 				     struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	const struct firmware *fw;
 	int err;
 
-	if (component)
-		return -EOPNOTSUPP;
-
-	err = request_firmware_direct(&fw, file_name, &dev->pdev->dev);
+	err = request_firmware_direct(&fw, params->file_name, &dev->pdev->dev);
 	if (err)
 		return err;
 
@@ -88,21 +85,96 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 	return 0;
 }
 
+static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u8 reset_level, reset_type, net_port_alive;
+	int err;
+
+	err = mlx5_fw_reset_query(dev, &reset_level, &reset_type);
+	if (err)
+		return err;
+	if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) {
+		NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot");
+		return -EINVAL;
+	}
+
+	net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
+	err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive);
+	if (err)
+		goto out;
+
+	err = mlx5_fw_reset_wait_reset_done(dev);
+out:
+	if (err)
+		NL_SET_ERR_MSG_MOD(extack, "FW activate command failed");
+	return err;
+}
+
+static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink,
+					      struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u8 reset_level;
+	int err;
+
+	err = mlx5_fw_reset_query(dev, &reset_level, NULL);
+	if (err)
+		return err;
+	if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "FW upgrade to the stored FW can't be done by FW live patching");
+		return -EINVAL;
+	}
+
+	return mlx5_fw_reset_set_live_patch(dev);
+}
+
 static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
+				    enum devlink_reload_action action,
+				    enum devlink_reload_limit limit,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 
-	mlx5_unload_one(dev, false);
-	return 0;
+	switch (action) {
+	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+		mlx5_unload_one(dev, false);
+		return 0;
+	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+			return mlx5_devlink_trigger_fw_live_patch(devlink, extack);
+		return mlx5_devlink_reload_fw_activate(devlink, extack);
+	default:
+		/* Unsupported action should not get to this function */
+		WARN_ON(1);
+		return -EOPNOTSUPP;
+	}
 }
 
-static int mlx5_devlink_reload_up(struct devlink *devlink,
+static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+				  enum devlink_reload_limit limit, u32 *actions_performed,
 				  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 
-	return mlx5_load_one(dev, false);
+	*actions_performed = BIT(action);
+	switch (action) {
+	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+		return mlx5_load_one(dev, false);
+	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+			break;
+		/* On fw_activate action, also driver is reloaded and reinit performed */
+		*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
+		return mlx5_load_one(dev, false);
+	default:
+		/* Unsupported action should not get to this function */
+		WARN_ON(1);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
 }
 
 static const struct devlink_ops mlx5_devlink_ops = {
@@ -118,6 +190,9 @@ static const struct devlink_ops mlx5_devlink_ops = {
 #endif
 	.flash_update = mlx5_devlink_flash_update,
 	.info_get = mlx5_devlink_info_get,
+	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+			  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+	.reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET),
 	.reload_down = mlx5_devlink_reload_down,
 	.reload_up = mlx5_devlink_reload_up,
 };
@@ -228,6 +303,24 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id
 }
 #endif
 
+static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
+						    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool);
+	return 0;
+}
+
+static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id,
+						    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev);
+	return 0;
+}
+
 static const struct devlink_param mlx5_devlink_params[] = {
 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
 			     "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
@@ -243,6 +336,9 @@ static const struct devlink_param mlx5_devlink_params[] = {
 			     NULL, NULL,
 			     mlx5_devlink_large_group_num_validate),
 #endif
+	DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			      mlx5_devlink_enable_remote_dev_reset_get,
+			      mlx5_devlink_enable_remote_dev_reset_set, NULL),
 };
 
 static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index ad3594c4afcb..2eb022ad7fd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -124,7 +124,7 @@ static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
 static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
 {
 	struct mlx5_core_dev *dev = tracer->dev;
-	struct device *ddev = &dev->pdev->dev;
+	struct device *ddev;
 	dma_addr_t dma;
 	void *buff;
 	gfp_t gfp;
@@ -142,6 +142,7 @@ static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
 	}
 	tracer->buff.log_buf = buff;
 
+	ddev = mlx5_core_dma_dev(dev);
 	dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
 	if (dma_mapping_error(ddev, dma)) {
 		mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
@@ -162,11 +163,12 @@ free_pages:
 static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
 {
 	struct mlx5_core_dev *dev = tracer->dev;
-	struct device *ddev = &dev->pdev->dev;
+	struct device *ddev;
 
 	if (!tracer->buff.log_buf)
 		return;
 
+	ddev = mlx5_core_dma_dev(dev);
 	dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
 	free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
 }
@@ -1064,6 +1066,58 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 	kvfree(tracer);
 }
 
+static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev;
+	int err;
+
+	cancel_work_sync(&tracer->read_fw_strings_work);
+	mlx5_fw_tracer_clean_ready_list(tracer);
+	mlx5_fw_tracer_clean_print_hash(tracer);
+	mlx5_fw_tracer_clean_saved_traces_array(tracer);
+	mlx5_fw_tracer_free_strings_db(tracer);
+
+	dev = tracer->dev;
+	err = mlx5_query_mtrc_caps(tracer);
+	if (err) {
+		mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+		return err;
+	}
+
+	err = mlx5_fw_tracer_allocate_strings_db(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err);
+		return err;
+	}
+	mlx5_fw_tracer_init_saved_traces_array(tracer);
+
+	return 0;
+}
+
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev;
+	int err;
+
+	if (IS_ERR_OR_NULL(tracer))
+		return -EINVAL;
+
+	dev = tracer->dev;
+	mlx5_fw_tracer_cleanup(tracer);
+	err = mlx5_fw_tracer_recreate_strings_db(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n");
+		return err;
+	}
+	err = mlx5_fw_tracer_init(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n");
+		return err;
+	}
+
+	return 0;
+}
+
 static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 40601fba80ba..97252a85d65e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -191,5 +191,6 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
 int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
 int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
 					    struct devlink_fmsg *fmsg);
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
index 4924a5658853..ed4fb79b4db7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -78,7 +78,7 @@ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump
 				 struct page *page)
 {
 	struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump;
-	struct device *ddev = &dev->pdev->dev;
+	struct device *ddev = mlx5_core_dma_dev(dev);
 	u32 out_seq_num;
 	u32 in_seq_num;
 	dma_addr_t dma;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index a894ea98c95a..3dc9dd3f24dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -43,19 +43,13 @@ static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
 
 int mlx5_ec_init(struct mlx5_core_dev *dev)
 {
-	int err = 0;
-
 	if (!mlx5_core_is_ecpf(dev))
 		return 0;
 
 	/* ECPF shall enable HCA for peer PF in the same way a PF
 	 * does this for its VFs.
 	 */
-	err = mlx5_peer_pf_init(dev);
-	if (err)
-		return err;
-
-	return 0;
+	return mlx5_peer_pf_init(dev);
 }
 
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 356f5852955f..2f05b0f9de01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -226,6 +226,7 @@ enum mlx5e_priv_flag {
 	MLX5E_PFLAG_RX_STRIDING_RQ,
 	MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
 	MLX5E_PFLAG_XDP_TX_MPWQE,
+	MLX5E_PFLAG_SKB_TX_MPWQE,
 	MLX5E_NUM_PFLAGS, /* Keep last */
 };
 
@@ -270,6 +271,7 @@ enum {
 	MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
 	MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
 	MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
+	MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */
 };
 
 struct mlx5e_cq {
@@ -309,6 +311,7 @@ struct mlx5e_sq_dma {
 
 enum {
 	MLX5E_SQ_STATE_ENABLED,
+	MLX5E_SQ_STATE_MPWQE,
 	MLX5E_SQ_STATE_RECOVERING,
 	MLX5E_SQ_STATE_IPSEC,
 	MLX5E_SQ_STATE_AM,
@@ -317,26 +320,40 @@ enum {
 	MLX5E_SQ_STATE_PENDING_XSK_TX,
 };
 
+struct mlx5e_tx_mpwqe {
+	/* Current MPWQE session */
+	struct mlx5e_tx_wqe *wqe;
+	u32 bytes_count;
+	u8 ds_count;
+	u8 pkt_count;
+	u8 inline_on;
+};
+
 struct mlx5e_txqsq {
 	/* data path */
 
 	/* dirtied @completion */
 	u16                        cc;
+	u16                        skb_fifo_cc;
 	u32                        dma_fifo_cc;
 	struct dim                 dim; /* Adaptive Moderation */
 
 	/* dirtied @xmit */
 	u16                        pc ____cacheline_aligned_in_smp;
+	u16                        skb_fifo_pc;
 	u32                        dma_fifo_pc;
+	struct mlx5e_tx_mpwqe      mpwqe;
 
 	struct mlx5e_cq            cq;
 
 	/* read only */
 	struct mlx5_wq_cyc         wq;
 	u32                        dma_fifo_mask;
+	u16                        skb_fifo_mask;
 	struct mlx5e_sq_stats     *stats;
 	struct {
 		struct mlx5e_sq_dma       *dma_fifo;
+		struct sk_buff           **skb_fifo;
 		struct mlx5e_tx_wqe_info  *wqe_info;
 	} db;
 	void __iomem              *uar_map;
@@ -403,7 +420,7 @@ struct mlx5e_xdp_info {
 	};
 };
 
-struct mlx5e_xdp_xmit_data {
+struct mlx5e_xmit_data {
 	dma_addr_t  dma_addr;
 	void       *data;
 	u32         len;
@@ -416,18 +433,10 @@ struct mlx5e_xdp_info_fifo {
 	u32 mask;
 };
 
-struct mlx5e_xdp_mpwqe {
-	/* Current MPWQE session */
-	struct mlx5e_tx_wqe *wqe;
-	u8                   ds_count;
-	u8                   pkt_count;
-	u8                   inline_on;
-};
-
 struct mlx5e_xdpsq;
 typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
 typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
-					struct mlx5e_xdp_xmit_data *,
+					struct mlx5e_xmit_data *,
 					struct mlx5e_xdp_info *,
 					int);
 
@@ -442,12 +451,12 @@ struct mlx5e_xdpsq {
 	u32                        xdpi_fifo_pc ____cacheline_aligned_in_smp;
 	u16                        pc;
 	struct mlx5_wqe_ctrl_seg   *doorbell_cseg;
-	struct mlx5e_xdp_mpwqe     mpwqe;
+	struct mlx5e_tx_mpwqe      mpwqe;
 
 	struct mlx5e_cq            cq;
 
 	/* read only */
-	struct xdp_umem           *umem;
+	struct xsk_buff_pool      *xsk_pool;
 	struct mlx5_wq_cyc         wq;
 	struct mlx5e_xdpsq_stats  *stats;
 	mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
@@ -611,7 +620,7 @@ struct mlx5e_rq {
 	struct page_pool      *page_pool;
 
 	/* AF_XDP zero-copy */
-	struct xdp_umem       *umem;
+	struct xsk_buff_pool  *xsk_pool;
 
 	struct work_struct     recover_work;
 
@@ -735,12 +744,13 @@ struct mlx5e_hv_vhca_stats_agent {
 #endif
 
 struct mlx5e_xsk {
-	/* UMEMs are stored separately from channels, because we don't want to
-	 * lose them when channels are recreated. The kernel also stores UMEMs,
-	 * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
-	 * so rely on our mechanism.
+	/* XSK buffer pools are stored separately from channels,
+	 * because we don't want to lose them when channels are
+	 * recreated. The kernel also stores buffer pool, but it doesn't
+	 * distinguish between zero-copy and non-zero-copy UMEMs, so
+	 * rely on our mechanism.
 	 */
-	struct xdp_umem **umems;
+	struct xsk_buff_pool **pools;
 	u16 refcnt;
 	bool ever_used;
 };
@@ -899,7 +909,7 @@ struct mlx5e_xsk_param;
 struct mlx5e_rq_param;
 int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
-		  struct xdp_umem *umem, struct mlx5e_rq *rq);
+		  struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq);
 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
 void mlx5e_close_rq(struct mlx5e_rq *rq);
@@ -909,7 +919,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
 		     struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
 void mlx5e_close_icosq(struct mlx5e_icosq *sq);
 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
-		     struct mlx5e_sq_param *param, struct xdp_umem *umem,
+		     struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
 		     struct mlx5e_xdpsq *sq, bool is_redirect);
 void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 6fdcd5e69476..dc744702aee4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -12,9 +12,12 @@ enum {
 };
 
 struct mlx5e_tc_table {
-	/* protects flow table */
+	/* Protects the dynamic assignment of the t parameter
+	 * which is the nic tc root table.
+	 */
 	struct mutex			t_lock;
 	struct mlx5_flow_table		*t;
+	struct mlx5_fs_chains           *chains;
 
 	struct rhashtable               ht;
 
@@ -24,6 +27,8 @@ struct mlx5e_tc_table {
 
 	struct notifier_block     netdevice_nb;
 	struct netdev_net_notifier	netdevice_nn;
+
+	struct mlx5_tc_ct_priv         *ct;
 };
 
 struct mlx5e_flow_table {
@@ -231,6 +236,7 @@ struct mlx5e_accel_fs_tcp;
 
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
+	struct mlx5_flow_namespace      *egress_ns;
 #ifdef CONFIG_MLX5_EN_RXNFC
 	struct mlx5e_ethtool_steering   ethtool;
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3dc200bcfabd..69a05da0e3e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -242,8 +242,8 @@ static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
 
 {
 	u32 data_size;
+	int err = 0;
 	u32 offset;
-	int err;
 
 	for (offset = 0; offset < value_len; offset += data_size) {
 		data_size = value_len - offset;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 79cc42d88eec..e36e505d38ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -12,7 +12,7 @@
 #include "neigh.h"
 #include "en_rep.h"
 #include "eswitch.h"
-#include "esw/chains.h"
+#include "lib/fs_chains.h"
 #include "en/tc_ct.h"
 #include "en/mapping.h"
 #include "en/tc_tun.h"
@@ -191,7 +191,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
 	case TC_SETUP_CLSFLOWER:
 		memcpy(&tmp, f, sizeof(*f));
 
-		if (!mlx5_esw_chains_prios_supported(esw))
+		if (!mlx5_chains_prios_supported(esw_chains(esw)))
 			return -EOPNOTSUPP;
 
 		/* Re-use tc offload path by moving the ft flow to the
@@ -203,12 +203,12 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
 		 *
 		 * We only support chain 0 of FT offload.
 		 */
-		if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+		if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
 			return -EOPNOTSUPP;
 		if (tmp.common.chain_index != 0)
 			return -EOPNOTSUPP;
 
-		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
 		tmp.common.prio++;
 		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
@@ -378,12 +378,12 @@ static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
 		 *
 		 * We only support chain 0 of FT offload.
 		 */
-		if (!mlx5_esw_chains_prios_supported(esw) ||
-		    tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
+		if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
+		    tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
 		    tmp.common.chain_index)
 			return -EOPNOTSUPP;
 
-		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
 		tmp.common.prio++;
 		err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
@@ -612,7 +612,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	struct tc_skb_ext *tc_skb_ext;
 	struct mlx5_eswitch *esw;
 	struct mlx5e_priv *priv;
-	int tunnel_moffset;
 	int err;
 
 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -626,7 +625,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
 
-	err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+	err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain);
 	if (err) {
 		netdev_dbg(priv->netdev,
 			   "Couldn't find chain for chain tag: %d, err: %d\n",
@@ -647,13 +646,12 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 		uplink_priv = &uplink_rpriv->uplink_priv;
-		if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb,
+		if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
 					      zone_restore_id))
 			return false;
 	}
 
-	tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
-	tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+	tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG);
 	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
 #endif /* CONFIG_NET_TC_SKB_EXT */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index a8be40cbe325..e521254d886e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,7 +14,7 @@
 #include <linux/workqueue.h>
 #include <linux/xarray.h>
 
-#include "esw/chains.h"
+#include "lib/fs_chains.h"
 #include "en/tc_ct.h"
 #include "en/mod_hdr.h"
 #include "en/mapping.h"
@@ -39,8 +39,9 @@
 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
 
 struct mlx5_tc_ct_priv {
-	struct mlx5_eswitch *esw;
+	struct mlx5_core_dev *dev;
 	const struct net_device *netdev;
+	struct mod_hdr_tbl *mod_hdr_tbl;
 	struct idr fte_ids;
 	struct xarray tuple_ids;
 	struct rhashtable zone_ht;
@@ -50,13 +51,16 @@ struct mlx5_tc_ct_priv {
 	struct mlx5_flow_table *ct_nat;
 	struct mlx5_flow_table *post_ct;
 	struct mutex control_lock; /* guards parallel adds/dels */
+	struct mutex shared_counter_lock;
 	struct mapping_ctx *zone_mapping;
 	struct mapping_ctx *labels_mapping;
+	enum mlx5_flow_namespace_type ns_type;
+	struct mlx5_fs_chains *chains;
 };
 
 struct mlx5_ct_flow {
-	struct mlx5_esw_flow_attr pre_ct_attr;
-	struct mlx5_esw_flow_attr post_ct_attr;
+	struct mlx5_flow_attr *pre_ct_attr;
+	struct mlx5_flow_attr *post_ct_attr;
 	struct mlx5_flow_handle *pre_ct_rule;
 	struct mlx5_flow_handle *post_ct_rule;
 	struct mlx5_ct_ft *ft;
@@ -67,12 +71,12 @@ struct mlx5_ct_flow {
 struct mlx5_ct_zone_rule {
 	struct mlx5_flow_handle *rule;
 	struct mlx5e_mod_hdr_handle *mh;
-	struct mlx5_esw_flow_attr attr;
+	struct mlx5_flow_attr *attr;
 	bool nat;
 };
 
 struct mlx5_tc_ct_pre {
-	struct mlx5_flow_table *fdb;
+	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *flow_grp;
 	struct mlx5_flow_group *miss_grp;
 	struct mlx5_flow_handle *flow_rule;
@@ -114,11 +118,16 @@ struct mlx5_ct_tuple {
 	u16 zone;
 };
 
+struct mlx5_ct_shared_counter {
+	struct mlx5_fc *counter;
+	refcount_t refcount;
+};
+
 struct mlx5_ct_entry {
 	struct rhash_head node;
 	struct rhash_head tuple_node;
 	struct rhash_head tuple_nat_node;
-	struct mlx5_fc *counter;
+	struct mlx5_ct_shared_counter *shared_counter;
 	unsigned long cookie;
 	unsigned long restore_cookie;
 	struct mlx5_ct_tuple tuple;
@@ -157,18 +166,6 @@ static const struct rhashtable_params tuples_nat_ht_params = {
 	.min_size = 16 * 1024,
 };
 
-static struct mlx5_tc_ct_priv *
-mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
-{
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_rep_uplink_priv *uplink_priv;
-	struct mlx5e_rep_priv *uplink_rpriv;
-
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-	uplink_priv = &uplink_rpriv->uplink_priv;
-	return uplink_priv->ct_priv;
-}
-
 static int
 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
 {
@@ -397,20 +394,30 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 }
 
 static void
+mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+	if (!refcount_dec_and_test(&entry->shared_counter->refcount))
+		return;
+
+	mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
+	kfree(entry->shared_counter);
+}
+
+static void
 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
 			  struct mlx5_ct_entry *entry,
 			  bool nat)
 {
 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
-	struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
-	struct mlx5_eswitch *esw = ct_priv->esw;
+	struct mlx5_flow_attr *attr = zone_rule->attr;
 
 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
 
-	mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
-	mlx5e_mod_hdr_detach(ct_priv->esw->dev,
-			     &esw->offloads.mod_hdr, zone_rule->mh);
+	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
+	mlx5e_mod_hdr_detach(ct_priv->dev,
+			     ct_priv->mod_hdr_tbl, zone_rule->mh);
 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
+	kfree(attr);
 }
 
 static void
@@ -419,8 +426,6 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 {
 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
-
-	mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
 }
 
 static struct flow_action_entry *
@@ -446,29 +451,40 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
 			       u32 labels_id,
 			       u8 zone_restore_id)
 {
-	struct mlx5_eswitch *esw = ct_priv->esw;
+	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+	struct mlx5_core_dev *dev = ct_priv->dev;
 	int err;
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 					CTSTATE_TO_REG, ct_state);
 	if (err)
 		return err;
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 					MARK_TO_REG, mark);
 	if (err)
 		return err;
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 					LABELS_TO_REG, labels_id);
 	if (err)
 		return err;
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 					ZONE_RESTORE_TO_REG, zone_restore_id);
 	if (err)
 		return err;
 
+	/* Make another copy of zone id in reg_b for
+	 * NIC rx flows since we don't copy reg_c1 to
+	 * reg_b upon miss.
+	 */
+	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
@@ -549,7 +565,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
 {
 	struct flow_action *flow_action = &flow_rule->action;
-	struct mlx5_core_dev *mdev = ct_priv->esw->dev;
+	struct mlx5_core_dev *mdev = ct_priv->dev;
 	struct flow_action_entry *act;
 	size_t action_size;
 	char *modact;
@@ -560,8 +576,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
 		case FLOW_ACTION_MANGLE: {
-			err = alloc_mod_hdr_actions(mdev,
-						    MLX5_FLOW_NAMESPACE_FDB,
+			err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
 						    mod_acts);
 			if (err)
 				return err;
@@ -590,7 +605,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 
 static int
 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
-				struct mlx5_esw_flow_attr *attr,
+				struct mlx5_flow_attr *attr,
 				struct flow_rule *flow_rule,
 				struct mlx5e_mod_hdr_handle **mh,
 				u8 zone_restore_id, bool nat)
@@ -626,9 +641,9 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 	if (err)
 		goto err_mapping;
 
-	*mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev,
-				   &ct_priv->esw->offloads.mod_hdr,
-				   MLX5_FLOW_NAMESPACE_FDB,
+	*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+				   ct_priv->mod_hdr_tbl,
+				   ct_priv->ns_type,
 				   &mod_acts);
 	if (IS_ERR(*mh)) {
 		err = PTR_ERR(*mh);
@@ -652,9 +667,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 			  bool nat, u8 zone_restore_id)
 {
 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
-	struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
-	struct mlx5_eswitch *esw = ct_priv->esw;
+	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 	struct mlx5_flow_spec *spec = NULL;
+	struct mlx5_flow_attr *attr;
 	int err;
 
 	zone_rule->nat = nat;
@@ -663,6 +678,12 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 	if (!spec)
 		return -ENOMEM;
 
+	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+	if (!attr) {
+		err = -ENOMEM;
+		goto err_attr;
+	}
+
 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
 					      &zone_rule->mh,
 					      zone_restore_id, nat);
@@ -676,9 +697,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
 	attr->dest_chain = 0;
 	attr->dest_ft = ct_priv->post_ct;
-	attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
+	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
 	attr->outer_match_level = MLX5_MATCH_L4;
-	attr->counter = entry->counter;
+	attr->counter = entry->shared_counter->counter;
 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 
 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
@@ -686,39 +707,100 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 				    entry->tuple.zone & MLX5_CT_ZONE_MASK,
 				    MLX5_CT_ZONE_MASK);
 
-	zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+	zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
 	if (IS_ERR(zone_rule->rule)) {
 		err = PTR_ERR(zone_rule->rule);
 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
 		goto err_rule;
 	}
 
+	zone_rule->attr = attr;
+
 	kfree(spec);
 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 
 	return 0;
 
 err_rule:
-	mlx5e_mod_hdr_detach(ct_priv->esw->dev,
-			     &esw->offloads.mod_hdr, zone_rule->mh);
+	mlx5e_mod_hdr_detach(ct_priv->dev,
+			     ct_priv->mod_hdr_tbl, zone_rule->mh);
 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 err_mod_hdr:
+	kfree(attr);
+err_attr:
 	kfree(spec);
 	return err;
 }
 
+static struct mlx5_ct_shared_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+			      struct mlx5_ct_entry *entry)
+{
+	struct mlx5_ct_tuple rev_tuple = entry->tuple;
+	struct mlx5_ct_shared_counter *shared_counter;
+	struct mlx5_core_dev *dev = ct_priv->dev;
+	struct mlx5_ct_entry *rev_entry;
+	__be16 tmp_port;
+	int ret;
+
+	/* get the reversed tuple */
+	tmp_port = rev_tuple.port.src;
+	rev_tuple.port.src = rev_tuple.port.dst;
+	rev_tuple.port.dst = tmp_port;
+
+	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		__be32 tmp_addr = rev_tuple.ip.src_v4;
+
+		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+		rev_tuple.ip.dst_v4 = tmp_addr;
+	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+		rev_tuple.ip.dst_v6 = tmp_addr;
+	} else {
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	/* Use the same counter as the reverse direction */
+	mutex_lock(&ct_priv->shared_counter_lock);
+	rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
+					   tuples_ht_params);
+	if (rev_entry) {
+		if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
+			mutex_unlock(&ct_priv->shared_counter_lock);
+			return rev_entry->shared_counter;
+		}
+	}
+	mutex_unlock(&ct_priv->shared_counter_lock);
+
+	shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
+	if (!shared_counter)
+		return ERR_PTR(-ENOMEM);
+
+	shared_counter->counter = mlx5_fc_create(dev, true);
+	if (IS_ERR(shared_counter->counter)) {
+		ct_dbg("Failed to create counter for ct entry");
+		ret = PTR_ERR(shared_counter->counter);
+		kfree(shared_counter);
+		return ERR_PTR(ret);
+	}
+
+	refcount_set(&shared_counter->refcount, 1);
+	return shared_counter;
+}
+
 static int
 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 			   struct flow_rule *flow_rule,
 			   struct mlx5_ct_entry *entry,
 			   u8 zone_restore_id)
 {
-	struct mlx5_eswitch *esw = ct_priv->esw;
 	int err;
 
-	entry->counter = mlx5_fc_create(esw->dev, true);
-	if (IS_ERR(entry->counter)) {
-		err = PTR_ERR(entry->counter);
+	entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+	if (IS_ERR(entry->shared_counter)) {
+		err = PTR_ERR(entry->shared_counter);
 		ct_dbg("Failed to create counter for ct entry");
 		return err;
 	}
@@ -738,7 +820,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 err_nat:
 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 err_orig:
-	mlx5_fc_destroy(esw->dev, entry->counter);
+	mlx5_tc_ct_shared_counter_put(ct_priv, entry);
 	return err;
 }
 
@@ -828,12 +910,16 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
 			struct mlx5_ct_entry *entry)
 {
 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+	mutex_lock(&ct_priv->shared_counter_lock);
 	if (entry->tuple_node.next)
 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 				       &entry->tuple_nat_node,
 				       tuples_nat_ht_params);
 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
 			       tuples_ht_params);
+	mutex_unlock(&ct_priv->shared_counter_lock);
+	mlx5_tc_ct_shared_counter_put(ct_priv, entry);
+
 }
 
 static int
@@ -870,7 +956,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
 	if (!entry)
 		return -ENOENT;
 
-	mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
+	mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
 			  FLOW_ACTION_HW_STATS_DELAYED);
 
@@ -943,9 +1029,7 @@ out:
 	return false;
 }
 
-int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
-			    struct mlx5_flow_spec *spec)
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
 {
 	u32 ctstate = 0, ctstate_mask = 0;
 
@@ -961,24 +1045,21 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
 	return 0;
 }
 
-void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr)
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
 {
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-
-	if (!ct_priv || !ct_attr->ct_labels_id)
+	if (!priv || !ct_attr->ct_labels_id)
 		return;
 
-	mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id);
+	mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
 }
 
 int
-mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 		     struct mlx5_flow_spec *spec,
 		     struct flow_cls_offload *f,
 		     struct mlx5_ct_attr *ct_attr,
 		     struct netlink_ext_ack *extack)
 {
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector_key_ct *mask, *key;
 	bool trk, est, untrk, unest, new;
@@ -991,7 +1072,7 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
 		return 0;
 
-	if (!ct_priv) {
+	if (!priv) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "offload of ct matching isn't available");
 		return -EOPNOTSUPP;
@@ -1047,7 +1128,7 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
-		if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
+		if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
 			return -EOPNOTSUPP;
 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
 					    MLX5_CT_LABELS_MASK);
@@ -1057,14 +1138,12 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
 }
 
 int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
-			struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+			struct mlx5_flow_attr *attr,
 			const struct flow_action_entry *act,
 			struct netlink_ext_ack *extack)
 {
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-
-	if (!ct_priv) {
+	if (!priv) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "offload of ct action isn't available");
 		return -EOPNOTSUPP;
@@ -1083,8 +1162,8 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
 {
 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
-	struct mlx5_core_dev *dev = ct_priv->esw->dev;
-	struct mlx5_flow_table *fdb = pre_ct->fdb;
+	struct mlx5_core_dev *dev = ct_priv->dev;
+	struct mlx5_flow_table *ft = pre_ct->ft;
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_modify_hdr *mod_hdr;
@@ -1099,14 +1178,14 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
 		return -ENOMEM;
 
 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
-	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
+	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+					ZONE_TO_REG, zone);
 	if (err) {
 		ct_dbg("Failed to set zone register mapping");
 		goto err_mapping;
 	}
 
-	mod_hdr = mlx5_modify_header_alloc(dev,
-					   MLX5_FLOW_NAMESPACE_FDB,
+	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
 					   pre_mod_acts.num_actions,
 					   pre_mod_acts.actions);
 
@@ -1132,7 +1211,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
 
 	dest.ft = ct_priv->post_ct;
-	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
@@ -1143,7 +1222,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
 	/* add miss rule */
 	memset(spec, 0, sizeof(*spec));
 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
-	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
@@ -1170,7 +1249,7 @@ tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
 		       struct mlx5_tc_ct_pre *pre_ct)
 {
 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
-	struct mlx5_core_dev *dev = ct_priv->esw->dev;
+	struct mlx5_core_dev *dev = ct_priv->dev;
 
 	mlx5_del_flow_rules(pre_ct->flow_rule);
 	mlx5_del_flow_rules(pre_ct->miss_rule);
@@ -1184,7 +1263,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
-	struct mlx5_core_dev *dev = ct_priv->esw->dev;
+	struct mlx5_core_dev *dev = ct_priv->dev;
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_namespace *ns;
 	struct mlx5_flow_table *ft;
@@ -1194,10 +1273,10 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
 	void *misc;
 	int err;
 
-	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
 	if (!ns) {
 		err = -EOPNOTSUPP;
-		ct_dbg("Failed to get FDB flow namespace");
+		ct_dbg("Failed to get flow namespace");
 		return err;
 	}
 
@@ -1206,7 +1285,8 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
 		return -ENOMEM;
 
 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
-	ft_attr.prio = FDB_TC_OFFLOAD;
+	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
+			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
 	ft_attr.max_fte = 2;
 	ft_attr.level = 1;
 	ft = mlx5_create_flow_table(ns, &ft_attr);
@@ -1215,7 +1295,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
 		ct_dbg("Failed to create pre ct table");
 		goto out_free;
 	}
-	pre_ct->fdb = ft;
+	pre_ct->ft = ft;
 
 	/* create flow group */
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
@@ -1279,7 +1359,7 @@ mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
 	mlx5_destroy_flow_group(pre_ct->miss_grp);
 	mlx5_destroy_flow_group(pre_ct->flow_grp);
-	mlx5_destroy_flow_table(pre_ct->fdb);
+	mlx5_destroy_flow_table(pre_ct->ft);
 }
 
 static int
@@ -1398,7 +1478,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
 /* We translate the tc filter with CT action to the following HW model:
  *
  * +---------------------+
- * + fdb prio (tc chain) +
+ * + ft prio (tc chain) +
  * + original match      +
  * +---------------------+
  *      | set chain miss mapping
@@ -1428,17 +1508,17 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
  * +--------------+
  */
 static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 			  struct mlx5e_tc_flow *flow,
 			  struct mlx5_flow_spec *orig_spec,
-			  struct mlx5_esw_flow_attr *attr)
+			  struct mlx5_flow_attr *attr)
 {
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
 	struct mlx5_flow_spec *post_ct_spec = NULL;
-	struct mlx5_eswitch *esw = ct_priv->esw;
-	struct mlx5_esw_flow_attr *pre_ct_attr;
+	struct mlx5_flow_attr *pre_ct_attr;
 	struct mlx5_modify_hdr *mod_hdr;
 	struct mlx5_flow_handle *rule;
 	struct mlx5_ct_flow *ct_flow;
@@ -1473,10 +1553,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 	}
 	ct_flow->fte_id = fte_id;
 
-	/* Base esw attributes of both rules on original rule attribute */
-	pre_ct_attr = &ct_flow->pre_ct_attr;
-	memcpy(pre_ct_attr, attr, sizeof(*attr));
-	memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
+	/* Base flow attributes of both rules on original rule attribute */
+	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+	if (!ct_flow->pre_ct_attr) {
+		err = -ENOMEM;
+		goto err_alloc_pre;
+	}
+
+	ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+	if (!ct_flow->post_ct_attr) {
+		err = -ENOMEM;
+		goto err_alloc_post;
+	}
+
+	pre_ct_attr = ct_flow->pre_ct_attr;
+	memcpy(pre_ct_attr, attr, attr_sz);
+	memcpy(ct_flow->post_ct_attr, attr, attr_sz);
 
 	/* Modify the original rule's action to fwd and modify, leave decap */
 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1487,22 +1579,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 	 * don't go though all prios of this chain as normal tc rules
 	 * miss.
 	 */
-	err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
-						&chain_mapping);
+	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+					    &chain_mapping);
 	if (err) {
 		ct_dbg("Failed to get chain register mapping for chain");
 		goto err_get_chain;
 	}
 	ct_flow->chain_mapping = chain_mapping;
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
 					CHAIN_TO_REG, chain_mapping);
 	if (err) {
 		ct_dbg("Failed to set chain register mapping");
 		goto err_mapping;
 	}
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
 					FTEID_TO_REG, fte_id);
 	if (err) {
 		ct_dbg("Failed to set fte_id register mapping");
@@ -1516,7 +1608,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 	    attr->chain == 0) {
 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
 
-		err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+		err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
+						ct_priv->ns_type,
 						TUNNEL_TO_REG,
 						tun_id);
 		if (err) {
@@ -1525,8 +1618,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 		}
 	}
 
-	mod_hdr = mlx5_modify_header_alloc(esw->dev,
-					   MLX5_FLOW_NAMESPACE_FDB,
+	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
 					   pre_mod_acts.num_actions,
 					   pre_mod_acts.actions);
 	if (IS_ERR(mod_hdr)) {
@@ -1542,16 +1634,16 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
 				    fte_id, MLX5_FTE_ID_MASK);
 
-	/* Put post_ct rule on post_ct fdb */
-	ct_flow->post_ct_attr.chain = 0;
-	ct_flow->post_ct_attr.prio = 0;
-	ct_flow->post_ct_attr.fdb = ct_priv->post_ct;
+	/* Put post_ct rule on post_ct flow table */
+	ct_flow->post_ct_attr->chain = 0;
+	ct_flow->post_ct_attr->prio = 0;
+	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
 
-	ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE;
-	ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE;
-	ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
-	rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec,
-					       &ct_flow->post_ct_attr);
+	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
+	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
+	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+	rule = mlx5_tc_rule_insert(priv, post_ct_spec,
+				   ct_flow->post_ct_attr);
 	ct_flow->post_ct_rule = rule;
 	if (IS_ERR(ct_flow->post_ct_rule)) {
 		err = PTR_ERR(ct_flow->post_ct_rule);
@@ -1561,10 +1653,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 
 	/* Change original rule point to ct table */
 	pre_ct_attr->dest_chain = 0;
-	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
-	ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
-							       orig_spec,
-							       pre_ct_attr);
+	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
+	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
+						   pre_ct_attr);
 	if (IS_ERR(ct_flow->pre_ct_rule)) {
 		err = PTR_ERR(ct_flow->pre_ct_rule);
 		ct_dbg("Failed to add pre ct rule");
@@ -1578,14 +1669,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 	return rule;
 
 err_insert_orig:
-	mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule,
-					&ct_flow->post_ct_attr);
+	mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
+			    ct_flow->post_ct_attr);
 err_insert_post_ct:
 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
 err_mapping:
 	dealloc_mod_hdr_actions(&pre_mod_acts);
-	mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
+	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
 err_get_chain:
+	kfree(ct_flow->post_ct_attr);
+err_alloc_post:
+	kfree(ct_flow->pre_ct_attr);
+err_alloc_pre:
 	idr_remove(&ct_priv->fte_ids, fte_id);
 err_idr:
 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
@@ -1597,14 +1692,14 @@ err_ft:
 }
 
 static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
+__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
 				struct mlx5_flow_spec *orig_spec,
-				struct mlx5_esw_flow_attr *attr,
+				struct mlx5_flow_attr *attr,
 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
 {
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-	struct mlx5_eswitch *esw = ct_priv->esw;
-	struct mlx5_esw_flow_attr *pre_ct_attr;
+	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
+	struct mlx5_flow_attr *pre_ct_attr;
 	struct mlx5_modify_hdr *mod_hdr;
 	struct mlx5_flow_handle *rule;
 	struct mlx5_ct_flow *ct_flow;
@@ -1615,8 +1710,13 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
 		return ERR_PTR(-ENOMEM);
 
 	/* Base esw attributes on original rule attribute */
-	pre_ct_attr = &ct_flow->pre_ct_attr;
-	memcpy(pre_ct_attr, attr, sizeof(*attr));
+	pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+	if (!pre_ct_attr) {
+		err = -ENOMEM;
+		goto err_attr;
+	}
+
+	memcpy(pre_ct_attr, attr, attr_sz);
 
 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
 	if (err) {
@@ -1624,8 +1724,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
 		goto err_set_registers;
 	}
 
-	mod_hdr = mlx5_modify_header_alloc(esw->dev,
-					   MLX5_FLOW_NAMESPACE_FDB,
+	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
 					   mod_acts->num_actions,
 					   mod_acts->actions);
 	if (IS_ERR(mod_hdr)) {
@@ -1638,7 +1737,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
 	pre_ct_attr->modify_hdr = mod_hdr;
 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
-	rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr);
+	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		ct_dbg("Failed to add ct clear rule");
@@ -1646,6 +1745,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
 	}
 
 	attr->ct_attr.ct_flow = ct_flow;
+	ct_flow->pre_ct_attr = pre_ct_attr;
 	ct_flow->pre_ct_rule = rule;
 	return rule;
 
@@ -1654,61 +1754,67 @@ err_insert:
 err_set_registers:
 	netdev_warn(priv->netdev,
 		    "Failed to offload ct clear flow, err %d\n", err);
+	kfree(pre_ct_attr);
+err_attr:
+	kfree(ct_flow);
+
 	return ERR_PTR(err);
 }
 
 struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
 			struct mlx5e_tc_flow *flow,
 			struct mlx5_flow_spec *spec,
-			struct mlx5_esw_flow_attr *attr,
+			struct mlx5_flow_attr *attr,
 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
 {
 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
 	struct mlx5_flow_handle *rule;
 
-	if (!ct_priv)
+	if (!priv)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	mutex_lock(&ct_priv->control_lock);
+	mutex_lock(&priv->control_lock);
 
 	if (clear_action)
 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
 	else
 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
-	mutex_unlock(&ct_priv->control_lock);
+	mutex_unlock(&priv->control_lock);
 
 	return rule;
 }
 
 static void
 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+			 struct mlx5e_tc_flow *flow,
 			 struct mlx5_ct_flow *ct_flow)
 {
-	struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr;
-	struct mlx5_eswitch *esw = ct_priv->esw;
+	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
+	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 
-	mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule,
-					pre_ct_attr);
-	mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr);
+	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
+			    pre_ct_attr);
+	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
 
 	if (ct_flow->post_ct_rule) {
-		mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule,
-						&ct_flow->post_ct_attr);
-		mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
+		mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
+				    ct_flow->post_ct_attr);
+		mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
 	}
 
+	kfree(ct_flow->pre_ct_attr);
+	kfree(ct_flow->post_ct_attr);
 	kfree(ct_flow);
 }
 
 void
-mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
-		       struct mlx5_esw_flow_attr *attr)
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+		       struct mlx5e_tc_flow *flow,
+		       struct mlx5_flow_attr *attr)
 {
-	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
 
 	/* We are called on error to clean up stuff from parsing
@@ -1717,22 +1823,15 @@ mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
 	if (!ct_flow)
 		return;
 
-	mutex_lock(&ct_priv->control_lock);
-	__mlx5_tc_ct_delete_flow(ct_priv, ct_flow);
-	mutex_unlock(&ct_priv->control_lock);
+	mutex_lock(&priv->control_lock);
+	__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
+	mutex_unlock(&priv->control_lock);
 }
 
 static int
-mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
-			      const char **err_msg)
+mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
+				  const char **err_msg)
 {
-#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-	/* cannot restore chain ID on HW miss */
-
-	*err_msg = "tc skb extension missing";
-	return -EOPNOTSUPP;
-#endif
-
 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
 		*err_msg = "firmware level support is missing";
 		return -EOPNOTSUPP;
@@ -1766,44 +1865,61 @@ mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
 	return 0;
 }
 
-static void
-mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err)
+static int
+mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
+				  const char **err_msg)
+{
+	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+		*err_msg = "firmware level support is missing";
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+			      enum mlx5_flow_namespace_type ns_type,
+			      const char **err_msg)
 {
-	if (msg)
-		netdev_warn(rpriv->netdev,
-			    "tc ct offload not supported, %s, err: %d\n",
-			    msg, err);
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	/* cannot restore chain ID on HW miss */
+
+	*err_msg = "tc skb extension missing";
+	return -EOPNOTSUPP;
+#endif
+	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+		return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
 	else
-		netdev_warn(rpriv->netdev,
-			    "tc ct offload not supported, err: %d\n",
-			    err);
+		return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
 }
 
-int
-mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
+#define INIT_ERR_PREFIX "tc ct offload init failed"
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+		struct mod_hdr_tbl *mod_hdr,
+		enum mlx5_flow_namespace_type ns_type)
 {
 	struct mlx5_tc_ct_priv *ct_priv;
-	struct mlx5e_rep_priv *rpriv;
-	struct mlx5_eswitch *esw;
-	struct mlx5e_priv *priv;
+	struct mlx5_core_dev *dev;
 	const char *msg;
 	int err;
 
-	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
-	priv = netdev_priv(rpriv->netdev);
-	esw = priv->mdev->priv.eswitch;
-
-	err = mlx5_tc_ct_init_check_support(esw, &msg);
+	dev = priv->mdev;
+	err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
 	if (err) {
-		mlx5_tc_ct_init_err(rpriv, msg, err);
+		mlx5_core_warn(dev,
+			       "tc ct offload not supported, %s\n",
+			       msg);
 		goto err_support;
 	}
 
 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
-	if (!ct_priv) {
-		mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM);
+	if (!ct_priv)
 		goto err_alloc;
-	}
 
 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
 	if (IS_ERR(ct_priv->zone_mapping)) {
@@ -1817,46 +1933,51 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
 		goto err_mapping_labels;
 	}
 
-	ct_priv->esw = esw;
-	ct_priv->netdev = rpriv->netdev;
-	ct_priv->ct = mlx5_esw_chains_create_global_table(esw);
+	ct_priv->ns_type = ns_type;
+	ct_priv->chains = chains;
+	ct_priv->netdev = priv->netdev;
+	ct_priv->dev = priv->mdev;
+	ct_priv->mod_hdr_tbl = mod_hdr;
+	ct_priv->ct = mlx5_chains_create_global_table(chains);
 	if (IS_ERR(ct_priv->ct)) {
 		err = PTR_ERR(ct_priv->ct);
-		mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err);
+		mlx5_core_warn(dev,
+			       "%s, failed to create ct table err: %d\n",
+			       INIT_ERR_PREFIX, err);
 		goto err_ct_tbl;
 	}
 
-	ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw);
+	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
 	if (IS_ERR(ct_priv->ct_nat)) {
 		err = PTR_ERR(ct_priv->ct_nat);
-		mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table",
-				    err);
+		mlx5_core_warn(dev,
+			       "%s, failed to create ct nat table err: %d\n",
+			       INIT_ERR_PREFIX, err);
 		goto err_ct_nat_tbl;
 	}
 
-	ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw);
+	ct_priv->post_ct = mlx5_chains_create_global_table(chains);
 	if (IS_ERR(ct_priv->post_ct)) {
 		err = PTR_ERR(ct_priv->post_ct);
-		mlx5_tc_ct_init_err(rpriv, "failed to create post ct table",
-				    err);
+		mlx5_core_warn(dev,
+			       "%s, failed to create post ct table err: %d\n",
+			       INIT_ERR_PREFIX, err);
 		goto err_post_ct_tbl;
 	}
 
 	idr_init(&ct_priv->fte_ids);
 	mutex_init(&ct_priv->control_lock);
+	mutex_init(&ct_priv->shared_counter_lock);
 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
 
-	/* Done, set ct_priv to know it initializted */
-	uplink_priv->ct_priv = ct_priv;
-
-	return 0;
+	return ct_priv;
 
 err_post_ct_tbl:
-	mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat);
+	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
 err_ct_nat_tbl:
-	mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct);
+	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
 err_ct_tbl:
 	mapping_destroy(ct_priv->labels_mapping);
 err_mapping_labels:
@@ -1866,20 +1987,22 @@ err_mapping_zone:
 err_alloc:
 err_support:
 
-	return 0;
+	return NULL;
 }
 
 void
-mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
 {
-	struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
+	struct mlx5_fs_chains *chains;
 
 	if (!ct_priv)
 		return;
 
-	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct);
-	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat);
-	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct);
+	chains = ct_priv->chains;
+
+	mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
+	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
 	mapping_destroy(ct_priv->zone_mapping);
 	mapping_destroy(ct_priv->labels_mapping);
 
@@ -1887,17 +2010,15 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
 	rhashtable_destroy(&ct_priv->zone_ht);
 	mutex_destroy(&ct_priv->control_lock);
+	mutex_destroy(&ct_priv->shared_counter_lock);
 	idr_destroy(&ct_priv->fte_ids);
 	kfree(ct_priv);
-
-	uplink_priv->ct_priv = NULL;
 }
 
 bool
-mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
 			 struct sk_buff *skb, u8 zone_restore_id)
 {
-	struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
 	struct mlx5_ct_tuple tuple = {};
 	struct mlx5_ct_entry *entry;
 	u16 zone;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 708c216325d3..6503b614337c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -10,12 +10,14 @@
 
 #include "en.h"
 
-struct mlx5_esw_flow_attr;
+struct mlx5_flow_attr;
 struct mlx5e_tc_mod_hdr_acts;
 struct mlx5_rep_uplink_priv;
 struct mlx5e_tc_flow;
 struct mlx5e_priv;
 
+struct mlx5_fs_chains;
+struct mlx5_tc_ct_priv;
 struct mlx5_ct_flow;
 
 struct nf_flowtable;
@@ -76,68 +78,78 @@ struct mlx5_ct_attr {
 				 misc_parameters_2.metadata_reg_c_1) + 3,\
 }
 
+#define nic_zone_restore_to_reg_ct {\
+	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
+	.moffset = 2,\
+	.mlen = 1,\
+}
+
 #define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
+#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
+#define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8)
 #define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8)
 #define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0)
 
 #if IS_ENABLED(CONFIG_MLX5_TC_CT)
 
-int
-mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv);
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+		struct mod_hdr_tbl *mod_hdr,
+		enum mlx5_flow_namespace_type ns_type);
 void
-mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv);
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
 
 void
-mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr);
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr);
 
 int
-mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 		     struct mlx5_flow_spec *spec,
 		     struct flow_cls_offload *f,
 		     struct mlx5_ct_attr *ct_attr,
 		     struct netlink_ext_ack *extack);
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
 int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
-			    struct mlx5_flow_spec *spec);
-int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
-			struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+			struct mlx5_flow_attr *attr,
 			const struct flow_action_entry *act,
 			struct netlink_ext_ack *extack);
 
 struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
 			struct mlx5e_tc_flow *flow,
 			struct mlx5_flow_spec *spec,
-			struct mlx5_esw_flow_attr *attr,
+			struct mlx5_flow_attr *attr,
 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
 void
-mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv,
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
 		       struct mlx5e_tc_flow *flow,
-		       struct mlx5_esw_flow_attr *attr);
+		       struct mlx5_flow_attr *attr);
 
 bool
-mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
 			 struct sk_buff *skb, u8 zone_restore_id);
 
 #else /* CONFIG_MLX5_TC_CT */
 
-static inline int
-mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
+static inline struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+		struct mod_hdr_tbl *mod_hdr,
+		enum mlx5_flow_namespace_type ns_type)
 {
-	return 0;
+	return NULL;
 }
 
 static inline void
-mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
 {
 }
 
 static inline void
-mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) {}
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {}
 
 static inline int
-mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 		     struct mlx5_flow_spec *spec,
 		     struct flow_cls_offload *f,
 		     struct mlx5_ct_attr *ct_attr,
@@ -149,47 +161,44 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
 		return 0;
 
 	NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
-	netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n");
 	return -EOPNOTSUPP;
 }
 
 static inline int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
-			    struct mlx5_flow_spec *spec)
+mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
 {
 	return 0;
 }
 
 static inline int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
-			struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+			struct mlx5_flow_attr *attr,
 			const struct flow_action_entry *act,
 			struct netlink_ext_ack *extack)
 {
 	NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
-	netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n");
 	return -EOPNOTSUPP;
 }
 
 static inline struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
 			struct mlx5e_tc_flow *flow,
 			struct mlx5_flow_spec *spec,
-			struct mlx5_esw_flow_attr *attr,
+			struct mlx5_flow_attr *attr,
 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline void
-mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv,
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
 		       struct mlx5e_tc_flow *flow,
-		       struct mlx5_esw_flow_attr *attr)
+		       struct mlx5_flow_attr *attr)
 {
 }
 
 static inline bool
-mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
 			 struct sk_buff *skb, u8 zone_restore_id)
 {
 	if (!zone_restore_id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 24336c60123a..07ee1d236ab3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -7,6 +7,21 @@
 #include "en.h"
 #include <linux/indirect_call_wrapper.h>
 
+#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+
 #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
 
 enum mlx5e_icosq_wqe_type {
@@ -46,8 +61,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       struct net_device *sb_dev);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
-void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-		   struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
 
@@ -110,6 +123,7 @@ struct mlx5e_tx_wqe_info {
 	u32 num_bytes;
 	u8 num_wqebbs;
 	u8 num_dma;
+	u8 num_fifo_pkts;
 #ifdef CONFIG_MLX5_EN_TLS
 	struct page *resync_dump_frag_page;
 #endif
@@ -194,23 +208,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
 }
 
 static inline void
-mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
-			u16 pi, u16 nnops)
-{
-	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		memset(wi, 0, sizeof(*wi));
-		wi->num_wqebbs = 1;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-	sq->stats->nop += nnops;
-}
-
-static inline void
 mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
 		struct mlx5_wqe_ctrl_seg *ctrl)
 {
@@ -228,29 +225,6 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
 	mlx5_write64((__be32 *)ctrl, uar_map);
 }
 
-static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
-{
-	return cseg && !!cseg->tis_tir_num;
-}
-
-static inline u8
-mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
-			 struct sk_buff *skb)
-{
-	u8 mode;
-
-	if (mlx5e_transport_inline_tx_wqe(cseg))
-		return MLX5_INLINE_MODE_TCP_UDP;
-
-	mode = sq->min_inline_mode;
-
-	if (skb_vlan_tag_present(skb) &&
-	    test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
-		mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
-
-	return mode;
-}
-
 static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
 {
 	struct mlx5_core_cq *mcq;
@@ -276,6 +250,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
 	dma->type = map_type;
 }
 
+static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
+{
+	return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
+}
+
+static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+	struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
+
+	*skb_item = skb;
+}
+
+static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
+{
+	return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
+}
+
 static inline void
 mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
 {
@@ -291,6 +282,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
 	}
 }
 
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
+{
+	return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
+}
+
 static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
 {
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index b28df21981a1..ae90d533a350 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -59,7 +59,7 @@ static inline bool
 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
 		    struct mlx5e_dma_info *di, struct xdp_buff *xdp)
 {
-	struct mlx5e_xdp_xmit_data xdptxd;
+	struct mlx5e_xmit_data xdptxd;
 	struct mlx5e_xdp_info xdpi;
 	struct xdp_frame *xdpf;
 	dma_addr_t dma_addr;
@@ -194,18 +194,22 @@ static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
 
 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
 {
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
 	struct mlx5e_xdpsq_stats *stats = sq->stats;
+	struct mlx5e_tx_wqe *wqe;
 	u16 pi;
 
-	pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
-	session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+	pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+	net_prefetchw(wqe->data);
 
-	prefetchw(session->wqe->data);
-	session->ds_count  = MLX5E_XDP_TX_EMPTY_DS_COUNT;
-	session->pkt_count = 0;
-
-	mlx5e_xdp_update_inline_state(sq);
+	*session = (struct mlx5e_tx_mpwqe) {
+		.wqe = wqe,
+		.bytes_count = 0,
+		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+		.pkt_count = 0,
+		.inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+	};
 
 	stats->mpwqe++;
 }
@@ -213,7 +217,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
 {
 	struct mlx5_wq_cyc       *wq    = &sq->wq;
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
 	struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
 	u16 ds_count = session->ds_count;
 	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -258,10 +262,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
 }
 
 INDIRECT_CALLABLE_SCOPE bool
-mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
+mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
 			   struct mlx5e_xdp_info *xdpi, int check_result)
 {
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
 	struct mlx5e_xdpsq_stats *stats = sq->stats;
 
 	if (unlikely(xdptxd->len > sq->hw_mtu)) {
@@ -284,8 +288,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *x
 
 	mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
 
-	if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
-		     session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
+	if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
 		mlx5e_xdp_mpwqe_complete(sq);
 
 	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
@@ -306,7 +309,7 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
 }
 
 INDIRECT_CALLABLE_SCOPE bool
-mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
 		     struct mlx5e_xdp_info *xdpi, int check_result)
 {
 	struct mlx5_wq_cyc       *wq   = &sq->wq;
@@ -322,7 +325,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
 
 	struct mlx5e_xdpsq_stats *stats = sq->stats;
 
-	prefetchw(wqe);
+	net_prefetchw(wqe);
 
 	if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
 		stats->err++;
@@ -445,7 +448,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(sq->umem, xsk_frames);
+		xsk_tx_completed(sq->xsk_pool, xsk_frames);
 
 	sq->stats->cqes += i;
 
@@ -475,7 +478,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 	}
 
 	if (xsk_frames)
-		xsk_umem_complete_tx(sq->umem, xsk_frames);
+		xsk_tx_completed(sq->xsk_pool, xsk_frames);
 }
 
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -503,7 +506,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
-		struct mlx5e_xdp_xmit_data xdptxd;
+		struct mlx5e_xmit_data xdptxd;
 		struct mlx5e_xdp_info xdpi;
 		bool ret;
 
@@ -563,4 +566,3 @@ void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
 	sq->xmit_xdp_frame = is_mpw ?
 		mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
 }
-
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index e806c13d491f..d487e5e37162 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -38,27 +38,12 @@
 #include "en/txrx.h"
 
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
-	(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-
-#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
-#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
-	DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
-
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */)
 
-#define MLX5E_XDP_MPW_MAX_NUM_DS \
-	(MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
+	(MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
+	 sizeof(struct mlx5_wqe_inline_seg))
 
 struct mlx5e_xsk_param;
 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
@@ -73,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		   u32 flags);
 
 INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
-							  struct mlx5e_xdp_xmit_data *xdptxd,
+							  struct mlx5e_xmit_data *xdptxd,
 							  struct mlx5e_xdp_info *xdpi,
 							  int check_result));
 INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
-						    struct mlx5e_xdp_xmit_data *xdptxd,
+						    struct mlx5e_xmit_data *xdptxd,
 						    struct mlx5e_xdp_info *xdpi,
 						    int check_result));
 INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
@@ -122,30 +107,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
 /* Enable inline WQEs to shift some load from a congested HCA (HW) to
  * a less congested cpu (SW).
  */
-static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
+static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
 {
 	u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
 
 #define MLX5E_XDP_INLINE_WATERMARK_LOW	10
 #define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
 
-	if (session->inline_on) {
-		if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
-			session->inline_on = 0;
-		return;
-	}
+	if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+		return false;
+
+	if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+		return true;
 
-	/* inline is false */
-	if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
-		session->inline_on = 1;
+	return cur;
 }
 
-static inline bool
-mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
+static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
 {
-	return session->inline_on &&
-	       session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
+	if (session->inline_on)
+		return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
+		       MLX5E_TX_MPW_MAX_NUM_DS;
+	return mlx5e_tx_mpwqe_is_full(session);
 }
 
 struct mlx5e_xdp_wqe_info {
@@ -155,15 +138,16 @@ struct mlx5e_xdp_wqe_info {
 
 static inline void
 mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
-			 struct mlx5e_xdp_xmit_data *xdptxd,
+			 struct mlx5e_xmit_data *xdptxd,
 			 struct mlx5e_xdpsq_stats *stats)
 {
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
 	struct mlx5_wqe_data_seg *dseg =
 		(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
 	u32 dma_len = xdptxd->len;
 
 	session->pkt_count++;
+	session->bytes_count += dma_len;
 
 	if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
 		struct mlx5_wqe_inline_seg *inline_dseg =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 331ca2b0f8a4..71e8d66fa150 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -1,31 +1,31 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2019 Mellanox Technologies. */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
 
 #include <net/xdp_sock_drv.h>
-#include "umem.h"
+#include "pool.h"
 #include "setup.h"
 #include "en/params.h"
 
-static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
-			      struct xdp_umem *umem)
+static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+			      struct xsk_buff_pool *pool)
 {
-	struct device *dev = priv->mdev->device;
+	struct device *dev = mlx5_core_dma_dev(priv->mdev);
 
-	return xsk_buff_dma_map(umem, dev, 0);
+	return xsk_pool_dma_map(pool, dev, 0);
 }
 
-static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
-				 struct xdp_umem *umem)
+static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+				 struct xsk_buff_pool *pool)
 {
-	return xsk_buff_dma_unmap(umem, 0);
+	return xsk_pool_dma_unmap(pool, 0);
 }
 
-static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
 {
-	if (!xsk->umems) {
-		xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
-				     sizeof(*xsk->umems), GFP_KERNEL);
-		if (unlikely(!xsk->umems))
+	if (!xsk->pools) {
+		xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+				     sizeof(*xsk->pools), GFP_KERNEL);
+		if (unlikely(!xsk->pools))
 			return -ENOMEM;
 	}
 
@@ -35,68 +35,68 @@ static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
 	return 0;
 }
 
-static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk)
 {
 	if (!--xsk->refcnt) {
-		kfree(xsk->umems);
-		xsk->umems = NULL;
+		kfree(xsk->pools);
+		xsk->pools = NULL;
 	}
 }
 
-static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix)
 {
 	int err;
 
-	err = mlx5e_xsk_get_umems(xsk);
+	err = mlx5e_xsk_get_pools(xsk);
 	if (unlikely(err))
 		return err;
 
-	xsk->umems[ix] = umem;
+	xsk->pools[ix] = pool;
 	return 0;
 }
 
-static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix)
 {
-	xsk->umems[ix] = NULL;
+	xsk->pools[ix] = NULL;
 
-	mlx5e_xsk_put_umems(xsk);
+	mlx5e_xsk_put_pools(xsk);
 }
 
-static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool)
 {
-	return xsk_umem_get_headroom(umem) <= 0xffff &&
-		xsk_umem_get_chunk_size(umem) <= 0xffff;
+	return xsk_pool_get_headroom(pool) <= 0xffff &&
+		xsk_pool_get_chunk_size(pool) <= 0xffff;
 }
 
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk)
 {
-	xsk->headroom = xsk_umem_get_headroom(umem);
-	xsk->chunk_size = xsk_umem_get_chunk_size(umem);
+	xsk->headroom = xsk_pool_get_headroom(pool);
+	xsk->chunk_size = xsk_pool_get_chunk_size(pool);
 }
 
 static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
-				   struct xdp_umem *umem, u16 ix)
+				   struct xsk_buff_pool *pool, u16 ix)
 {
 	struct mlx5e_params *params = &priv->channels.params;
 	struct mlx5e_xsk_param xsk;
 	struct mlx5e_channel *c;
 	int err;
 
-	if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+	if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix)))
 		return -EBUSY;
 
-	if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+	if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
 		return -EINVAL;
 
-	err = mlx5e_xsk_map_umem(priv, umem);
+	err = mlx5e_xsk_map_pool(priv, pool);
 	if (unlikely(err))
 		return err;
 
-	err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+	err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
 	if (unlikely(err))
-		goto err_unmap_umem;
+		goto err_unmap_pool;
 
-	mlx5e_build_xsk_param(umem, &xsk);
+	mlx5e_build_xsk_param(pool, &xsk);
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		/* XSK objects will be created on open. */
@@ -112,9 +112,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
 
 	c = priv->channels.c[ix];
 
-	err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+	err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
 	if (unlikely(err))
-		goto err_remove_umem;
+		goto err_remove_pool;
 
 	mlx5e_activate_xsk(c);
 
@@ -132,11 +132,11 @@ err_deactivate:
 	mlx5e_deactivate_xsk(c);
 	mlx5e_close_xsk(c);
 
-err_remove_umem:
-	mlx5e_xsk_remove_umem(&priv->xsk, ix);
+err_remove_pool:
+	mlx5e_xsk_remove_pool(&priv->xsk, ix);
 
-err_unmap_umem:
-	mlx5e_xsk_unmap_umem(priv, umem);
+err_unmap_pool:
+	mlx5e_xsk_unmap_pool(priv, pool);
 
 	return err;
 
@@ -146,7 +146,7 @@ validate_closed:
 	 */
 	if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
 		err = -EINVAL;
-		goto err_remove_umem;
+		goto err_remove_pool;
 	}
 
 	return 0;
@@ -154,45 +154,45 @@ validate_closed:
 
 static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
 {
-	struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+	struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params,
 						   &priv->xsk, ix);
 	struct mlx5e_channel *c;
 
-	if (unlikely(!umem))
+	if (unlikely(!pool))
 		return -EINVAL;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		goto remove_umem;
+		goto remove_pool;
 
 	/* XSK RQ and SQ are only created if XDP program is set. */
 	if (!priv->channels.params.xdp_prog)
-		goto remove_umem;
+		goto remove_pool;
 
 	c = priv->channels.c[ix];
 	mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
 	mlx5e_deactivate_xsk(c);
 	mlx5e_close_xsk(c);
 
-remove_umem:
-	mlx5e_xsk_remove_umem(&priv->xsk, ix);
-	mlx5e_xsk_unmap_umem(priv, umem);
+remove_pool:
+	mlx5e_xsk_remove_pool(&priv->xsk, ix);
+	mlx5e_xsk_unmap_pool(priv, pool);
 
 	return 0;
 }
 
-static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool,
 				 u16 ix)
 {
 	int err;
 
 	mutex_lock(&priv->state_lock);
-	err = mlx5e_xsk_enable_locked(priv, umem, ix);
+	err = mlx5e_xsk_enable_locked(priv, pool, ix);
 	mutex_unlock(&priv->state_lock);
 
 	return err;
 }
 
-static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix)
 {
 	int err;
 
@@ -203,7 +203,7 @@ static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
 	return err;
 }
 
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_params *params = &priv->channels.params;
@@ -212,6 +212,6 @@ int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
 	if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
 		return -EINVAL;
 
-	return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
-		      mlx5e_xsk_disable_umem(priv, ix);
+	return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) :
+		      mlx5e_xsk_disable_pool(priv, ix);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
new file mode 100644
index 000000000000..dca0010a0866
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_XSK_POOL_H__
+#define __MLX5_EN_XSK_POOL_H__
+
+#include "en.h"
+
+static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params,
+						       struct mlx5e_xsk *xsk, u16 ix)
+{
+	if (!xsk || !xsk->pools)
+		return NULL;
+
+	if (unlikely(ix >= params->num_channels))
+		return NULL;
+
+	return xsk->pools[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid);
+
+#endif /* __MLX5_EN_XSK_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 40db27bf790b..8e7b877d8a12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -47,8 +47,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 
 	xdp->data_end = xdp->data + cqe_bcnt32;
 	xdp_set_data_meta_invalid(xdp);
-	xsk_buff_dma_sync_for_cpu(xdp);
-	prefetch(xdp->data);
+	xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+	net_prefetch(xdp->data);
 
 	/* Possible flows:
 	 * - XDP_REDIRECT to XSKMAP:
@@ -93,8 +93,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 
 	xdp->data_end = xdp->data + cqe_bcnt;
 	xdp_set_data_meta_invalid(xdp);
-	xsk_buff_dma_sync_for_cpu(xdp);
-	prefetch(xdp->data);
+	xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+	net_prefetch(xdp->data);
 
 	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
 		rq->stats->wqe_err++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index d147b2f13b54..7f88ccf67fdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -19,10 +19,10 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 					      struct mlx5e_wqe_frag_info *wi,
 					      u32 cqe_bcnt);
 
-static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
 					    struct mlx5e_dma_info *dma_info)
 {
-	dma_info->xsk = xsk_buff_alloc(rq->umem);
+	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
 	if (!dma_info->xsk)
 		return -ENOMEM;
 
@@ -38,13 +38,13 @@ static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
 
 static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
 {
-	if (!xsk_umem_uses_need_wakeup(rq->umem))
+	if (!xsk_uses_need_wakeup(rq->xsk_pool))
 		return alloc_err;
 
 	if (unlikely(alloc_err))
-		xsk_set_rx_need_wakeup(rq->umem);
+		xsk_set_rx_need_wakeup(rq->xsk_pool);
 	else
-		xsk_clear_rx_need_wakeup(rq->umem);
+		xsk_clear_rx_need_wakeup(rq->xsk_pool);
 
 	return false;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 55e65a438de7..4e574ac73019 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -45,7 +45,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
 }
 
 int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
-		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+		   struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
 		   struct mlx5e_channel *c)
 {
 	struct mlx5e_channel_param *cparam;
@@ -64,7 +64,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	if (unlikely(err))
 		goto err_free_cparam;
 
-	err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
+	err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq);
 	if (unlikely(err))
 		goto err_close_rx_cq;
 
@@ -72,13 +72,13 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	if (unlikely(err))
 		goto err_close_rq;
 
-	/* Create a separate SQ, so that when the UMEM is disabled, we could
+	/* Create a separate SQ, so that when the buff pool is disabled, we could
 	 * close this SQ safely and stop receiving CQEs. In other case, e.g., if
-	 * the XDPSQ was used instead, we might run into trouble when the UMEM
+	 * the XDPSQ was used instead, we might run into trouble when the buff pool
 	 * is disabled and then reenabled, but the SQ continues receiving CQEs
-	 * from the old UMEM.
+	 * from the old buff pool.
 	 */
-	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
+	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
 	if (unlikely(err))
 		goto err_close_tx_cq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index 0dd11b81c046..ca20f1ff5e39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -12,7 +12,7 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
 			      struct mlx5e_xsk_param *xsk,
 			      struct mlx5_core_dev *mdev);
 int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
-		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+		   struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
 		   struct mlx5e_channel *c);
 void mlx5e_close_xsk(struct mlx5e_channel *c);
 void mlx5e_activate_xsk(struct mlx5e_channel *c);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 4d892f6cecb3..fb671a457129 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2019 Mellanox Technologies. */
 
 #include "tx.h"
-#include "umem.h"
+#include "pool.h"
 #include "en/xdp.h"
 #include "en/params.h"
 #include <net/xdp_sock_drv.h>
@@ -66,9 +66,9 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
 
 bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
 {
-	struct xdp_umem *umem = sq->umem;
+	struct xsk_buff_pool *pool = sq->xsk_pool;
+	struct mlx5e_xmit_data xdptxd;
 	struct mlx5e_xdp_info xdpi;
-	struct mlx5e_xdp_xmit_data xdptxd;
 	bool work_done = true;
 	bool flush = false;
 
@@ -87,7 +87,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
 			break;
 		}
 
-		if (!xsk_umem_consume_tx(umem, &desc)) {
+		if (!xsk_tx_peek_desc(pool, &desc)) {
 			/* TX will get stuck until something wakes it up by
 			 * triggering NAPI. Currently it's expected that the
 			 * application calls sendto() if there are consumed, but
@@ -96,11 +96,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
 			break;
 		}
 
-		xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr);
-		xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr);
+		xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
+		xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
 		xdptxd.len = desc.len;
 
-		xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len);
+		xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
 
 		ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
 				      mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result);
@@ -119,7 +119,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
 			mlx5e_xdp_mpwqe_complete(sq);
 		mlx5e_xmit_xdp_doorbell(sq);
 
-		xsk_umem_consume_tx_done(umem);
+		xsk_tx_release(pool);
 	}
 
 	return !(budget && work_done);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 39fa0a705856..a05085035f23 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -15,13 +15,13 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
 
 static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
 {
-	if (!xsk_umem_uses_need_wakeup(sq->umem))
+	if (!xsk_uses_need_wakeup(sq->xsk_pool))
 		return;
 
 	if (sq->pc != sq->cc)
-		xsk_clear_tx_need_wakeup(sq->umem);
+		xsk_clear_tx_need_wakeup(sq->xsk_pool);
 	else
-		xsk_set_tx_need_wakeup(sq->umem);
+		xsk_set_tx_need_wakeup(sq->xsk_pool);
 }
 
 #endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
deleted file mode 100644
index bada94973586..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_XSK_UMEM_H__
-#define __MLX5_EN_XSK_UMEM_H__
-
-#include "en.h"
-
-static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
-						  struct mlx5e_xsk *xsk, u16 ix)
-{
-	if (!xsk || !xsk->umems)
-		return NULL;
-
-	if (unlikely(ix >= params->num_channels))
-		return NULL;
-
-	return xsk->umems[ix];
-}
-
-struct mlx5e_xsk_param;
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
-
-/* .ndo_bpf callback. */
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
-
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
-
-#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 110476bdeffb..899b98aca0d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -107,6 +107,9 @@ struct mlx5e_accel_tx_state {
 #ifdef CONFIG_MLX5_EN_TLS
 	struct mlx5e_accel_tx_tls_state tls;
 #endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+	struct mlx5e_accel_tx_ipsec_state ipsec;
+#endif
 };
 
 static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
@@ -125,27 +128,70 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
 	}
 #endif
 
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) {
+		if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec)))
+			return false;
+	}
+#endif
+
 	return true;
 }
 
-static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
-					 struct mlx5e_txqsq *sq,
-					 struct sk_buff *skb,
+static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+	return mlx5e_ipsec_is_tx_flow(&state->ipsec);
+#endif
+
+	return false;
+}
+
+static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
+						  struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state))
+		return mlx5e_ipsec_tx_ids_len(&state->ipsec);
+#endif
+
+	return 0;
+}
+
+/* Part of the eseg touched by TX offloads */
+#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
+
+static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
+				       struct sk_buff *skb,
+				       struct mlx5_wqe_eth_seg *eseg)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (xfrm_offload(skb))
+		mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
+#endif
+
+#if IS_ENABLED(CONFIG_GENEVE)
+	if (skb->encapsulation)
+		mlx5e_tx_tunnel_accel(skb, eseg);
+#endif
+
+	return true;
+}
+
+static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
 					 struct mlx5e_tx_wqe *wqe,
-					 struct mlx5e_accel_tx_state *state)
+					 struct mlx5e_accel_tx_state *state,
+					 struct mlx5_wqe_inline_seg *inlseg)
 {
 #ifdef CONFIG_MLX5_EN_TLS
 	mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
-	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
-		if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb)))
-			return false;
-	}
+	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) &&
+	    state->ipsec.xo && state->ipsec.tailen)
+		mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg);
 #endif
-
-	return true;
 }
 
 static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
index 4cdd9eac647d..97f1594cee11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -191,7 +191,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
 	ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if  (!in || !ft->g) {
-		kvfree(ft->g);
+		kfree(ft->g);
 		kvfree(in);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index d39989cddd90..3d45341e2216 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -560,6 +560,9 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 		return;
 	}
 
+	if (mlx5_is_ipsec_device(mdev))
+		netdev->gso_partial_features |= NETIF_F_GSO_ESP;
+
 	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
 	netdev->features |= NETIF_F_GSO_ESP;
 	netdev->hw_features |= NETIF_F_GSO_ESP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 0fc8b4d4f4a3..6164c7f59efb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -76,6 +76,7 @@ struct mlx5e_ipsec_stats {
 };
 
 struct mlx5e_accel_fs_esp;
+struct mlx5e_ipsec_tx;
 
 struct mlx5e_ipsec {
 	struct mlx5e_priv *en_priv;
@@ -87,6 +88,7 @@ struct mlx5e_ipsec {
 	struct mlx5e_ipsec_stats stats;
 	struct workqueue_struct *wq;
 	struct mlx5e_accel_fs_esp *rx_fs;
+	struct mlx5e_ipsec_tx *tx_fs;
 };
 
 struct mlx5e_ipsec_esn_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 429428bbc903..0e45590662a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -34,6 +34,12 @@ struct mlx5e_accel_fs_esp {
 	struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES];
 };
 
+struct mlx5e_ipsec_tx {
+	struct mlx5_flow_table *ft;
+	struct mutex mutex; /* Protect IPsec TX steering */
+	u32 refcnt;
+};
+
 /* IPsec RX flow steering */
 static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
 {
@@ -228,8 +234,8 @@ static int rx_fs_create(struct mlx5e_priv *priv,
 	fs_prot->miss_rule = miss_rule;
 
 out:
-	kfree(flow_group_in);
-	kfree(spec);
+	kvfree(flow_group_in);
+	kvfree(spec);
 	return err;
 }
 
@@ -323,6 +329,77 @@ out:
 	mutex_unlock(&fs_prot->prot_mutex);
 }
 
+/* IPsec TX flow steering */
+static int tx_create(struct mlx5e_priv *priv)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5_flow_table *ft;
+	int err;
+
+	priv->fs.egress_ns =
+		mlx5_get_flow_namespace(priv->mdev,
+					MLX5_FLOW_NAMESPACE_EGRESS_KERNEL);
+	if (!priv->fs.egress_ns)
+		return -EOPNOTSUPP;
+
+	ft_attr.max_fte = NUM_IPSEC_FTE;
+	ft_attr.autogroup.max_num_groups = 1;
+	ft = mlx5_create_auto_grouped_flow_table(priv->fs.egress_ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
+		return err;
+	}
+	ipsec->tx_fs->ft = ft;
+	return 0;
+}
+
+static void tx_destroy(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+	if (IS_ERR_OR_NULL(ipsec->tx_fs->ft))
+		return;
+
+	mlx5_destroy_flow_table(ipsec->tx_fs->ft);
+	ipsec->tx_fs->ft = NULL;
+}
+
+static int tx_ft_get(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+	int err = 0;
+
+	mutex_lock(&tx_fs->mutex);
+	if (tx_fs->refcnt++)
+		goto out;
+
+	err = tx_create(priv);
+	if (err) {
+		tx_fs->refcnt--;
+		goto out;
+	}
+
+out:
+	mutex_unlock(&tx_fs->mutex);
+	return err;
+}
+
+static void tx_ft_put(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+
+	mutex_lock(&tx_fs->mutex);
+	if (--tx_fs->refcnt)
+		goto out;
+
+	tx_destroy(priv);
+
+out:
+	mutex_unlock(&tx_fs->mutex);
+}
+
 static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
 			     u32 ipsec_obj_id,
 			     struct mlx5_flow_spec *spec,
@@ -457,6 +534,54 @@ out:
 	return err;
 }
 
+static int tx_add_rule(struct mlx5e_priv *priv,
+		       struct mlx5_accel_esp_xfrm_attrs *attrs,
+		       u32 ipsec_obj_id,
+		       struct mlx5e_ipsec_rule *ipsec_rule)
+{
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	err = tx_ft_get(priv);
+	if (err)
+		return err;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+
+	/* Add IPsec indicator in metadata_reg_a */
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+	MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+		 MLX5_ETH_WQE_FT_META_IPSEC);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+		 MLX5_ETH_WQE_FT_META_IPSEC);
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+			  MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
+	rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+			   attrs->action, err);
+		goto out;
+	}
+
+	ipsec_rule->rule = rule;
+
+out:
+	kvfree(spec);
+	if (err)
+		tx_ft_put(priv);
+	return err;
+}
+
 static void rx_del_rule(struct mlx5e_priv *priv,
 			struct mlx5_accel_esp_xfrm_attrs *attrs,
 			struct mlx5e_ipsec_rule *ipsec_rule)
@@ -470,15 +595,27 @@ static void rx_del_rule(struct mlx5e_priv *priv,
 	rx_ft_put(priv, attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
 }
 
+static void tx_del_rule(struct mlx5e_priv *priv,
+			struct mlx5e_ipsec_rule *ipsec_rule)
+{
+	mlx5_del_flow_rules(ipsec_rule->rule);
+	ipsec_rule->rule = NULL;
+
+	tx_ft_put(priv);
+}
+
 int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
 				  struct mlx5_accel_esp_xfrm_attrs *attrs,
 				  u32 ipsec_obj_id,
 				  struct mlx5e_ipsec_rule *ipsec_rule)
 {
-	if (!priv->ipsec->rx_fs || attrs->action != MLX5_ACCEL_ESP_ACTION_DECRYPT)
+	if (!priv->ipsec->rx_fs)
 		return -EOPNOTSUPP;
 
-	return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+	if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
+		return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+	else
+		return tx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
 }
 
 void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
@@ -488,7 +625,18 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
 	if (!priv->ipsec->rx_fs)
 		return;
 
-	rx_del_rule(priv, attrs, ipsec_rule);
+	if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
+		rx_del_rule(priv, attrs, ipsec_rule);
+	else
+		tx_del_rule(priv, ipsec_rule);
+}
+
+static void fs_cleanup_tx(struct mlx5e_priv *priv)
+{
+	mutex_destroy(&priv->ipsec->tx_fs->mutex);
+	WARN_ON(priv->ipsec->tx_fs->refcnt);
+	kfree(priv->ipsec->tx_fs);
+	priv->ipsec->tx_fs = NULL;
 }
 
 static void fs_cleanup_rx(struct mlx5e_priv *priv)
@@ -507,6 +655,17 @@ static void fs_cleanup_rx(struct mlx5e_priv *priv)
 	priv->ipsec->rx_fs = NULL;
 }
 
+static int fs_init_tx(struct mlx5e_priv *priv)
+{
+	priv->ipsec->tx_fs =
+		kzalloc(sizeof(struct mlx5e_ipsec_tx), GFP_KERNEL);
+	if (!priv->ipsec->tx_fs)
+		return -ENOMEM;
+
+	mutex_init(&priv->ipsec->tx_fs->mutex);
+	return 0;
+}
+
 static int fs_init_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5e_accel_fs_esp_prot *fs_prot;
@@ -532,13 +691,24 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv)
 	if (!priv->ipsec->rx_fs)
 		return;
 
+	fs_cleanup_tx(priv);
 	fs_cleanup_rx(priv);
 }
 
 int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv)
 {
+	int err;
+
 	if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec)
 		return -EOPNOTSUPP;
 
-	return fs_init_rx(priv);
+	err = fs_init_tx(priv);
+	if (err)
+		return err;
+
+	err = fs_init_rx(priv);
+	if (err)
+		fs_cleanup_tx(priv);
+
+	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 93a8d68815ad..11e31a3db2be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -34,7 +34,7 @@
 #include <crypto/aead.h>
 #include <net/xfrm.h>
 #include <net/esp.h>
-
+#include "accel/ipsec_offload.h"
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/ipsec.h"
 #include "accel/accel.h"
@@ -233,18 +233,94 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
 		   ntohs(mdata->content.tx.seq));
 }
 
-bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
-			       struct mlx5_wqe_eth_seg *eseg,
-			       struct sk_buff *skb)
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+			       struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+			       struct mlx5_wqe_inline_seg *inlseg)
+{
+	inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG);
+	esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto);
+}
+
+static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
+				 struct sk_buff *skb,
+				 struct xfrm_state *x,
+				 struct xfrm_offload *xo,
+				 struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+	unsigned int blksize, clen, alen, plen;
+	struct crypto_aead *aead;
+	unsigned int tailen;
+
+	ipsec_st->x = x;
+	ipsec_st->xo = xo;
+	if (mlx5_is_ipsec_device(priv->mdev)) {
+		aead = x->data;
+		alen = crypto_aead_authsize(aead);
+		blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+		clen = ALIGN(skb->len + 2, blksize);
+		plen = max_t(u32, clen - skb->len, 4);
+		tailen = plen + alen;
+		ipsec_st->plen = plen;
+		ipsec_st->tailen = tailen;
+	}
+
+	return 0;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+			       struct mlx5_wqe_eth_seg *eseg)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
-	struct mlx5e_ipsec_metadata *mdata;
-	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct xfrm_encap_tmpl  *encap;
 	struct xfrm_state *x;
 	struct sec_path *sp;
+	u8 l3_proto;
+
+	sp = skb_sec_path(skb);
+	if (unlikely(sp->len != 1))
+		return;
+
+	x = xfrm_input_state(skb);
+	if (unlikely(!x))
+		return;
+
+	if (unlikely(!x->xso.offload_handle ||
+		     (skb->protocol != htons(ETH_P_IP) &&
+		      skb->protocol != htons(ETH_P_IPV6))))
+		return;
+
+	mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
 
-	if (!xo)
-		return true;
+	l3_proto = (x->props.family == AF_INET) ?
+		   ((struct iphdr *)skb_network_header(skb))->protocol :
+		   ((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
+
+	if (mlx5_is_ipsec_device(priv->mdev)) {
+		eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+		eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+		encap = x->encap;
+		if (!encap) {
+			eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+				cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+				cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+		} else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+			eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+				cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+				cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
+		}
+	}
+}
+
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+			       struct sk_buff *skb,
+			       struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5e_ipsec_metadata *mdata;
+	struct xfrm_state *x;
+	struct sec_path *sp;
 
 	sp = skb_sec_path(skb);
 	if (unlikely(sp->len != 1)) {
@@ -270,15 +346,21 @@ bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
 			atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
 			goto drop;
 		}
-	mdata = mlx5e_ipsec_add_metadata(skb);
-	if (IS_ERR(mdata)) {
-		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
-		goto drop;
+
+	if (MLX5_CAP_GEN(priv->mdev, fpga)) {
+		mdata = mlx5e_ipsec_add_metadata(skb);
+		if (IS_ERR(mdata)) {
+			atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+			goto drop;
+		}
 	}
-	mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
+
 	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
 	sa_entry->set_iv_op(skb, x, xo);
-	mlx5e_ipsec_set_metadata(skb, mdata, xo);
+	if (MLX5_CAP_GEN(priv->mdev, fpga))
+		mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+	mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
 
 	return true;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index f96e786db158..056dacb612b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -43,6 +43,13 @@
 #define MLX5_IPSEC_METADATA_SYNDROM_MASK     (0x7F)
 #define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF)
 
+struct mlx5e_accel_tx_ipsec_state {
+	struct xfrm_offload *xo;
+	struct xfrm_state *x;
+	u32 tailen;
+	u32 plen;
+};
+
 #ifdef CONFIG_MLX5_EN_IPSEC
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -55,16 +62,32 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
 			    struct xfrm_offload *xo);
 void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
 			struct xfrm_offload *xo);
-bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
-			       struct mlx5_wqe_eth_seg *eseg,
-			       struct sk_buff *skb);
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+			       struct sk_buff *skb,
+			       struct mlx5e_accel_tx_ipsec_state *ipsec_st);
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+			       struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+			       struct mlx5_wqe_inline_seg *inlseg);
 void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
 				       struct sk_buff *skb,
 				       struct mlx5_cqe64 *cqe);
+static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+	return ipsec_st->tailen;
+}
+
 static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
 {
 	return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata));
 }
+
+static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+	return ipsec_st->x;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+			       struct mlx5_wqe_eth_seg *eseg);
 #else
 static inline
 void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 6bbfcf18107d..ccaccb9fc2f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -253,7 +253,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
 		goto err_out;
 	}
 
-	pdev = sq->channel->priv->mdev->device;
+	pdev = mlx5_core_dma_dev(sq->channel->priv->mdev);
 	buf->dma_addr = dma_map_single(pdev, &buf->progress,
 				       PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) {
@@ -390,7 +390,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
 
 	priv_rx = buf->priv_rx;
 	resync = &priv_rx->resync;
-	dev = resync->priv->mdev->device;
+	dev = mlx5_core_dma_dev(resync->priv->mdev);
 	if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index f4861545b236..b140e13fdcc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -345,9 +345,6 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 	struct mlx5e_sq_stats *stats;
 	struct mlx5e_sq_dma *dma;
 
-	if (!wi->resync_dump_frag_page)
-		return;
-
 	dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
 	stats = sq->stats;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index ff4c740af10b..7521c9be735b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -29,12 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+					  struct mlx5e_tx_wqe_info *wi,
+					  u32 *dma_fifo_cc)
+{
+	if (unlikely(wi->resync_dump_frag_page)) {
+		mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+		return true;
+	}
+	return false;
+}
 #else
-static inline void
-mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
-				      struct mlx5e_tx_wqe_info *wi,
-				      u32 *dma_fifo_cc)
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+					  struct mlx5e_tx_wqe_info *wi,
+					  u32 *dma_fifo_cc)
 {
+	return false;
 }
 
 #endif /* CONFIG_MLX5_EN_TLS */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index b0c31d49ff8d..6982b193ee8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 				 struct mlx5e_tls *tls)
 {
 	u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
-	struct mlx5e_tx_wqe *wqe;
 	struct sync_info info;
 	struct sk_buff *nskb;
 	int linear_len = 0;
 	int headln;
-	u16 pi;
 	int i;
 
 	sq->stats->tls_ooo++;
@@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 	sq->stats->tls_resync_bytes += nskb->len;
 	mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
 				    cpu_to_be64(info.rcd_sn));
-	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-	mlx5e_sq_xmit(sq, nskb, wqe, pi, true);
+	mlx5e_sq_xmit_simple(sq, nskb, true);
 
 	return true;
 
@@ -274,6 +270,8 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 	if (!datalen)
 		return true;
 
+	mlx5e_tx_mpwqe_ensure_complete(sq);
+
 	tls_ctx = tls_get_ctx(skb->sk);
 	if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
 		goto err_out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 08270987c506..d25a56ec6876 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -32,7 +32,7 @@
 
 #include "en.h"
 #include "en/port.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
 #include "lib/clock.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -243,7 +243,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
 		return MLX5E_NUM_PFLAGS;
 	case ETH_SS_TEST:
 		return mlx5e_self_test_num(priv);
-		fallthrough;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1341,6 +1340,14 @@ static int mlx5e_set_tunable(struct net_device *dev,
 	return err;
 }
 
+static void mlx5e_get_pause_stats(struct net_device *netdev,
+				  struct ethtool_pause_stats *pause_stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_pause_get(priv, pause_stats);
+}
+
 void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
 				  struct ethtool_pauseparam *pauseparam)
 {
@@ -1901,7 +1908,7 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
 	return 0;
 }
 
-static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1913,7 +1920,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
 
 	new_channels.params = priv->channels.params;
 
-	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+	MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
@@ -1924,6 +1931,16 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
 	return err;
 }
 
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+	return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+}
+
+static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+	return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable);
+}
+
 static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
 	{ "rx_cqe_moder",        set_pflag_rx_cqe_based_moder },
 	{ "tx_cqe_moder",        set_pflag_tx_cqe_based_moder },
@@ -1931,6 +1948,7 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
 	{ "rx_striding_rq",      set_pflag_rx_striding_rq },
 	{ "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
 	{ "xdp_tx_mpwqe",        set_pflag_xdp_tx_mpwqe },
+	{ "skb_tx_mpwqe",        set_pflag_skb_tx_mpwqe },
 };
 
 static int mlx5e_handle_pflag(struct net_device *netdev,
@@ -2033,6 +2051,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.set_rxnfc         = mlx5e_set_rxnfc,
 	.get_tunable       = mlx5e_get_tunable,
 	.set_tunable       = mlx5e_set_tunable,
+	.get_pause_stats   = mlx5e_get_pause_stats,
 	.get_pauseparam    = mlx5e_get_pauseparam,
 	.set_pauseparam    = mlx5e_set_pauseparam,
 	.get_ts_info       = mlx5e_get_ts_info,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 83c9b2bbc4af..b416a8ee2eed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -33,7 +33,7 @@
 #include <linux/mlx5/fs.h>
 #include "en.h"
 #include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
 
 struct mlx5e_ethtool_rule {
 	struct list_head             list;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 42ec28e29834..b3f02aac7f26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -57,7 +57,7 @@
 #include "en/monitor_stats.h"
 #include "en/health.h"
 #include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
 #include "en/xsk/setup.h"
 #include "en/xsk/rx.h"
 #include "en/xsk/tx.h"
@@ -393,7 +393,7 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
-			  struct xdp_umem *umem,
+			  struct xsk_buff_pool *xsk_pool,
 			  struct mlx5e_rq_param *rqp,
 			  struct mlx5e_rq *rq)
 {
@@ -419,9 +419,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->mdev    = mdev;
 	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	rq->xdpsq   = &c->rq_xdpsq;
-	rq->umem    = umem;
+	rq->xsk_pool = xsk_pool;
 
-	if (rq->umem)
+	if (rq->xsk_pool)
 		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
 	else
 		rq->stats = &c->priv->channel_stats[c->ix].rq;
@@ -511,7 +511,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	if (xsk) {
 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 						 MEM_TYPE_XSK_BUFF_POOL, NULL);
-		xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq);
+		xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
 	} else {
 		/* Create a page_pool and register it with rxq */
 		pp_params.order     = 0;
@@ -861,11 +861,11 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 
 int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
-		  struct xdp_umem *umem, struct mlx5e_rq *rq)
+		  struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq)
 {
 	int err;
 
-	err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
+	err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq);
 	if (err)
 		return err;
 
@@ -893,6 +893,13 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
 		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
 
+	/* For CQE compression on striding RQ, use stride index provided by
+	 * HW if capability is supported.
+	 */
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
+	    MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index))
+		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state);
+
 	return 0;
 
 err_destroy_rq:
@@ -970,7 +977,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 
 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
-			     struct xdp_umem *umem,
+			     struct xsk_buff_pool *xsk_pool,
 			     struct mlx5e_sq_param *param,
 			     struct mlx5e_xdpsq *sq,
 			     bool is_redirect)
@@ -986,9 +993,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	sq->umem      = umem;
+	sq->xsk_pool  = xsk_pool;
 
-	sq->stats = sq->umem ?
+	sq->stats = sq->xsk_pool ?
 		&c->priv->channel_stats[c->ix].xsksq :
 		is_redirect ?
 			&c->priv->channel_stats[c->ix].xdpsq :
@@ -1085,6 +1092,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
 static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
 {
 	kvfree(sq->db.wqe_info);
+	kvfree(sq->db.skb_fifo);
 	kvfree(sq->db.dma_fifo);
 }
 
@@ -1096,15 +1104,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 	sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
 						   sizeof(*sq->db.dma_fifo)),
 					GFP_KERNEL, numa);
+	sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
+						   sizeof(*sq->db.skb_fifo)),
+					GFP_KERNEL, numa);
 	sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
 						   sizeof(*sq->db.wqe_info)),
 					GFP_KERNEL, numa);
-	if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+	if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
 		mlx5e_free_txqsq_db(sq);
 		return -ENOMEM;
 	}
 
 	sq->dma_fifo_mask = df_sz - 1;
+	sq->skb_fifo_mask = df_sz - 1;
 
 	return 0;
 }
@@ -1115,6 +1127,12 @@ static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
 
 	sq->stop_room  = mlx5e_tls_get_stop_room(sq);
 	sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+	if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state))
+		/* A MPWQE can take up to the maximum-sized WQE + all the normal
+		 * stop room can be taken if a new packet breaks the active
+		 * MPWQE session and allocates its WQEs right away.
+		 */
+		sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
 
 	if (WARN_ON(sq->stop_room >= sq_size)) {
 		netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
@@ -1156,6 +1174,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 	if (mlx5_accel_is_tls_device(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+	if (param->is_mpw)
+		set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
 	err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
 	if (err)
 		return err;
@@ -1449,13 +1469,13 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq)
 }
 
 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
-		     struct mlx5e_sq_param *param, struct xdp_umem *umem,
+		     struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
 		     struct mlx5e_xdpsq *sq, bool is_redirect)
 {
 	struct mlx5e_create_sq_param csp = {};
 	int err;
 
-	err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
+	err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
 	if (err)
 		return err;
 
@@ -1948,7 +1968,7 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 			      struct mlx5e_params *params,
 			      struct mlx5e_channel_param *cparam,
-			      struct xdp_umem *umem,
+			      struct xsk_buff_pool *xsk_pool,
 			      struct mlx5e_channel **cp)
 {
 	int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
@@ -1972,7 +1992,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->tstamp   = &priv->tstamp;
 	c->ix       = ix;
 	c->cpu      = cpu;
-	c->pdev     = priv->mdev->device;
+	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
 	c->num_tc   = params->num_tc;
@@ -1987,9 +2007,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	if (unlikely(err))
 		goto err_napi_del;
 
-	if (umem) {
-		mlx5e_build_xsk_param(umem, &xsk);
-		err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+	if (xsk_pool) {
+		mlx5e_build_xsk_param(xsk_pool, &xsk);
+		err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
 		if (unlikely(err))
 			goto err_close_queues;
 	}
@@ -2160,7 +2180,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 
-	param->wq.buf_numa_node = dev_to_node(mdev->device);
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 	mlx5e_build_rx_cq_param(priv, params, xsk, &param->cqp);
 }
 
@@ -2176,7 +2196,7 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
 	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
 
-	param->wq.buf_numa_node = dev_to_node(mdev->device);
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 }
 
 void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
@@ -2188,7 +2208,7 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
 
-	param->wq.buf_numa_node = dev_to_node(priv->mdev->device);
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
 }
 
 static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
@@ -2204,6 +2224,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
 	mlx5e_build_sq_param_common(priv, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
 	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
 }
 
@@ -2223,6 +2244,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 			     struct mlx5e_cq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
+	bool hw_stridx = false;
 	void *cqc = param->cqc;
 	u8 log_cq_size;
 
@@ -2230,6 +2252,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
 			mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+		hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		log_cq_size = params->log_rq_mtu_frames;
@@ -2237,7 +2260,8 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 
 	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
 	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
-		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
+		MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
+			 MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
 		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
 	}
 
@@ -2350,12 +2374,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 
 	mlx5e_build_channel_param(priv, &chs->params, cparam);
 	for (i = 0; i < chs->num; i++) {
-		struct xdp_umem *umem = NULL;
+		struct xsk_buff_pool *xsk_pool = NULL;
 
 		if (chs->params.xdp_prog)
-			umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+			xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
 
-		err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
+		err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
 		if (err)
 			goto err_close_channels;
 	}
@@ -3222,8 +3246,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
 			       struct mlx5e_cq *cq,
 			       struct mlx5e_cq_param *param)
 {
-	param->wq.buf_numa_node = dev_to_node(mdev->device);
-	param->wq.db_numa_node  = dev_to_node(mdev->device);
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+	param->wq.db_numa_node  = dev_to_node(mlx5_core_dma_dev(mdev));
 
 	return mlx5e_alloc_cq_common(mdev, param, cq);
 }
@@ -3927,13 +3951,14 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
 	u16 ix;
 
 	for (ix = 0; ix < chs->params.num_channels; ix++) {
-		struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+		struct xsk_buff_pool *xsk_pool =
+			mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
 		struct mlx5e_xsk_param xsk;
 
-		if (!umem)
+		if (!xsk_pool)
 			continue;
 
-		mlx5e_build_xsk_param(umem, &xsk);
+		mlx5e_build_xsk_param(xsk_pool, &xsk);
 
 		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
 			u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
@@ -4466,8 +4491,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return mlx5e_xdp_set(dev, xdp->prog);
-	case XDP_SETUP_XSK_UMEM:
-		return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+	case XDP_SETUP_XSK_POOL:
+		return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
 					    xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
@@ -4758,6 +4783,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
 	params->log_sq_size = is_kdump_kernel() ?
 		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE,
+			MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
 
 	/* XDP SQ */
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e979bff64c49..67247c33b9fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -39,7 +39,6 @@
 #include <net/ipv6_stubs.h>
 
 #include "eswitch.h"
-#include "esw/chains.h"
 #include "en.h"
 #include "en_rep.h"
 #include "en/txrx.h"
@@ -288,6 +287,14 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
 	return mlx5e_ethtool_get_rxfh_indir_size(priv);
 }
 
+static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev,
+					     struct ethtool_pause_stats *stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_pause_get(priv, stats);
+}
+
 static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
 					    struct ethtool_pauseparam *pauseparam)
 {
@@ -362,23 +369,11 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
 	.set_rxfh          = mlx5e_set_rxfh,
 	.get_rxnfc         = mlx5e_get_rxnfc,
 	.set_rxnfc         = mlx5e_set_rxnfc,
+	.get_pause_stats   = mlx5e_uplink_rep_get_pause_stats,
 	.get_pauseparam    = mlx5e_uplink_rep_get_pauseparam,
 	.set_pauseparam    = mlx5e_uplink_rep_set_pauseparam,
 };
 
-static void mlx5e_rep_get_port_parent_id(struct net_device *dev,
-					 struct netdev_phys_item_id *ppid)
-{
-	struct mlx5e_priv *priv;
-	u64 parent_id;
-
-	priv = netdev_priv(dev);
-
-	parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
-	ppid->id_len = sizeof(parent_id);
-	memcpy(ppid->id, &parent_id, sizeof(parent_id));
-}
-
 static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep)
 {
@@ -603,12 +598,13 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan
 	return 0;
 }
 
-static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *dev)
+static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_core_dev *dev = priv->mdev;
 
-	return &rpriv->dl_port;
+	return mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
 }
 
 static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier)
@@ -1198,63 +1194,13 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.stats_grps_num		= mlx5e_ul_rep_stats_grps_num,
 };
 
-static bool
-is_devlink_port_supported(const struct mlx5_core_dev *dev,
-			  const struct mlx5e_rep_priv *rpriv)
-{
-	return rpriv->rep->vport == MLX5_VPORT_UPLINK ||
-	       rpriv->rep->vport == MLX5_VPORT_PF ||
-	       mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
-}
-
-static int register_devlink_port(struct mlx5_core_dev *dev,
-				 struct mlx5e_rep_priv *rpriv)
-{
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	struct devlink_port_attrs attrs = {};
-	struct netdev_phys_item_id ppid = {};
-	unsigned int dl_port_index = 0;
-	u16 pfnum;
-
-	if (!is_devlink_port_supported(dev, rpriv))
-		return 0;
-
-	mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
-	dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, rep->vport);
-	pfnum = PCI_FUNC(dev->pdev->devfn);
-	if (rep->vport == MLX5_VPORT_UPLINK) {
-		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
-		attrs.phys.port_number = pfnum;
-		memcpy(attrs.switch_id.id, &ppid.id[0], ppid.id_len);
-		attrs.switch_id.id_len = ppid.id_len;
-		devlink_port_attrs_set(&rpriv->dl_port, &attrs);
-	} else if (rep->vport == MLX5_VPORT_PF) {
-		memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
-		rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
-		devlink_port_attrs_pci_pf_set(&rpriv->dl_port, pfnum);
-	} else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) {
-		memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
-		rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
-		devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
-					      pfnum, rep->vport - 1);
-	}
-	return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
-}
-
-static void unregister_devlink_port(struct mlx5_core_dev *dev,
-				    struct mlx5e_rep_priv *rpriv)
-{
-	if (is_devlink_port_supported(dev, rpriv))
-		devlink_port_unregister(&rpriv->dl_port);
-}
-
 /* e-Switch vport representors */
 static int
 mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
 	const struct mlx5e_profile *profile;
 	struct mlx5e_rep_priv *rpriv;
+	struct devlink_port *dl_port;
 	struct net_device *netdev;
 	int nch, err;
 
@@ -1304,28 +1250,19 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto err_detach_netdev;
 	}
 
-	err = register_devlink_port(dev, rpriv);
-	if (err) {
-		netdev_warn(netdev, "Failed to register devlink port %d\n",
-			    rep->vport);
-		goto err_neigh_cleanup;
-	}
-
 	err = register_netdev(netdev);
 	if (err) {
 		netdev_warn(netdev,
 			    "Failed to register representor netdev for vport %d\n",
 			    rep->vport);
-		goto err_devlink_cleanup;
+		goto err_neigh_cleanup;
 	}
 
-	if (is_devlink_port_supported(dev, rpriv))
-		devlink_port_type_eth_set(&rpriv->dl_port, netdev);
+	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+	if (dl_port)
+		devlink_port_type_eth_set(dl_port, netdev);
 	return 0;
 
-err_devlink_cleanup:
-	unregister_devlink_port(dev, rpriv);
-
 err_neigh_cleanup:
 	mlx5e_rep_neigh_cleanup(rpriv);
 
@@ -1349,12 +1286,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *dev = priv->mdev;
+	struct devlink_port *dl_port;
 	void *ppriv = priv->ppriv;
 
-	if (is_devlink_port_supported(dev, rpriv))
-		devlink_port_type_clear(&rpriv->dl_port);
+	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+	if (dl_port)
+		devlink_port_type_clear(dl_port);
 	unregister_netdev(netdev);
-	unregister_devlink_port(dev, rpriv);
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_detach_netdev(priv);
 	if (rep->vport == MLX5_VPORT_UPLINK)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 0d1562e20118..9020d1419bcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -101,7 +101,6 @@ struct mlx5e_rep_priv {
 	struct list_head       vport_sqs_list;
 	struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
 	struct rtnl_link_stats64 prev_vf_vport_stats;
-	struct devlink_port dl_port;
 };
 
 static inline
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 64c8ac5eabf6..599f5b5ebc97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -30,7 +30,6 @@
  * SOFTWARE.
  */
 
-#include <linux/prefetch.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
@@ -139,8 +138,17 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
 	title->check_sum    = mini_cqe->checksum;
 	title->op_own      &= 0xf0;
 	title->op_own      |= 0x01 & (cqcc >> wq->fbc.log_sz);
-	title->wqe_counter  = cpu_to_be16(cqd->wqe_counter);
 
+	/* state bit set implies linked-list striding RQ wq type and
+	 * HW stride index capability supported
+	 */
+	if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
+		title->wqe_counter = mini_cqe->stridx;
+		return;
+	}
+
+	/* HW stride index capability not supported */
+	title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
 		cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
 	else
@@ -282,8 +290,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
 static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
 				   struct mlx5e_dma_info *dma_info)
 {
-	if (rq->umem)
-		return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+	if (rq->xsk_pool)
+		return mlx5e_xsk_page_alloc_pool(rq, dma_info);
 	else
 		return mlx5e_page_alloc_pool(rq, dma_info);
 }
@@ -314,7 +322,7 @@ static inline void mlx5e_page_release(struct mlx5e_rq *rq,
 				      struct mlx5e_dma_info *dma_info,
 				      bool recycle)
 {
-	if (rq->umem)
+	if (rq->xsk_pool)
 		/* The `recycle` parameter is ignored, and the page is always
 		 * put into the Reuse Ring, because there is no way to return
 		 * the page to the userspace when the interface goes down.
@@ -401,14 +409,14 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
 	int err;
 	int i;
 
-	if (rq->umem) {
+	if (rq->xsk_pool) {
 		int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
 
 		/* Check in advance that we have enough frames, instead of
 		 * allocating one-by-one, failing and moving frames to the
 		 * Reuse Ring.
 		 */
-		if (unlikely(!xsk_buff_can_alloc(rq->umem, pages_desired)))
+		if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired)))
 			return -ENOMEM;
 	}
 
@@ -506,8 +514,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	/* Check in advance that we have enough frames, instead of allocating
 	 * one-by-one, failing and moving frames to the Reuse Ring.
 	 */
-	if (rq->umem &&
-	    unlikely(!xsk_buff_can_alloc(rq->umem, MLX5_MPWRQ_PAGES_PER_WQE))) {
+	if (rq->xsk_pool &&
+	    unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) {
 		err = -ENOMEM;
 		goto err;
 	}
@@ -755,7 +763,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 	 * the driver when it refills the Fill Ring.
 	 * 2. Otherwise, busy poll by rescheduling the NAPI poll.
 	 */
-	if (unlikely(alloc_err == -ENOMEM && rq->umem))
+	if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
 		return true;
 
 	return false;
@@ -1144,8 +1152,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 
 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
 				      frag_size, DMA_FROM_DEVICE);
-	prefetchw(va); /* xdp_frame data area */
-	prefetch(data);
+	net_prefetchw(va); /* xdp_frame data area */
+	net_prefetch(data);
 
 	mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
 	if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
@@ -1184,7 +1192,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 		return NULL;
 	}
 
-	prefetchw(skb->data);
+	net_prefetchw(skb->data);
 
 	while (byte_cnt) {
 		u16 frag_consumed_bytes =
@@ -1252,6 +1260,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	}
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+	if (mlx5e_cqe_regb_chain(cqe))
+		if (!mlx5e_tc_update_skb(cqe, skb))
+			goto free_wqe;
+
 	napi_gro_receive(rq->cq.napi, skb);
 
 free_wqe:
@@ -1399,7 +1412,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 		return NULL;
 	}
 
-	prefetchw(skb->data);
+	net_prefetchw(skb->data);
 
 	if (unlikely(frag_offset >= PAGE_SIZE)) {
 		di++;
@@ -1451,8 +1464,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 
 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
 				      frag_size, DMA_FROM_DEVICE);
-	prefetchw(va); /* xdp_frame data area */
-	prefetch(data);
+	net_prefetchw(va); /* xdp_frame data area */
+	net_prefetch(data);
 
 	mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
 	if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
@@ -1513,6 +1526,11 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
 		goto mpwrq_cqe_out;
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+	if (mlx5e_cqe_regb_chain(cqe))
+		if (!mlx5e_tc_update_skb(cqe, skb))
+			goto mpwrq_cqe_out;
+
 	napi_gro_receive(rq->cq.napi, skb);
 
 mpwrq_cqe_out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 46790216ce86..ce8ab1f01876 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -30,7 +30,6 @@
  * SOFTWARE.
  */
 
-#include <linux/prefetch.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
 #include <net/udp.h>
@@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 		return NULL;
 	}
 
-	prefetchw(skb->data);
+	net_prefetchw(skb->data);
 	skb_reserve(skb, NET_IP_ALIGN);
 
 	/*  Reserve for ethernet and IP header  */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index f6383bc2bc3f..78f6a6f0a7e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -110,6 +110,8 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
 
 #ifdef CONFIG_MLX5_EN_TLS
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
@@ -365,6 +367,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
 			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
 			s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
 			s->tx_nop               += sq_stats->nop;
+			s->tx_mpwqe_blks        += sq_stats->mpwqe_blks;
+			s->tx_mpwqe_pkts        += sq_stats->mpwqe_pkts;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
@@ -689,6 +693,35 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 }
 
+#define MLX5E_READ_CTR64_BE_F(ptr, c)			\
+	be64_to_cpu(*(__be64 *)((char *)ptr +		\
+		MLX5_BYTE_OFF(ppcnt_reg,		\
+			counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)))
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+			   struct ethtool_pause_stats *pause_stats)
+{
+	u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+	if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+		return;
+
+	MLX5_SET(ppcnt_reg, in, local_port, 1);
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+	mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3,
+			     sz, MLX5_REG_PPCNT, 0, 0);
+
+	pause_stats->tx_pause_frames =
+		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      a_pause_mac_ctrl_frames_transmitted);
+	pause_stats->rx_pause_frames =
+		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      a_pause_mac_ctrl_frames_received);
+}
+
 #define PPORT_2863_OFF(c) \
 	MLX5_BYTE_OFF(ppcnt_reg, \
 		      counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
@@ -1539,6 +1572,8 @@ static const struct counter_desc sq_stats_desc[] = {
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
 #ifdef CONFIG_MLX5_EN_TLS
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 562263d62141..162daaadb0d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -105,6 +105,9 @@ void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx);
 void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data);
 void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv);
 
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+			   struct ethtool_pause_stats *pause_stats);
+
 /* Concrete NIC Stats */
 
 struct mlx5e_sw_stats {
@@ -118,6 +121,8 @@ struct mlx5e_sw_stats {
 	u64 tx_tso_inner_bytes;
 	u64 tx_added_vlan_packets;
 	u64 tx_nop;
+	u64 tx_mpwqe_blks;
+	u64 tx_mpwqe_pkts;
 	u64 rx_lro_packets;
 	u64 rx_lro_bytes;
 	u64 rx_mcast_packets;
@@ -348,6 +353,8 @@ struct mlx5e_sq_stats {
 	u64 csum_partial_inner;
 	u64 added_vlan_packets;
 	u64 nop;
+	u64 mpwqe_blks;
+	u64 mpwqe_pkts;
 #ifdef CONFIG_MLX5_EN_TLS
 	u64 tls_encrypted_packets;
 	u64 tls_encrypted_bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1c93f92d9210..e3a968e9e2a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -57,7 +57,6 @@
 #include "en/rep/neigh.h"
 #include "en_tc.h"
 #include "eswitch.h"
-#include "esw/chains.h"
 #include "fs_core.h"
 #include "en/port.h"
 #include "en/tc_tun.h"
@@ -66,20 +65,11 @@
 #include "en/mod_hdr.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
+#include "lib/fs_chains.h"
 #include "diag/en_tc_tracepoint.h"
 
+#define nic_chains(priv) ((priv)->fs.tc.chains)
 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
-
-struct mlx5_nic_flow_attr {
-	u32 action;
-	u32 flow_tag;
-	struct mlx5_modify_hdr *modify_hdr;
-	u32 hairpin_tirn;
-	u8 match_level;
-	struct mlx5_flow_table	*hairpin_ft;
-	struct mlx5_fc		*counter;
-};
-
 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
 
 enum {
@@ -153,11 +143,7 @@ struct mlx5e_tc_flow {
 	struct rcu_head		rcu_head;
 	struct completion	init_done;
 	int tunnel_id; /* the mapped tunnel id of this flow */
-
-	union {
-		struct mlx5_esw_flow_attr esw_attr[0];
-		struct mlx5_nic_flow_attr nic_attr[0];
-	};
+	struct mlx5_flow_attr *attr;
 };
 
 struct mlx5e_tc_flow_parse_attr {
@@ -170,7 +156,7 @@ struct mlx5e_tc_flow_parse_attr {
 };
 
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
-#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
 
 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
 	[CHAIN_TO_REG] = {
@@ -191,6 +177,16 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
 	[MARK_TO_REG] = mark_to_reg_ct,
 	[LABELS_TO_REG] = labels_to_reg_ct,
 	[FTEID_TO_REG] = fteid_to_reg_ct,
+	/* For NIC rules we store the retore metadata directly
+	 * into reg_b that is passed to SW since we don't
+	 * jump between steering domains.
+	 */
+	[NIC_CHAIN_TO_REG] = {
+		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
+		.moffset = 0,
+		.mlen = 2,
+	},
+	[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
 };
 
 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
@@ -244,6 +240,7 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
 int
 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
 			  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+			  enum mlx5_flow_namespace_type ns,
 			  enum mlx5e_tc_attr_to_reg type,
 			  u32 data)
 {
@@ -253,8 +250,7 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
 	char *modact;
 	int err;
 
-	err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB,
-				    mod_hdr_acts);
+	err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
 	if (err)
 		return err;
 
@@ -275,6 +271,54 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
 	return 0;
 }
 
+#define esw_offloads_mode(esw) (mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
+
+static struct mlx5_tc_ct_priv *
+get_ct_priv(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+
+	if (esw_offloads_mode(esw)) {
+		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+		uplink_priv = &uplink_rpriv->uplink_priv;
+
+		return uplink_priv->ct_priv;
+	}
+
+	return priv->fs.tc.ct;
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+		    struct mlx5_flow_spec *spec,
+		    struct mlx5_flow_attr *attr)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (esw_offloads_mode(esw))
+		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+	return	mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+}
+
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+		    struct mlx5_flow_handle *rule,
+		    struct mlx5_flow_attr *attr)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (esw_offloads_mode(esw)) {
+		mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+
+		return;
+	}
+
+	mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+}
+
 struct mlx5e_hairpin {
 	struct mlx5_hairpin *pair;
 
@@ -370,7 +414,7 @@ static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
 						    MLX5E_TC_FLOW_FLAG_##flag)
 
-static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
 {
 	return flow_flag_test(flow, ESWITCH);
 }
@@ -415,10 +459,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 		return PTR_ERR(mh);
 
 	modify_hdr = mlx5e_mod_hdr_get(mh);
-	if (mlx5e_is_eswitch_flow(flow))
-		flow->esw_attr->modify_hdr = modify_hdr;
-	else
-		flow->nic_attr->modify_hdr = modify_hdr;
+	flow->attr->modify_hdr = modify_hdr;
 	flow->mh = mh;
 
 	return 0;
@@ -858,9 +899,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 attach_flow:
 	if (hpe->hp->num_channels > 1) {
 		flow_flag_set(flow, HAIRPIN_RSS);
-		flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+		flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
 	} else {
-		flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+		flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
 	}
 
 	flow->hpe = hpe;
@@ -890,129 +931,212 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
 	flow->hpe = NULL;
 }
 
-static int
-mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-		      struct mlx5e_tc_flow_parse_attr *parse_attr,
-		      struct mlx5e_tc_flow *flow,
-		      struct netlink_ext_ack *extack)
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+			     struct mlx5_flow_spec *spec,
+			     struct mlx5_flow_attr *attr)
 {
-	struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
-	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
-	struct mlx5_core_dev *dev = priv->mdev;
+	struct mlx5_flow_context *flow_context = &spec->flow_context;
+	struct mlx5_fs_chains *nic_chains = nic_chains(priv);
+	struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
+	struct mlx5e_tc_table *tc = &priv->fs.tc;
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
 		.flags    = FLOW_ACT_NO_APPEND,
 	};
-	struct mlx5_fc *counter = NULL;
-	int err, dest_ix = 0;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_table *ft;
+	int dest_ix = 0;
 
 	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
-	flow_context->flow_tag = attr->flow_tag;
-
-	if (flow_flag_test(flow, HAIRPIN)) {
-		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
-		if (err)
-			return err;
+	flow_context->flow_tag = nic_attr->flow_tag;
 
-		if (flow_flag_test(flow, HAIRPIN_RSS)) {
-			dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-			dest[dest_ix].ft = attr->hairpin_ft;
-		} else {
-			dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-			dest[dest_ix].tir_num = attr->hairpin_tirn;
-		}
+	if (attr->dest_ft) {
+		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest[dest_ix].ft = attr->dest_ft;
+		dest_ix++;
+	} else if (nic_attr->hairpin_ft) {
+		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest[dest_ix].ft = nic_attr->hairpin_ft;
+		dest_ix++;
+	} else if (nic_attr->hairpin_tirn) {
+		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+		dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
 		dest_ix++;
 	} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-		dest[dest_ix].ft = priv->fs.vlan.ft.t;
+		if (attr->dest_chain) {
+			dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
+								 attr->dest_chain, 1,
+								 MLX5E_TC_FT_LEVEL);
+			if (IS_ERR(dest[dest_ix].ft))
+				return ERR_CAST(dest[dest_ix].ft);
+		} else {
+			dest[dest_ix].ft = priv->fs.vlan.ft.t;
+		}
+		dest_ix++;
+	}
+
+	if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+	    MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+		flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
 		dest_ix++;
 	}
 
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+		flow_act.modify_hdr = attr->modify_hdr;
+
+	mutex_lock(&tc->t_lock);
+	if (IS_ERR_OR_NULL(tc->t)) {
+		/* Create the root table here if doesn't exist yet */
+		tc->t =
+			mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
+
+		if (IS_ERR(tc->t)) {
+			mutex_unlock(&tc->t_lock);
+			netdev_err(priv->netdev,
+				   "Failed to create tc offload table\n");
+			rule = ERR_CAST(priv->fs.tc.t);
+			goto err_ft_get;
+		}
+	}
+	mutex_unlock(&tc->t_lock);
+
+	if (attr->chain || attr->prio)
+		ft = mlx5_chains_get_table(nic_chains,
+					   attr->chain, attr->prio,
+					   MLX5E_TC_FT_LEVEL);
+	else
+		ft = attr->ft;
+
+	if (IS_ERR(ft)) {
+		rule = ERR_CAST(ft);
+		goto err_ft_get;
+	}
+
+	if (attr->outer_match_level != MLX5_MATCH_NONE)
+		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+	rule = mlx5_add_flow_rules(ft, spec,
+				   &flow_act, dest, dest_ix);
+	if (IS_ERR(rule))
+		goto err_rule;
+
+	return rule;
+
+err_rule:
+	if (attr->chain || attr->prio)
+		mlx5_chains_put_table(nic_chains,
+				      attr->chain, attr->prio,
+				      MLX5E_TC_FT_LEVEL);
+err_ft_get:
+	if (attr->dest_chain)
+		mlx5_chains_put_table(nic_chains,
+				      attr->dest_chain, 1,
+				      MLX5E_TC_FT_LEVEL);
+
+	return ERR_CAST(rule);
+}
+
+static int
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+		      struct mlx5e_tc_flow_parse_attr *parse_attr,
+		      struct mlx5e_tc_flow *flow,
+		      struct netlink_ext_ack *extack)
+{
+	struct mlx5_flow_attr *attr = flow->attr;
+	struct mlx5_core_dev *dev = priv->mdev;
+	struct mlx5_fc *counter = NULL;
+	int err;
+
+	if (flow_flag_test(flow, HAIRPIN)) {
+		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
+		if (err)
+			return err;
+	}
+
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(dev, true);
 		if (IS_ERR(counter))
 			return PTR_ERR(counter);
 
-		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[dest_ix].counter_id = mlx5_fc_id(counter);
-		dest_ix++;
 		attr->counter = counter;
 	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
-		flow_act.modify_hdr = attr->modify_hdr;
 		dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
 		if (err)
 			return err;
 	}
 
-	mutex_lock(&priv->fs.tc.t_lock);
-	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
-		struct mlx5_flow_table_attr ft_attr = {};
-		int tc_grp_size, tc_tbl_size, tc_num_grps;
-		u32 max_flow_counter;
-
-		max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
-				    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
-
-		tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
-
-		tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
-				    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
-		tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
-
-		ft_attr.prio = MLX5E_TC_PRIO;
-		ft_attr.max_fte = tc_tbl_size;
-		ft_attr.level = MLX5E_TC_FT_LEVEL;
-		ft_attr.autogroup.max_num_groups = tc_num_grps;
-		priv->fs.tc.t =
-			mlx5_create_auto_grouped_flow_table(priv->fs.ns,
-							    &ft_attr);
-		if (IS_ERR(priv->fs.tc.t)) {
-			mutex_unlock(&priv->fs.tc.t_lock);
-			NL_SET_ERR_MSG_MOD(extack,
-					   "Failed to create tc offload table");
-			netdev_err(priv->netdev,
-				   "Failed to create tc offload table\n");
-			return PTR_ERR(priv->fs.tc.t);
-		}
-	}
+	if (flow_flag_test(flow, CT))
+		flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
+							attr, &parse_attr->mod_hdr_acts);
+	else
+		flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
+							     attr);
 
-	if (attr->match_level != MLX5_MATCH_NONE)
-		parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+	return PTR_ERR_OR_ZERO(flow->rule[0]);
+}
 
-	flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
-					    &flow_act, dest, dest_ix);
-	mutex_unlock(&priv->fs.tc.t_lock);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+				  struct mlx5_flow_handle *rule,
+				  struct mlx5_flow_attr *attr)
+{
+	struct mlx5_fs_chains *nic_chains = nic_chains(priv);
 
-	return PTR_ERR_OR_ZERO(flow->rule[0]);
+	mlx5_del_flow_rules(rule);
+
+	if (attr->chain || attr->prio)
+		mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
+				      MLX5E_TC_FT_LEVEL);
+
+	if (attr->dest_chain)
+		mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
+				      MLX5E_TC_FT_LEVEL);
 }
 
 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 				  struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
-	struct mlx5_fc *counter = NULL;
+	struct mlx5_flow_attr *attr = flow->attr;
+	struct mlx5e_tc_table *tc = &priv->fs.tc;
+
+	flow_flag_clear(flow, OFFLOADED);
 
-	counter = attr->counter;
-	if (!IS_ERR_OR_NULL(flow->rule[0]))
-		mlx5_del_flow_rules(flow->rule[0]);
-	mlx5_fc_destroy(priv->mdev, counter);
+	if (flow_flag_test(flow, CT))
+		mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+	else if (!IS_ERR_OR_NULL(flow->rule[0]))
+		mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
 
+	/* Remove root table if no rules are left to avoid
+	 * extra steering hops.
+	 */
 	mutex_lock(&priv->fs.tc.t_lock);
-	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
-		mlx5_destroy_flow_table(priv->fs.tc.t);
+	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
+	    !IS_ERR_OR_NULL(tc->t)) {
+		mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
 		priv->fs.tc.t = NULL;
 	}
 	mutex_unlock(&priv->fs.tc.t_lock);
 
+	kvfree(attr->parse_attr);
+
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
 
+	mlx5_fc_destroy(priv->mdev, attr->counter);
+
 	if (flow_flag_test(flow, HAIRPIN))
 		mlx5e_hairpin_flow_del(priv, flow);
+
+	kfree(flow->attr);
 }
 
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
@@ -1035,7 +1159,7 @@ static struct mlx5_flow_handle *
 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 			   struct mlx5e_tc_flow *flow,
 			   struct mlx5_flow_spec *spec,
-			   struct mlx5_esw_flow_attr *attr)
+			   struct mlx5_flow_attr *attr)
 {
 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
 	struct mlx5_flow_handle *rule;
@@ -1043,7 +1167,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 	if (flow_flag_test(flow, CT)) {
 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
 
-		return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr,
+		return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+					       flow, spec, attr,
 					       mod_hdr_acts);
 	}
 
@@ -1051,7 +1176,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 	if (IS_ERR(rule))
 		return rule;
 
-	if (attr->split_count) {
+	if (attr->esw_attr->split_count) {
 		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
 		if (IS_ERR(flow->rule[1])) {
 			mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
@@ -1065,16 +1190,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 static void
 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
 			     struct mlx5e_tc_flow *flow,
-			     struct mlx5_esw_flow_attr *attr)
+			     struct mlx5_flow_attr *attr)
 {
 	flow_flag_clear(flow, OFFLOADED);
 
 	if (flow_flag_test(flow, CT)) {
-		mlx5_tc_ct_delete_flow(flow->priv, flow, attr);
+		mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
 		return;
 	}
 
-	if (attr->split_count)
+	if (attr->esw_attr->split_count)
 		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
 
 	mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
@@ -1085,18 +1210,24 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
 			      struct mlx5e_tc_flow *flow,
 			      struct mlx5_flow_spec *spec)
 {
-	struct mlx5_esw_flow_attr slow_attr;
+	struct mlx5_flow_attr *slow_attr;
 	struct mlx5_flow_handle *rule;
 
-	memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
-	slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	slow_attr.split_count = 0;
-	slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+	if (!slow_attr)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
+	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	slow_attr->esw_attr->split_count = 0;
+	slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
 
-	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr);
+	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
 	if (!IS_ERR(rule))
 		flow_flag_set(flow, SLOW);
 
+	kfree(slow_attr);
+
 	return rule;
 }
 
@@ -1104,14 +1235,21 @@ static void
 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
 				  struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_esw_flow_attr slow_attr;
+	struct mlx5_flow_attr *slow_attr;
 
-	memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
-	slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	slow_attr.split_count = 0;
-	slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
-	mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr);
+	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+	if (!slow_attr) {
+		mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
+		return;
+	}
+
+	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
+	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	slow_attr->esw_attr->split_count = 0;
+	slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
 	flow_flag_clear(flow, SLOW);
+	kfree(slow_attr);
 }
 
 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
@@ -1169,9 +1307,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
 	struct net_device *out_dev, *encap_dev = NULL;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5_flow_attr *attr = flow->attr;
+	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
 	struct mlx5e_priv *out_priv;
@@ -1180,7 +1319,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	int err = 0;
 	int out_index;
 
-	if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
+	if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "E-switch priorities unsupported, upgrade FW");
 		return -EOPNOTSUPP;
@@ -1191,14 +1330,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	 * FDB_FT_CHAIN which is outside tc range.
 	 * See mlx5e_rep_setup_ft_cb().
 	 */
-	max_chain = mlx5_esw_chains_get_chain_range(esw);
+	max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
 	if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Requested chain is out of supported range");
 		return -EOPNOTSUPP;
 	}
 
-	max_prio = mlx5_esw_chains_get_prio_range(esw);
+	max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
 	if (attr->prio > max_prio) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Requested priority is out of supported range");
@@ -1211,10 +1350,13 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 			return err;
 	}
 
+	parse_attr = attr->parse_attr;
+	esw_attr = attr->esw_attr;
+
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
 		int mirred_ifindex;
 
-		if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
 			continue;
 
 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
@@ -1227,8 +1369,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 
 		out_priv = netdev_priv(encap_dev);
 		rpriv = out_priv->ppriv;
-		attr->dests[out_index].rep = rpriv->rep;
-		attr->dests[out_index].mdev = out_priv->mdev;
+		esw_attr->dests[out_index].rep = rpriv->rep;
+		esw_attr->dests[out_index].mdev = out_priv->mdev;
 	}
 
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
@@ -1244,7 +1386,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-		counter = mlx5_fc_create(attr->counter_dev, true);
+		counter = mlx5_fc_create(esw_attr->counter_dev, true);
 		if (IS_ERR(counter))
 			return PTR_ERR(counter);
 
@@ -1270,7 +1412,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 
 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
+	struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
 	void *headers_v = MLX5_ADDR_OF(fte_match_param,
 				       spec->match_value,
 				       misc_parameters_3);
@@ -1285,7 +1427,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 				  struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5_flow_attr *attr = flow->attr;
 	int out_index;
 
 	mlx5e_put_flow_tunnel_id(flow);
@@ -1306,22 +1448,24 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 	mlx5_eswitch_del_vlan_action(esw, attr);
 
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
-		if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
+		if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
 			mlx5e_detach_encap(priv, flow, out_index);
 			kfree(attr->parse_attr->tun_info[out_index]);
 		}
 	kvfree(attr->parse_attr);
 
-	mlx5_tc_ct_match_del(priv, &flow->esw_attr->ct_attr);
+	mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
-		mlx5_fc_destroy(attr->counter_dev, attr->counter);
+		mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
 
 	if (flow_flag_test(flow, L3_TO_L2_DECAP))
 		mlx5e_detach_decap(priv, flow);
+
+	kfree(flow->attr);
 }
 
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1331,6 +1475,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_attr *attr;
 	struct mlx5_flow_spec *spec;
 	struct mlx5e_tc_flow *flow;
 	int err;
@@ -1353,8 +1498,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 
 		if (!mlx5e_is_offloaded_flow(flow))
 			continue;
-		esw_attr = flow->esw_attr;
-		spec = &esw_attr->parse_attr->spec;
+		attr = flow->attr;
+		esw_attr = attr->esw_attr;
+		spec = &attr->parse_attr->spec;
 
 		esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
 		esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
@@ -1374,7 +1520,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 		if (!all_flow_encaps_valid)
 			continue;
 		/* update from slow path rule to encap rule */
-		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
+		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
 		if (IS_ERR(rule)) {
 			err = PTR_ERR(rule);
 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
@@ -1394,7 +1540,9 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 			      struct list_head *flow_list)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_attr *attr;
 	struct mlx5_flow_spec *spec;
 	struct mlx5e_tc_flow *flow;
 	int err;
@@ -1402,12 +1550,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 	list_for_each_entry(flow, flow_list, tmp_list) {
 		if (!mlx5e_is_offloaded_flow(flow))
 			continue;
-		spec = &flow->esw_attr->parse_attr->spec;
+		attr = flow->attr;
+		esw_attr = attr->esw_attr;
+		spec = &attr->parse_attr->spec;
 
 		/* update from encap rule to slow path rule */
 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
 		/* mark the flow's encap dest as non-valid */
-		flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+		esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
 
 		if (IS_ERR(rule)) {
 			err = PTR_ERR(rule);
@@ -1416,7 +1566,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 			continue;
 		}
 
-		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
+		mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
 		flow->rule[0] = rule;
 		/* was unset when fast path rule removed */
 		flow_flag_set(flow, OFFLOADED);
@@ -1429,10 +1579,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 
 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
 {
-	if (mlx5e_is_eswitch_flow(flow))
-		return flow->esw_attr->counter;
-	else
-		return flow->nic_attr->counter;
+	return flow->attr->counter;
 }
 
 /* Takes reference to all flows attached to encap and adds the flows to
@@ -1798,11 +1945,11 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct netlink_ext_ack *extack = f->common.extack;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
 	struct flow_match_enc_opts enc_opts_match;
 	struct tunnel_match_enc_opts tun_enc_opts;
 	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5_flow_attr *attr = flow->attr;
 	struct mlx5e_rep_priv *uplink_rpriv;
 	struct tunnel_match_key tunnel_key;
 	bool enc_opts_is_dont_care = true;
@@ -1866,7 +2013,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
 	} else {
 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
 		err = mlx5e_tc_match_to_reg_set(priv->mdev,
-						mod_hdr_acts,
+						mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
 						TUNNEL_TO_REG, value);
 		if (err)
 			goto err_set;
@@ -1952,8 +2099,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 	if (!mlx5e_is_eswitch_flow(flow))
 		return -EOPNOTSUPP;
 
-	needs_mapping = !!flow->esw_attr->chain;
-	sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f);
+	needs_mapping = !!flow->attr->chain;
+	sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
 	*match_inner = !needs_mapping;
 
 	if ((needs_mapping || sets_mapping) &&
@@ -1965,7 +2112,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		return -EOPNOTSUPP;
 	}
 
-	if (!flow->esw_attr->chain) {
+	if (!flow->attr->chain) {
 		err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
 					 match_level);
 		if (err) {
@@ -1980,7 +2127,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		 * object
 		 */
 		if (!netif_is_bareudp(filter_dev))
-			flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+			flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
 	}
 
 	if (!needs_mapping && !sets_mapping)
@@ -2483,12 +2630,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 		}
 	}
 
-	if (is_eswitch_flow) {
-		flow->esw_attr->inner_match_level = inner_match_level;
-		flow->esw_attr->outer_match_level = outer_match_level;
-	} else {
-		flow->nic_attr->match_level = non_tunnel_match_level;
-	}
+	flow->attr->inner_match_level = inner_match_level;
+	flow->attr->outer_match_level = outer_match_level;
+
 
 	return err;
 }
@@ -2614,6 +2758,7 @@ static struct mlx5_fields fields[] = {
 	OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
 	OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
+	OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
 
 	OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
 	OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
@@ -3090,7 +3235,7 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv,
 	 *  we can't restore ct state
 	 */
 	if (!ct_clear && modify_tuple &&
-	    mlx5_tc_ct_add_no_trk_match(priv, spec)) {
+	    mlx5_tc_ct_add_no_trk_match(spec)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "can't offload tuple modify header with ct matches");
 		netdev_info(priv->netdev,
@@ -3121,12 +3266,13 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
 	bool ct_flow = false, ct_clear = false;
 	u32 actions;
 
+	ct_clear = flow->attr->ct_attr.ct_action &
+		TCA_CT_ACT_CLEAR;
+	ct_flow = flow_flag_test(flow, CT) && !ct_clear;
+	actions = flow->attr->action;
+
 	if (mlx5e_is_eswitch_flow(flow)) {
-		actions = flow->esw_attr->action;
-		ct_clear = flow->esw_attr->ct_attr.ct_action &
-			   TCA_CT_ACT_CLEAR;
-		ct_flow = flow_flag_test(flow, CT) && !ct_clear;
-		if (flow->esw_attr->split_count && ct_flow) {
+		if (flow->attr->esw_attr->split_count && ct_flow) {
 			/* All registers used by ct are cleared when using
 			 * split rules.
 			 */
@@ -3134,8 +3280,6 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
 					   "Can't offload mirroring with action ct");
 			return false;
 		}
-	} else {
-		actions = flow->nic_attr->action;
 	}
 
 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@ -3233,15 +3377,67 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
 				       extack);
 }
 
+static int validate_goto_chain(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow,
+			       const struct flow_action_entry *act,
+			       u32 actions,
+			       struct netlink_ext_ack *extack)
+{
+	bool is_esw = mlx5e_is_eswitch_flow(flow);
+	struct mlx5_flow_attr *attr = flow->attr;
+	bool ft_flow = mlx5e_is_ft_flow(flow);
+	u32 dest_chain = act->chain_index;
+	struct mlx5_fs_chains *chains;
+	struct mlx5_eswitch *esw;
+	u32 reformat_and_fwd;
+	u32 max_chain;
+
+	esw = priv->mdev->priv.eswitch;
+	chains = is_esw ? esw_chains(esw) : nic_chains(priv);
+	max_chain = mlx5_chains_get_chain_range(chains);
+	reformat_and_fwd = is_esw ?
+			   MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+			   MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+	if (ft_flow) {
+		NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (!mlx5_chains_backwards_supported(chains) &&
+	    dest_chain <= attr->chain) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Goto lower numbered chain isn't supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (dest_chain > max_chain) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Requested destination chain is out of supported range");
+		return -EOPNOTSUPP;
+	}
+
+	if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+		       MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+	    !reformat_and_fwd) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Goto chain is not allowed if action has reformat or decap");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 				struct flow_action *flow_action,
 				struct mlx5e_tc_flow_parse_attr *parse_attr,
 				struct mlx5e_tc_flow *flow,
 				struct netlink_ext_ack *extack)
 {
-	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+	struct mlx5_flow_attr *attr = flow->attr;
 	struct pedit_headers_action hdrs[2] = {};
 	const struct flow_action_entry *act;
+	struct mlx5_nic_flow_attr *nic_attr;
 	u32 action = 0;
 	int err, i;
 
@@ -3252,7 +3448,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 					FLOW_ACTION_HW_STATS_DELAYED_BIT))
 		return -EOPNOTSUPP;
 
-	attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+	nic_attr = attr->nic_attr;
+
+	nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
@@ -3273,8 +3471,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 			if (err)
 				return err;
 
-			action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
-				  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 			break;
 		case FLOW_ACTION_VLAN_MANGLE:
 			err = add_vlan_rewrite_action(priv,
@@ -3319,10 +3516,26 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 				return -EINVAL;
 			}
 
-			attr->flow_tag = mark;
+			nic_attr->flow_tag = mark;
 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 			}
 			break;
+		case FLOW_ACTION_GOTO:
+			err = validate_goto_chain(priv, flow, act, action,
+						  extack);
+			if (err)
+				return err;
+
+			action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			attr->dest_chain = act->chain_index;
+			break;
+		case FLOW_ACTION_CT:
+			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
+			if (err)
+				return err;
+
+			flow_flag_set(flow, CT);
+			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
 			return -EOPNOTSUPP;
@@ -3345,6 +3558,18 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 	}
 
 	attr->action = action;
+
+	if (attr->dest_chain) {
+		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+			NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
+			return -EOPNOTSUPP;
+		}
+		attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	}
+
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+		attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
 	if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
 		return -EOPNOTSUPP;
 
@@ -3476,8 +3701,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      bool *encap_valid)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5_flow_attr *attr = flow->attr;
 	const struct ip_tunnel_info *tun_info;
 	struct encap_key key;
 	struct mlx5e_encap_entry *e;
@@ -3563,8 +3788,8 @@ attach_flow:
 	flow->encaps[out_index].index = out_index;
 	*encap_dev = e->out_dev;
 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
-		attr->dests[out_index].pkt_reformat = e->pkt_reformat;
-		attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
 		*encap_valid = true;
 	} else {
 		*encap_valid = false;
@@ -3591,14 +3816,14 @@ static int mlx5e_attach_decap(struct mlx5e_priv *priv,
 			      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5e_decap_entry *d;
 	struct mlx5e_decap_key key;
 	uintptr_t hash_key;
 	int err = 0;
 
-	parse_attr = attr->parse_attr;
+	parse_attr = flow->attr->parse_attr;
 	if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "encap header larger than max supported");
@@ -3740,7 +3965,7 @@ static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
 }
 
 static int add_vlan_push_action(struct mlx5e_priv *priv,
-				struct mlx5_esw_flow_attr *attr,
+				struct mlx5_flow_attr *attr,
 				struct net_device **out_dev,
 				u32 *action)
 {
@@ -3753,7 +3978,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv,
 	};
 	int err;
 
-	err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+	err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
 	if (err)
 		return err;
 
@@ -3766,7 +3991,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv,
 }
 
 static int add_vlan_pop_action(struct mlx5e_priv *priv,
-			       struct mlx5_esw_flow_attr *attr,
+			       struct mlx5_flow_attr *attr,
 			       u32 *action)
 {
 	struct flow_action_entry vlan_act = {
@@ -3777,7 +4002,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
 	nest_level = attr->parse_attr->filter_dev->lower_level -
 						priv->netdev->lower_level;
 	while (nest_level--) {
-		err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+		err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
 		if (err)
 			return err;
 	}
@@ -3838,59 +4063,20 @@ static bool is_duplicated_output_device(struct net_device *dev,
 	return false;
 }
 
-static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw,
-				    struct mlx5e_tc_flow *flow,
-				    const struct flow_action_entry *act,
-				    u32 actions,
-				    struct netlink_ext_ack *extack)
-{
-	u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	bool ft_flow = mlx5e_is_ft_flow(flow);
-	u32 dest_chain = act->chain_index;
-
-	if (ft_flow) {
-		NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
-		return -EOPNOTSUPP;
-	}
-
-	if (!mlx5_esw_chains_backwards_supported(esw) &&
-	    dest_chain <= attr->chain) {
-		NL_SET_ERR_MSG_MOD(extack,
-				   "Goto lower numbered chain isn't supported");
-		return -EOPNOTSUPP;
-	}
-	if (dest_chain > max_chain) {
-		NL_SET_ERR_MSG_MOD(extack,
-				   "Requested destination chain is out of supported range");
-		return -EOPNOTSUPP;
-	}
-
-	if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
-		       MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
-	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) {
-		NL_SET_ERR_MSG_MOD(extack,
-				   "Goto chain is not allowed if action has reformat or decap");
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
 static int verify_uplink_forwarding(struct mlx5e_priv *priv,
 				    struct mlx5e_tc_flow *flow,
 				    struct net_device *out_dev,
 				    struct netlink_ext_ack *extack)
 {
+	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_rep_priv *rep_priv;
 
 	/* Forwarding non encapsulated traffic between
 	 * uplink ports is allowed only if
 	 * termination_table_raw_traffic cap is set.
 	 *
-	 * Input vport was stored esw_attr->in_rep.
+	 * Input vport was stored attr->in_rep.
 	 * In LAG case, *priv* is the private data of
 	 * uplink which may be not the input vport.
 	 */
@@ -3925,13 +4111,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 {
 	struct pedit_headers_action hdrs[2] = {};
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	const struct ip_tunnel_info *info = NULL;
+	struct mlx5_flow_attr *attr = flow->attr;
 	int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
 	bool ft_flow = mlx5e_is_ft_flow(flow);
 	const struct flow_action_entry *act;
+	struct mlx5_esw_flow_attr *esw_attr;
 	bool encap = false, decap = false;
 	u32 action = attr->action;
 	int err, i, if_count = 0;
@@ -3944,12 +4131,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 					FLOW_ACTION_HW_STATS_DELAYED_BIT))
 		return -EOPNOTSUPP;
 
+	esw_attr = attr->esw_attr;
+	parse_attr = attr->parse_attr;
+
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
 		case FLOW_ACTION_DROP:
 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 			break;
+		case FLOW_ACTION_TRAP:
+			if (!flow_offload_has_one_action(flow_action)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "action trap is supported as a sole action only");
+				return -EOPNOTSUPP;
+			}
+			action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+				   MLX5_FLOW_CONTEXT_ACTION_COUNT);
+			attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+			break;
 		case FLOW_ACTION_MPLS_PUSH:
 			if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
 							reformat_l2_to_l3_tunnel) ||
@@ -3990,7 +4190,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 
 			if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
 				action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-				attr->split_count = attr->out_count;
+				esw_attr->split_count = esw_attr->out_count;
 			}
 			break;
 		case FLOW_ACTION_CSUM:
@@ -4027,27 +4227,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				return -EOPNOTSUPP;
 			}
 
-			if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+			if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
 				NL_SET_ERR_MSG_MOD(extack,
 						   "can't support more output ports, can't offload forwarding");
 				netdev_warn(priv->netdev,
 					    "can't support more than %d output ports, can't offload forwarding\n",
-					    attr->out_count);
+					    esw_attr->out_count);
 				return -EOPNOTSUPP;
 			}
 
 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 			if (encap) {
-				parse_attr->mirred_ifindex[attr->out_count] =
+				parse_attr->mirred_ifindex[esw_attr->out_count] =
 					out_dev->ifindex;
-				parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
-				if (!parse_attr->tun_info[attr->out_count])
+				parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
+				if (!parse_attr->tun_info[esw_attr->out_count])
 					return -ENOMEM;
 				encap = false;
-				attr->dests[attr->out_count].flags |=
+				esw_attr->dests[esw_attr->out_count].flags |=
 					MLX5_ESW_DEST_ENCAP;
-				attr->out_count++;
+				esw_attr->out_count++;
 				/* attr->dests[].rep is resolved when we
 				 * handle encap
 				 */
@@ -4096,9 +4296,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 
 				out_priv = netdev_priv(out_dev);
 				rpriv = out_priv->ppriv;
-				attr->dests[attr->out_count].rep = rpriv->rep;
-				attr->dests[attr->out_count].mdev = out_priv->mdev;
-				attr->out_count++;
+				esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+				esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+				esw_attr->out_count++;
 			} else if (parse_attr->filter_dev != priv->netdev) {
 				/* All mlx5 devices are called to configure
 				 * high level device filters. Therefore, the
@@ -4136,12 +4336,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 							      act, parse_attr, hdrs,
 							      &action, extack);
 			} else {
-				err = parse_tc_vlan_action(priv, act, attr, &action);
+				err = parse_tc_vlan_action(priv, act, esw_attr, &action);
 			}
 			if (err)
 				return err;
 
-			attr->split_count = attr->out_count;
+			esw_attr->split_count = esw_attr->out_count;
 			break;
 		case FLOW_ACTION_VLAN_MANGLE:
 			err = add_vlan_rewrite_action(priv,
@@ -4151,14 +4351,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			if (err)
 				return err;
 
-			attr->split_count = attr->out_count;
+			esw_attr->split_count = esw_attr->out_count;
 			break;
 		case FLOW_ACTION_TUNNEL_DECAP:
 			decap = true;
 			break;
 		case FLOW_ACTION_GOTO:
-			err = mlx5_validate_goto_chain(esw, flow, act, action,
-						       extack);
+			err = validate_goto_chain(priv, flow, act, action,
+						  extack);
 			if (err)
 				return err;
 
@@ -4166,7 +4366,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			attr->dest_chain = act->chain_index;
 			break;
 		case FLOW_ACTION_CT:
-			err = mlx5_tc_ct_parse_action(priv, attr, act, extack);
+			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
 			if (err)
 				return err;
 
@@ -4205,7 +4405,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
 			if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
 			      (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
-				attr->split_count = 0;
+				esw_attr->split_count = 0;
 		}
 	}
 
@@ -4245,7 +4445,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 		return -EOPNOTSUPP;
 	}
 
-	if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+	if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "current firmware doesn't support split rule for port mirroring");
 		netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
@@ -4296,25 +4496,37 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
 
 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
+	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+	struct mlx5_flow_attr *attr = flow->attr;
+	bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
 		flow_flag_test(flow, INGRESS);
 	bool act_is_encap = !!(attr->action &
 			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
-	bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+	bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
 						MLX5_DEVCOM_ESW_OFFLOADS);
 
 	if (!esw_paired)
 		return false;
 
-	if ((mlx5_lag_is_sriov(attr->in_mdev) ||
-	     mlx5_lag_is_multipath(attr->in_mdev)) &&
+	if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
+	     mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
 	    (is_rep_ingress || act_is_encap))
 		return true;
 
 	return false;
 }
 
+struct mlx5_flow_attr *
+mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
+{
+	u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
+				sizeof(struct mlx5_esw_flow_attr) :
+				sizeof(struct mlx5_nic_flow_attr);
+	struct mlx5_flow_attr *attr;
+
+	return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
+}
+
 static int
 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 		 struct flow_cls_offload *f, unsigned long flow_flags,
@@ -4322,19 +4534,26 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 		 struct mlx5e_tc_flow **__flow)
 {
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5_flow_attr *attr;
 	struct mlx5e_tc_flow *flow;
-	int out_index, err;
+	int err = -ENOMEM;
+	int out_index;
 
-	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
+	flow = kzalloc(sizeof(*flow), GFP_KERNEL);
 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
-	if (!parse_attr || !flow) {
-		err = -ENOMEM;
+	if (!parse_attr || !flow)
 		goto err_free;
-	}
 
-	flow->cookie = f->cookie;
 	flow->flags = flow_flags;
+	flow->cookie = f->cookie;
 	flow->priv = priv;
+
+	attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
+	if (!attr)
+		goto err_free;
+
+	flow->attr = attr;
+
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
 		INIT_LIST_HEAD(&flow->encaps[out_index].list);
 	INIT_LIST_HEAD(&flow->hairpin);
@@ -4354,7 +4573,17 @@ err_free:
 }
 
 static void
-mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
+mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
+		     struct mlx5e_tc_flow_parse_attr *parse_attr,
+		     struct flow_cls_offload *f)
+{
+	attr->parse_attr = parse_attr;
+	attr->chain = f->common.chain_index;
+	attr->prio = f->common.prio;
+}
+
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
 			 struct mlx5e_priv *priv,
 			 struct mlx5e_tc_flow_parse_attr *parse_attr,
 			 struct flow_cls_offload *f,
@@ -4362,10 +4591,9 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
 			 struct mlx5_core_dev *in_mdev)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
 
-	esw_attr->parse_attr = parse_attr;
-	esw_attr->chain = f->common.chain_index;
-	esw_attr->prio = f->common.prio;
+	mlx5e_flow_attr_init(attr, parse_attr, f);
 
 	esw_attr->in_rep = in_rep;
 	esw_attr->in_mdev = in_mdev;
@@ -4399,7 +4627,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
 		goto out;
 
 	parse_attr->filter_dev = filter_dev;
-	mlx5e_flow_esw_attr_init(flow->esw_attr,
+	mlx5e_flow_esw_attr_init(flow->attr,
 				 priv, parse_attr,
 				 f, in_rep, in_mdev);
 
@@ -4409,8 +4637,8 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
 		goto err_free;
 
 	/* actions validation depends on parsing the ct matches first */
-	err = mlx5_tc_ct_match_add(priv, &parse_attr->spec, f,
-				   &flow->esw_attr->ct_attr, extack);
+	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+				   &flow->attr->ct_attr, extack);
 	if (err)
 		goto err_free;
 
@@ -4441,6 +4669,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
 {
 	struct mlx5e_priv *priv = flow->priv, *peer_priv;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
 	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5e_rep_priv *peer_urpriv;
@@ -4460,15 +4689,15 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
 	 * original flow and packets redirected from uplink use the
 	 * peer mdev.
 	 */
-	if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
+	if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
 		in_mdev = peer_priv->mdev;
 	else
 		in_mdev = priv->mdev;
 
-	parse_attr = flow->esw_attr->parse_attr;
+	parse_attr = flow->attr->parse_attr;
 	peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
 					 parse_attr->filter_dev,
-					 flow->esw_attr->in_rep, in_mdev);
+					 attr->in_rep, in_mdev);
 	if (IS_ERR(peer_flow)) {
 		err = PTR_ERR(peer_flow);
 		goto out;
@@ -4532,9 +4761,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5e_tc_flow *flow;
 	int attr_size, err;
 
-	/* multi-chain not supported for NIC rules */
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+		if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+			return -EOPNOTSUPP;
+	} else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
 		return -EOPNOTSUPP;
+	}
 
 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
 	attr_size  = sizeof(struct mlx5_nic_flow_attr);
@@ -4544,11 +4776,18 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
 		goto out;
 
 	parse_attr->filter_dev = filter_dev;
+	mlx5e_flow_attr_init(flow->attr, parse_attr, f);
+
 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
 			       f, filter_dev);
 	if (err)
 		goto err_free;
 
+	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+				   &flow->attr->ct_attr, extack);
+	if (err)
+		goto err_free;
+
 	err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
 	if (err)
 		goto err_free;
@@ -4558,14 +4797,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
 		goto err_free;
 
 	flow_flag_set(flow, OFFLOADED);
-	kvfree(parse_attr);
 	*__flow = flow;
 
 	return 0;
 
 err_free:
 	mlx5e_flow_put(priv, flow);
-	kvfree(parse_attr);
 out:
 	return err;
 }
@@ -4940,9 +5177,27 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
+static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
+{
+	int tc_grp_size, tc_tbl_size;
+	u32 max_flow_counter;
+
+	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+	tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+	tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+			    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
+	return tc_tbl_size;
+}
+
 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
+	struct mlx5_core_dev *dev = priv->mdev;
+	struct mlx5_chains_attr attr = {};
 	int err;
 
 	mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4954,6 +5209,27 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	if (err)
 		return err;
 
+	if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+		attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
+			MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+		attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+	}
+	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
+	attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
+	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
+	attr.default_ft = priv->fs.vlan.ft.t;
+
+	tc->chains = mlx5_chains_create(dev, &attr);
+	if (IS_ERR(tc->chains)) {
+		err = PTR_ERR(tc->chains);
+		goto err_chains;
+	}
+
+	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
+				 MLX5_FLOW_NAMESPACE_KERNEL);
+	if (IS_ERR(tc->ct))
+		goto err_ct;
+
 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
 	err = register_netdevice_notifier_dev_net(priv->netdev,
 						  &tc->netdevice_nb,
@@ -4961,8 +5237,17 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	if (err) {
 		tc->netdevice_nb.notifier_call = NULL;
 		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+		goto err_reg;
 	}
 
+	return 0;
+
+err_reg:
+	mlx5_tc_ct_clean(tc->ct);
+err_ct:
+	mlx5_chains_destroy(tc->chains);
+err_chains:
+	rhashtable_destroy(&tc->ht);
 	return err;
 }
 
@@ -4987,28 +5272,38 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 	mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
 	mutex_destroy(&tc->hairpin_tbl_lock);
 
-	rhashtable_destroy(&tc->ht);
+	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
 
 	if (!IS_ERR_OR_NULL(tc->t)) {
-		mlx5_destroy_flow_table(tc->t);
+		mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
 		tc->t = NULL;
 	}
 	mutex_destroy(&tc->t_lock);
+
+	mlx5_tc_ct_clean(tc->ct);
+	mlx5_chains_destroy(tc->chains);
 }
 
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 {
 	const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
 	struct mlx5_rep_uplink_priv *uplink_priv;
-	struct mlx5e_rep_priv *priv;
+	struct mlx5e_rep_priv *rpriv;
 	struct mapping_ctx *mapping;
-	int err;
+	struct mlx5_eswitch *esw;
+	struct mlx5e_priv *priv;
+	int err = 0;
 
 	uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
-	priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+	priv = netdev_priv(rpriv->netdev);
+	esw = priv->mdev->priv.eswitch;
 
-	err = mlx5_tc_ct_init(uplink_priv);
-	if (err)
+	uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
+					       esw_chains(esw),
+					       &esw->offloads.mod_hdr,
+					       MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(uplink_priv->ct_priv))
 		goto err_ct;
 
 	mapping = mapping_create(sizeof(struct tunnel_match_key),
@@ -5037,7 +5332,7 @@ err_ht_init:
 err_enc_opts_mapping:
 	mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
-	mlx5_tc_ct_clean(uplink_priv);
+	mlx5_tc_ct_clean(uplink_priv->ct_priv);
 err_ct:
 	netdev_warn(priv->netdev,
 		    "Failed to initialize tc (eswitch), err: %d", err);
@@ -5051,10 +5346,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
 	rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
 
 	uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
+
 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
 	mapping_destroy(uplink_priv->tunnel_mapping);
 
-	mlx5_tc_ct_clean(uplink_priv);
+	mlx5_tc_ct_clean(uplink_priv->ct_priv);
 }
 
 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
@@ -5119,3 +5415,44 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 		return -EOPNOTSUPP;
 	}
 }
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
+			 struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	u32 chain = 0, chain_tag, reg_b, zone_restore_id;
+	struct mlx5e_priv *priv = netdev_priv(skb->dev);
+	struct mlx5e_tc_table *tc = &priv->fs.tc;
+	struct tc_skb_ext *tc_skb_ext;
+	int err;
+
+	reg_b = be32_to_cpu(cqe->ft_metadata);
+
+	chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+
+	err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
+	if (err) {
+		netdev_dbg(priv->netdev,
+			   "Couldn't find chain for chain tag: %d, err: %d\n",
+			   chain_tag, err);
+		return false;
+	}
+
+	if (chain) {
+		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		if (WARN_ON(!tc_skb_ext))
+			return false;
+
+		tc_skb_ext->chain = chain;
+
+		zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
+				  ZONE_RESTORE_MAX;
+
+		if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
+					      zone_restore_id))
+			return false;
+	}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+	return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 437f680728fd..3b979008143d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -35,17 +35,57 @@
 
 #include <net/pkt_cls.h>
 #include "en.h"
+#include "eswitch.h"
+#include "en/tc_ct.h"
 
 #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
 
 #ifdef CONFIG_MLX5_ESWITCH
 
+#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+			  sizeof(struct mlx5_nic_flow_attr))
+#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+			  sizeof(struct mlx5_esw_flow_attr))
+#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\
+			    ESW_FLOW_ATTR_SZ :\
+			    NIC_FLOW_ATTR_SZ)
+
+
 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
 
 struct mlx5e_tc_update_priv {
 	struct net_device *tun_dev;
 };
 
+struct mlx5_nic_flow_attr {
+	u32 flow_tag;
+	u32 hairpin_tirn;
+	struct mlx5_flow_table *hairpin_ft;
+};
+
+struct mlx5_flow_attr {
+	u32 action;
+	struct mlx5_fc *counter;
+	struct mlx5_modify_hdr *modify_hdr;
+	struct mlx5_ct_attr ct_attr;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	u32 chain;
+	u16 prio;
+	u32 dest_chain;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_table *dest_ft;
+	u8 inner_match_level;
+	u8 outer_match_level;
+	u32 flags;
+	union {
+		struct mlx5_esw_flow_attr esw_attr[0];
+		struct mlx5_nic_flow_attr nic_attr[0];
+	};
+};
+
+#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
+#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
+
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 
 struct tunnel_match_key {
@@ -90,6 +130,7 @@ enum {
 
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
 
 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 			   struct flow_cls_offload *f, unsigned long flags);
@@ -133,6 +174,8 @@ enum mlx5e_tc_attr_to_reg {
 	MARK_TO_REG,
 	LABELS_TO_REG,
 	FTEID_TO_REG,
+	NIC_CHAIN_TO_REG,
+	NIC_ZONE_RESTORE_TO_REG,
 };
 
 struct mlx5e_tc_attr_to_reg_mapping {
@@ -150,6 +193,7 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
 
 int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
 			      struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+			      enum mlx5_flow_namespace_type ns,
 			      enum mlx5e_tc_attr_to_reg type,
 			      u32 data);
 
@@ -181,14 +225,42 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 			    void *cb_priv);
 
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+			     struct mlx5_flow_spec *spec,
+			     struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+				  struct mlx5_flow_handle *rule,
+				  struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+		    struct mlx5_flow_spec *spec,
+		    struct mlx5_flow_attr *attr);
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+		    struct mlx5_flow_handle *rule,
+		    struct mlx5_flow_attr *attr);
+
 #else /* CONFIG_MLX5_CLS_ACT */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
 static inline int
 mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
 { return -EOPNOTSUPP; }
+
 #endif /* CONFIG_MLX5_CLS_ACT */
 
+struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type);
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+			     struct mlx5_flow_spec *spec,
+			     struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+				  struct mlx5_flow_handle *rule,
+				  struct mlx5_flow_attr *attr);
+
 #else /* CONFIG_MLX5_ESWITCH */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
@@ -203,4 +275,29 @@ mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
 { return -EOPNOTSUPP; }
 #endif
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	u32 chain, reg_b;
+
+	reg_b = be32_to_cpu(cqe->ft_metadata);
+
+	chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+	if (chain)
+		return true;
+#endif
+
+	return false;
+}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{ return false; }
+static inline bool
+mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
+{ return true; }
+#endif
+
 #endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index da596de3abba..82b4419af9d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -144,9 +144,29 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
 	memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
 }
 
+/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
+static void
+ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+			    struct mlx5_wqe_eth_seg *eseg)
+{
+	eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+	if (skb->encapsulation) {
+		eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+		sq->stats->csum_partial_inner++;
+	} else {
+		eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+		sq->stats->csum_partial++;
+	}
+}
+
 static inline void
 mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
 {
+	if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
+		ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
+		return;
+	}
+
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
 		if (skb->encapsulation) {
@@ -232,131 +252,188 @@ dma_unmap_wqe_err:
 	return -ENOMEM;
 }
 
+struct mlx5e_tx_attr {
+	u32 num_bytes;
+	u16 headlen;
+	u16 ihs;
+	__be16 mss;
+	u16 insz;
+	u8 opcode;
+};
+
+struct mlx5e_tx_wqe_attr {
+	u16 ds_cnt;
+	u16 ds_cnt_inl;
+	u16 ds_cnt_ids;
+	u8 num_wqebbs;
+};
+
+static u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+			 struct mlx5e_accel_tx_state *accel)
+{
+	u8 mode;
+
+#ifdef CONFIG_MLX5_EN_TLS
+	if (accel && accel->tls.tls_tisn)
+		return MLX5_INLINE_MODE_TCP_UDP;
+#endif
+
+	mode = sq->min_inline_mode;
+
+	if (skb_vlan_tag_present(skb) &&
+	    test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+		mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+	return mode;
+}
+
+static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+				  struct mlx5e_accel_tx_state *accel,
+				  struct mlx5e_tx_attr *attr)
+{
+	struct mlx5e_sq_stats *stats = sq->stats;
+
+	if (skb_is_gso(skb)) {
+		u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+
+		*attr = (struct mlx5e_tx_attr) {
+			.opcode    = MLX5_OPCODE_LSO,
+			.mss       = cpu_to_be16(skb_shinfo(skb)->gso_size),
+			.ihs       = ihs,
+			.num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
+			.headlen   = skb_headlen(skb) - ihs,
+		};
+
+		stats->packets += skb_shinfo(skb)->gso_segs;
+	} else {
+		u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
+		u16 ihs = mlx5e_calc_min_inline(mode, skb);
+
+		*attr = (struct mlx5e_tx_attr) {
+			.opcode    = MLX5_OPCODE_SEND,
+			.mss       = cpu_to_be16(0),
+			.ihs       = ihs,
+			.num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
+			.headlen   = skb_headlen(skb) - ihs,
+		};
+
+		stats->packets++;
+	}
+
+	attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
+	stats->bytes += attr->num_bytes;
+}
+
+static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
+				   struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+	u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
+	u16 ds_cnt_inl = 0;
+	u16 ds_cnt_ids = 0;
+
+	if (attr->insz)
+		ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
+					  MLX5_SEND_WQE_DS);
+
+	ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
+	if (attr->ihs) {
+		u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+		if (skb_vlan_tag_present(skb))
+			inl += VLAN_HLEN;
+
+		ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+		ds_cnt += ds_cnt_inl;
+	}
+
+	*wqe_attr = (struct mlx5e_tx_wqe_attr) {
+		.ds_cnt     = ds_cnt,
+		.ds_cnt_inl = ds_cnt_inl,
+		.ds_cnt_ids = ds_cnt_ids,
+		.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+	};
+}
+
+static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
+{
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+
+static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
+{
+	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
+		netif_tx_stop_queue(sq->txq);
+		sq->stats->stopped++;
+	}
+}
+
 static inline void
 mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-		     u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
+		     const struct mlx5e_tx_attr *attr,
+		     const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
 		     struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
 		     bool xmit_more)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	bool send_doorbell;
 
-	wi->num_bytes = num_bytes;
-	wi->num_dma = num_dma;
-	wi->num_wqebbs = num_wqebbs;
-	wi->skb = skb;
+	*wi = (struct mlx5e_tx_wqe_info) {
+		.skb = skb,
+		.num_bytes = attr->num_bytes,
+		.num_dma = num_dma,
+		.num_wqebbs = wqe_attr->num_wqebbs,
+		.num_fifo_pkts = 0,
+	};
 
-	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
-	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
+	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
 
-	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
-		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	mlx5e_tx_skb_update_hwts_flags(skb);
 
 	sq->pc += wi->num_wqebbs;
-	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
-		netif_tx_stop_queue(sq->txq);
-		sq->stats->stopped++;
-	}
 
-	send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
-					       xmit_more);
+	mlx5e_tx_check_stop(sq);
+
+	send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
 	if (send_doorbell)
 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
 }
 
-void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-		   struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
+static void
+mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		  const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
+		  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
 {
-	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5_wqe_ctrl_seg *cseg;
 	struct mlx5_wqe_eth_seg  *eseg;
 	struct mlx5_wqe_data_seg *dseg;
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, contig_wqebbs_room;
-	u16 ds_cnt, ds_cnt_inl = 0;
-	u8 num_wqebbs, opcode;
-	u32 num_bytes;
 	int num_dma;
-	__be16 mss;
-
-	/* Calc ihs and ds cnt, no writes to wqe yet */
-	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
-	if (skb_is_gso(skb)) {
-		opcode    = MLX5_OPCODE_LSO;
-		mss       = cpu_to_be16(skb_shinfo(skb)->gso_size);
-		ihs       = mlx5e_tx_get_gso_ihs(sq, skb);
-		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
-		stats->packets += skb_shinfo(skb)->gso_segs;
-	} else {
-		u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
 
-		opcode    = MLX5_OPCODE_SEND;
-		mss       = 0;
-		ihs       = mlx5e_calc_min_inline(mode, skb);
-		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
-		stats->packets++;
-	}
-
-	stats->bytes     += num_bytes;
 	stats->xmit_more += xmit_more;
 
-	headlen = skb->len - ihs - skb->data_len;
-	ds_cnt += !!headlen;
-	ds_cnt += skb_shinfo(skb)->nr_frags;
-
-	if (ihs) {
-		ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
-
-		ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
-		ds_cnt += ds_cnt_inl;
-	}
-
-	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
-#ifdef CONFIG_MLX5_EN_IPSEC
-		struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
-		struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
-#endif
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-		wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-#ifdef CONFIG_MLX5_EN_IPSEC
-		wqe->eth = cur_eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
-		wqe->ctrl = cur_ctrl;
-#endif
-	}
-
 	/* fill wqe */
 	wi   = &sq->db.wqe_info[pi];
 	cseg = &wqe->ctrl;
 	eseg = &wqe->eth;
 	dseg =  wqe->data;
 
-#if IS_ENABLED(CONFIG_GENEVE)
-	if (skb->encapsulation)
-		mlx5e_tx_tunnel_accel(skb, eseg);
-#endif
-	mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+	eseg->mss = attr->mss;
 
-	eseg->mss = mss;
-
-	if (ihs) {
-		eseg->inline_hdr.sz = cpu_to_be16(ihs);
+	if (attr->ihs) {
 		if (skb_vlan_tag_present(skb)) {
-			ihs -= VLAN_HLEN;
-			mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
+			eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
+			mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
 			stats->added_vlan_packets++;
 		} else {
-			memcpy(eseg->inline_hdr.start, skb->data, ihs);
+			eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
+			memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
 		}
-		dseg += ds_cnt_inl;
+		dseg += wqe_attr->ds_cnt_inl;
 	} else if (skb_vlan_tag_present(skb)) {
 		eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
 		if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
@@ -365,12 +442,13 @@ void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		stats->added_vlan_packets++;
 	}
 
-	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+	dseg += wqe_attr->ds_cnt_ids;
+	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
+					  attr->headlen, dseg);
 	if (unlikely(num_dma < 0))
 		goto err_drop;
 
-	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
-			     num_dma, wi, cseg, xmit_more);
+	mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
 
 	return;
 
@@ -379,10 +457,173 @@ err_drop:
 	dev_kfree_skb_any(skb);
 }
 
+static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
+{
+	return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
+	       !attr->insz;
+}
+
+static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
+{
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+
+	/* Assumes the session is already running and has at least one packet. */
+	return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+}
+
+static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
+					 struct mlx5_wqe_eth_seg *eseg)
+{
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_tx_wqe *wqe;
+	u16 pi;
+
+	pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+	prefetchw(wqe->data);
+
+	*session = (struct mlx5e_tx_mpwqe) {
+		.wqe = wqe,
+		.bytes_count = 0,
+		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+		.pkt_count = 0,
+		.inline_on = 0,
+	};
+
+	memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+
+	sq->stats->mpwqe_blks++;
+}
+
+static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
+{
+	return sq->mpwqe.wqe;
+}
+
+static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
+{
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+	struct mlx5_wqe_data_seg *dseg;
+
+	dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+
+	session->pkt_count++;
+	session->bytes_count += txd->len;
+
+	dseg->addr = cpu_to_be64(txd->dma_addr);
+	dseg->byte_count = cpu_to_be32(txd->len);
+	dseg->lkey = sq->mkey_be;
+	session->ds_count++;
+
+	sq->stats->mpwqe_pkts++;
+}
+
+static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
+{
+	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+	u8 ds_count = session->ds_count;
+	struct mlx5_wqe_ctrl_seg *cseg;
+	struct mlx5e_tx_wqe_info *wi;
+	u16 pi;
+
+	cseg = &session->wqe->ctrl;
+	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	wi = &sq->db.wqe_info[pi];
+	*wi = (struct mlx5e_tx_wqe_info) {
+		.skb = NULL,
+		.num_bytes = session->bytes_count,
+		.num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
+		.num_dma = session->pkt_count,
+		.num_fifo_pkts = session->pkt_count,
+	};
+
+	sq->pc += wi->num_wqebbs;
+
+	session->wqe = NULL;
+
+	mlx5e_tx_check_stop(sq);
+
+	return cseg;
+}
+
+static void
+mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		    struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
+{
+	struct mlx5_wqe_ctrl_seg *cseg;
+	struct mlx5e_xmit_data txd;
+
+	if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
+		mlx5e_tx_mpwqe_session_start(sq, eseg);
+	} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
+		mlx5e_tx_mpwqe_session_complete(sq);
+		mlx5e_tx_mpwqe_session_start(sq, eseg);
+	}
+
+	sq->stats->xmit_more += xmit_more;
+
+	txd.data = skb->data;
+	txd.len = skb->len;
+
+	txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+		goto err_unmap;
+	mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
+
+	mlx5e_skb_fifo_push(sq, skb);
+
+	mlx5e_tx_mpwqe_add_dseg(sq, &txd);
+
+	mlx5e_tx_skb_update_hwts_flags(skb);
+
+	if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
+		/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
+		cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+		if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
+			mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+	} else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
+		/* Might stop the queue, but we were asked to ring the doorbell anyway. */
+		cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+	}
+
+	return;
+
+err_unmap:
+	mlx5e_dma_unmap_wqe_err(sq, 1);
+	sq->stats->dropped++;
+	dev_kfree_skb_any(skb);
+}
+
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
+{
+	/* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
+	if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
+		mlx5e_tx_mpwqe_session_complete(sq);
+}
+
+static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+				   struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+{
+	if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
+		return false;
+
+	mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+	return true;
+}
+
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_accel_tx_state accel = {};
+	struct mlx5e_tx_wqe_attr wqe_attr;
+	struct mlx5e_tx_attr attr;
 	struct mlx5e_tx_wqe *wqe;
 	struct mlx5e_txqsq *sq;
 	u16 pi;
@@ -391,21 +632,92 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* May send SKBs and WQEs. */
 	if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
-		goto out;
+		return NETDEV_TX_OK;
 
-	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
+
+	if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
+		if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
+			struct mlx5_wqe_eth_seg eseg = {};
+
+			if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg)))
+				return NETDEV_TX_OK;
+
+			mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
+			return NETDEV_TX_OK;
+		}
+
+		mlx5e_tx_mpwqe_ensure_complete(sq);
+	}
+
+	mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
 	/* May update the WQE, but may not post other WQEs. */
-	if (unlikely(!mlx5e_accel_tx_finish(priv, sq, skb, wqe, &accel)))
-		goto out;
+	mlx5e_accel_tx_finish(sq, wqe, &accel,
+			      (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
+	if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
+		return NETDEV_TX_OK;
 
-	mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
+	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
 
-out:
 	return NETDEV_TX_OK;
 }
 
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
+{
+	struct mlx5e_tx_wqe_attr wqe_attr;
+	struct mlx5e_tx_attr attr;
+	struct mlx5e_tx_wqe *wqe;
+	u16 pi;
+
+	mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+	mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+	mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth);
+	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
+}
+
+static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+				  u32 *dma_fifo_cc)
+{
+	int i;
+
+	for (i = 0; i < wi->num_dma; i++) {
+		struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+
+		mlx5e_tx_dma_unmap(sq->pdev, dma);
+	}
+}
+
+static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+			      struct mlx5_cqe64 *cqe, int napi_budget)
+{
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		struct skb_shared_hwtstamps hwts = {};
+		u64 ts = get_cqe_ts(cqe);
+
+		hwts.hwtstamp = mlx5_timecounter_cyc2time(sq->clock, ts);
+		skb_tstamp_tx(skb, &hwts);
+	}
+
+	napi_consume_skb(skb, napi_budget);
+}
+
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+					  struct mlx5_cqe64 *cqe, int napi_budget)
+{
+	int i;
+
+	for (i = 0; i < wi->num_fifo_pkts; i++) {
+		struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
+
+		mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+	}
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_sq_stats *stats;
@@ -451,42 +763,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
 		do {
-			struct sk_buff *skb;
-			int j;
-
 			last_wqe = (sqcc == wqe_counter);
 
 			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
 			wi = &sq->db.wqe_info[ci];
-			skb = wi->skb;
 
-			if (unlikely(!skb)) {
-				mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
-				sqcc += wi->num_wqebbs;
-				continue;
-			}
+			sqcc += wi->num_wqebbs;
 
-			if (unlikely(skb_shinfo(skb)->tx_flags &
-				     SKBTX_HW_TSTAMP)) {
-				struct skb_shared_hwtstamps hwts = {};
+			if (likely(wi->skb)) {
+				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+				mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
 
-				hwts.hwtstamp =
-					mlx5_timecounter_cyc2time(sq->clock,
-								  get_cqe_ts(cqe));
-				skb_tstamp_tx(skb, &hwts);
+				npkts++;
+				nbytes += wi->num_bytes;
+				continue;
 			}
 
-			for (j = 0; j < wi->num_dma; j++) {
-				struct mlx5e_sq_dma *dma =
-					mlx5e_dma_get(sq, dma_fifo_cc++);
+			if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+									       &dma_fifo_cc)))
+				continue;
 
-				mlx5e_tx_dma_unmap(sq->pdev, dma);
-			}
+			if (wi->num_fifo_pkts) {
+				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+				mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
 
-			npkts++;
-			nbytes += wi->num_bytes;
-			sqcc += wi->num_wqebbs;
-			napi_consume_skb(skb, napi_budget);
+				npkts += wi->num_fifo_pkts;
+				nbytes += wi->num_bytes;
+			}
 		} while (!last_wqe);
 
 		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -525,13 +828,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 	return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
 
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+	int i;
+
+	for (i = 0; i < wi->num_fifo_pkts; i++)
+		dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
+}
+
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 {
 	struct mlx5e_tx_wqe_info *wi;
 	u32 dma_fifo_cc, nbytes = 0;
 	u16 ci, sqcc, npkts = 0;
-	struct sk_buff *skb;
-	int i;
 
 	sqcc = sq->cc;
 	dma_fifo_cc = sq->dma_fifo_cc;
@@ -539,25 +848,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 	while (sqcc != sq->pc) {
 		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
 		wi = &sq->db.wqe_info[ci];
-		skb = wi->skb;
 
-		if (!skb) {
-			mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
-			sqcc += wi->num_wqebbs;
+		sqcc += wi->num_wqebbs;
+
+		if (likely(wi->skb)) {
+			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+			dev_kfree_skb_any(wi->skb);
+
+			npkts++;
+			nbytes += wi->num_bytes;
 			continue;
 		}
 
-		for (i = 0; i < wi->num_dma; i++) {
-			struct mlx5e_sq_dma *dma =
-				mlx5e_dma_get(sq, dma_fifo_cc++);
+		if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+			continue;
 
-			mlx5e_tx_dma_unmap(sq->pdev, dma);
-		}
+		if (wi->num_fifo_pkts) {
+			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+			mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
 
-		dev_kfree_skb_any(skb);
-		npkts++;
-		nbytes += wi->num_bytes;
-		sqcc += wi->num_wqebbs;
+			npkts += wi->num_fifo_pkts;
+			nbytes += wi->num_bytes;
+		}
 	}
 
 	sq->dma_fifo_cc = dma_fifo_cc;
@@ -576,9 +888,34 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
 	dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
 }
 
+static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
+				   const struct mlx5e_tx_attr *attr,
+				   struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+	u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
+	u16 ds_cnt_inl = 0;
+
+	ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+
+	if (attr->ihs) {
+		u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+		ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+		ds_cnt += ds_cnt_inl;
+	}
+
+	*wqe_attr = (struct mlx5e_tx_wqe_attr) {
+		.ds_cnt     = ds_cnt,
+		.ds_cnt_inl = ds_cnt_inl,
+		.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+	};
+}
+
 void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		   struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
 {
+	struct mlx5e_tx_wqe_attr wqe_attr;
+	struct mlx5e_tx_attr attr;
 	struct mlx5i_tx_wqe *wqe;
 
 	struct mlx5_wqe_datagram_seg *datagram;
@@ -588,47 +925,17 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 ds_cnt, ds_cnt_inl = 0;
-	u8 num_wqebbs, opcode;
-	u16 headlen, ihs, pi;
-	u32 num_bytes;
 	int num_dma;
-	__be16 mss;
+	u16 pi;
 
-	/* Calc ihs and ds cnt, no writes to wqe yet */
-	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
-	if (skb_is_gso(skb)) {
-		opcode    = MLX5_OPCODE_LSO;
-		mss       = cpu_to_be16(skb_shinfo(skb)->gso_size);
-		ihs       = mlx5e_tx_get_gso_ihs(sq, skb);
-		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
-		stats->packets += skb_shinfo(skb)->gso_segs;
-	} else {
-		u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
+	mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+	mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
 
-		opcode    = MLX5_OPCODE_SEND;
-		mss       = 0;
-		ihs       = mlx5e_calc_min_inline(mode, skb);
-		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
-		stats->packets++;
-	}
+	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
 
-	stats->bytes     += num_bytes;
 	stats->xmit_more += xmit_more;
 
-	headlen = skb->len - ihs - skb->data_len;
-	ds_cnt += !!headlen;
-	ds_cnt += skb_shinfo(skb)->nr_frags;
-
-	if (ihs) {
-		ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
-		ds_cnt += ds_cnt_inl;
-	}
-
-	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
-	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
-
 	/* fill wqe */
 	wi       = &sq->db.wqe_info[pi];
 	cseg     = &wqe->ctrl;
@@ -640,20 +947,20 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 	mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
 
-	eseg->mss = mss;
+	eseg->mss = attr.mss;
 
-	if (ihs) {
-		memcpy(eseg->inline_hdr.start, skb->data, ihs);
-		eseg->inline_hdr.sz = cpu_to_be16(ihs);
-		dseg += ds_cnt_inl;
+	if (attr.ihs) {
+		memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);
+		eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
+		dseg += wqe_attr.ds_cnt_inl;
 	}
 
-	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
+					  attr.headlen, dseg);
 	if (unlikely(num_dma < 0))
 		goto err_drop;
 
-	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
-			     num_dma, wi, cseg, xmit_more);
+	mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
 
 	return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 22a19d391e17..8ebfe782f95e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -828,8 +828,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 		INIT_LIST_HEAD(&eq->tasklet_ctx.list);
 		INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
 		spin_lock_init(&eq->tasklet_ctx.lock);
-		tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
-			     (unsigned long)&eq->tasklet_ctx);
+		tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
 
 		eq->irq_nb.notifier_call = mlx5_eq_comp_int;
 		param = (struct mlx5_eq_param) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 07b2acd7e6b3..c3faae67e4d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -148,6 +148,11 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
 	esw_acl_egress_vlan_grp_destroy(vport);
 }
 
+static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+	return mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
 int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 {
 	int table_size = 0;
@@ -157,6 +162,9 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
 	    !MLX5_CAP_GEN(esw->dev, prio_tag_required))
 		return 0;
 
+	if (!esw_acl_egress_needed(esw, vport->vport))
+		return 0;
+
 	esw_acl_egress_ofld_rules_destroy(vport);
 
 	if (mlx5_esw_acl_egress_fwd2vport_supported(esw))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
deleted file mode 100644
index d5bf908dfecd..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
+++ /dev/null
@@ -1,944 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-// Copyright (c) 2020 Mellanox Technologies.
-
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/mlx5_ifc.h>
-#include <linux/mlx5/fs.h>
-
-#include "esw/chains.h"
-#include "en/mapping.h"
-#include "mlx5_core.h"
-#include "fs_core.h"
-#include "eswitch.h"
-#include "en.h"
-#include "en_tc.h"
-
-#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
-#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
-#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
-#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping)
-#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
-#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
-#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
-#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb)
-#define fdb_ignore_flow_level_supported(esw) \
-	(MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
-#define fdb_modify_header_fwd_to_table_supported(esw) \
-	(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
-
-/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
- * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
- * for each flow table pool. We can allocate up to 16M of each pool,
- * and we keep track of how much we used via get_next_avail_sz_from_pool.
- * Firmware doesn't report any of this for now.
- * ESW_POOL is expected to be sorted from large to small and match firmware
- * pools.
- */
-#define ESW_SIZE (16 * 1024 * 1024)
-static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
-					  1 * 1024 * 1024,
-					  64 * 1024,
-					  128 };
-#define ESW_FT_TBL_SZ (64 * 1024)
-
-struct mlx5_esw_chains_priv {
-	struct rhashtable chains_ht;
-	struct rhashtable prios_ht;
-	/* Protects above chains_ht and prios_ht */
-	struct mutex lock;
-
-	struct mlx5_flow_table *tc_end_fdb;
-	struct mapping_ctx *chains_mapping;
-
-	int fdb_left[ARRAY_SIZE(ESW_POOLS)];
-};
-
-struct fdb_chain {
-	struct rhash_head node;
-
-	u32 chain;
-
-	int ref;
-	int id;
-
-	struct mlx5_eswitch *esw;
-	struct list_head prios_list;
-	struct mlx5_flow_handle *restore_rule;
-	struct mlx5_modify_hdr *miss_modify_hdr;
-};
-
-struct fdb_prio_key {
-	u32 chain;
-	u32 prio;
-	u32 level;
-};
-
-struct fdb_prio {
-	struct rhash_head node;
-	struct list_head list;
-
-	struct fdb_prio_key key;
-
-	int ref;
-
-	struct fdb_chain *fdb_chain;
-	struct mlx5_flow_table *fdb;
-	struct mlx5_flow_table *next_fdb;
-	struct mlx5_flow_group *miss_group;
-	struct mlx5_flow_handle *miss_rule;
-};
-
-static const struct rhashtable_params chain_params = {
-	.head_offset = offsetof(struct fdb_chain, node),
-	.key_offset = offsetof(struct fdb_chain, chain),
-	.key_len = sizeof_field(struct fdb_chain, chain),
-	.automatic_shrinking = true,
-};
-
-static const struct rhashtable_params prio_params = {
-	.head_offset = offsetof(struct fdb_prio, node),
-	.key_offset = offsetof(struct fdb_prio, key),
-	.key_len = sizeof_field(struct fdb_prio, key),
-	.automatic_shrinking = true,
-};
-
-bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw)
-{
-	return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-}
-
-bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw)
-{
-	return mlx5_esw_chains_prios_supported(esw) &&
-	       fdb_ignore_flow_level_supported(esw);
-}
-
-u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw)
-{
-	if (!mlx5_esw_chains_prios_supported(esw))
-		return 1;
-
-	if (fdb_ignore_flow_level_supported(esw))
-		return UINT_MAX - 1;
-
-	return FDB_TC_MAX_CHAIN;
-}
-
-u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw)
-{
-	return mlx5_esw_chains_get_chain_range(esw) + 1;
-}
-
-u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw)
-{
-	if (!mlx5_esw_chains_prios_supported(esw))
-		return 1;
-
-	if (fdb_ignore_flow_level_supported(esw))
-		return UINT_MAX;
-
-	return FDB_TC_MAX_PRIO;
-}
-
-static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
-{
-	if (fdb_ignore_flow_level_supported(esw))
-		return UINT_MAX;
-
-	return FDB_TC_LEVELS_PER_PRIO;
-}
-
-#define POOL_NEXT_SIZE 0
-static int
-mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw,
-				       int desired_size)
-{
-	int i, found_i = -1;
-
-	for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
-		if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) {
-			found_i = i;
-			if (desired_size != POOL_NEXT_SIZE)
-				break;
-		}
-	}
-
-	if (found_i != -1) {
-		--fdb_pool_left(esw)[found_i];
-		return ESW_POOLS[found_i];
-	}
-
-	return 0;
-}
-
-static void
-mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
-{
-	int i;
-
-	for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
-		if (sz == ESW_POOLS[i]) {
-			++fdb_pool_left(esw)[i];
-			return;
-		}
-	}
-
-	WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz);
-}
-
-static void
-mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw)
-{
-	u32 fdb_max;
-	int i;
-
-	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size);
-
-	for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--)
-		fdb_pool_left(esw)[i] =
-			ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
-}
-
-static struct mlx5_flow_table *
-mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw,
-				 u32 chain, u32 prio, u32 level)
-{
-	struct mlx5_flow_table_attr ft_attr = {};
-	struct mlx5_flow_namespace *ns;
-	struct mlx5_flow_table *fdb;
-	int sz;
-
-	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
-		ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
-				  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
-
-	sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
-	     mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) :
-	     mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE);
-	if (!sz)
-		return ERR_PTR(-ENOSPC);
-	ft_attr.max_fte = sz;
-
-	/* We use tc_slow_fdb(esw) as the table's next_ft till
-	 * ignore_flow_level is allowed on FT creation and not just for FTEs.
-	 * Instead caller should add an explicit miss rule if needed.
-	 */
-	ft_attr.next_ft = tc_slow_fdb(esw);
-
-	/* The root table(chain 0, prio 1, level 0) is required to be
-	 * connected to the previous prio (FDB_BYPASS_PATH if exists).
-	 * We always create it, as a managed table, in order to align with
-	 * fs_core logic.
-	 */
-	if (!fdb_ignore_flow_level_supported(esw) ||
-	    (chain == 0 && prio == 1 && level == 0)) {
-		ft_attr.level = level;
-		ft_attr.prio = prio - 1;
-		ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
-	} else {
-		ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
-		ft_attr.prio = FDB_TC_OFFLOAD;
-		/* Firmware doesn't allow us to create another level 0 table,
-		 * so we create all unmanaged tables as level 1.
-		 *
-		 * To connect them, we use explicit miss rules with
-		 * ignore_flow_level. Caller is responsible to create
-		 * these rules (if needed).
-		 */
-		ft_attr.level = 1;
-		ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
-	}
-
-	ft_attr.autogroup.num_reserved_entries = 2;
-	ft_attr.autogroup.max_num_groups = esw->params.large_group_num;
-	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
-	if (IS_ERR(fdb)) {
-		esw_warn(esw->dev,
-			 "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
-			 (int)PTR_ERR(fdb), chain, prio, level, sz);
-		mlx5_esw_chains_put_sz_to_pool(esw, sz);
-		return fdb;
-	}
-
-	return fdb;
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
-				  struct mlx5_flow_table *fdb)
-{
-	mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte);
-	mlx5_destroy_flow_table(fdb);
-}
-
-static int
-create_fdb_chain_restore(struct fdb_chain *fdb_chain)
-{
-	char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
-	struct mlx5_eswitch *esw = fdb_chain->esw;
-	struct mlx5_modify_hdr *mod_hdr;
-	u32 index;
-	int err;
-
-	if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) ||
-	    !mlx5_esw_chains_prios_supported(esw))
-		return 0;
-
-	err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index);
-	if (err)
-		return err;
-	if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
-		/* we got the special default flow tag id, so we won't know
-		 * if we actually marked the packet with the restore rule
-		 * we create.
-		 *
-		 * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
-		 */
-		err = mapping_add(esw_chains_mapping(esw),
-				  &fdb_chain->chain, &index);
-		mapping_remove(esw_chains_mapping(esw),
-			       MLX5_FS_DEFAULT_FLOW_TAG);
-		if (err)
-			return err;
-	}
-
-	fdb_chain->id = index;
-
-	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
-	MLX5_SET(set_action_in, modact, field,
-		 mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield);
-	MLX5_SET(set_action_in, modact, offset,
-		 mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8);
-	MLX5_SET(set_action_in, modact, length,
-		 mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8);
-	MLX5_SET(set_action_in, modact, data, fdb_chain->id);
-	mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
-					   1, modact);
-	if (IS_ERR(mod_hdr)) {
-		err = PTR_ERR(mod_hdr);
-		goto err_mod_hdr;
-	}
-	fdb_chain->miss_modify_hdr = mod_hdr;
-
-	fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id);
-	if (IS_ERR(fdb_chain->restore_rule)) {
-		err = PTR_ERR(fdb_chain->restore_rule);
-		goto err_rule;
-	}
-
-	return 0;
-
-err_rule:
-	mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
-err_mod_hdr:
-	/* Datapath can't find this mapping, so we can safely remove it */
-	mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
-	return err;
-}
-
-static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain)
-{
-	struct mlx5_eswitch *esw = fdb_chain->esw;
-
-	if (!fdb_chain->miss_modify_hdr)
-		return;
-
-	mlx5_del_flow_rules(fdb_chain->restore_rule);
-	mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
-	mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
-}
-
-static struct fdb_chain *
-mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
-{
-	struct fdb_chain *fdb_chain = NULL;
-	int err;
-
-	fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL);
-	if (!fdb_chain)
-		return ERR_PTR(-ENOMEM);
-
-	fdb_chain->esw = esw;
-	fdb_chain->chain = chain;
-	INIT_LIST_HEAD(&fdb_chain->prios_list);
-
-	err = create_fdb_chain_restore(fdb_chain);
-	if (err)
-		goto err_restore;
-
-	err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
-				     chain_params);
-	if (err)
-		goto err_insert;
-
-	return fdb_chain;
-
-err_insert:
-	destroy_fdb_chain_restore(fdb_chain);
-err_restore:
-	kvfree(fdb_chain);
-	return ERR_PTR(err);
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
-{
-	struct mlx5_eswitch *esw = fdb_chain->esw;
-
-	rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
-			       chain_params);
-
-	destroy_fdb_chain_restore(fdb_chain);
-	kvfree(fdb_chain);
-}
-
-static struct fdb_chain *
-mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
-{
-	struct fdb_chain *fdb_chain;
-
-	fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain,
-					   chain_params);
-	if (!fdb_chain) {
-		fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain);
-		if (IS_ERR(fdb_chain))
-			return fdb_chain;
-	}
-
-	fdb_chain->ref++;
-
-	return fdb_chain;
-}
-
-static struct mlx5_flow_handle *
-mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain,
-			      struct mlx5_flow_table *fdb,
-			      struct mlx5_flow_table *next_fdb)
-{
-	struct mlx5_eswitch *esw = fdb_chain->esw;
-	struct mlx5_flow_destination dest = {};
-	struct mlx5_flow_act act = {};
-
-	act.flags  = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
-	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = next_fdb;
-
-	if (next_fdb == tc_end_fdb(esw) &&
-	    mlx5_esw_chains_prios_supported(esw)) {
-		act.modify_hdr = fdb_chain->miss_modify_hdr;
-		act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-	}
-
-	return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1);
-}
-
-static int
-mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
-				  struct mlx5_flow_table *next_fdb)
-{
-	struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
-	struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
-	struct fdb_prio *pos;
-	int n = 0, err;
-
-	if (fdb_prio->key.level)
-		return 0;
-
-	/* Iterate in reverse order until reaching the level 0 rule of
-	 * the previous priority, adding all the miss rules first, so we can
-	 * revert them if any of them fails.
-	 */
-	pos = fdb_prio;
-	list_for_each_entry_continue_reverse(pos,
-					     &fdb_chain->prios_list,
-					     list) {
-		miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain,
-							      pos->fdb,
-							      next_fdb);
-		if (IS_ERR(miss_rules[n])) {
-			err = PTR_ERR(miss_rules[n]);
-			goto err_prev_rule;
-		}
-
-		n++;
-		if (!pos->key.level)
-			break;
-	}
-
-	/* Success, delete old miss rules, and update the pointers. */
-	n = 0;
-	pos = fdb_prio;
-	list_for_each_entry_continue_reverse(pos,
-					     &fdb_chain->prios_list,
-					     list) {
-		mlx5_del_flow_rules(pos->miss_rule);
-
-		pos->miss_rule = miss_rules[n];
-		pos->next_fdb = next_fdb;
-
-		n++;
-		if (!pos->key.level)
-			break;
-	}
-
-	return 0;
-
-err_prev_rule:
-	while (--n >= 0)
-		mlx5_del_flow_rules(miss_rules[n]);
-
-	return err;
-}
-
-static void
-mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain)
-{
-	if (--fdb_chain->ref == 0)
-		mlx5_esw_chains_destroy_fdb_chain(fdb_chain);
-}
-
-static struct fdb_prio *
-mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
-				u32 chain, u32 prio, u32 level)
-{
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_handle *miss_rule = NULL;
-	struct mlx5_flow_group *miss_group;
-	struct fdb_prio *fdb_prio = NULL;
-	struct mlx5_flow_table *next_fdb;
-	struct fdb_chain *fdb_chain;
-	struct mlx5_flow_table *fdb;
-	struct list_head *pos;
-	u32 *flow_group_in;
-	int err;
-
-	fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain);
-	if (IS_ERR(fdb_chain))
-		return ERR_CAST(fdb_chain);
-
-	fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL);
-	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!fdb_prio || !flow_group_in) {
-		err = -ENOMEM;
-		goto err_alloc;
-	}
-
-	/* Chain's prio list is sorted by prio and level.
-	 * And all levels of some prio point to the next prio's level 0.
-	 * Example list (prio, level):
-	 * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
-	 * In hardware, we will we have the following pointers:
-	 * (3,0) -> (5,0) -> (7,0) -> Slow path
-	 * (3,1) -> (5,0)
-	 * (5,1) -> (7,0)
-	 * (6,1) -> (7,0)
-	 */
-
-	/* Default miss for each chain: */
-	next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
-		    tc_slow_fdb(esw) :
-		    tc_end_fdb(esw);
-	list_for_each(pos, &fdb_chain->prios_list) {
-		struct fdb_prio *p = list_entry(pos, struct fdb_prio, list);
-
-		/* exit on first pos that is larger */
-		if (prio < p->key.prio || (prio == p->key.prio &&
-					   level < p->key.level)) {
-			/* Get next level 0 table */
-			next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb;
-			break;
-		}
-	}
-
-	fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		goto err_create;
-	}
-
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
-		 fdb->max_fte - 2);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
-		 fdb->max_fte - 1);
-	miss_group = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR(miss_group)) {
-		err = PTR_ERR(miss_group);
-		goto err_group;
-	}
-
-	/* Add miss rule to next_fdb */
-	miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb);
-	if (IS_ERR(miss_rule)) {
-		err = PTR_ERR(miss_rule);
-		goto err_miss_rule;
-	}
-
-	fdb_prio->miss_group = miss_group;
-	fdb_prio->miss_rule = miss_rule;
-	fdb_prio->next_fdb = next_fdb;
-	fdb_prio->fdb_chain = fdb_chain;
-	fdb_prio->key.chain = chain;
-	fdb_prio->key.prio = prio;
-	fdb_prio->key.level = level;
-	fdb_prio->fdb = fdb;
-
-	err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node,
-				     prio_params);
-	if (err)
-		goto err_insert;
-
-	list_add(&fdb_prio->list, pos->prev);
-
-	/* Table is ready, connect it */
-	err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb);
-	if (err)
-		goto err_update;
-
-	kvfree(flow_group_in);
-	return fdb_prio;
-
-err_update:
-	list_del(&fdb_prio->list);
-	rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
-			       prio_params);
-err_insert:
-	mlx5_del_flow_rules(miss_rule);
-err_miss_rule:
-	mlx5_destroy_flow_group(miss_group);
-err_group:
-	mlx5_esw_chains_destroy_fdb_table(esw, fdb);
-err_create:
-err_alloc:
-	kvfree(fdb_prio);
-	kvfree(flow_group_in);
-	mlx5_esw_chains_put_fdb_chain(fdb_chain);
-	return ERR_PTR(err);
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw,
-				 struct fdb_prio *fdb_prio)
-{
-	struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
-
-	WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio,
-						  fdb_prio->next_fdb));
-
-	list_del(&fdb_prio->list);
-	rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
-			       prio_params);
-	mlx5_del_flow_rules(fdb_prio->miss_rule);
-	mlx5_destroy_flow_group(fdb_prio->miss_group);
-	mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb);
-	mlx5_esw_chains_put_fdb_chain(fdb_chain);
-	kvfree(fdb_prio);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
-			  u32 level)
-{
-	struct mlx5_flow_table *prev_fts;
-	struct fdb_prio *fdb_prio;
-	struct fdb_prio_key key;
-	int l = 0;
-
-	if ((chain > mlx5_esw_chains_get_chain_range(esw) &&
-	     chain != mlx5_esw_chains_get_ft_chain(esw)) ||
-	    prio > mlx5_esw_chains_get_prio_range(esw) ||
-	    level > mlx5_esw_chains_get_level_range(esw))
-		return ERR_PTR(-EOPNOTSUPP);
-
-	/* create earlier levels for correct fs_core lookup when
-	 * connecting tables.
-	 */
-	for (l = 0; l < level; l++) {
-		prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l);
-		if (IS_ERR(prev_fts)) {
-			fdb_prio = ERR_CAST(prev_fts);
-			goto err_get_prevs;
-		}
-	}
-
-	key.chain = chain;
-	key.prio = prio;
-	key.level = level;
-
-	mutex_lock(&esw_chains_lock(esw));
-	fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
-					  prio_params);
-	if (!fdb_prio) {
-		fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain,
-							   prio, level);
-		if (IS_ERR(fdb_prio))
-			goto err_create_prio;
-	}
-
-	++fdb_prio->ref;
-	mutex_unlock(&esw_chains_lock(esw));
-
-	return fdb_prio->fdb;
-
-err_create_prio:
-	mutex_unlock(&esw_chains_lock(esw));
-err_get_prevs:
-	while (--l >= 0)
-		mlx5_esw_chains_put_table(esw, chain, prio, l);
-	return ERR_CAST(fdb_prio);
-}
-
-void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
-			  u32 level)
-{
-	struct fdb_prio *fdb_prio;
-	struct fdb_prio_key key;
-
-	key.chain = chain;
-	key.prio = prio;
-	key.level = level;
-
-	mutex_lock(&esw_chains_lock(esw));
-	fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
-					  prio_params);
-	if (!fdb_prio)
-		goto err_get_prio;
-
-	if (--fdb_prio->ref == 0)
-		mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio);
-	mutex_unlock(&esw_chains_lock(esw));
-
-	while (level-- > 0)
-		mlx5_esw_chains_put_table(esw, chain, prio, level);
-
-	return;
-
-err_get_prio:
-	mutex_unlock(&esw_chains_lock(esw));
-	WARN_ONCE(1,
-		  "Couldn't find table: (chain: %d prio: %d level: %d)",
-		  chain, prio, level);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw)
-{
-	return tc_end_fdb(esw);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw)
-{
-	u32 chain, prio, level;
-	int err;
-
-	if (!fdb_ignore_flow_level_supported(esw)) {
-		err = -EOPNOTSUPP;
-
-		esw_warn(esw->dev,
-			 "Couldn't create global flow table, ignore_flow_level not supported.");
-		goto err_ignore;
-	}
-
-	chain = mlx5_esw_chains_get_chain_range(esw),
-	prio = mlx5_esw_chains_get_prio_range(esw);
-	level = mlx5_esw_chains_get_level_range(esw);
-
-	return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
-
-err_ignore:
-	return ERR_PTR(err);
-}
-
-void
-mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw,
-				     struct mlx5_flow_table *ft)
-{
-	mlx5_esw_chains_destroy_fdb_table(esw, ft);
-}
-
-static int
-mlx5_esw_chains_init(struct mlx5_eswitch *esw)
-{
-	struct mlx5_esw_chains_priv *chains_priv;
-	struct mlx5_core_dev *dev = esw->dev;
-	u32 max_flow_counter, fdb_max;
-	struct mapping_ctx *mapping;
-	int err;
-
-	chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
-	if (!chains_priv)
-		return -ENOMEM;
-	esw_chains_priv(esw) = chains_priv;
-
-	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
-			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
-	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
-
-	esw_debug(dev,
-		  "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n",
-		  max_flow_counter, esw->params.large_group_num, fdb_max);
-
-	mlx5_esw_chains_init_sz_pool(esw);
-
-	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
-	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
-		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-		esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
-	} else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
-		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-		esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
-	} else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
-		/* Disabled when ttl workaround is needed, e.g
-		 * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
-		 */
-		esw_warn(dev,
-			 "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
-		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-	} else {
-		esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-		esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
-			 mlx5_esw_chains_get_chain_range(esw),
-			 mlx5_esw_chains_get_prio_range(esw));
-	}
-
-	err = rhashtable_init(&esw_chains_ht(esw), &chain_params);
-	if (err)
-		goto init_chains_ht_err;
-
-	err = rhashtable_init(&esw_prios_ht(esw), &prio_params);
-	if (err)
-		goto init_prios_ht_err;
-
-	mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw),
-				 true);
-	if (IS_ERR(mapping)) {
-		err = PTR_ERR(mapping);
-		goto mapping_err;
-	}
-	esw_chains_mapping(esw) = mapping;
-
-	mutex_init(&esw_chains_lock(esw));
-
-	return 0;
-
-mapping_err:
-	rhashtable_destroy(&esw_prios_ht(esw));
-init_prios_ht_err:
-	rhashtable_destroy(&esw_chains_ht(esw));
-init_chains_ht_err:
-	kfree(chains_priv);
-	return err;
-}
-
-static void
-mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
-{
-	mutex_destroy(&esw_chains_lock(esw));
-	mapping_destroy(esw_chains_mapping(esw));
-	rhashtable_destroy(&esw_prios_ht(esw));
-	rhashtable_destroy(&esw_chains_ht(esw));
-
-	kfree(esw_chains_priv(esw));
-}
-
-static int
-mlx5_esw_chains_open(struct mlx5_eswitch *esw)
-{
-	struct mlx5_flow_table *ft;
-	int err;
-
-	/* Create tc_end_fdb(esw) which is the always created ft chain */
-	ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw),
-				       1, 0);
-	if (IS_ERR(ft))
-		return PTR_ERR(ft);
-
-	tc_end_fdb(esw) = ft;
-
-	/* Always open the root for fast path */
-	ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
-	if (IS_ERR(ft)) {
-		err = PTR_ERR(ft);
-		goto level_0_err;
-	}
-
-	/* Open level 1 for split rules now if prios isn't supported  */
-	if (!mlx5_esw_chains_prios_supported(esw)) {
-		err = mlx5_esw_vport_tbl_get(esw);
-		if (err)
-			goto level_1_err;
-	}
-
-	return 0;
-
-level_1_err:
-	mlx5_esw_chains_put_table(esw, 0, 1, 0);
-level_0_err:
-	mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
-	return err;
-}
-
-static void
-mlx5_esw_chains_close(struct mlx5_eswitch *esw)
-{
-	if (!mlx5_esw_chains_prios_supported(esw))
-		mlx5_esw_vport_tbl_put(esw);
-	mlx5_esw_chains_put_table(esw, 0, 1, 0);
-	mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
-}
-
-int
-mlx5_esw_chains_create(struct mlx5_eswitch *esw)
-{
-	int err;
-
-	err = mlx5_esw_chains_init(esw);
-	if (err)
-		return err;
-
-	err = mlx5_esw_chains_open(esw);
-	if (err)
-		goto err_open;
-
-	return 0;
-
-err_open:
-	mlx5_esw_chains_cleanup(esw);
-	return err;
-}
-
-void
-mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
-{
-	mlx5_esw_chains_close(esw);
-	mlx5_esw_chains_cleanup(esw);
-}
-
-int
-mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain,
-				  u32 *chain_mapping)
-{
-	return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping);
-}
-
-int
-mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping)
-{
-	return mapping_remove(esw_chains_mapping(esw), chain_mapping);
-}
-
-int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag,
-				   u32 *chain)
-{
-	int err;
-
-	err = mapping_find(esw_chains_mapping(esw), tag, chain);
-	if (err) {
-		esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag);
-		return -ENOENT;
-	}
-
-	return 0;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
deleted file mode 100644
index 7679ac359e31..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2020 Mellanox Technologies. */
-
-#ifndef __ML5_ESW_CHAINS_H__
-#define __ML5_ESW_CHAINS_H__
-
-#include "eswitch.h"
-
-#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-
-bool
-mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
-bool
-mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
-			  u32 level);
-void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
-			  u32 level);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw);
-void
-mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw,
-				     struct mlx5_flow_table *ft);
-
-int
-mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain,
-				  u32 *chain_mapping);
-int
-mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw,
-				  u32 chain_mapping);
-
-int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
-void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
-
-int
-mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
-
-#else /* CONFIG_MLX5_CLS_ACT */
-
-static inline struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
-			  u32 level) { return ERR_PTR(-EOPNOTSUPP); }
-static inline void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
-			  u32 level) {}
-
-static inline struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) { return ERR_PTR(-EOPNOTSUPP); }
-
-static inline int mlx5_esw_chains_create(struct mlx5_eswitch *esw) { return 0; }
-static inline void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) {}
-
-#endif /* CONFIG_MLX5_CLS_ACT */
-
-#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
new file mode 100644
index 000000000000..ffff11baa3d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd. */
+
+#include <linux/mlx5/driver.h>
+#include "eswitch.h"
+
+static void
+mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+	u64 parent_id;
+
+	parent_id = mlx5_query_nic_system_image_guid(dev);
+	ppid->id_len = sizeof(parent_id);
+	memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+static bool
+mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+	return vport_num == MLX5_VPORT_UPLINK ||
+	       (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
+	       mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
+static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct devlink_port_attrs attrs = {};
+	struct netdev_phys_item_id ppid = {};
+	struct devlink_port *dl_port;
+	u32 controller_num = 0;
+	bool external;
+	u16 pfnum;
+
+	dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL);
+	if (!dl_port)
+		return NULL;
+
+	mlx5_esw_get_port_parent_id(dev, &ppid);
+	pfnum = PCI_FUNC(dev->pdev->devfn);
+	external = mlx5_core_is_ecpf_esw_manager(dev);
+	if (external)
+		controller_num = dev->priv.eswitch->offloads.host_number + 1;
+
+	if (vport_num == MLX5_VPORT_UPLINK) {
+		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+		attrs.phys.port_number = pfnum;
+		memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+		attrs.switch_id.id_len = ppid.id_len;
+		devlink_port_attrs_set(dl_port, &attrs);
+	} else if (vport_num == MLX5_VPORT_PF) {
+		memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+		dl_port->attrs.switch_id.id_len = ppid.id_len;
+		devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external);
+	} else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) {
+		memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+		dl_port->attrs.switch_id.id_len = ppid.id_len;
+		devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+					      vport_num - 1, external);
+	}
+	return dl_port;
+}
+
+static void mlx5_esw_dl_port_free(struct devlink_port *dl_port)
+{
+	kfree(dl_port);
+}
+
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct devlink_port *dl_port;
+	unsigned int dl_port_index;
+	struct mlx5_vport *vport;
+	struct devlink *devlink;
+	int err;
+
+	if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+		return 0;
+
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
+	dl_port = mlx5_esw_dl_port_alloc(esw, vport_num);
+	if (!dl_port)
+		return -ENOMEM;
+
+	devlink = priv_to_devlink(dev);
+	dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+	err = devlink_port_register(devlink, dl_port, dl_port_index);
+	if (err)
+		goto reg_err;
+
+	vport->dl_port = dl_port;
+	return 0;
+
+reg_err:
+	mlx5_esw_dl_port_free(dl_port);
+	return err;
+}
+
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_vport *vport;
+
+	if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+		return;
+
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return;
+	devlink_port_unregister(vport->dl_port);
+	mlx5_esw_dl_port_free(vport->dl_port);
+	vport->dl_port = NULL;
+}
+
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_vport *vport;
+
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	return vport->dl_port;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 867d8120b8a5..cf87de94418f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -42,6 +42,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "lib/mpfs.h"
+#include "lib/fs_chains.h"
 #include "en/tc_ct.h"
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -62,6 +63,9 @@
 #define mlx5_esw_has_fwd_fdb(dev) \
 	MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
 
+#define esw_chains(esw) \
+	((esw)->fdb_table.offloads.esw_chains_priv)
+
 struct vport_ingress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_handle *allow_rule;
@@ -152,14 +156,9 @@ struct mlx5_vport {
 
 	bool                    enabled;
 	enum mlx5_eswitch_vport_event enabled_events;
+	struct devlink_port *dl_port;
 };
 
-enum offloads_fdb_flags {
-	ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
-};
-
-struct mlx5_esw_chains_priv;
-
 struct mlx5_eswitch_fdb {
 	union {
 		struct legacy_fdb {
@@ -183,7 +182,7 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
 
-			struct mlx5_esw_chains_priv *esw_chains_priv;
+			struct mlx5_fs_chains *esw_chains_priv;
 			struct {
 				DECLARE_HASHTABLE(table, 8);
 				/* Protects vports.table */
@@ -217,6 +216,7 @@ struct mlx5_esw_offload {
 	atomic64_t num_flows;
 	enum devlink_eswitch_encap_mode encap;
 	struct ida vport_metadata_ida;
+	unsigned int host_number; /* ECPF supports one external host */
 };
 
 /* E-Switch MC FDB table hash node */
@@ -329,7 +329,7 @@ struct mlx5_termtbl_handle;
 
 bool
 mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
-			      struct mlx5_esw_flow_attr *attr,
+			      struct mlx5_flow_attr *attr,
 			      struct mlx5_flow_act *flow_act,
 			      struct mlx5_flow_spec *spec);
 
@@ -349,19 +349,19 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
-				struct mlx5_esw_flow_attr *attr);
+				struct mlx5_flow_attr *attr);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 			  struct mlx5_flow_spec *spec,
-			  struct mlx5_esw_flow_attr *attr);
+			  struct mlx5_flow_attr *attr);
 void
 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_handle *rule,
-				struct mlx5_esw_flow_attr *attr);
+				struct mlx5_flow_attr *attr);
 void
 mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 			  struct mlx5_flow_handle *rule,
-			  struct mlx5_esw_flow_attr *attr);
+			  struct mlx5_flow_attr *attr);
 
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
@@ -401,7 +401,6 @@ struct mlx5_esw_flow_attr {
 	int split_count;
 	int out_count;
 
-	int	action;
 	__be16	vlan_proto[MLX5_FS_VLAN_DEPTH];
 	u16	vlan_vid[MLX5_FS_VLAN_DEPTH];
 	u8	vlan_prio[MLX5_FS_VLAN_DEPTH];
@@ -413,19 +412,7 @@ struct mlx5_esw_flow_attr {
 		struct mlx5_core_dev *mdev;
 		struct mlx5_termtbl_handle *termtbl;
 	} dests[MLX5_MAX_FLOW_FWD_VPORTS];
-	struct  mlx5_modify_hdr *modify_hdr;
-	u8	inner_match_level;
-	u8	outer_match_level;
-	struct mlx5_fc *counter;
-	u32	chain;
-	u16	prio;
-	u32	dest_chain;
-	u32	flags;
-	struct mlx5_flow_table *fdb;
-	struct mlx5_flow_table *dest_ft;
-	struct mlx5_ct_attr ct_attr;
 	struct mlx5_pkt_reformat *decap_pkt_reformat;
-	struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
@@ -451,9 +438,9 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
-				 struct mlx5_esw_flow_attr *attr);
+				 struct mlx5_flow_attr *attr);
 int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
-				 struct mlx5_esw_flow_attr *attr);
+				 struct mlx5_flow_attr *attr);
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  u16 vport, u16 vlan, u8 qos, u8 set_flags);
 
@@ -677,6 +664,9 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
 				enum mlx5_eswitch_vport_event enabled_events);
 void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs);
 
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 1bcf2609dca8..c9c2962ad49f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -39,12 +39,13 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 #include "esw/acl/ofld.h"
-#include "esw/chains.h"
 #include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
 #include "lib/devcom.h"
 #include "lib/eq.h"
+#include "lib/fs_chains.h"
+#include "en_tc.h"
 
 /* There are two match-all miss flows, one for unicast dst mac and
  * one for multicast.
@@ -66,6 +67,12 @@ struct mlx5_vport_key {
 	u16 vhca_id;
 } __packed;
 
+struct mlx5_vport_tbl_attr {
+	u16 chain;
+	u16 prio;
+	u16 vport;
+};
+
 struct mlx5_vport_table {
 	struct hlist_node hlist;
 	struct mlx5_flow_table *fdb;
@@ -94,10 +101,10 @@ esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns)
 }
 
 static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
-				  struct mlx5_esw_flow_attr *attr,
+				  struct mlx5_vport_tbl_attr *attr,
 				  struct mlx5_vport_key *key)
 {
-	key->vport = attr->in_rep->vport;
+	key->vport = attr->vport;
 	key->chain = attr->chain;
 	key->prio = attr->prio;
 	key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
@@ -118,7 +125,7 @@ esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32
 }
 
 static void
-esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr)
+esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
 {
 	struct mlx5_vport_table *e;
 	struct mlx5_vport_key key;
@@ -138,7 +145,7 @@ out:
 }
 
 static struct mlx5_flow_table *
-esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr)
+esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
 {
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_namespace *ns;
@@ -189,16 +196,15 @@ err_alloc:
 
 int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
 {
-	struct mlx5_esw_flow_attr attr = {};
-	struct mlx5_eswitch_rep rep = {};
+	struct mlx5_vport_tbl_attr attr;
 	struct mlx5_flow_table *fdb;
 	struct mlx5_vport *vport;
 	int i;
 
+	attr.chain = 0;
 	attr.prio = 1;
-	attr.in_rep = &rep;
 	mlx5_esw_for_all_vports(esw, i, vport) {
-		attr.in_rep->vport = vport->vport;
+		attr.vport = vport->vport;
 		fdb = esw_vport_tbl_get(esw, &attr);
 		if (IS_ERR(fdb))
 			goto out;
@@ -212,15 +218,14 @@ out:
 
 void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
 {
-	struct mlx5_esw_flow_attr attr = {};
-	struct mlx5_eswitch_rep rep = {};
+	struct mlx5_vport_tbl_attr attr;
 	struct mlx5_vport *vport;
 	int i;
 
+	attr.chain = 0;
 	attr.prio = 1;
-	attr.in_rep = &rep;
 	mlx5_esw_for_all_vports(esw, i, vport) {
-		attr.in_rep->vport = vport->vport;
+		attr.vport = vport->vport;
 		esw_vport_tbl_put(esw, &attr);
 	}
 }
@@ -242,8 +247,11 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
 				  struct mlx5_esw_flow_attr *attr)
 {
 	if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
-	    attr && attr->in_rep && attr->in_rep->vport == MLX5_VPORT_UPLINK)
-		spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+	    attr && attr->in_rep)
+		spec->flow_context.flow_source =
+			attr->in_rep->vport == MLX5_VPORT_UPLINK ?
+				MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK :
+				MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
 }
 
 static void
@@ -290,11 +298,14 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
-				struct mlx5_esw_flow_attr *attr)
+				struct mlx5_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
 	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
-	bool split = !!(attr->split_count);
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+	struct mlx5_fs_chains *chains = esw_chains(esw);
+	bool split = !!(esw_attr->split_count);
+	struct mlx5_vport_tbl_attr fwd_attr;
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_table *fdb;
 	int j, i = 0;
@@ -308,13 +319,13 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
 				     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 	else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
-		flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
-		flow_act.vlan[0].vid = attr->vlan_vid[0];
-		flow_act.vlan[0].prio = attr->vlan_prio[0];
+		flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]);
+		flow_act.vlan[0].vid = esw_attr->vlan_vid[0];
+		flow_act.vlan[0].prio = esw_attr->vlan_prio[0];
 		if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
-			flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
-			flow_act.vlan[1].vid = attr->vlan_vid[1];
-			flow_act.vlan[1].prio = attr->vlan_prio[1];
+			flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]);
+			flow_act.vlan[1].vid = esw_attr->vlan_vid[1];
+			flow_act.vlan[1].prio = esw_attr->vlan_prio[1];
 		}
 	}
 
@@ -329,12 +340,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		} else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
 			flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-			dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw);
+			dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
 			i++;
 		} else if (attr->dest_chain) {
 			flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
-			ft = mlx5_esw_chains_get_table(esw, attr->dest_chain,
-						       1, 0);
+			ft = mlx5_chains_get_table(chains, attr->dest_chain,
+						   1, 0);
 			if (IS_ERR(ft)) {
 				rule = ERR_CAST(ft);
 				goto err_create_goto_table;
@@ -344,28 +355,29 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 			dest[i].ft = ft;
 			i++;
 		} else {
-			for (j = attr->split_count; j < attr->out_count; j++) {
+			for (j = esw_attr->split_count; j < esw_attr->out_count; j++) {
 				dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-				dest[i].vport.num = attr->dests[j].rep->vport;
+				dest[i].vport.num = esw_attr->dests[j].rep->vport;
 				dest[i].vport.vhca_id =
-					MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
+					MLX5_CAP_GEN(esw_attr->dests[j].mdev, vhca_id);
 				if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 					dest[i].vport.flags |=
 						MLX5_FLOW_DEST_VPORT_VHCA_ID;
-				if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
+				if (esw_attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
 					flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-					flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
+					flow_act.pkt_reformat =
+							esw_attr->dests[j].pkt_reformat;
 					dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 					dest[i].vport.pkt_reformat =
-						attr->dests[j].pkt_reformat;
+						esw_attr->dests[j].pkt_reformat;
 				}
 				i++;
 			}
 		}
 	}
 
-	if (attr->decap_pkt_reformat)
-		flow_act.pkt_reformat = attr->decap_pkt_reformat;
+	if (esw_attr->decap_pkt_reformat)
+		flow_act.pkt_reformat = esw_attr->decap_pkt_reformat;
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
@@ -382,26 +394,30 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		flow_act.modify_hdr = attr->modify_hdr;
 
 	if (split) {
-		fdb = esw_vport_tbl_get(esw, attr);
+		fwd_attr.chain = attr->chain;
+		fwd_attr.prio = attr->prio;
+		fwd_attr.vport = esw_attr->in_rep->vport;
+
+		fdb = esw_vport_tbl_get(esw, &fwd_attr);
 	} else {
 		if (attr->chain || attr->prio)
-			fdb = mlx5_esw_chains_get_table(esw, attr->chain,
-							attr->prio, 0);
+			fdb = mlx5_chains_get_table(chains, attr->chain,
+						    attr->prio, 0);
 		else
-			fdb = attr->fdb;
+			fdb = attr->ft;
 
 		if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT))
-			mlx5_eswitch_set_rule_source_port(esw, spec, attr);
+			mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr);
 	}
 	if (IS_ERR(fdb)) {
 		rule = ERR_CAST(fdb);
 		goto err_esw_get;
 	}
 
-	mlx5_eswitch_set_rule_flow_source(esw, spec, attr);
+	mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
 
 	if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
-		rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
+		rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
 						     &flow_act, dest, i);
 	else
 		rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
@@ -414,12 +430,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
 err_add_rule:
 	if (split)
-		esw_vport_tbl_put(esw, attr);
+		esw_vport_tbl_put(esw, &fwd_attr);
 	else if (attr->chain || attr->prio)
-		mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 err_esw_get:
 	if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
-		mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
+		mlx5_chains_put_table(chains, attr->dest_chain, 1, 0);
 err_create_goto_table:
 	return rule;
 }
@@ -427,46 +443,51 @@ err_create_goto_table:
 struct mlx5_flow_handle *
 mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 			  struct mlx5_flow_spec *spec,
-			  struct mlx5_esw_flow_attr *attr)
+			  struct mlx5_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
 	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+	struct mlx5_fs_chains *chains = esw_chains(esw);
+	struct mlx5_vport_tbl_attr fwd_attr;
 	struct mlx5_flow_table *fast_fdb;
 	struct mlx5_flow_table *fwd_fdb;
 	struct mlx5_flow_handle *rule;
 	int i;
 
-	fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0);
+	fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
 	if (IS_ERR(fast_fdb)) {
 		rule = ERR_CAST(fast_fdb);
 		goto err_get_fast;
 	}
 
-	fwd_fdb = esw_vport_tbl_get(esw, attr);
+	fwd_attr.chain = attr->chain;
+	fwd_attr.prio = attr->prio;
+	fwd_attr.vport = esw_attr->in_rep->vport;
+	fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr);
 	if (IS_ERR(fwd_fdb)) {
 		rule = ERR_CAST(fwd_fdb);
 		goto err_get_fwd;
 	}
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	for (i = 0; i < attr->split_count; i++) {
+	for (i = 0; i < esw_attr->split_count; i++) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-		dest[i].vport.num = attr->dests[i].rep->vport;
+		dest[i].vport.num = esw_attr->dests[i].rep->vport;
 		dest[i].vport.vhca_id =
-			MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
+			MLX5_CAP_GEN(esw_attr->dests[i].mdev, vhca_id);
 		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 			dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
-		if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
+		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
 			dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
-			dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
+			dest[i].vport.pkt_reformat = esw_attr->dests[i].pkt_reformat;
 		}
 	}
 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest[i].ft = fwd_fdb,
 	i++;
 
-	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
-	mlx5_eswitch_set_rule_flow_source(esw, spec, attr);
+	mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr);
 
 	if (attr->outer_match_level != MLX5_MATCH_NONE)
 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -481,9 +502,9 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 
 	return rule;
 add_err:
-	esw_vport_tbl_put(esw, attr);
+	esw_vport_tbl_put(esw, &fwd_attr);
 err_get_fwd:
-	mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 err_get_fast:
 	return rule;
 }
@@ -491,10 +512,13 @@ err_get_fast:
 static void
 __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 			struct mlx5_flow_handle *rule,
-			struct mlx5_esw_flow_attr *attr,
+			struct mlx5_flow_attr *attr,
 			bool fwd_rule)
 {
-	bool split = (attr->split_count > 0);
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+	struct mlx5_fs_chains *chains = esw_chains(esw);
+	bool split = (esw_attr->split_count > 0);
+	struct mlx5_vport_tbl_attr fwd_attr;
 	int i;
 
 	mlx5_del_flow_rules(rule);
@@ -502,31 +526,36 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 	if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) {
 		/* unref the term table */
 		for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
-			if (attr->dests[i].termtbl)
-				mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
+			if (esw_attr->dests[i].termtbl)
+				mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl);
 		}
 	}
 
 	atomic64_dec(&esw->offloads.num_flows);
 
+	if (fwd_rule || split) {
+		fwd_attr.chain = attr->chain;
+		fwd_attr.prio = attr->prio;
+		fwd_attr.vport = esw_attr->in_rep->vport;
+	}
+
 	if (fwd_rule)  {
-		esw_vport_tbl_put(esw, attr);
-		mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+		esw_vport_tbl_put(esw, &fwd_attr);
+		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 	} else {
 		if (split)
-			esw_vport_tbl_put(esw, attr);
+			esw_vport_tbl_put(esw, &fwd_attr);
 		else if (attr->chain || attr->prio)
-			mlx5_esw_chains_put_table(esw, attr->chain, attr->prio,
-						  0);
+			mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 		if (attr->dest_chain)
-			mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
+			mlx5_chains_put_table(chains, attr->dest_chain, 1, 0);
 	}
 }
 
 void
 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_handle *rule,
-				struct mlx5_esw_flow_attr *attr)
+				struct mlx5_flow_attr *attr)
 {
 	__mlx5_eswitch_del_rule(esw, rule, attr, false);
 }
@@ -534,7 +563,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 void
 mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 			  struct mlx5_flow_handle *rule,
-			  struct mlx5_esw_flow_attr *attr)
+			  struct mlx5_flow_attr *attr)
 {
 	__mlx5_eswitch_del_rule(esw, rule, attr, true);
 }
@@ -611,9 +640,10 @@ out_notsupp:
 }
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
-				 struct mlx5_esw_flow_attr *attr)
+				 struct mlx5_flow_attr *attr)
 {
 	struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
 	struct mlx5_eswitch_rep *vport = NULL;
 	bool push, pop, fwd;
 	int err = 0;
@@ -629,17 +659,17 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 
 	mutex_lock(&esw->state_lock);
 
-	err = esw_add_vlan_action_check(attr, push, pop, fwd);
+	err = esw_add_vlan_action_check(esw_attr, push, pop, fwd);
 	if (err)
 		goto unlock;
 
 	attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
 
-	vport = esw_vlan_action_get_vport(attr, push, pop);
+	vport = esw_vlan_action_get_vport(esw_attr, push, pop);
 
 	if (!push && !pop && fwd) {
 		/* tracks VF --> wire rules without vlan push action */
-		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
+		if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
 			vport->vlan_refcount++;
 			attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
 		}
@@ -662,11 +692,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 		if (vport->vlan_refcount)
 			goto skip_set_push;
 
-		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
-						    SET_VLAN_INSERT | SET_VLAN_STRIP);
+		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0],
+						    0, SET_VLAN_INSERT | SET_VLAN_STRIP);
 		if (err)
 			goto out;
-		vport->vlan = attr->vlan_vid[0];
+		vport->vlan = esw_attr->vlan_vid[0];
 skip_set_push:
 		vport->vlan_refcount++;
 	}
@@ -679,9 +709,10 @@ unlock:
 }
 
 int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
-				 struct mlx5_esw_flow_attr *attr)
+				 struct mlx5_flow_attr *attr)
 {
 	struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
 	struct mlx5_eswitch_rep *vport = NULL;
 	bool push, pop, fwd;
 	int err = 0;
@@ -699,11 +730,11 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 
 	mutex_lock(&esw->state_lock);
 
-	vport = esw_vlan_action_get_vport(attr, push, pop);
+	vport = esw_vlan_action_get_vport(esw_attr, push, pop);
 
 	if (!push && !pop && fwd) {
 		/* tracks VF --> wire rules without vlan push action */
-		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
+		if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
 			vport->vlan_refcount--;
 
 		goto out;
@@ -1137,6 +1168,126 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
 	}
 }
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+#define fdb_modify_header_fwd_to_table_supported(esw) \
+	(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
+static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level))
+		*flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+
+	if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) &&
+	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
+	} else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
+	} else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
+		/* Disabled when ttl workaround is needed, e.g
+		 * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
+		 */
+		esw_warn(dev,
+			 "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
+		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+	} else {
+		*flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_info(dev, "Supported tc chains and prios offload\n");
+	}
+
+	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+		*flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED;
+}
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_table *nf_ft, *ft;
+	struct mlx5_chains_attr attr = {};
+	struct mlx5_fs_chains *chains;
+	u32 fdb_max;
+	int err;
+
+	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+	esw_init_chains_offload_flags(esw, &attr.flags);
+	attr.ns = MLX5_FLOW_NAMESPACE_FDB;
+	attr.max_ft_sz = fdb_max;
+	attr.max_grp_num = esw->params.large_group_num;
+	attr.default_ft = miss_fdb;
+	attr.max_restore_tag = esw_get_max_restore_tag(esw);
+
+	chains = mlx5_chains_create(dev, &attr);
+	if (IS_ERR(chains)) {
+		err = PTR_ERR(chains);
+		esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
+		return err;
+	}
+
+	esw->fdb_table.offloads.esw_chains_priv = chains;
+
+	/* Create tc_end_ft which is the always created ft chain */
+	nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains),
+				      1, 0);
+	if (IS_ERR(nf_ft)) {
+		err = PTR_ERR(nf_ft);
+		goto nf_ft_err;
+	}
+
+	/* Always open the root for fast path */
+	ft = mlx5_chains_get_table(chains, 0, 1, 0);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto level_0_err;
+	}
+
+	/* Open level 1 for split fdb rules now if prios isn't supported  */
+	if (!mlx5_chains_prios_supported(chains)) {
+		err = mlx5_esw_vport_tbl_get(esw);
+		if (err)
+			goto level_1_err;
+	}
+
+	mlx5_chains_set_end_ft(chains, nf_ft);
+
+	return 0;
+
+level_1_err:
+	mlx5_chains_put_table(chains, 0, 1, 0);
+level_0_err:
+	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+nf_ft_err:
+	mlx5_chains_destroy(chains);
+	esw->fdb_table.offloads.esw_chains_priv = NULL;
+
+	return err;
+}
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{
+	if (!mlx5_chains_prios_supported(chains))
+		mlx5_esw_vport_tbl_put(esw);
+	mlx5_chains_put_table(chains, 0, 1, 0);
+	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+	mlx5_chains_destroy(chains);
+}
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{ return 0; }
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{}
+
+#endif
+
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1192,9 +1343,9 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	}
 	esw->fdb_table.offloads.slow_fdb = fdb;
 
-	err = mlx5_esw_chains_create(esw);
+	err = esw_chains_create(esw, fdb);
 	if (err) {
-		esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
+		esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
 		goto fdb_chains_err;
 	}
 
@@ -1288,7 +1439,7 @@ miss_err:
 peer_miss_err:
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
-	mlx5_esw_chains_destroy(esw);
+	esw_chains_destroy(esw, esw_chains(esw));
 fdb_chains_err:
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 slow_fdb_err:
@@ -1312,7 +1463,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 		mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
-	mlx5_esw_chains_destroy(esw);
+	esw_chains_destroy(esw, esw_chains(esw));
+
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 	/* Holds true only as long as DMFS is the default */
 	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
@@ -1671,15 +1823,12 @@ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
 	__esw_offloads_unload_rep(esw, rep, rep_type);
 }
 
-int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	struct mlx5_eswitch_rep *rep;
 	int rep_type;
 	int err;
 
-	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
-		return 0;
-
 	rep = mlx5_eswitch_get_rep(esw, vport_num);
 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
 		if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
@@ -1698,19 +1847,46 @@ err_reps:
 	return err;
 }
 
-void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
+static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	struct mlx5_eswitch_rep *rep;
 	int rep_type;
 
-	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
-		return;
-
 	rep = mlx5_eswitch_get_rep(esw, vport_num);
 	for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--)
 		__esw_offloads_unload_rep(esw, rep, rep_type);
 }
 
+int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	int err;
+
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+		return 0;
+
+	err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+	if (err)
+		return err;
+
+	err = mlx5_esw_offloads_rep_load(esw, vport_num);
+	if (err)
+		goto load_err;
+	return err;
+
+load_err:
+	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+	return err;
+}
+
+void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+		return;
+
+	mlx5_esw_offloads_rep_unload(esw, vport_num);
+	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+}
+
 #define ESW_OFFLOADS_DEVCOM_PAIR	(0)
 #define ESW_OFFLOADS_DEVCOM_UNPAIR	(1)
 
@@ -1868,53 +2044,38 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
 	return true;
 }
 
-static bool
-esw_check_vport_match_metadata_mandatory(const struct mlx5_eswitch *esw)
-{
-	return mlx5_core_mp_enabled(esw->dev);
-}
-
-static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw)
-{
-	return esw_check_vport_match_metadata_mandatory(esw) &&
-	       esw_check_vport_match_metadata_supported(esw);
-}
-
 u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
 {
-	u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1;
-	u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
-	u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
-	u32 start;
-	u32 end;
+	u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
+	u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 1;
+	u32 pf_num;
 	int id;
 
-	/* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
-	WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
-
-	/* Trim vhca_id to ESW_VHCA_ID_BITS */
-	vhca_id &= vhca_id_mask;
-
-	start = (vhca_id << ESW_VPORT_BITS);
-	end = start + num_vports;
-	if (!vhca_id)
-		start += 1; /* zero is reserved/invalid metadata */
-	id = ida_alloc_range(&esw->offloads.vport_metadata_ida, start, end, GFP_KERNEL);
+	/* Only 4 bits of pf_num */
+	pf_num = PCI_FUNC(esw->dev->pdev->devfn);
+	if (pf_num > max_pf_num)
+		return 0;
 
-	return (id < 0) ? 0 : id;
+	/* Metadata is 4 bits of PFNUM and 12 bits of unique id */
+	/* Use only non-zero vport_id (1-4095) for all PF's */
+	id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL);
+	if (id < 0)
+		return 0;
+	id = (pf_num << ESW_VPORT_BITS) | id;
+	return id;
 }
 
 void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
 {
-	ida_free(&esw->offloads.vport_metadata_ida, metadata);
+	u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1;
+
+	/* Metadata contains only 12 bits of actual ida id */
+	ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask);
 }
 
 static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
 					     struct mlx5_vport *vport)
 {
-	if (vport->vport == MLX5_VPORT_UPLINK)
-		return 0;
-
 	vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
 	vport->metadata = vport->default_metadata;
 	return vport->metadata ? 0 : -ENOSPC;
@@ -1923,40 +2084,65 @@ static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
 static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
 						struct mlx5_vport *vport)
 {
-	if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata)
+	if (!vport->default_metadata)
 		return;
 
 	WARN_ON(vport->metadata != vport->default_metadata);
 	mlx5_esw_match_metadata_free(esw, vport->default_metadata);
 }
 
+static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i;
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+		return;
+
+	mlx5_esw_for_all_vports_reverse(esw, i, vport)
+		esw_offloads_vport_metadata_cleanup(esw, vport);
+}
+
+static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int err;
+	int i;
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+		return 0;
+
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		err = esw_offloads_vport_metadata_setup(esw, vport);
+		if (err)
+			goto metadata_err;
+	}
+
+	return 0;
+
+metadata_err:
+	esw_offloads_metadata_uninit(esw);
+	return err;
+}
+
 int
 esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
 				     struct mlx5_vport *vport)
 {
 	int err;
 
-	err = esw_offloads_vport_metadata_setup(esw, vport);
-	if (err)
-		goto metadata_err;
-
 	err = esw_acl_ingress_ofld_setup(esw, vport);
 	if (err)
-		goto ingress_err;
+		return err;
 
-	if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
-		err = esw_acl_egress_ofld_setup(esw, vport);
-		if (err)
-			goto egress_err;
-	}
+	err = esw_acl_egress_ofld_setup(esw, vport);
+	if (err)
+		goto egress_err;
 
 	return 0;
 
 egress_err:
 	esw_acl_ingress_ofld_cleanup(esw, vport);
-ingress_err:
-	esw_offloads_vport_metadata_cleanup(esw, vport);
-metadata_err:
 	return err;
 }
 
@@ -1966,22 +2152,14 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
 {
 	esw_acl_egress_ofld_cleanup(vport);
 	esw_acl_ingress_ofld_cleanup(esw, vport);
-	esw_offloads_vport_metadata_cleanup(esw, vport);
 }
 
 static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
-	int err;
-
-	if (esw_use_vport_metadata(esw))
-		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
 
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
-	err = esw_vport_create_offloads_acl_tables(esw, vport);
-	if (err)
-		esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
-	return err;
+	return esw_vport_create_offloads_acl_tables(esw, vport);
 }
 
 static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
@@ -1990,7 +2168,6 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
 
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
 	esw_vport_destroy_offloads_acl_tables(esw, vport);
-	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 }
 
 static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
@@ -2114,6 +2291,24 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type
 	return NOTIFY_OK;
 }
 
+static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
+{
+	const u32 *query_host_out;
+
+	if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+		return 0;
+
+	query_host_out = mlx5_esw_query_functions(esw->dev);
+	if (IS_ERR(query_host_out))
+		return PTR_ERR(query_host_out);
+
+	/* Mark non local controller with non zero controller number. */
+	esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out,
+					     host_params_context.host_number);
+	kvfree(query_host_out);
+	return 0;
+}
+
 int esw_offloads_enable(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
@@ -2128,6 +2323,17 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 	mutex_init(&esw->offloads.termtbl_mutex);
 	mlx5_rdma_enable_roce(esw->dev);
 
+	err = mlx5_esw_host_number_init(esw);
+	if (err)
+		goto err_metadata;
+
+	if (esw_check_vport_match_metadata_supported(esw))
+		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+	err = esw_offloads_metadata_init(esw);
+	if (err)
+		goto err_metadata;
+
 	err = esw_set_passing_vport_metadata(esw, true);
 	if (err)
 		goto err_vport_metadata;
@@ -2160,6 +2366,9 @@ err_uplink:
 err_steering_init:
 	esw_set_passing_vport_metadata(esw, false);
 err_vport_metadata:
+	esw_offloads_metadata_uninit(esw);
+err_metadata:
+	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 	mlx5_rdma_disable_roce(esw->dev);
 	mutex_destroy(&esw->offloads.termtbl_mutex);
 	return err;
@@ -2193,6 +2402,8 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
 	esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
 	esw_set_passing_vport_metadata(esw, false);
 	esw_offloads_steering_cleanup(esw);
+	esw_offloads_metadata_uninit(esw);
+	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 	mlx5_rdma_disable_roce(esw->dev);
 	mutex_destroy(&esw->offloads.termtbl_mutex);
 	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 17a0d2bc102b..ec679560a95d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -3,6 +3,7 @@
 
 #include <linux/mlx5/fs.h>
 #include "eswitch.h"
+#include "en_tc.h"
 #include "fs_core.h"
 
 struct mlx5_termtbl_handle {
@@ -228,10 +229,11 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
 
 bool
 mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
-			      struct mlx5_esw_flow_attr *attr,
+			      struct mlx5_flow_attr *attr,
 			      struct mlx5_flow_act *flow_act,
 			      struct mlx5_flow_spec *spec)
 {
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
 	int i;
 
 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
@@ -244,8 +246,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
 		return true;
 
 	/* hairpin */
-	for (i = attr->split_count; i < attr->out_count; i++)
-		if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK)
+	for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+		if (esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK)
 			return true;
 
 	return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 831d2c39e153..80da50e12915 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -54,7 +54,7 @@ static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
 	if (unlikely(!buf->sg[0].data))
 		goto out;
 
-	dma_device = &conn->fdev->mdev->pdev->dev;
+	dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
 	buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
 					     buf->sg[0].size, buf->dma_dir);
 	err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
@@ -86,7 +86,7 @@ static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
 {
 	struct device *dma_device;
 
-	dma_device = &conn->fdev->mdev->pdev->dev;
+	dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
 	if (buf->sg[1].data)
 		dma_unmap_single(dma_device, buf->sg[1].dma_addr,
 				 buf->sg[1].size, buf->dma_dir);
@@ -388,9 +388,9 @@ static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
 	mlx5_fpga_conn_arm_cq(conn);
 }
 
-static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t)
 {
-	struct mlx5_fpga_conn *conn = (void *)data;
+	struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet);
 
 	if (unlikely(!conn->qp.active))
 		return;
@@ -478,8 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	conn->cq.mcq.comp       = mlx5_fpga_conn_cq_complete;
 	conn->cq.mcq.irqn       = irqn;
 	conn->cq.mcq.uar        = fdev->conn_res.uar;
-	tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
-		     (unsigned long)conn);
+	tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
 
 	mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index fee169732de7..babe3405132a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -776,6 +776,9 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 		table_type = FS_FT_NIC_RX;
 		break;
 	case MLX5_FLOW_NAMESPACE_EGRESS:
+#ifdef CONFIG_MLX5_IPSEC
+	case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL:
+#endif
 		max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
 		table_type = FS_FT_NIC_TX;
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 75fa44eee434..16091838bfcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -126,6 +126,10 @@
 #define LAG_NUM_PRIOS 1
 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
 
+#define KERNEL_TX_IPSEC_NUM_PRIOS  1
+#define KERNEL_TX_IPSEC_NUM_LEVELS 1
+#define KERNEL_TX_MIN_LEVEL        (KERNEL_TX_IPSEC_NUM_LEVELS)
+
 struct node_caps {
 	size_t	arr_sz;
 	long	*caps;
@@ -180,13 +184,24 @@ static struct init_tree_node {
 
 static struct init_tree_node egress_root_fs = {
 	.type = FS_TYPE_NAMESPACE,
+#ifdef CONFIG_MLX5_IPSEC
+	.ar_size = 2,
+#else
 	.ar_size = 1,
+#endif
 	.children = (struct init_tree_node[]) {
 		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
 			 FS_CHAINING_CAPS_EGRESS,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
+#ifdef CONFIG_MLX5_IPSEC
+		ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS_EGRESS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
+						  KERNEL_TX_IPSEC_NUM_LEVELS))),
+#endif
 	}
 };
 
@@ -1595,11 +1610,12 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 		return true;
 
 	if (ignore_level) {
-		if (ft->type != FS_FT_FDB)
+		if (ft->type != FS_FT_FDB &&
+		    ft->type != FS_FT_NIC_RX)
 			return false;
 
 		if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
-		    dest->ft->type != FS_FT_FDB)
+		    ft->type != dest->ft->type)
 			return false;
 	}
 
@@ -2164,8 +2180,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		break;
 	}
 
-	if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+	if (type == MLX5_FLOW_NAMESPACE_EGRESS ||
+	    type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) {
 		root_ns = steering->egress_root_ns;
+		prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
 	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
 		root_ns = steering->rdma_rx_root_ns;
 		prio = RDMA_RX_BYPASS_PRIO;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
new file mode 100644
index 000000000000..f9042e147c7f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved. */
+
+#include "fw_reset.h"
+#include "diag/fw_tracer.h"
+
+enum {
+	MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
+	MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
+	MLX5_FW_RESET_FLAGS_PENDING_COMP
+};
+
+struct mlx5_fw_reset {
+	struct mlx5_core_dev *dev;
+	struct mlx5_nb nb;
+	struct workqueue_struct *wq;
+	struct work_struct fw_live_patch_work;
+	struct work_struct reset_request_work;
+	struct work_struct reset_reload_work;
+	struct work_struct reset_now_work;
+	struct work_struct reset_abort_work;
+	unsigned long reset_flags;
+	struct timer_list timer;
+	struct completion done;
+	int ret;
+};
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	if (enable)
+		clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+	else
+		set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
+			     u8 reset_type_sel, u8 sync_resp, bool sync_start)
+{
+	u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+
+	MLX5_SET(mfrl_reg, in, reset_level, reset_level);
+	MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+	MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp);
+	MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
+}
+
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+{
+	u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+	int err;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0);
+	if (err)
+		return err;
+
+	if (reset_level)
+		*reset_level = MLX5_GET(mfrl_reg, out, reset_level);
+	if (reset_type)
+		*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+
+	return 0;
+}
+
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+{
+	return mlx5_reg_mfrl_query(dev, reset_level, reset_type);
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+	int err;
+
+	set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+	err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true);
+	if (err)
+		clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+	return err;
+}
+
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
+{
+	return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
+}
+
+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	/* if this is the driver that initiated the fw reset, devlink completed the reload */
+	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
+		complete(&fw_reset->done);
+	} else {
+		mlx5_load_one(dev, false);
+		devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
+							BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+							BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+	}
+}
+
+static void mlx5_sync_reset_reload_work(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+						      reset_reload_work);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+	int err;
+
+	mlx5_enter_error_state(dev, true);
+	mlx5_unload_one(dev, false);
+	err = mlx5_health_wait_pci_up(dev);
+	if (err)
+		mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
+	fw_reset->ret = err;
+	mlx5_fw_reset_complete_reload(dev);
+}
+
+static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	del_timer(&fw_reset->timer);
+}
+
+static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	mlx5_stop_sync_reset_poll(dev);
+	clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
+	if (poll_health)
+		mlx5_start_health_poll(dev);
+}
+
+#define MLX5_RESET_POLL_INTERVAL	(HZ / 10)
+static void poll_sync_reset(struct timer_list *t)
+{
+	struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+	u32 fatal_error;
+
+	if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+		return;
+
+	fatal_error = mlx5_health_check_fatal_sensors(dev);
+
+	if (fatal_error) {
+		mlx5_core_warn(dev, "Got Device Reset\n");
+		mlx5_sync_reset_clear_reset_requested(dev, false);
+		queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+		return;
+	}
+
+	mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL));
+}
+
+static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	timer_setup(&fw_reset->timer, poll_sync_reset, 0);
+	fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL);
+	add_timer(&fw_reset->timer);
+}
+
+static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev)
+{
+	return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false);
+}
+
+static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
+{
+	return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
+}
+
+static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	mlx5_stop_health_poll(dev, true);
+	set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
+	mlx5_start_sync_reset_poll(dev);
+}
+
+static void mlx5_fw_live_patch_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+						      fw_live_patch_work);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+	struct mlx5_fw_tracer *tracer;
+
+	mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+		       fw_rev_min(dev), fw_rev_sub(dev));
+
+	tracer = dev->tracer;
+	if (IS_ERR_OR_NULL(tracer))
+		return;
+
+	if (mlx5_fw_tracer_reload(tracer))
+		mlx5_core_err(dev, "Failed to reload FW tracer\n");
+}
+
+static void mlx5_sync_reset_request_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+						      reset_request_work);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+	int err;
+
+	if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) {
+		err = mlx5_fw_reset_set_reset_sync_nack(dev);
+		mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
+			       err ? "Failed" : "Sent");
+		return;
+	}
+	mlx5_sync_reset_set_reset_requested(dev);
+	err = mlx5_fw_reset_set_reset_sync_ack(dev);
+	if (err)
+		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
+	else
+		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
+}
+
+#define MLX5_PCI_LINK_UP_TIMEOUT 2000
+
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+{
+	struct pci_bus *bridge_bus = dev->pdev->bus;
+	struct pci_dev *bridge = bridge_bus->self;
+	u16 reg16, dev_id, sdev_id;
+	unsigned long timeout;
+	struct pci_dev *sdev;
+	int cap, err;
+	u32 reg32;
+
+	/* Check that all functions under the pci bridge are PFs of
+	 * this device otherwise fail this function.
+	 */
+	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+	if (err)
+		return err;
+	list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+		err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
+		if (err)
+			return err;
+		if (sdev_id != dev_id)
+			return -EPERM;
+	}
+
+	cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
+	if (!cap)
+		return -EOPNOTSUPP;
+
+	list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+		pci_save_state(sdev);
+		pci_cfg_access_lock(sdev);
+	}
+	/* PCI link toggle */
+	err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, &reg16);
+	if (err)
+		return err;
+	reg16 |= PCI_EXP_LNKCTL_LD;
+	err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+	if (err)
+		return err;
+	msleep(500);
+	reg16 &= ~PCI_EXP_LNKCTL_LD;
+	err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+	if (err)
+		return err;
+
+	/* Check link */
+	err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
+	if (err)
+		return err;
+	if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
+		mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
+		msleep(1000);
+		goto restore;
+	}
+
+	timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
+	do {
+		err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
+		if (err)
+			return err;
+		if (reg16 & PCI_EXP_LNKSTA_DLLLA)
+			break;
+		msleep(20);
+	} while (!time_after(jiffies, timeout));
+
+	if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
+		mlx5_core_info(dev, "PCI Link up\n");
+	} else {
+		mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
+			      reg16, MLX5_PCI_LINK_UP_TIMEOUT);
+		err = -ETIMEDOUT;
+	}
+
+restore:
+	list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+		pci_cfg_access_unlock(sdev);
+		pci_restore_state(sdev);
+	}
+
+	return err;
+}
+
+static void mlx5_sync_reset_now_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+						      reset_now_work);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+	int err;
+
+	mlx5_sync_reset_clear_reset_requested(dev, false);
+
+	mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
+
+	err = mlx5_cmd_fast_teardown_hca(dev);
+	if (err) {
+		mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
+		goto done;
+	}
+
+	err = mlx5_pci_link_toggle(dev);
+	if (err) {
+		mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+		goto done;
+	}
+
+	mlx5_enter_error_state(dev, true);
+	mlx5_unload_one(dev, false);
+done:
+	fw_reset->ret = err;
+	mlx5_fw_reset_complete_reload(dev);
+}
+
+static void mlx5_sync_reset_abort_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+						      reset_abort_work);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+
+	mlx5_sync_reset_clear_reset_requested(dev, true);
+	mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
+}
+
+static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe)
+{
+	struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe;
+	u8 sync_event_rst_type;
+
+	sync_fw_update_eqe = &eqe->data.sync_fw_update;
+	sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK;
+	switch (sync_event_rst_type) {
+	case MLX5_SYNC_RST_STATE_RESET_REQUEST:
+		queue_work(fw_reset->wq, &fw_reset->reset_request_work);
+		break;
+	case MLX5_SYNC_RST_STATE_RESET_NOW:
+		queue_work(fw_reset->wq, &fw_reset->reset_now_work);
+		break;
+	case MLX5_SYNC_RST_STATE_RESET_ABORT:
+		queue_work(fw_reset->wq, &fw_reset->reset_abort_work);
+		break;
+	}
+}
+
+static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
+	struct mlx5_eqe *eqe = data;
+
+	switch (eqe->sub_type) {
+	case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
+			queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+		break;
+	case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
+		mlx5_sync_reset_events_handle(fw_reset, eqe);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+#define MLX5_FW_RESET_TIMEOUT_MSEC 5000
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
+{
+	unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC);
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+	int err;
+
+	if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
+		mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n",
+			       MLX5_FW_RESET_TIMEOUT_MSEC / 1000);
+		err = -ETIMEDOUT;
+		goto out;
+	}
+	err = fw_reset->ret;
+out:
+	clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+	return err;
+}
+
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
+	mlx5_eq_notifier_register(dev, &fw_reset->nb);
+}
+
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
+{
+	mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
+}
+
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
+
+	if (!fw_reset)
+		return -ENOMEM;
+	fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events");
+	if (!fw_reset->wq) {
+		kfree(fw_reset);
+		return -ENOMEM;
+	}
+
+	fw_reset->dev = dev;
+	dev->priv.fw_reset = fw_reset;
+
+	INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event);
+	INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+	INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
+	INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
+	INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
+
+	init_completion(&fw_reset->done);
+	return 0;
+}
+
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	destroy_workqueue(fw_reset->wq);
+	kfree(dev->priv.fw_reset);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
new file mode 100644
index 000000000000..7761ee5fc7d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved. */
+
+#ifndef __MLX5_FW_RESET_H
+#define __MLX5_FW_RESET_H
+
+#include "mlx5_core.h"
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel);
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
+
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index b31f769d2df9..54523bed16cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -110,7 +110,7 @@ static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
 	return rfr && synd;
 }
 
-static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
 {
 	if (sensor_pci_not_working(dev))
 		return MLX5_SENSOR_PCI_COMM_ERR;
@@ -173,7 +173,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
 	 * Check again to avoid a redundant 2nd reset. If the fatal erros was
 	 * PCI related a reset won't help.
 	 */
-	fatal_error = check_fatal_sensors(dev);
+	fatal_error = mlx5_health_check_fatal_sensors(dev);
 	if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
 	    fatal_error == MLX5_SENSOR_NIC_DISABLED ||
 	    fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
@@ -195,7 +195,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 	bool err_detected = false;
 
 	/* Mark the device as fatal in order to abort FW commands */
-	if ((check_fatal_sensors(dev) || force) &&
+	if ((mlx5_health_check_fatal_sensors(dev) || force) &&
 	    dev->state == MLX5_DEVICE_STATE_UP) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 		err_detected = true;
@@ -208,7 +208,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 		goto unlock;
 	}
 
-	if (check_fatal_sensors(dev) || force) { /* protected state setting */
+	if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 		mlx5_cmd_flush(dev);
 	}
@@ -231,7 +231,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
 
 	mlx5_core_err(dev, "start\n");
 
-	if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+	if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
 		/* Get cr-dump and reset FW semaphore */
 		lock = lock_sem_sw_reset(dev, true);
 
@@ -308,26 +308,31 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 
 /* How much time to wait until health resetting the driver (in msecs) */
 #define MLX5_RECOVERY_WAIT_MSECS 60000
-static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
 {
 	unsigned long end;
 
-	mlx5_core_warn(dev, "handling bad device here\n");
-	mlx5_handle_bad_state(dev);
 	end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
 	while (sensor_pci_not_working(dev)) {
-		if (time_after(jiffies, end)) {
-			mlx5_core_err(dev,
-				      "health recovery flow aborted, PCI reads still not working\n");
-			return -EIO;
-		}
+		if (time_after(jiffies, end))
+			return -ETIMEDOUT;
 		msleep(100);
 	}
+	return 0;
+}
 
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+{
+	mlx5_core_warn(dev, "handling bad device here\n");
+	mlx5_handle_bad_state(dev);
+	if (mlx5_health_wait_pci_up(dev)) {
+		mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
+		return -EIO;
+	}
 	mlx5_core_err(dev, "starting health recovery flow\n");
 	mlx5_recover_device(dev);
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
-	    check_fatal_sensors(dev)) {
+	    mlx5_health_check_fatal_sensors(dev)) {
 		mlx5_core_err(dev, "health recovery failed\n");
 		return -EIO;
 	}
@@ -696,7 +701,7 @@ static void poll_health(struct timer_list *t)
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		goto out;
 
-	fatal_error = check_fatal_sensors(dev);
+	fatal_error = mlx5_health_check_fatal_sensors(dev);
 
 	if (fatal_error && !health->fatal_error) {
 		mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 874c70e8cc54..33081b24f10a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -102,7 +102,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 		if (ldev->pf[i].netdev == ndev)
 			return i;
 
-	return -1;
+	return -ENOENT;
 }
 
 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
@@ -271,7 +271,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 	bool do_bond, roce_lag;
 	int err;
 
-	if (!dev0 || !dev1)
+	if (!mlx5_lag_is_ready(ldev))
 		return;
 
 	spin_lock(&lag_lock);
@@ -355,7 +355,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 {
 	struct net_device *upper = info->upper_dev, *ndev_tmp;
 	struct netdev_lag_upper_info *lag_upper_info = NULL;
-	bool is_bonded;
+	bool is_bonded, is_in_lag, mode_supported;
 	int bond_status = 0;
 	int num_slaves = 0;
 	int idx;
@@ -374,7 +374,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 	rcu_read_lock();
 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
-		if (idx > -1)
+		if (idx >= 0)
 			bond_status |= (1 << idx);
 
 		num_slaves++;
@@ -391,13 +391,24 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 	/* Determine bonding status:
 	 * A device is considered bonded if both its physical ports are slaves
 	 * of the same lag master, and only them.
-	 * Lag mode must be activebackup or hash.
 	 */
-	is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
-		    (bond_status == 0x3) &&
-		    ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
-		     (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
 
+	if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
+		NL_SET_ERR_MSG_MOD(info->info.extack,
+				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
+		return 0;
+	}
+
+	/* Lag mode must be activebackup or hash. */
+	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
+			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
+
+	if (is_in_lag && !mode_supported)
+		NL_SET_ERR_MSG_MOD(info->info.extack,
+				   "Can't activate LAG offload, TX type isn't supported");
+
+	is_bonded = is_in_lag && mode_supported;
 	if (tracker->is_bonded != is_bonded) {
 		tracker->is_bonded = is_bonded;
 		return 1;
@@ -418,7 +429,7 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
 		return 0;
 
 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
-	if (idx == -1)
+	if (idx < 0)
 		return 0;
 
 	/* This information is used to determine virtual to physical
@@ -445,6 +456,10 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 
 	ldev    = container_of(this, struct mlx5_lag, nb);
+
+	if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
+		return NOTIFY_DONE;
+
 	tracker = ldev->tracker;
 
 	switch (event) {
@@ -493,14 +508,14 @@ static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
 	kfree(ldev);
 }
 
-static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
-				struct mlx5_core_dev *dev,
-				struct net_device *netdev)
+static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+			       struct mlx5_core_dev *dev,
+			       struct net_device *netdev)
 {
 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 
 	if (fn >= MLX5_MAX_PORTS)
-		return;
+		return -EPERM;
 
 	spin_lock(&lag_lock);
 	ldev->pf[fn].dev    = dev;
@@ -511,6 +526,8 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
 	dev->priv.lag = ldev;
 
 	spin_unlock(&lag_lock);
+
+	return fn;
 }
 
 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
@@ -537,11 +554,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 {
 	struct mlx5_lag *ldev = NULL;
 	struct mlx5_core_dev *tmp_dev;
-	int err;
+	int i, err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
-	    !MLX5_CAP_GEN(dev, lag_master) ||
-	    (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return;
 
 	tmp_dev = mlx5_get_next_phys_dev(dev);
@@ -556,7 +571,18 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 		}
 	}
 
-	mlx5_lag_dev_add_pf(ldev, dev, netdev);
+	if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
+		return;
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++) {
+		tmp_dev = ldev->pf[i].dev;
+		if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) ||
+		    MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS)
+			break;
+	}
+
+	if (i >= MLX5_MAX_PORTS)
+		ldev->flags |= MLX5_LAG_FLAG_READY;
 
 	if (!ldev->nb.notifier_call) {
 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
@@ -587,6 +613,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
 
 	mlx5_lag_dev_remove_pf(ldev, dev);
 
+	ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
 	for (i = 0; i < MLX5_MAX_PORTS; i++)
 		if (ldev->pf[i].dev)
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index f1068aac6406..8d8cf2d0bc6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -16,6 +16,7 @@ enum {
 	MLX5_LAG_FLAG_ROCE   = 1 << 0,
 	MLX5_LAG_FLAG_SRIOV  = 1 << 1,
 	MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
+	MLX5_LAG_FLAG_READY = 1 << 3,
 };
 
 #define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
@@ -59,6 +60,12 @@ __mlx5_lag_is_active(struct mlx5_lag *ldev)
 	return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
 }
 
+static inline bool
+mlx5_lag_is_ready(struct mlx5_lag *ldev)
+{
+	return ldev->flags & MLX5_LAG_FLAG_READY;
+}
+
 void mlx5_modify_lag(struct mlx5_lag *ldev,
 		     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 9e68f5926ab6..88e58ac902de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -11,7 +11,7 @@
 
 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
 {
-	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
+	if (!mlx5_lag_is_ready(ldev))
 		return false;
 
 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
@@ -131,7 +131,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 			struct net_device *nh_dev = nh->fib_nh_dev;
 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
 
-			mlx5_lag_set_port_affinity(ldev, ++i);
+			if (i < 0)
+				i = MLX5_LAG_NORMAL_AFFINITY;
+			else
+				++i;
+
+			mlx5_lag_set_port_affinity(ldev, i);
 		}
 		return;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 2d55b7c22c03..c70c1f0ca0c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -150,28 +150,30 @@ static void mlx5_pps_out(struct work_struct *work)
 static void mlx5_timestamp_overflow(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
-	struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
-						overflow_work);
+	struct mlx5_core_dev *mdev;
+	struct mlx5_clock *clock;
 	unsigned long flags;
 
+	clock = container_of(dwork, struct mlx5_clock, overflow_work);
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
 	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_read(&clock->tc);
-	mlx5_update_clock_info_page(clock->mdev);
+	mlx5_update_clock_info_page(mdev);
 	write_sequnlock_irqrestore(&clock->lock, flags);
 	schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
 }
 
-static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
-			    const struct timespec64 *ts)
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
 {
-	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
-						 ptp_info);
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
 	u64 ns = timespec64_to_ns(ts);
+	struct mlx5_core_dev *mdev;
 	unsigned long flags;
 
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
 	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_init(&clock->tc, &clock->cycles, ns);
-	mlx5_update_clock_info_page(clock->mdev);
+	mlx5_update_clock_info_page(mdev);
 	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
@@ -180,13 +182,12 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
 static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
 			     struct ptp_system_timestamp *sts)
 {
-	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
-						ptp_info);
-	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
-						  clock);
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev;
 	unsigned long flags;
 	u64 cycles, ns;
 
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
 	write_seqlock_irqsave(&clock->lock, flags);
 	cycles = mlx5_read_internal_timer(mdev, sts);
 	ns = timecounter_cyc2time(&clock->tc, cycles);
@@ -199,13 +200,14 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
 
 static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
-	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
-						ptp_info);
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev;
 	unsigned long flags;
 
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
 	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_adjtime(&clock->tc, delta);
-	mlx5_update_clock_info_page(clock->mdev);
+	mlx5_update_clock_info_page(mdev);
 	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
@@ -213,12 +215,13 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 
 static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 {
-	u64 adj;
-	u32 diff;
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev;
 	unsigned long flags;
 	int neg_adj = 0;
-	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
-						ptp_info);
+	u32 diff;
+	u64 adj;
+
 
 	if (delta < 0) {
 		neg_adj = 1;
@@ -229,11 +232,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 	adj *= delta;
 	diff = div_u64(adj, 1000000000ULL);
 
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
 	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_read(&clock->tc);
 	clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
 				       clock->nominal_c_mult + diff;
-	mlx5_update_clock_info_page(clock->mdev);
+	mlx5_update_clock_info_page(mdev);
 	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
@@ -431,13 +435,11 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
 	default:
 		return -EOPNOTSUPP;
 	}
-
-	return -EOPNOTSUPP;
 }
 
 static const struct ptp_clock_info mlx5_ptp_clock_info = {
 	.owner		= THIS_MODULE,
-	.name		= "mlx5_p2p",
+	.name		= "mlx5_ptp",
 	.max_adj	= 100000000,
 	.n_alarm	= 0,
 	.n_ext_ts	= 0,
@@ -465,7 +467,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
 
 static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
 {
-	struct mlx5_core_dev *mdev = clock->mdev;
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+
 	u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
 	u8 mode;
 	int err;
@@ -538,20 +541,23 @@ static int mlx5_pps_event(struct notifier_block *nb,
 			  unsigned long type, void *data)
 {
 	struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
-	struct mlx5_core_dev *mdev = clock->mdev;
 	struct ptp_clock_event ptp_event;
 	u64 cycles_now, cycles_delta;
 	u64 nsec_now, nsec_delta, ns;
 	struct mlx5_eqe *eqe = data;
 	int pin = eqe->data.pps.pin;
+	struct mlx5_core_dev *mdev;
 	struct timespec64 ts;
 	unsigned long flags;
 
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
 	switch (clock->ptp_info.pin_config[pin].func) {
 	case PTP_PF_EXTTS:
 		ptp_event.index = pin;
-		ptp_event.timestamp = timecounter_cyc2time(&clock->tc,
-					be64_to_cpu(eqe->data.pps.time_stamp));
+		ptp_event.timestamp =
+			mlx5_timecounter_cyc2time(clock,
+						  be64_to_cpu(eqe->data.pps.time_stamp));
 		if (clock->pps_info.enabled) {
 			ptp_event.type = PTP_CLOCK_PPSUSR;
 			ptp_event.pps_times.ts_real =
@@ -574,8 +580,8 @@ static int mlx5_pps_event(struct notifier_block *nb,
 		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
 					 clock->cycles.mult);
 		clock->pps_info.start[pin] = cycles_now + cycles_delta;
-		schedule_work(&clock->pps_info.out_work);
 		write_sequnlock_irqrestore(&clock->lock, flags);
+		schedule_work(&clock->pps_info.out_work);
 		break;
 	default:
 		mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
@@ -605,7 +611,6 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
 						  clock->cycles.shift);
 	clock->nominal_c_mult = clock->cycles.mult;
 	clock->cycles.mask = CLOCKSOURCE_MASK(41);
-	clock->mdev = mdev;
 
 	timecounter_init(&clock->tc, &clock->cycles,
 			 ktime_to_ns(ktime_get_real()));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 5c681e31983b..81f2cc4ca1da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -78,7 +78,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
 void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
 struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
-void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t);
 struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
 
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
new file mode 100644
index 000000000000..947f346bdc2d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies.
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/fs.h>
+
+#include "lib/fs_chains.h"
+#include "en/mapping.h"
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+
+#define chains_lock(chains) ((chains)->lock)
+#define chains_ht(chains) ((chains)->chains_ht)
+#define chains_mapping(chains) ((chains)->chains_mapping)
+#define prios_ht(chains) ((chains)->prios_ht)
+#define ft_pool_left(chains) ((chains)->ft_left)
+#define tc_default_ft(chains) ((chains)->tc_default_ft)
+#define tc_end_ft(chains) ((chains)->tc_end_ft)
+#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
+				  FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
+
+/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS),
+ * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via get_next_avail_sz_from_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small and match firmware
+ * pools.
+ */
+#define FT_SIZE (16 * 1024 * 1024)
+static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024,
+					  1 * 1024 * 1024,
+					  64 * 1024,
+					  128 };
+#define FT_TBL_SZ (64 * 1024)
+
+struct mlx5_fs_chains {
+	struct mlx5_core_dev *dev;
+
+	struct rhashtable chains_ht;
+	struct rhashtable prios_ht;
+	/* Protects above chains_ht and prios_ht */
+	struct mutex lock;
+
+	struct mlx5_flow_table *tc_default_ft;
+	struct mlx5_flow_table *tc_end_ft;
+	struct mapping_ctx *chains_mapping;
+
+	enum mlx5_flow_namespace_type ns;
+	u32 group_num;
+	u32 flags;
+
+	int ft_left[ARRAY_SIZE(FT_POOLS)];
+};
+
+struct fs_chain {
+	struct rhash_head node;
+
+	u32 chain;
+
+	int ref;
+	int id;
+
+	struct mlx5_fs_chains *chains;
+	struct list_head prios_list;
+	struct mlx5_flow_handle *restore_rule;
+	struct mlx5_modify_hdr *miss_modify_hdr;
+};
+
+struct prio_key {
+	u32 chain;
+	u32 prio;
+	u32 level;
+};
+
+struct prio {
+	struct rhash_head node;
+	struct list_head list;
+
+	struct prio_key key;
+
+	int ref;
+
+	struct fs_chain *chain;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_table *next_ft;
+	struct mlx5_flow_group *miss_group;
+	struct mlx5_flow_handle *miss_rule;
+};
+
+static const struct rhashtable_params chain_params = {
+	.head_offset = offsetof(struct fs_chain, node),
+	.key_offset = offsetof(struct fs_chain, chain),
+	.key_len = sizeof_field(struct fs_chain, chain),
+	.automatic_shrinking = true,
+};
+
+static const struct rhashtable_params prio_params = {
+	.head_offset = offsetof(struct prio, node),
+	.key_offset = offsetof(struct prio, key),
+	.key_len = sizeof_field(struct prio, key),
+	.automatic_shrinking = true,
+};
+
+bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
+{
+	return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+}
+
+static bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{
+	return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+}
+
+bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains)
+{
+	return mlx5_chains_prios_supported(chains) &&
+	       mlx5_chains_ignore_flow_level_supported(chains);
+}
+
+u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains)
+{
+	if (!mlx5_chains_prios_supported(chains))
+		return 1;
+
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		return UINT_MAX - 1;
+
+	/* We should get here only for eswitch case */
+	return FDB_TC_MAX_CHAIN;
+}
+
+u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
+{
+	return mlx5_chains_get_chain_range(chains) + 1;
+}
+
+u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+{
+	if (!mlx5_chains_prios_supported(chains))
+		return 1;
+
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		return UINT_MAX;
+
+	/* We should get here only for eswitch case */
+	return FDB_TC_MAX_PRIO;
+}
+
+static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains)
+{
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		return UINT_MAX;
+
+	/* Same value for FDB and NIC RX tables */
+	return FDB_TC_LEVELS_PER_PRIO;
+}
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+		       struct mlx5_flow_table *ft)
+{
+	tc_end_ft(chains) = ft;
+}
+
+#define POOL_NEXT_SIZE 0
+static int
+mlx5_chains_get_avail_sz_from_pool(struct mlx5_fs_chains *chains,
+				   int desired_size)
+{
+	int i, found_i = -1;
+
+	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+		if (ft_pool_left(chains)[i] && FT_POOLS[i] > desired_size) {
+			found_i = i;
+			if (desired_size != POOL_NEXT_SIZE)
+				break;
+		}
+	}
+
+	if (found_i != -1) {
+		--ft_pool_left(chains)[found_i];
+		return FT_POOLS[found_i];
+	}
+
+	return 0;
+}
+
+static void
+mlx5_chains_put_sz_to_pool(struct mlx5_fs_chains *chains, int sz)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+		if (sz == FT_POOLS[i]) {
+			++ft_pool_left(chains)[i];
+			return;
+		}
+	}
+
+	WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz);
+}
+
+static void
+mlx5_chains_init_sz_pool(struct mlx5_fs_chains *chains, u32 ft_max)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
+		ft_pool_left(chains)[i] =
+			FT_POOLS[i] <= ft_max ? FT_SIZE / FT_POOLS[i] : 0;
+}
+
+static struct mlx5_flow_table *
+mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+			 u32 chain, u32 prio, u32 level)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	int sz;
+
+	if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED)
+		ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+				  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+	sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+	     mlx5_chains_get_avail_sz_from_pool(chains, FT_TBL_SZ) :
+	     mlx5_chains_get_avail_sz_from_pool(chains, POOL_NEXT_SIZE);
+	if (!sz)
+		return ERR_PTR(-ENOSPC);
+	ft_attr.max_fte = sz;
+
+	/* We use tc_default_ft(chains) as the table's next_ft till
+	 * ignore_flow_level is allowed on FT creation and not just for FTEs.
+	 * Instead caller should add an explicit miss rule if needed.
+	 */
+	ft_attr.next_ft = tc_default_ft(chains);
+
+	/* The root table(chain 0, prio 1, level 0) is required to be
+	 * connected to the previous fs_core managed prio.
+	 * We always create it, as a managed table, in order to align with
+	 * fs_core logic.
+	 */
+	if (!mlx5_chains_ignore_flow_level_supported(chains) ||
+	    (chain == 0 && prio == 1 && level == 0)) {
+		ft_attr.level = level;
+		ft_attr.prio = prio - 1;
+		ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
+			mlx5_get_fdb_sub_ns(chains->dev, chain) :
+			mlx5_get_flow_namespace(chains->dev, chains->ns);
+	} else {
+		ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
+		ft_attr.prio = ns_to_chains_fs_prio(chains->ns);
+		/* Firmware doesn't allow us to create another level 0 table,
+		 * so we create all unmanaged tables as level 1.
+		 *
+		 * To connect them, we use explicit miss rules with
+		 * ignore_flow_level. Caller is responsible to create
+		 * these rules (if needed).
+		 */
+		ft_attr.level = 1;
+		ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
+	}
+
+	ft_attr.autogroup.num_reserved_entries = 2;
+	ft_attr.autogroup.max_num_groups = chains->group_num;
+	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
+			       (int)PTR_ERR(ft), chain, prio, level, sz);
+		mlx5_chains_put_sz_to_pool(chains, sz);
+		return ft;
+	}
+
+	return ft;
+}
+
+static void
+mlx5_chains_destroy_table(struct mlx5_fs_chains *chains,
+			  struct mlx5_flow_table *ft)
+{
+	mlx5_chains_put_sz_to_pool(chains, ft->max_fte);
+	mlx5_destroy_flow_table(ft);
+}
+
+static int
+create_chain_restore(struct fs_chain *chain)
+{
+	struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+	char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
+	struct mlx5_fs_chains *chains = chain->chains;
+	enum mlx5e_tc_attr_to_reg chain_to_reg;
+	struct mlx5_modify_hdr *mod_hdr;
+	u32 index;
+	int err;
+
+	if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
+	    !mlx5_chains_prios_supported(chains))
+		return 0;
+
+	err = mapping_add(chains_mapping(chains), &chain->chain, &index);
+	if (err)
+		return err;
+	if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
+		/* we got the special default flow tag id, so we won't know
+		 * if we actually marked the packet with the restore rule
+		 * we create.
+		 *
+		 * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
+		 */
+		err = mapping_add(chains_mapping(chains),
+				  &chain->chain, &index);
+		mapping_remove(chains_mapping(chains),
+			       MLX5_FS_DEFAULT_FLOW_TAG);
+		if (err)
+			return err;
+	}
+
+	chain->id = index;
+
+	if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
+		chain_to_reg = CHAIN_TO_REG;
+		chain->restore_rule = esw_add_restore_rule(esw, chain->id);
+		if (IS_ERR(chain->restore_rule)) {
+			err = PTR_ERR(chain->restore_rule);
+			goto err_rule;
+		}
+	} else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) {
+		/* For NIC RX we don't need a restore rule
+		 * since we write the metadata to reg_b
+		 * that is passed to SW directly.
+		 */
+		chain_to_reg = NIC_CHAIN_TO_REG;
+	} else {
+		err = -EINVAL;
+		goto err_rule;
+	}
+
+	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, modact, field,
+		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
+	MLX5_SET(set_action_in, modact, offset,
+		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset * 8);
+	MLX5_SET(set_action_in, modact, length,
+		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen * 8);
+	MLX5_SET(set_action_in, modact, data, chain->id);
+	mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
+					   1, modact);
+	if (IS_ERR(mod_hdr)) {
+		err = PTR_ERR(mod_hdr);
+		goto err_mod_hdr;
+	}
+	chain->miss_modify_hdr = mod_hdr;
+
+	return 0;
+
+err_mod_hdr:
+	if (!IS_ERR_OR_NULL(chain->restore_rule))
+		mlx5_del_flow_rules(chain->restore_rule);
+err_rule:
+	/* Datapath can't find this mapping, so we can safely remove it */
+	mapping_remove(chains_mapping(chains), chain->id);
+	return err;
+}
+
+static void destroy_chain_restore(struct fs_chain *chain)
+{
+	struct mlx5_fs_chains *chains = chain->chains;
+
+	if (!chain->miss_modify_hdr)
+		return;
+
+	if (chain->restore_rule)
+		mlx5_del_flow_rules(chain->restore_rule);
+
+	mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
+	mapping_remove(chains_mapping(chains), chain->id);
+}
+
+static struct fs_chain *
+mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+	struct fs_chain *chain_s = NULL;
+	int err;
+
+	chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL);
+	if (!chain_s)
+		return ERR_PTR(-ENOMEM);
+
+	chain_s->chains = chains;
+	chain_s->chain = chain;
+	INIT_LIST_HEAD(&chain_s->prios_list);
+
+	err = create_chain_restore(chain_s);
+	if (err)
+		goto err_restore;
+
+	err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node,
+				     chain_params);
+	if (err)
+		goto err_insert;
+
+	return chain_s;
+
+err_insert:
+	destroy_chain_restore(chain_s);
+err_restore:
+	kvfree(chain_s);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_chain(struct fs_chain *chain)
+{
+	struct mlx5_fs_chains *chains = chain->chains;
+
+	rhashtable_remove_fast(&chains_ht(chains), &chain->node,
+			       chain_params);
+
+	destroy_chain_restore(chain);
+	kvfree(chain);
+}
+
+static struct fs_chain *
+mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+	struct fs_chain *chain_s;
+
+	chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain,
+					 chain_params);
+	if (!chain_s) {
+		chain_s = mlx5_chains_create_chain(chains, chain);
+		if (IS_ERR(chain_s))
+			return chain_s;
+	}
+
+	chain_s->ref++;
+
+	return chain_s;
+}
+
+static struct mlx5_flow_handle *
+mlx5_chains_add_miss_rule(struct fs_chain *chain,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_table *next_ft)
+{
+	struct mlx5_fs_chains *chains = chain->chains;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act act = {};
+
+	act.flags  = FLOW_ACT_NO_APPEND;
+	if (mlx5_chains_ignore_flow_level_supported(chain->chains))
+		act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = next_ft;
+
+	if (next_ft == tc_end_ft(chains) &&
+	    chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
+	    mlx5_chains_prios_supported(chains)) {
+		act.modify_hdr = chain->miss_modify_hdr;
+		act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	}
+
+	return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+mlx5_chains_update_prio_prevs(struct prio *prio,
+			      struct mlx5_flow_table *next_ft)
+{
+	struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
+	struct fs_chain *chain = prio->chain;
+	struct prio *pos;
+	int n = 0, err;
+
+	if (prio->key.level)
+		return 0;
+
+	/* Iterate in reverse order until reaching the level 0 rule of
+	 * the previous priority, adding all the miss rules first, so we can
+	 * revert them if any of them fails.
+	 */
+	pos = prio;
+	list_for_each_entry_continue_reverse(pos,
+					     &chain->prios_list,
+					     list) {
+		miss_rules[n] = mlx5_chains_add_miss_rule(chain,
+							  pos->ft,
+							  next_ft);
+		if (IS_ERR(miss_rules[n])) {
+			err = PTR_ERR(miss_rules[n]);
+			goto err_prev_rule;
+		}
+
+		n++;
+		if (!pos->key.level)
+			break;
+	}
+
+	/* Success, delete old miss rules, and update the pointers. */
+	n = 0;
+	pos = prio;
+	list_for_each_entry_continue_reverse(pos,
+					     &chain->prios_list,
+					     list) {
+		mlx5_del_flow_rules(pos->miss_rule);
+
+		pos->miss_rule = miss_rules[n];
+		pos->next_ft = next_ft;
+
+		n++;
+		if (!pos->key.level)
+			break;
+	}
+
+	return 0;
+
+err_prev_rule:
+	while (--n >= 0)
+		mlx5_del_flow_rules(miss_rules[n]);
+
+	return err;
+}
+
+static void
+mlx5_chains_put_chain(struct fs_chain *chain)
+{
+	if (--chain->ref == 0)
+		mlx5_chains_destroy_chain(chain);
+}
+
+static struct prio *
+mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
+			u32 chain, u32 prio, u32 level)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_handle *miss_rule = NULL;
+	struct mlx5_flow_group *miss_group;
+	struct mlx5_flow_table *next_ft;
+	struct mlx5_flow_table *ft;
+	struct prio *prio_s = NULL;
+	struct fs_chain *chain_s;
+	struct list_head *pos;
+	u32 *flow_group_in;
+	int err;
+
+	chain_s = mlx5_chains_get_chain(chains, chain);
+	if (IS_ERR(chain_s))
+		return ERR_CAST(chain_s);
+
+	prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!prio_s || !flow_group_in) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* Chain's prio list is sorted by prio and level.
+	 * And all levels of some prio point to the next prio's level 0.
+	 * Example list (prio, level):
+	 * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
+	 * In hardware, we will we have the following pointers:
+	 * (3,0) -> (5,0) -> (7,0) -> Slow path
+	 * (3,1) -> (5,0)
+	 * (5,1) -> (7,0)
+	 * (6,1) -> (7,0)
+	 */
+
+	/* Default miss for each chain: */
+	next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+		  tc_default_ft(chains) :
+		  tc_end_ft(chains);
+	list_for_each(pos, &chain_s->prios_list) {
+		struct prio *p = list_entry(pos, struct prio, list);
+
+		/* exit on first pos that is larger */
+		if (prio < p->key.prio || (prio == p->key.prio &&
+					   level < p->key.level)) {
+			/* Get next level 0 table */
+			next_ft = p->key.level == 0 ? p->ft : p->next_ft;
+			break;
+		}
+	}
+
+	ft = mlx5_chains_create_table(chains, chain, prio, level);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto err_create;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+		 ft->max_fte - 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 ft->max_fte - 1);
+	miss_group = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(miss_group)) {
+		err = PTR_ERR(miss_group);
+		goto err_group;
+	}
+
+	/* Add miss rule to next_ft */
+	miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft);
+	if (IS_ERR(miss_rule)) {
+		err = PTR_ERR(miss_rule);
+		goto err_miss_rule;
+	}
+
+	prio_s->miss_group = miss_group;
+	prio_s->miss_rule = miss_rule;
+	prio_s->next_ft = next_ft;
+	prio_s->chain = chain_s;
+	prio_s->key.chain = chain;
+	prio_s->key.prio = prio;
+	prio_s->key.level = level;
+	prio_s->ft = ft;
+
+	err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node,
+				     prio_params);
+	if (err)
+		goto err_insert;
+
+	list_add(&prio_s->list, pos->prev);
+
+	/* Table is ready, connect it */
+	err = mlx5_chains_update_prio_prevs(prio_s, ft);
+	if (err)
+		goto err_update;
+
+	kvfree(flow_group_in);
+	return prio_s;
+
+err_update:
+	list_del(&prio_s->list);
+	rhashtable_remove_fast(&prios_ht(chains), &prio_s->node,
+			       prio_params);
+err_insert:
+	mlx5_del_flow_rules(miss_rule);
+err_miss_rule:
+	mlx5_destroy_flow_group(miss_group);
+err_group:
+	mlx5_chains_destroy_table(chains, ft);
+err_create:
+err_alloc:
+	kvfree(prio_s);
+	kvfree(flow_group_in);
+	mlx5_chains_put_chain(chain_s);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
+			 struct prio *prio)
+{
+	struct fs_chain *chain = prio->chain;
+
+	WARN_ON(mlx5_chains_update_prio_prevs(prio,
+					      prio->next_ft));
+
+	list_del(&prio->list);
+	rhashtable_remove_fast(&prios_ht(chains), &prio->node,
+			       prio_params);
+	mlx5_del_flow_rules(prio->miss_rule);
+	mlx5_destroy_flow_group(prio->miss_group);
+	mlx5_chains_destroy_table(chains, prio->ft);
+	mlx5_chains_put_chain(chain);
+	kvfree(prio);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level)
+{
+	struct mlx5_flow_table *prev_fts;
+	struct prio *prio_s;
+	struct prio_key key;
+	int l = 0;
+
+	if ((chain > mlx5_chains_get_chain_range(chains) &&
+	     chain != mlx5_chains_get_nf_ft_chain(chains)) ||
+	    prio > mlx5_chains_get_prio_range(chains) ||
+	    level > mlx5_chains_get_level_range(chains))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/* create earlier levels for correct fs_core lookup when
+	 * connecting tables.
+	 */
+	for (l = 0; l < level; l++) {
+		prev_fts = mlx5_chains_get_table(chains, chain, prio, l);
+		if (IS_ERR(prev_fts)) {
+			prio_s = ERR_CAST(prev_fts);
+			goto err_get_prevs;
+		}
+	}
+
+	key.chain = chain;
+	key.prio = prio;
+	key.level = level;
+
+	mutex_lock(&chains_lock(chains));
+	prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+					prio_params);
+	if (!prio_s) {
+		prio_s = mlx5_chains_create_prio(chains, chain,
+						 prio, level);
+		if (IS_ERR(prio_s))
+			goto err_create_prio;
+	}
+
+	++prio_s->ref;
+	mutex_unlock(&chains_lock(chains));
+
+	return prio_s->ft;
+
+err_create_prio:
+	mutex_unlock(&chains_lock(chains));
+err_get_prevs:
+	while (--l >= 0)
+		mlx5_chains_put_table(chains, chain, prio, l);
+	return ERR_CAST(prio_s);
+}
+
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level)
+{
+	struct prio *prio_s;
+	struct prio_key key;
+
+	key.chain = chain;
+	key.prio = prio;
+	key.level = level;
+
+	mutex_lock(&chains_lock(chains));
+	prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+					prio_params);
+	if (!prio_s)
+		goto err_get_prio;
+
+	if (--prio_s->ref == 0)
+		mlx5_chains_destroy_prio(chains, prio_s);
+	mutex_unlock(&chains_lock(chains));
+
+	while (level-- > 0)
+		mlx5_chains_put_table(chains, chain, prio, level);
+
+	return;
+
+err_get_prio:
+	mutex_unlock(&chains_lock(chains));
+	WARN_ONCE(1,
+		  "Couldn't find table: (chain: %d prio: %d level: %d)",
+		  chain, prio, level);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
+{
+	return tc_end_ft(chains);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains)
+{
+	u32 chain, prio, level;
+	int err;
+
+	if (!mlx5_chains_ignore_flow_level_supported(chains)) {
+		err = -EOPNOTSUPP;
+
+		mlx5_core_warn(chains->dev,
+			       "Couldn't create global flow table, ignore_flow_level not supported.");
+		goto err_ignore;
+	}
+
+	chain = mlx5_chains_get_chain_range(chains),
+	prio = mlx5_chains_get_prio_range(chains);
+	level = mlx5_chains_get_level_range(chains);
+
+	return mlx5_chains_create_table(chains, chain, prio, level);
+
+err_ignore:
+	return ERR_PTR(err);
+}
+
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+				 struct mlx5_flow_table *ft)
+{
+	mlx5_chains_destroy_table(chains, ft);
+}
+
+static struct mlx5_fs_chains *
+mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+	struct mlx5_fs_chains *chains_priv;
+	struct mapping_ctx *mapping;
+	u32 max_flow_counter;
+	int err;
+
+	chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
+	if (!chains_priv)
+		return ERR_PTR(-ENOMEM);
+
+	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+	mlx5_core_dbg(dev,
+		      "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n",
+		      max_flow_counter, attr->max_grp_num, attr->max_ft_sz);
+
+	chains_priv->dev = dev;
+	chains_priv->flags = attr->flags;
+	chains_priv->ns = attr->ns;
+	chains_priv->group_num = attr->max_grp_num;
+	tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
+
+	mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
+		       mlx5_chains_get_chain_range(chains_priv),
+		       mlx5_chains_get_prio_range(chains_priv));
+
+	mlx5_chains_init_sz_pool(chains_priv, attr->max_ft_sz);
+
+	err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
+	if (err)
+		goto init_chains_ht_err;
+
+	err = rhashtable_init(&prios_ht(chains_priv), &prio_params);
+	if (err)
+		goto init_prios_ht_err;
+
+	mapping = mapping_create(sizeof(u32), attr->max_restore_tag,
+				 true);
+	if (IS_ERR(mapping)) {
+		err = PTR_ERR(mapping);
+		goto mapping_err;
+	}
+	chains_mapping(chains_priv) = mapping;
+
+	mutex_init(&chains_lock(chains_priv));
+
+	return chains_priv;
+
+mapping_err:
+	rhashtable_destroy(&prios_ht(chains_priv));
+init_prios_ht_err:
+	rhashtable_destroy(&chains_ht(chains_priv));
+init_chains_ht_err:
+	kfree(chains_priv);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
+{
+	mutex_destroy(&chains_lock(chains));
+	mapping_destroy(chains_mapping(chains));
+	rhashtable_destroy(&prios_ht(chains));
+	rhashtable_destroy(&chains_ht(chains));
+
+	kfree(chains);
+}
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+	struct mlx5_fs_chains *chains;
+
+	chains = mlx5_chains_init(dev, attr);
+
+	return chains;
+}
+
+void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains)
+{
+	mlx5_chains_cleanup(chains);
+}
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+			      u32 *chain_mapping)
+{
+	return mapping_add(chains_mapping(chains), &chain, chain_mapping);
+}
+
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
+{
+	return mapping_remove(chains_mapping(chains), chain_mapping);
+}
+
+int mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag,
+			   u32 *chain)
+{
+	int err;
+
+	err = mapping_find(chains_mapping(chains), tag, chain);
+	if (err) {
+		mlx5_core_warn(chains->dev, "Can't find chain for tag: %d\n", tag);
+		return -ENOENT;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
new file mode 100644
index 000000000000..6d5be31b05dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_ESW_CHAINS_H__
+#define __ML5_ESW_CHAINS_H__
+
+#include <linux/mlx5/fs.h>
+
+struct mlx5_fs_chains;
+
+enum mlx5_chains_flags {
+	MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+	MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1),
+	MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2),
+};
+
+struct mlx5_chains_attr {
+	enum mlx5_flow_namespace_type ns;
+	u32 flags;
+	u32 max_ft_sz;
+	u32 max_grp_num;
+	struct mlx5_flow_table *default_ft;
+	u32 max_restore_tag;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+bool
+mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool
+mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level);
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level);
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains);
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+				 struct mlx5_flow_table *ft);
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+			      u32 *chain_mapping);
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains,
+			      u32 chain_mapping);
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
+void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
+
+int
+mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, u32 *chain);
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+		       struct mlx5_flow_table *ft);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level) {};
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{ return NULL; }
+static inline void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains) {};
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ce43e3feccd9..8ff207aa1479 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -57,6 +57,7 @@
 #include "lib/mpfs.h"
 #include "eswitch.h"
 #include "devlink.h"
+#include "fw_reset.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
 #include "fpga/ipsec.h"
@@ -548,6 +549,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 	if (MLX5_CAP_GEN_MAX(dev, dct))
 		MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
 
+	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
+		MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
+
 	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
 		MLX5_SET(cmd_hca_cap,
 			 set_hca_cap,
@@ -739,7 +743,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 	pci_set_drvdata(dev->pdev, dev);
 
 	dev->bar_addr = pci_resource_start(pdev, 0);
-	priv->numa_node = dev_to_node(&dev->pdev->dev);
+	priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
 
 	err = mlx5_pci_enable_device(dev);
 	if (err) {
@@ -832,6 +836,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_eq_cleanup;
 	}
 
+	err = mlx5_fw_reset_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "failed to initialize fw reset events\n");
+		goto err_events_cleanup;
+	}
+
 	mlx5_cq_debugfs_init(dev);
 
 	mlx5_init_reserved_gids(dev);
@@ -893,6 +903,8 @@ err_tables_cleanup:
 	mlx5_geneve_destroy(dev->geneve);
 	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cq_debugfs_cleanup(dev);
+	mlx5_fw_reset_cleanup(dev);
+err_events_cleanup:
 	mlx5_events_cleanup(dev);
 err_eq_cleanup:
 	mlx5_eq_table_cleanup(dev);
@@ -920,6 +932,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cq_debugfs_cleanup(dev);
+	mlx5_fw_reset_cleanup(dev);
 	mlx5_events_cleanup(dev);
 	mlx5_eq_table_cleanup(dev);
 	mlx5_irq_table_cleanup(dev);
@@ -1078,6 +1091,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 		goto err_fw_tracer;
 	}
 
+	mlx5_fw_reset_events_start(dev);
 	mlx5_hv_vhca_init(dev->hv_vhca);
 
 	err = mlx5_rsc_dump_init(dev);
@@ -1139,6 +1153,7 @@ err_fpga_start:
 	mlx5_rsc_dump_cleanup(dev);
 err_rsc_dump:
 	mlx5_hv_vhca_cleanup(dev->hv_vhca);
+	mlx5_fw_reset_events_stop(dev);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 err_fw_tracer:
 	mlx5_eq_table_destroy(dev);
@@ -1161,6 +1176,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_fpga_device_stop(dev);
 	mlx5_rsc_dump_cleanup(dev);
 	mlx5_hv_vhca_cleanup(dev->hv_vhca);
+	mlx5_fw_reset_events_stop(dev);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 	mlx5_eq_table_destroy(dev);
 	mlx5_irq_table_destroy(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index fc1649dac11b..8cec85ab419d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -100,6 +100,11 @@ do {								\
 			     __func__, __LINE__, current->pid,	\
 			     ##__VA_ARGS__)
 
+static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev)
+{
+	return &dev->pdev->dev;
+}
+
 enum {
 	MLX5_CMD_DATA, /* print command payload only */
 	MLX5_CMD_TIME, /* print command execution time */
@@ -123,6 +128,8 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
 void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev);
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index c0e18f2ade99..150638814517 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -238,7 +238,7 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
 	rb_erase(&fwp->rb_node, root);
 	if (in_free_list)
 		list_del(&fwp->list);
-	dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
+	dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK,
 		       PAGE_SIZE, DMA_BIDIRECTIONAL);
 	__free_page(fwp->page);
 	kfree(fwp);
@@ -265,7 +265,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id)
 
 static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
 {
-	struct device *device = dev->device;
+	struct device *device = mlx5_core_dma_dev(dev);
 	int nid = dev_to_node(device);
 	struct page *page;
 	u64 zero_addr = 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index c63f727273d8..7df883686d46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -203,7 +203,6 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 	struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
 	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
 	struct mlx5dr_match_param mask = {};
-	struct mlx5dr_match_misc3 *misc3;
 	struct mlx5dr_ste_build *sb;
 	bool inner, rx;
 	int idx = 0;
@@ -252,18 +251,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 		if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) &&
 		    (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
 		     dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
-			ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
-							    dmn, inner, rx);
-			if (ret)
-				return ret;
+			mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
+						      dmn, inner, rx);
 		}
 
 		if (dr_mask_is_smac_set(&mask.outer) &&
 		    dr_mask_is_dmac_set(&mask.outer)) {
-			ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
-							      inner, rx);
-			if (ret)
-				return ret;
+			mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
+							inner, rx);
 		}
 
 		if (dr_mask_is_smac_set(&mask.outer))
@@ -313,8 +308,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 			mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask,
 						       inner, rx);
 
-		misc3 = &mask.misc3;
-		if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) &&
+		if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(&mask.misc3) &&
 		     mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) ||
 		    (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) &&
 		     mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) {
@@ -340,10 +334,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 
 		if (dr_mask_is_smac_set(&mask.inner) &&
 		    dr_mask_is_dmac_set(&mask.inner)) {
-			ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
-							      &mask, inner, rx);
-			if (ret)
-				return ret;
+			mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
+							&mask, inner, rx);
 		}
 
 		if (dr_mask_is_smac_set(&mask.inner))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 6ec5106bc472..b3c9dc032026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -242,7 +242,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
 	new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
 	new_ste = &new_htbl->ste_arr[new_idx];
 
-	if (mlx5dr_ste_not_used_ste(new_ste)) {
+	if (mlx5dr_ste_is_not_used(new_ste)) {
 		mlx5dr_htbl_get(new_htbl);
 		list_add_tail(&new_ste->miss_list_node,
 			      mlx5dr_ste_get_miss_list(new_ste));
@@ -335,7 +335,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
 
 	for (i = 0; i < cur_entries; i++) {
 		cur_ste = &cur_htbl->ste_arr[i];
-		if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */
+		if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */
 			continue;
 
 		err = dr_rule_rehash_copy_miss_list(matcher,
@@ -791,7 +791,7 @@ again:
 	miss_list = &cur_htbl->chunk->miss_list[index];
 	ste = &cur_htbl->ste_arr[index];
 
-	if (mlx5dr_ste_not_used_ste(ste)) {
+	if (mlx5dr_ste_is_not_used(ste)) {
 		if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
 					       ste, ste_location,
 					       hw_ste, miss_list,
@@ -985,31 +985,28 @@ static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
 static bool dr_rule_skip(enum mlx5dr_domain_type domain,
 			 enum mlx5dr_ste_entry_type ste_type,
 			 struct mlx5dr_match_param *mask,
-			 struct mlx5dr_match_param *value)
+			 struct mlx5dr_match_param *value,
+			 u32 flow_source)
 {
+	bool rx = ste_type == MLX5DR_STE_TYPE_RX;
+
 	if (domain != MLX5DR_DOMAIN_TYPE_FDB)
 		return false;
 
 	if (mask->misc.source_port) {
-		if (ste_type == MLX5DR_STE_TYPE_RX)
-			if (value->misc.source_port != WIRE_PORT)
-				return true;
+		if (rx && value->misc.source_port != WIRE_PORT)
+			return true;
 
-		if (ste_type == MLX5DR_STE_TYPE_TX)
-			if (value->misc.source_port == WIRE_PORT)
-				return true;
+		if (!rx && value->misc.source_port == WIRE_PORT)
+			return true;
 	}
 
-	/* Metadata C can be used to describe the source vport */
-	if (mask->misc2.metadata_reg_c_0) {
-		if (ste_type == MLX5DR_STE_TYPE_RX)
-			if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT)
-				return true;
+	if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT)
+		return true;
+
+	if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK)
+		return true;
 
-		if (ste_type == MLX5DR_STE_TYPE_TX)
-			if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT)
-				return true;
-	}
 	return false;
 }
 
@@ -1038,7 +1035,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 
 	INIT_LIST_HEAD(&nic_rule->rule_members_list);
 
-	if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
+	if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param,
+			 rule->flow_source))
 		return 0;
 
 	hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
@@ -1173,7 +1171,8 @@ static struct mlx5dr_rule *
 dr_rule_create_rule(struct mlx5dr_matcher *matcher,
 		    struct mlx5dr_match_parameters *value,
 		    size_t num_actions,
-		    struct mlx5dr_action *actions[])
+		    struct mlx5dr_action *actions[],
+		    u32 flow_source)
 {
 	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
 	struct mlx5dr_match_param param = {};
@@ -1188,6 +1187,7 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher,
 		return NULL;
 
 	rule->matcher = matcher;
+	rule->flow_source = flow_source;
 	INIT_LIST_HEAD(&rule->rule_actions_list);
 
 	ret = dr_rule_add_action_members(rule, num_actions, actions);
@@ -1232,13 +1232,14 @@ free_rule:
 struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
 				       struct mlx5dr_match_parameters *value,
 				       size_t num_actions,
-				       struct mlx5dr_action *actions[])
+				       struct mlx5dr_action *actions[],
+				       u32 flow_source)
 {
 	struct mlx5dr_rule *rule;
 
 	refcount_inc(&matcher->refcount);
 
-	rule = dr_rule_create_rule(matcher, value, num_actions, actions);
+	rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source);
 	if (!rule)
 		refcount_dec(&matcher->refcount);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 2ca79b9bde1f..24dede1b0a20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -466,10 +466,10 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
 		 * need to add the bit_mask
 		 */
 		for (j = 0; j < num_stes_per_iter; j++) {
-			u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
+			struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j];
 			u32 ste_off = j * DR_STE_SIZE;
 
-			if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
+			if (mlx5dr_ste_is_not_used(ste)) {
 				memcpy(data + ste_off,
 				       formatted_ste, DR_STE_SIZE);
 			} else {
@@ -831,7 +831,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 	if (!mr)
 		return NULL;
 
-	dma_device = &mdev->pdev->dev;
+	dma_device = mlx5_core_dma_dev(mdev);
 	dma_addr = dma_map_single(dma_device, buf, size,
 				  DMA_BIDIRECTIONAL);
 	err = dma_mapping_error(dma_device, dma_addr);
@@ -860,7 +860,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
 {
 	mlx5_core_destroy_mkey(mdev, &mr->mkey);
-	dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
+	dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
 			 DMA_BIDIRECTIONAL);
 	kfree(mr);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 00c2f598f034..b01aaec75622 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -155,6 +155,13 @@ static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
 	return byte_mask;
 }
 
+static u8 *mlx5dr_ste_get_tag(u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+
+	return hw_ste->tag;
+}
+
 void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
 {
 	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
@@ -549,25 +556,6 @@ void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr)
 	dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste);
 }
 
-/* The assumption here is that we don't update the ste->hw_ste if it is not
- * used ste, so it will be all zero, checking the next_lu_type.
- */
-bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste)
-{
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste;
-
-	if (MLX5_GET(ste_general, hw_ste, next_lu_type) ==
-	    MLX5DR_STE_LU_TYPE_NOP)
-		return true;
-
-	return false;
-}
-
-bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
-{
-	return !ste->refcount;
-}
-
 /* Init one ste as a pattern for ste data array */
 void mlx5dr_ste_set_formatted_ste(u16 gvmi,
 				  struct mlx5dr_domain_rx_tx *nic_dmn,
@@ -728,7 +716,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
 {
 	if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
 		if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
-			mlx5dr_err(dmn, "Partial mask source_port is not supported\n");
+			mlx5dr_err(dmn,
+				   "Partial mask source_port is not supported\n");
+			return -EINVAL;
+		}
+		if (mask->misc.source_eswitch_owner_vhca_id &&
+		    mask->misc.source_eswitch_owner_vhca_id != 0xffff) {
+			mlx5dr_err(dmn,
+				   "Partial mask source_eswitch_owner_vhca_id is not supported\n");
 			return -EINVAL;
 		}
 	}
@@ -760,7 +755,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
 
 		mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
 
-		ret = sb->ste_build_tag_func(value, sb, ste_arr);
+		ret = sb->ste_build_tag_func(value, sb, mlx5dr_ste_get_tag(ste_arr));
 		if (ret)
 			return ret;
 
@@ -778,8 +773,8 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
 	return 0;
 }
 
-static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
-						bool inner, u8 *bit_mask)
+static void dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
+						 bool inner, u8 *bit_mask)
 {
 	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
 
@@ -807,13 +802,6 @@ static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value
 		MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
 		mask->svlan_tag = 0;
 	}
-
-	if (mask->cvlan_tag || mask->svlan_tag) {
-		pr_info("Invalid c/svlan mask configuration\n");
-		return -EINVAL;
-	}
-
-	return 0;
 }
 
 static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
@@ -1059,11 +1047,9 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
 
 static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
 					   struct mlx5dr_ste_build *sb,
-					   u8 *hw_ste_p)
+					   u8 *tag)
 {
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
 	DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
@@ -1104,23 +1090,17 @@ static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
 	return 0;
 }
 
-int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
-				    struct mlx5dr_match_param *mask,
-				    bool inner, bool rx)
+void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
+				     struct mlx5dr_match_param *mask,
+				     bool inner, bool rx)
 {
-	int ret;
-
-	ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
-	if (ret)
-		return ret;
+	dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
 
 	sb->rx = rx;
 	sb->inner = inner;
 	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner);
 	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
 	sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag;
-
-	return 0;
 }
 
 static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value,
@@ -1136,11 +1116,9 @@ static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *val
 
 static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
 					    struct mlx5dr_ste_build *sb,
-					    u8 *hw_ste_p)
+					    u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
 	DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
@@ -1176,11 +1154,9 @@ static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *val
 
 static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
 					    struct mlx5dr_ste_build *sb,
-					    u8 *hw_ste_p)
+					    u8 *tag)
 {
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
 	DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
@@ -1238,11 +1214,9 @@ static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param
 
 static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
 						struct mlx5dr_ste_build *sb,
-						u8 *hw_ste_p)
+						u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
 	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
@@ -1328,12 +1302,10 @@ dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
 }
 
 static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
-					      bool inner, u8 *hw_ste_p)
+					      bool inner, u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
 	struct mlx5dr_match_misc *misc_spec = &value->misc;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
 	DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
@@ -1403,16 +1375,14 @@ static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
 				       struct mlx5dr_ste_build *sb,
-				       u8 *hw_ste_p)
+				       u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
 	DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
 
-	return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+	return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
 }
 
 void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
@@ -1440,16 +1410,14 @@ static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
 				       struct mlx5dr_ste_build *sb,
-				       u8 *hw_ste_p)
+				       u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
 	DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
 
-	return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+	return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
 }
 
 void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
@@ -1495,12 +1463,10 @@ static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
 				       struct mlx5dr_ste_build *sb,
-				       u8 *hw_ste_p)
+				       u8 *tag)
 {
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc *misc = &value->misc;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
 	DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
@@ -1561,11 +1527,9 @@ static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *va
 
 static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
 					     struct mlx5dr_ste_build *sb,
-					     u8 *hw_ste_p)
+					     u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
 
@@ -1608,11 +1572,9 @@ static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
 				       struct mlx5dr_ste_build *sb,
-				       u8 *hw_ste_p)
+				       u8 *tag)
 {
 	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
 	DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
@@ -1647,7 +1609,7 @@ void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
 
 static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value,
 					     struct mlx5dr_ste_build *sb,
-					     u8 *hw_ste_p)
+					     u8 *tag)
 {
 	return 0;
 }
@@ -1673,11 +1635,9 @@ static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value,
 				 struct mlx5dr_ste_build *sb,
-				 u8 *hw_ste_p)
+				 u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
-	u8 *tag = hw_ste->tag;
 
 	if (sb->inner)
 		DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag);
@@ -1716,11 +1676,9 @@ static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value,
 				struct mlx5dr_ste_build *sb,
-				u8 *hw_ste_p)
+				u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct  mlx5dr_match_misc *misc = &value->misc;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
 
@@ -1781,11 +1739,9 @@ static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value
 
 static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value,
 					  struct mlx5dr_ste_build *sb,
-					  u8 *hw_ste_p)
+					  u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
-	u8 *tag = hw_ste->tag;
 
 	if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
 		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
@@ -1903,11 +1859,9 @@ static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask,
 
 static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value,
 					  struct mlx5dr_ste_build *sb,
-					  u8 *hw_ste_p)
+					  u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
-	u8 *tag = hw_ste->tag;
 	u32 icmp_header_data;
 	int dw0_location;
 	int dw1_location;
@@ -2007,11 +1961,9 @@ static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *val
 
 static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value,
 					    struct mlx5dr_ste_build *sb,
-					    u8 *hw_ste_p)
+					    u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
 		       misc_2_mask, metadata_reg_a);
@@ -2052,11 +2004,9 @@ static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
 					struct mlx5dr_ste_build *sb,
-					u8 *hw_ste_p)
+					u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
-	u8 *tag = hw_ste->tag;
 
 	if (sb->inner) {
 		DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
@@ -2102,11 +2052,9 @@ dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(struct mlx5dr_match_param *value
 static int
 dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
 					   struct mlx5dr_ste_build *sb,
-					   u8 *hw_ste_p)
+					   u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
 		       outer_vxlan_gpe_flags, misc3,
@@ -2158,11 +2106,9 @@ dr_ste_build_flex_parser_tnl_geneve_bit_mask(struct mlx5dr_match_param *value,
 static int
 dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
 					struct mlx5dr_ste_build *sb,
-					u8 *hw_ste_p)
+					u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc *misc = &value->misc;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
 		       geneve_protocol_type, misc, geneve_protocol_type);
@@ -2205,11 +2151,9 @@ static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value,
 				       struct mlx5dr_ste_build *sb,
-				       u8 *hw_ste_p)
+				       u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
 	DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
@@ -2249,11 +2193,9 @@ static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value,
 
 static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value,
 				       struct mlx5dr_ste_build *sb,
-				       u8 *hw_ste_p)
+				       u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
-	u8 *tag = hw_ste->tag;
 
 	DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
 	DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
@@ -2276,38 +2218,25 @@ void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_build_register_1_tag;
 }
 
-static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
-					      u8 *bit_mask)
+static void dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+					       u8 *bit_mask)
 {
 	struct mlx5dr_match_misc *misc_mask = &value->misc;
 
-	/* Partial misc source_port is not supported */
-	if (misc_mask->source_port && misc_mask->source_port != 0xffff)
-		return -EINVAL;
-
-	/* Partial misc source_eswitch_owner_vhca_id is not supported */
-	if (misc_mask->source_eswitch_owner_vhca_id &&
-	    misc_mask->source_eswitch_owner_vhca_id != 0xffff)
-		return -EINVAL;
-
 	DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
 	DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
 	misc_mask->source_eswitch_owner_vhca_id = 0;
-
-	return 0;
 }
 
 static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 					 struct mlx5dr_ste_build *sb,
-					 u8 *hw_ste_p)
+					 u8 *tag)
 {
-	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
 	struct mlx5dr_match_misc *misc = &value->misc;
 	struct mlx5dr_cmd_vport_cap *vport_cap;
 	struct mlx5dr_domain *dmn = sb->dmn;
 	struct mlx5dr_cmd_caps *caps;
 	u8 *bit_mask = sb->bit_mask;
-	u8 *tag = hw_ste->tag;
 	bool source_gvmi_set;
 
 	DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
@@ -2339,19 +2268,15 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 	return 0;
 }
 
-int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
-				  struct mlx5dr_match_param *mask,
-				  struct mlx5dr_domain *dmn,
-				  bool inner, bool rx)
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+				   struct mlx5dr_match_param *mask,
+				   struct mlx5dr_domain *dmn,
+				   bool inner, bool rx)
 {
-	int ret;
-
 	/* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
 	sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
 
-	ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
-	if (ret)
-		return ret;
+	dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
 
 	sb->rx = rx;
 	sb->dmn = dmn;
@@ -2359,6 +2284,4 @@ int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
 	sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
 	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
 	sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag;
-
-	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 0883956c58c0..f50f3b107aa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -194,7 +194,7 @@ struct mlx5dr_ste_build {
 	u8 bit_mask[DR_STE_SIZE_MASK];
 	int (*ste_build_tag_func)(struct mlx5dr_match_param *spec,
 				  struct mlx5dr_ste_build *sb,
-				  u8 *hw_ste_p);
+				  u8 *tag);
 };
 
 struct mlx5dr_ste_htbl *
@@ -227,7 +227,6 @@ void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi);
 void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size);
 void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr);
 void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask);
-bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste);
 bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
 				u8 ste_location);
 void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag);
@@ -266,6 +265,11 @@ static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
 	ste->refcount++;
 }
 
+static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste)
+{
+	return !ste->refcount;
+}
+
 void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
 					  struct mlx5dr_ste_htbl *next_htbl);
 bool mlx5dr_ste_equal_tag(void *src, void *dst);
@@ -284,9 +288,9 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
 			     struct mlx5dr_matcher_rx_tx *nic_matcher,
 			     struct mlx5dr_match_param *value,
 			     u8 *ste_arr);
-int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
-				    struct mlx5dr_match_param *mask,
-				    bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
+				     struct mlx5dr_match_param *mask,
+				     bool inner, bool rx);
 void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
 					  struct mlx5dr_match_param *mask,
 					  bool inner, bool rx);
@@ -342,10 +346,10 @@ void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
 void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
 				 struct mlx5dr_match_param *mask,
 				 bool inner, bool rx);
-int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
-				  struct mlx5dr_match_param *mask,
-				  struct mlx5dr_domain *dmn,
-				  bool inner, bool rx);
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+				   struct mlx5dr_match_param *mask,
+				   struct mlx5dr_domain *dmn,
+				   bool inner, bool rx);
 void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
 
 /* Actions utils */
@@ -793,6 +797,7 @@ struct mlx5dr_rule {
 	struct mlx5dr_rule_rx_tx rx;
 	struct mlx5dr_rule_rx_tx tx;
 	struct list_head rule_actions_list;
+	u32 flow_source;
 };
 
 void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
@@ -991,7 +996,6 @@ struct mlx5dr_icm_chunk *
 mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
 		       enum mlx5dr_icm_chunk_size chunk_size);
 void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk);
-bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste);
 int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
 				      struct mlx5dr_domain_rx_tx *nic_dmn,
 				      struct mlx5dr_ste_htbl *htbl,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 9b08eb557a31..96c39a17d026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -487,7 +487,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
 				  &params,
 				  num_actions,
-				  actions);
+				  actions,
+				  fte->flow_context.flow_source);
 	if (!rule) {
 		err = -EINVAL;
 		goto free_actions;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 7deaca9ade3b..7914fe3fc68d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -67,7 +67,8 @@ struct mlx5dr_rule *
 mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
 		   struct mlx5dr_match_parameters *value,
 		   size_t num_actions,
-		   struct mlx5dr_action *actions[]);
+		   struct mlx5dr_action *actions[],
+		   u32 flow_source);
 
 int mlx5dr_rule_destroy(struct mlx5dr_rule *rule);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index ec45a03140d7..7f77c2a71d1c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -20,11 +20,13 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <linux/firmware.h>
 #include <asm/byteorder.h>
 #include <net/devlink.h>
 #include <trace/events/devlink.h>
 
 #include "core.h"
+#include "core_env.h"
 #include "item.h"
 #include "cmd.h"
 #include "port.h"
@@ -32,6 +34,7 @@
 #include "emad.h"
 #include "reg.h"
 #include "resources.h"
+#include "../mlxfw/mlxfw.h"
 
 static LIST_HEAD(mlxsw_core_driver_list);
 static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock);
@@ -82,6 +85,11 @@ struct mlxsw_core {
 	struct mlxsw_core_port *ports;
 	unsigned int max_ports;
 	bool fw_flash_in_progress;
+	struct {
+		struct devlink_health_reporter *fw_fatal;
+	} health;
+	struct mlxsw_env *env;
+	bool is_initialized; /* Denotes if core was already initialized. */
 	unsigned long driver_priv[];
 	/* driver_priv has to be always the last item */
 };
@@ -128,6 +136,11 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
 }
 EXPORT_SYMBOL(mlxsw_core_res_query_enabled);
 
+bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->driver->temp_warn_enabled;
+}
+
 bool
 mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
 					  const struct mlxsw_fw_rev *req_rev)
@@ -864,6 +877,294 @@ static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind)
 	return mlxsw_driver;
 }
 
+struct mlxsw_core_fw_info {
+	struct mlxfw_dev mlxfw_dev;
+	struct mlxsw_core *mlxsw_core;
+};
+
+static int mlxsw_core_fw_component_query(struct mlxfw_dev *mlxfw_dev,
+					 u16 component_index, u32 *p_max_size,
+					 u8 *p_align_bits, u16 *p_max_write_size)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcqi_pl[MLXSW_REG_MCQI_LEN];
+	int err;
+
+	mlxsw_reg_mcqi_pack(mcqi_pl, component_index);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcqi), mcqi_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, p_max_write_size);
+
+	*p_align_bits = max_t(u8, *p_align_bits, 2);
+	*p_max_write_size = min_t(u16, *p_max_write_size, MLXSW_REG_MCDA_MAX_DATA_LEN);
+	return 0;
+}
+
+static int mlxsw_core_fw_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+	u8 control_state;
+	int err;
+
+	mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state);
+	if (control_state != MLXFW_FSM_STATE_IDLE)
+		return -EBUSY;
+
+	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, 0, *fwhandle, 0);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_core_fw_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+					      u16 component_index, u32 component_size)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+
+	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT,
+			   component_index, fwhandle, component_size);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_core_fw_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+					    u8 *data, u16 size, u32 offset)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcda_pl[MLXSW_REG_MCDA_LEN];
+
+	mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcda), mcda_pl);
+}
+
+static int mlxsw_core_fw_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+					      u16 component_index)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+
+	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT,
+			   component_index, fwhandle, 0);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_core_fw_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+
+	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0, fwhandle, 0);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_core_fw_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+					 enum mlxfw_fsm_state *fsm_state,
+					 enum mlxfw_fsm_state_err *fsm_state_err)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+	u8 control_state;
+	u8 error_code;
+	int err;
+
+	mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state);
+	*fsm_state = control_state;
+	*fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, MLXFW_FSM_STATE_ERR_MAX);
+	return 0;
+}
+
+static void mlxsw_core_fw_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+
+	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0);
+	mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static void mlxsw_core_fw_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+	struct mlxsw_core_fw_info *mlxsw_core_fw_info =
+		container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev);
+	struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core;
+	char mcc_pl[MLXSW_REG_MCC_LEN];
+
+	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, fwhandle, 0);
+	mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static const struct mlxfw_dev_ops mlxsw_core_fw_mlxsw_dev_ops = {
+	.component_query	= mlxsw_core_fw_component_query,
+	.fsm_lock		= mlxsw_core_fw_fsm_lock,
+	.fsm_component_update	= mlxsw_core_fw_fsm_component_update,
+	.fsm_block_download	= mlxsw_core_fw_fsm_block_download,
+	.fsm_component_verify	= mlxsw_core_fw_fsm_component_verify,
+	.fsm_activate		= mlxsw_core_fw_fsm_activate,
+	.fsm_query_state	= mlxsw_core_fw_fsm_query_state,
+	.fsm_cancel		= mlxsw_core_fw_fsm_cancel,
+	.fsm_release		= mlxsw_core_fw_fsm_release,
+};
+
+static int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, const struct firmware *firmware,
+			       struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core_fw_info mlxsw_core_fw_info = {
+		.mlxfw_dev = {
+			.ops = &mlxsw_core_fw_mlxsw_dev_ops,
+			.psid = mlxsw_core->bus_info->psid,
+			.psid_size = strlen(mlxsw_core->bus_info->psid),
+			.devlink = priv_to_devlink(mlxsw_core),
+		},
+		.mlxsw_core = mlxsw_core
+	};
+	int err;
+
+	mlxsw_core->fw_flash_in_progress = true;
+	err = mlxfw_firmware_flash(&mlxsw_core_fw_info.mlxfw_dev, firmware, extack);
+	mlxsw_core->fw_flash_in_progress = false;
+
+	return err;
+}
+
+static int mlxsw_core_fw_rev_validate(struct mlxsw_core *mlxsw_core,
+				      const struct mlxsw_bus_info *mlxsw_bus_info,
+				      const struct mlxsw_fw_rev *req_rev,
+				      const char *filename)
+{
+	const struct mlxsw_fw_rev *rev = &mlxsw_bus_info->fw_rev;
+	union devlink_param_value value;
+	const struct firmware *firmware;
+	int err;
+
+	/* Don't check if driver does not require it */
+	if (!req_rev || !filename)
+		return 0;
+
+	/* Don't check if devlink 'fw_load_policy' param is 'flash' */
+	err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_core),
+						 DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+						 &value);
+	if (err)
+		return err;
+	if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)
+		return 0;
+
+	/* Validate driver & FW are compatible */
+	if (rev->major != req_rev->major) {
+		WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
+		     rev->major, req_rev->major);
+		return -EINVAL;
+	}
+	if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
+		return 0;
+
+	dev_err(mlxsw_bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n",
+		rev->major, rev->minor, rev->subminor, req_rev->major,
+		req_rev->minor, req_rev->subminor);
+	dev_info(mlxsw_bus_info->dev, "Flashing firmware using file %s\n", filename);
+
+	err = request_firmware_direct(&firmware, filename, mlxsw_bus_info->dev);
+	if (err) {
+		dev_err(mlxsw_bus_info->dev, "Could not request firmware file %s\n", filename);
+		return err;
+	}
+
+	err = mlxsw_core_fw_flash(mlxsw_core, firmware, NULL);
+	release_firmware(firmware);
+	if (err)
+		dev_err(mlxsw_bus_info->dev, "Could not upgrade firmware\n");
+
+	/* On FW flash success, tell the caller FW reset is needed
+	 * if current FW supports it.
+	 */
+	if (rev->minor >= req_rev->can_reset_minor)
+		return err ? err : -EAGAIN;
+	else
+		return 0;
+}
+
+static int mlxsw_core_fw_flash_update(struct mlxsw_core *mlxsw_core,
+				      struct devlink_flash_update_params *params,
+				      struct netlink_ext_ack *extack)
+{
+	const struct firmware *firmware;
+	int err;
+
+	err = request_firmware_direct(&firmware, params->file_name, mlxsw_core->bus_info->dev);
+	if (err)
+		return err;
+	err = mlxsw_core_fw_flash(mlxsw_core, firmware, extack);
+	release_firmware(firmware);
+
+	return err;
+}
+
+static int mlxsw_core_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id,
+							    union devlink_param_value val,
+							    struct netlink_ext_ack *extack)
+{
+	if (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER &&
+	    val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) {
+		NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct devlink_param mlxsw_core_fw_devlink_params[] = {
+	DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+			      mlxsw_core_devlink_param_fw_load_policy_validate),
+};
+
+static int mlxsw_core_fw_params_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	union devlink_param_value value;
+	int err;
+
+	err = devlink_params_register(devlink, mlxsw_core_fw_devlink_params,
+				      ARRAY_SIZE(mlxsw_core_fw_devlink_params));
+	if (err)
+		return err;
+
+	value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER;
+	devlink_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, value);
+	return 0;
+}
+
+static void mlxsw_core_fw_params_unregister(struct mlxsw_core *mlxsw_core)
+{
+	devlink_params_unregister(priv_to_devlink(mlxsw_core), mlxsw_core_fw_devlink_params,
+				  ARRAY_SIZE(mlxsw_core_fw_devlink_params));
+}
+
 static int mlxsw_devlink_port_split(struct devlink *devlink,
 				    unsigned int port_index,
 				    unsigned int count,
@@ -1113,7 +1414,8 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 
 static int
 mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
-					  bool netns_change,
+					  bool netns_change, enum devlink_reload_action action,
+					  enum devlink_reload_limit limit,
 					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -1126,11 +1428,14 @@ mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
 }
 
 static int
-mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
+mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+					enum devlink_reload_limit limit, u32 *actions_performed,
 					struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 
+	*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+			     BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
 	return mlxsw_core_bus_device_register(mlxsw_core->bus_info,
 					      mlxsw_core->bus,
 					      mlxsw_core->bus_priv, true,
@@ -1138,17 +1443,12 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
 }
 
 static int mlxsw_devlink_flash_update(struct devlink *devlink,
-				      const char *file_name,
-				      const char *component,
+				      struct devlink_flash_update_params *params,
 				      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 
-	if (!mlxsw_driver->flash_update)
-		return -EOPNOTSUPP;
-	return mlxsw_driver->flash_update(mlxsw_core, file_name,
-					  component, extack);
+	return mlxsw_core_fw_flash_update(mlxsw_core, params, extack);
 }
 
 static int mlxsw_devlink_trap_init(struct devlink *devlink,
@@ -1268,6 +1568,8 @@ mlxsw_devlink_trap_policer_counter_get(struct devlink *devlink,
 }
 
 static const struct devlink_ops mlxsw_devlink_ops = {
+	.reload_actions		= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+				  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
 	.reload_down		= mlxsw_devlink_core_bus_device_reload_down,
 	.reload_up		= mlxsw_devlink_core_bus_device_reload_up,
 	.port_type_set			= mlxsw_devlink_port_type_set,
@@ -1296,6 +1598,263 @@ static const struct devlink_ops mlxsw_devlink_ops = {
 	.trap_policer_counter_get	= mlxsw_devlink_trap_policer_counter_get,
 };
 
+static int mlxsw_core_params_register(struct mlxsw_core *mlxsw_core)
+{
+	int err;
+
+	err = mlxsw_core_fw_params_register(mlxsw_core);
+	if (err)
+		return err;
+
+	if (mlxsw_core->driver->params_register) {
+		err = mlxsw_core->driver->params_register(mlxsw_core);
+		if (err)
+			goto err_params_register;
+	}
+	return 0;
+
+err_params_register:
+	mlxsw_core_fw_params_unregister(mlxsw_core);
+	return err;
+}
+
+static void mlxsw_core_params_unregister(struct mlxsw_core *mlxsw_core)
+{
+	mlxsw_core_fw_params_unregister(mlxsw_core);
+	if (mlxsw_core->driver->params_register)
+		mlxsw_core->driver->params_unregister(mlxsw_core);
+}
+
+struct mlxsw_core_health_event {
+	struct mlxsw_core *mlxsw_core;
+	char mfde_pl[MLXSW_REG_MFDE_LEN];
+	struct work_struct work;
+};
+
+static void mlxsw_core_health_event_work(struct work_struct *work)
+{
+	struct mlxsw_core_health_event *event;
+	struct mlxsw_core *mlxsw_core;
+
+	event = container_of(work, struct mlxsw_core_health_event, work);
+	mlxsw_core = event->mlxsw_core;
+	devlink_health_report(mlxsw_core->health.fw_fatal, "FW fatal event occurred",
+			      event->mfde_pl);
+	kfree(event);
+}
+
+static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
+					    char *mfde_pl, void *priv)
+{
+	struct mlxsw_core_health_event *event;
+	struct mlxsw_core *mlxsw_core = priv;
+
+	event = kmalloc(sizeof(*event), GFP_ATOMIC);
+	if (!event)
+		return;
+	event->mlxsw_core = mlxsw_core;
+	memcpy(event->mfde_pl, mfde_pl, sizeof(event->mfde_pl));
+	INIT_WORK(&event->work, mlxsw_core_health_event_work);
+	mlxsw_core_schedule_work(&event->work);
+}
+
+static const struct mlxsw_listener mlxsw_core_health_listener =
+	MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
+
+static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *reporter,
+					   struct devlink_fmsg *fmsg, void *priv_ctx,
+					   struct netlink_ext_ack *extack)
+{
+	char *mfde_pl = priv_ctx;
+	char *val_str;
+	u8 event_id;
+	u32 val;
+	int err;
+
+	if (!priv_ctx)
+		/* User-triggered dumps are not possible */
+		return -EOPNOTSUPP;
+
+	val = mlxsw_reg_mfde_irisc_id_get(mfde_pl);
+	err = devlink_fmsg_u8_pair_put(fmsg, "irisc_id", val);
+	if (err)
+		return err;
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "event");
+	if (err)
+		return err;
+
+	event_id = mlxsw_reg_mfde_event_id_get(mfde_pl);
+	err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id);
+	if (err)
+		return err;
+	switch (event_id) {
+	case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO:
+		val_str = "CR space timeout";
+		break;
+	case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
+		val_str = "KVD insertion machine stopped";
+		break;
+	default:
+		val_str = NULL;
+	}
+	if (val_str) {
+		err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str);
+		if (err)
+			return err;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	val = mlxsw_reg_mfde_method_get(mfde_pl);
+	switch (val) {
+	case MLXSW_REG_MFDE_METHOD_QUERY:
+		val_str = "query";
+		break;
+	case MLXSW_REG_MFDE_METHOD_WRITE:
+		val_str = "write";
+		break;
+	default:
+		val_str = NULL;
+	}
+	if (val_str) {
+		err = devlink_fmsg_string_pair_put(fmsg, "method", val_str);
+		if (err)
+			return err;
+	}
+
+	val = mlxsw_reg_mfde_long_process_get(mfde_pl);
+	err = devlink_fmsg_bool_pair_put(fmsg, "long_process", val);
+	if (err)
+		return err;
+
+	val = mlxsw_reg_mfde_command_type_get(mfde_pl);
+	switch (val) {
+	case MLXSW_REG_MFDE_COMMAND_TYPE_MAD:
+		val_str = "mad";
+		break;
+	case MLXSW_REG_MFDE_COMMAND_TYPE_EMAD:
+		val_str = "emad";
+		break;
+	case MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF:
+		val_str = "cmdif";
+		break;
+	default:
+		val_str = NULL;
+	}
+	if (val_str) {
+		err = devlink_fmsg_string_pair_put(fmsg, "command_type", val_str);
+		if (err)
+			return err;
+	}
+
+	val = mlxsw_reg_mfde_reg_attr_id_get(mfde_pl);
+	err = devlink_fmsg_u32_pair_put(fmsg, "reg_attr_id", val);
+	if (err)
+		return err;
+
+	if (event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO) {
+		val = mlxsw_reg_mfde_log_address_get(mfde_pl);
+		err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val);
+		if (err)
+			return err;
+		val = mlxsw_reg_mfde_log_id_get(mfde_pl);
+		err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val);
+		if (err)
+			return err;
+	} else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) {
+		val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl);
+		err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int
+mlxsw_core_health_fw_fatal_test(struct devlink_health_reporter *reporter,
+				struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core *mlxsw_core = devlink_health_reporter_priv(reporter);
+	char mfgd_pl[MLXSW_REG_MFGD_LEN];
+	int err;
+
+	/* Read the register first to make sure no other bits are changed. */
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mfgd_trigger_test_set(mfgd_pl, true);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+}
+
+static const struct devlink_health_reporter_ops
+mlxsw_core_health_fw_fatal_ops = {
+	.name = "fw_fatal",
+	.dump = mlxsw_core_health_fw_fatal_dump,
+	.test = mlxsw_core_health_fw_fatal_test,
+};
+
+static int mlxsw_core_health_fw_fatal_config(struct mlxsw_core *mlxsw_core,
+					     bool enable)
+{
+	char mfgd_pl[MLXSW_REG_MFGD_LEN];
+	int err;
+
+	/* Read the register first to make sure no other bits are changed. */
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mfgd_fatal_event_mode_set(mfgd_pl, enable);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+}
+
+static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	struct devlink_health_reporter *fw_fatal;
+	int err;
+
+	if (!mlxsw_core->driver->fw_fatal_enabled)
+		return 0;
+
+	fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops,
+						  0, mlxsw_core);
+	if (IS_ERR(fw_fatal)) {
+		dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter");
+		return PTR_ERR(fw_fatal);
+	}
+	mlxsw_core->health.fw_fatal = fw_fatal;
+
+	err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+	if (err)
+		goto err_trap_register;
+
+	err = mlxsw_core_health_fw_fatal_config(mlxsw_core, true);
+	if (err)
+		goto err_fw_fatal_config;
+
+	return 0;
+
+err_fw_fatal_config:
+	mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+err_trap_register:
+	devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
+	return err;
+}
+
+static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core)
+{
+	if (!mlxsw_core->driver->fw_fatal_enabled)
+		return;
+
+	mlxsw_core_health_fw_fatal_config(mlxsw_core, false);
+	mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+	/* Make sure there is no more event work scheduled */
+	mlxsw_core_flush_owq();
+	devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
+}
+
 static int
 __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 				 const struct mlxsw_bus *mlxsw_bus,
@@ -1368,12 +1927,21 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 			goto err_devlink_register;
 	}
 
-	if (mlxsw_driver->params_register && !reload) {
-		err = mlxsw_driver->params_register(mlxsw_core);
+	if (!reload) {
+		err = mlxsw_core_params_register(mlxsw_core);
 		if (err)
 			goto err_register_params;
 	}
 
+	err = mlxsw_core_fw_rev_validate(mlxsw_core, mlxsw_bus_info, mlxsw_driver->fw_req_rev,
+					 mlxsw_driver->fw_filename);
+	if (err)
+		goto err_fw_rev_validate;
+
+	err = mlxsw_core_health_init(mlxsw_core);
+	if (err)
+		goto err_health_init;
+
 	if (mlxsw_driver->init) {
 		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack);
 		if (err)
@@ -1389,22 +1957,31 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_thermal_init;
 
-	if (mlxsw_driver->params_register)
-		devlink_params_publish(devlink);
+	err = mlxsw_env_init(mlxsw_core, &mlxsw_core->env);
+	if (err)
+		goto err_env_init;
+
+	mlxsw_core->is_initialized = true;
+	devlink_params_publish(devlink);
 
 	if (!reload)
 		devlink_reload_enable(devlink);
 
 	return 0;
 
+err_env_init:
+	mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 err_hwmon_init:
 	if (mlxsw_core->driver->fini)
 		mlxsw_core->driver->fini(mlxsw_core);
 err_driver_init:
-	if (mlxsw_driver->params_unregister && !reload)
-		mlxsw_driver->params_unregister(mlxsw_core);
+	mlxsw_core_health_fini(mlxsw_core);
+err_health_init:
+err_fw_rev_validate:
+	if (!reload)
+		mlxsw_core_params_unregister(mlxsw_core);
 err_register_params:
 	if (!reload)
 		devlink_unregister(devlink);
@@ -1469,14 +2046,16 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 			return;
 	}
 
-	if (mlxsw_core->driver->params_unregister)
-		devlink_params_unpublish(devlink);
+	devlink_params_unpublish(devlink);
+	mlxsw_core->is_initialized = false;
+	mlxsw_env_fini(mlxsw_core->env);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 	if (mlxsw_core->driver->fini)
 		mlxsw_core->driver->fini(mlxsw_core);
-	if (mlxsw_core->driver->params_unregister && !reload)
-		mlxsw_core->driver->params_unregister(mlxsw_core);
+	mlxsw_core_health_fini(mlxsw_core);
+	if (!reload)
+		mlxsw_core_params_unregister(mlxsw_core);
 	if (!reload)
 		devlink_unregister(devlink);
 	mlxsw_emad_fini(mlxsw_core);
@@ -1489,8 +2068,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 	return;
 
 reload_fail_deinit:
-	if (mlxsw_core->driver->params_unregister)
-		mlxsw_core->driver->params_unregister(mlxsw_core);
+	mlxsw_core_params_unregister(mlxsw_core);
 	devlink_unregister(devlink);
 	devlink_resources_unregister(devlink, NULL);
 	devlink_free(devlink);
@@ -2274,6 +2852,16 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);
 
+struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->env;
+}
+
+bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->is_initialized;
+}
+
 int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module)
 {
 	enum mlxsw_reg_pmtm_module_type module_type;
@@ -2410,18 +2998,6 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_kvd_sizes_get);
 
-void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core)
-{
-	mlxsw_core->fw_flash_in_progress = true;
-}
-EXPORT_SYMBOL(mlxsw_core_fw_flash_start);
-
-void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core)
-{
-	mlxsw_core->fw_flash_in_progress = false;
-}
-EXPORT_SYMBOL(mlxsw_core_fw_flash_end);
-
 int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox,
 			       struct mlxsw_res *res)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 11af3308f8cc..92f7398287be 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -32,6 +32,8 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
 
 bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);
 
+bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core);
+
 bool
 mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
 					  const struct mlxsw_fw_rev *req_rev);
@@ -221,6 +223,8 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
 struct devlink_port *
 mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
 				 u8 local_port);
+struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core);
+bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core);
 int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
@@ -280,6 +284,8 @@ struct mlxsw_driver {
 	struct list_head list;
 	const char *kind;
 	size_t priv_size;
+	const struct mlxsw_fw_rev *fw_req_rev;
+	const char *fw_filename;
 	int (*init)(struct mlxsw_core *mlxsw_core,
 		    const struct mlxsw_bus_info *mlxsw_bus_info,
 		    struct netlink_ext_ack *extack);
@@ -324,9 +330,6 @@ struct mlxsw_driver {
 				       unsigned int sb_index, u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
 				       u32 *p_cur, u32 *p_max);
-	int (*flash_update)(struct mlxsw_core *mlxsw_core,
-			    const char *file_name, const char *component,
-			    struct netlink_ext_ack *extack);
 	int (*trap_init)(struct mlxsw_core *mlxsw_core,
 			 const struct devlink_trap *trap, void *trap_ctx);
 	void (*trap_fini)(struct mlxsw_core *mlxsw_core,
@@ -371,6 +374,8 @@ struct mlxsw_driver {
 	u8 txhdr_len;
 	const struct mlxsw_config_profile *profile;
 	bool res_query_enabled;
+	bool fw_fatal_enabled;
+	bool temp_warn_enabled;
 };
 
 int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
@@ -378,9 +383,6 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 			     u64 *p_single_size, u64 *p_double_size,
 			     u64 *p_linear_size);
 
-void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core);
-void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core);
-
 u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core);
 u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 056eeb85be60..dd26865bd587 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -10,6 +10,18 @@
 #include "item.h"
 #include "reg.h"
 
+struct mlxsw_env_module_info {
+	u64 module_overheat_counter;
+	bool is_overheat;
+};
+
+struct mlxsw_env {
+	struct mlxsw_core *core;
+	u8 module_count;
+	spinlock_t module_info_lock; /* Protects 'module_info'. */
+	struct mlxsw_env_module_info module_info[];
+};
+
 static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
 					  bool *qsfp, bool *cmis)
 {
@@ -293,3 +305,359 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 EXPORT_SYMBOL(mlxsw_env_get_module_eeprom);
+
+static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core,
+					    u8 module,
+					    bool *p_has_temp_sensor)
+{
+	char mtbr_pl[MLXSW_REG_MTBR_LEN];
+	u16 temp;
+	int err;
+
+	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
+			    1);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtbr), mtbr_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
+
+	switch (temp) {
+	case MLXSW_REG_MTBR_BAD_SENS_INFO:
+	case MLXSW_REG_MTBR_NO_CONN:
+	case MLXSW_REG_MTBR_NO_TEMP_SENS:
+	case MLXSW_REG_MTBR_INDEX_NA:
+		*p_has_temp_sensor = false;
+		break;
+	default:
+		*p_has_temp_sensor = temp ? true : false;
+	}
+	return 0;
+}
+
+static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core,
+				    u16 sensor_index, bool enable)
+{
+	char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0};
+	enum mlxsw_reg_mtmp_tee tee;
+	int err, threshold_hi;
+
+	mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, sensor_index);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+
+	if (enable) {
+		err = mlxsw_env_module_temp_thresholds_get(mlxsw_core,
+							   sensor_index -
+							   MLXSW_REG_MTMP_MODULE_INDEX_MIN,
+							   SFP_TEMP_HIGH_WARN,
+							   &threshold_hi);
+		/* In case it is not possible to query the module's threshold,
+		 * use the default value.
+		 */
+		if (err)
+			threshold_hi = MLXSW_REG_MTMP_THRESH_HI;
+		else
+			/* mlxsw_env_module_temp_thresholds_get() multiplies
+			 * Celsius degrees by 1000 whereas MTMP expects
+			 * temperature in 0.125 Celsius degrees units.
+			 * Convert threshold_hi to correct units.
+			 */
+			threshold_hi = threshold_hi / 1000 * 8;
+
+		mlxsw_reg_mtmp_temperature_threshold_hi_set(mtmp_pl, threshold_hi);
+		mlxsw_reg_mtmp_temperature_threshold_lo_set(mtmp_pl, threshold_hi -
+							    MLXSW_REG_MTMP_HYSTERESIS_TEMP);
+	}
+	tee = enable ? MLXSW_REG_MTMP_TEE_GENERATE_EVENT : MLXSW_REG_MTMP_TEE_NO_EVENT;
+	mlxsw_reg_mtmp_tee_set(mtmp_pl, tee);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
+}
+
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
+					      u8 module_count)
+{
+	int i, err, sensor_index;
+	bool has_temp_sensor;
+
+	for (i = 0; i < module_count; i++) {
+		err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
+						       &has_temp_sensor);
+		if (err)
+			return err;
+
+		if (!has_temp_sensor)
+			continue;
+
+		sensor_index = i + MLXSW_REG_MTMP_MODULE_INDEX_MIN;
+		err = mlxsw_env_temp_event_set(mlxsw_core, sensor_index, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void mlxsw_env_mtwe_event_func(const struct mlxsw_reg_info *reg,
+				      char *mtwe_pl, void *priv)
+{
+	struct mlxsw_env *mlxsw_env = priv;
+	int i, sensor_warning;
+	bool is_overheat;
+
+	for (i = 0; i < mlxsw_env->module_count; i++) {
+		/* 64-127 of sensor_index are mapped to the port modules
+		 * sequentially (module 0 is mapped to sensor_index 64,
+		 * module 1 to sensor_index 65 and so on)
+		 */
+		sensor_warning =
+			mlxsw_reg_mtwe_sensor_warning_get(mtwe_pl,
+							  i + MLXSW_REG_MTMP_MODULE_INDEX_MIN);
+		spin_lock(&mlxsw_env->module_info_lock);
+		is_overheat =
+			mlxsw_env->module_info[i].is_overheat;
+
+		if ((is_overheat && sensor_warning) ||
+		    (!is_overheat && !sensor_warning)) {
+			/* Current state is "warning" and MTWE still reports
+			 * warning OR current state in "no warning" and MTWE
+			 * does not report warning.
+			 */
+			spin_unlock(&mlxsw_env->module_info_lock);
+			continue;
+		} else if (is_overheat && !sensor_warning) {
+			/* MTWE reports "no warning", turn is_overheat off.
+			 */
+			mlxsw_env->module_info[i].is_overheat = false;
+			spin_unlock(&mlxsw_env->module_info_lock);
+		} else {
+			/* Current state is "no warning" and MTWE reports
+			 * "warning", increase the counter and turn is_overheat
+			 * on.
+			 */
+			mlxsw_env->module_info[i].is_overheat = true;
+			mlxsw_env->module_info[i].module_overheat_counter++;
+			spin_unlock(&mlxsw_env->module_info_lock);
+		}
+	}
+}
+
+static const struct mlxsw_listener mlxsw_env_temp_warn_listener =
+	MLXSW_EVENTL(mlxsw_env_mtwe_event_func, MTWE, MTWE);
+
+static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core)
+{
+	struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+
+	if (!mlxsw_core_temp_warn_enabled(mlxsw_core))
+		return 0;
+
+	return mlxsw_core_trap_register(mlxsw_core,
+					&mlxsw_env_temp_warn_listener,
+					mlxsw_env);
+}
+
+static void mlxsw_env_temp_warn_event_unregister(struct mlxsw_env *mlxsw_env)
+{
+	if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core))
+		return;
+
+	mlxsw_core_trap_unregister(mlxsw_env->core,
+				   &mlxsw_env_temp_warn_listener, mlxsw_env);
+}
+
+struct mlxsw_env_module_plug_unplug_event {
+	struct mlxsw_env *mlxsw_env;
+	u8 module;
+	struct work_struct work;
+};
+
+static void mlxsw_env_pmpe_event_work(struct work_struct *work)
+{
+	struct mlxsw_env_module_plug_unplug_event *event;
+	struct mlxsw_env *mlxsw_env;
+	bool has_temp_sensor;
+	u16 sensor_index;
+	int err;
+
+	event = container_of(work, struct mlxsw_env_module_plug_unplug_event,
+			     work);
+	mlxsw_env = event->mlxsw_env;
+
+	spin_lock_bh(&mlxsw_env->module_info_lock);
+	mlxsw_env->module_info[event->module].is_overheat = false;
+	spin_unlock_bh(&mlxsw_env->module_info_lock);
+
+	err = mlxsw_env_module_has_temp_sensor(mlxsw_env->core, event->module,
+					       &has_temp_sensor);
+	/* Do not disable events on modules without sensors or faulty sensors
+	 * because FW returns errors.
+	 */
+	if (err)
+		goto out;
+
+	if (!has_temp_sensor)
+		goto out;
+
+	sensor_index = event->module + MLXSW_REG_MTMP_MODULE_INDEX_MIN;
+	mlxsw_env_temp_event_set(mlxsw_env->core, sensor_index, true);
+
+out:
+	kfree(event);
+}
+
+static void
+mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl,
+			     void *priv)
+{
+	struct mlxsw_env_module_plug_unplug_event *event;
+	enum mlxsw_reg_pmpe_module_status module_status;
+	u8 module = mlxsw_reg_pmpe_module_get(pmpe_pl);
+	struct mlxsw_env *mlxsw_env = priv;
+
+	if (WARN_ON_ONCE(module >= mlxsw_env->module_count))
+		return;
+
+	module_status = mlxsw_reg_pmpe_module_status_get(pmpe_pl);
+	if (module_status != MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED)
+		return;
+
+	event = kmalloc(sizeof(*event), GFP_ATOMIC);
+	if (!event)
+		return;
+
+	event->mlxsw_env = mlxsw_env;
+	event->module = module;
+	INIT_WORK(&event->work, mlxsw_env_pmpe_event_work);
+	mlxsw_core_schedule_work(&event->work);
+}
+
+static const struct mlxsw_listener mlxsw_env_module_plug_listener =
+	MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE);
+
+static int
+mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core)
+{
+	struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+
+	if (!mlxsw_core_temp_warn_enabled(mlxsw_core))
+		return 0;
+
+	return mlxsw_core_trap_register(mlxsw_core,
+					&mlxsw_env_module_plug_listener,
+					mlxsw_env);
+}
+
+static void
+mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env)
+{
+	if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core))
+		return;
+
+	mlxsw_core_trap_unregister(mlxsw_env->core,
+				   &mlxsw_env_module_plug_listener,
+				   mlxsw_env);
+}
+
+static int
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
+					 u8 module_count)
+{
+	int i, err;
+
+	for (i = 0; i < module_count; i++) {
+		char pmaos_pl[MLXSW_REG_PMAOS_LEN];
+
+		mlxsw_reg_pmaos_pack(pmaos_pl, i,
+				     MLXSW_REG_PMAOS_E_GENERATE_EVENT);
+		err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int
+mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module,
+				      u64 *p_counter)
+{
+	struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+
+	/* Prevent switch driver from accessing uninitialized data. */
+	if (!mlxsw_core_is_initialized(mlxsw_core)) {
+		*p_counter = 0;
+		return 0;
+	}
+
+	if (WARN_ON_ONCE(module >= mlxsw_env->module_count))
+		return -EINVAL;
+
+	spin_lock_bh(&mlxsw_env->module_info_lock);
+	*p_counter = mlxsw_env->module_info[module].module_overheat_counter;
+	spin_unlock_bh(&mlxsw_env->module_info_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_env_module_overheat_counter_get);
+
+int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
+{
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	struct mlxsw_env *env;
+	u8 module_count;
+	int err;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count);
+
+	env = kzalloc(struct_size(env, module_info, module_count), GFP_KERNEL);
+	if (!env)
+		return -ENOMEM;
+
+	spin_lock_init(&env->module_info_lock);
+	env->core = mlxsw_core;
+	env->module_count = module_count;
+	*p_env = env;
+
+	err = mlxsw_env_temp_warn_event_register(mlxsw_core);
+	if (err)
+		goto err_temp_warn_event_register;
+
+	err = mlxsw_env_module_plug_event_register(mlxsw_core);
+	if (err)
+		goto err_module_plug_event_register;
+
+	err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
+						       env->module_count);
+	if (err)
+		goto err_oper_state_event_enable;
+
+	err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+	if (err)
+		goto err_temp_event_enable;
+
+	return 0;
+
+err_temp_event_enable:
+err_oper_state_event_enable:
+	mlxsw_env_module_plug_event_unregister(env);
+err_module_plug_event_register:
+	mlxsw_env_temp_warn_event_unregister(env);
+err_temp_warn_event_register:
+	kfree(env);
+	return err;
+}
+
+void mlxsw_env_fini(struct mlxsw_env *env)
+{
+	mlxsw_env_module_plug_event_unregister(env);
+	/* Make sure there is no more event work scheduled. */
+	mlxsw_core_flush_owq();
+	mlxsw_env_temp_warn_event_unregister(env);
+	kfree(env);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
index 064d0e770c01..8e36a2634ef5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
@@ -14,4 +14,10 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev,
 				struct mlxsw_core *mlxsw_core, int module,
 				struct ethtool_eeprom *ee, u8 *data);
 
+int
+mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module,
+				      u64 *p_counter);
+int mlxsw_env_init(struct mlxsw_core *core, struct mlxsw_env **p_env);
+void mlxsw_env_fini(struct mlxsw_env *env);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 61719ec89808..2196c946698a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -12,8 +12,17 @@
 #include "core.h"
 #include "core_env.h"
 
-#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127
-#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \
+#define MLXSW_HWMON_SENSORS_MAX_COUNT 64
+#define MLXSW_HWMON_MODULES_MAX_COUNT 64
+#define MLXSW_HWMON_GEARBOXES_MAX_COUNT 32
+
+#define MLXSW_HWMON_ATTR_PER_SENSOR 3
+#define MLXSW_HWMON_ATTR_PER_MODULE 7
+#define MLXSW_HWMON_ATTR_PER_GEARBOX 4
+
+#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_SENSORS_MAX_COUNT * MLXSW_HWMON_ATTR_PER_SENSOR + \
+				MLXSW_HWMON_MODULES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_MODULE + \
+				MLXSW_HWMON_GEARBOXES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_GEARBOX + \
 				MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX)
 
 struct mlxsw_hwmon_attr {
@@ -97,7 +106,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
-	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0};
 	unsigned long val;
 	int index;
 	int err;
@@ -110,7 +119,13 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 
 	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
 					   mlxsw_hwmon->module_sensor_max);
-	mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
+
+	mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, index);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mtmp_mte_set(mtmp_pl, true);
+	mlxsw_reg_mtmp_mtr_set(mtmp_pl, true);
 	err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -205,25 +220,39 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev,
 	return len;
 }
 
-static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
-					    struct device_attribute *attr,
-					    char *buf)
+static int mlxsw_hwmon_module_temp_get(struct device *dev,
+				       struct device_attribute *attr,
+				       int *p_temp)
 {
 	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	u8 module;
-	int temp;
 	int err;
 
 	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
 	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
 			    false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		dev_err(dev, "Failed to query module temperature\n");
+		return err;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL);
+
+	return 0;
+}
+
+static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	int err, temp;
+
+	err = mlxsw_hwmon_module_temp_get(dev, attr, &temp);
 	if (err)
 		return err;
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 
 	return sprintf(buf, "%d\n", temp);
 }
@@ -270,48 +299,72 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
 	return sprintf(buf, "%u\n", fault);
 }
 
-static ssize_t
-mlxsw_hwmon_module_temp_critical_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
+static int mlxsw_hwmon_module_temp_critical_get(struct device *dev,
+						struct device_attribute *attr,
+						int *p_temp)
 {
 	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
-	int temp;
 	u8 module;
 	int err;
 
 	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
 	err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
-						   SFP_TEMP_HIGH_WARN, &temp);
+						   SFP_TEMP_HIGH_WARN, p_temp);
 	if (err) {
 		dev_err(dev, "Failed to query module temperature thresholds\n");
 		return err;
 	}
 
-	return sprintf(buf, "%u\n", temp);
+	return 0;
 }
 
 static ssize_t
-mlxsw_hwmon_module_temp_emergency_show(struct device *dev,
-				       struct device_attribute *attr,
-				       char *buf)
+mlxsw_hwmon_module_temp_critical_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	int err, temp;
+
+	err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &temp);
+	if (err)
+		return err;
+
+	return sprintf(buf, "%u\n", temp);
+}
+
+static int mlxsw_hwmon_module_temp_emergency_get(struct device *dev,
+						 struct device_attribute *attr,
+						 int *p_temp)
 {
 	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	u8 module;
-	int temp;
 	int err;
 
 	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
 	err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
-						   SFP_TEMP_HIGH_ALARM, &temp);
+						   SFP_TEMP_HIGH_ALARM, p_temp);
 	if (err) {
 		dev_err(dev, "Failed to query module temperature thresholds\n");
 		return err;
 	}
 
+	return 0;
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_emergency_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	int err, temp;
+
+	err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &temp);
+	if (err)
+		return err;
+
 	return sprintf(buf, "%u\n", temp);
 }
 
@@ -341,6 +394,53 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
 	return sprintf(buf, "gearbox %03u\n", index);
 }
 
+static ssize_t mlxsw_hwmon_temp_critical_alarm_show(struct device *dev,
+						    struct device_attribute *attr,
+						    char *buf)
+{
+	int err, temp, emergency_temp, critic_temp;
+
+	err = mlxsw_hwmon_module_temp_get(dev, attr, &temp);
+	if (err)
+		return err;
+
+	if (temp <= 0)
+		return sprintf(buf, "%d\n", false);
+
+	err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp);
+	if (err)
+		return err;
+
+	if (temp >= emergency_temp)
+		return sprintf(buf, "%d\n", false);
+
+	err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &critic_temp);
+	if (err)
+		return err;
+
+	return sprintf(buf, "%d\n", temp >= critic_temp);
+}
+
+static ssize_t mlxsw_hwmon_temp_emergency_alarm_show(struct device *dev,
+						     struct device_attribute *attr,
+						     char *buf)
+{
+	int err, temp, emergency_temp;
+
+	err = mlxsw_hwmon_module_temp_get(dev, attr, &temp);
+	if (err)
+		return err;
+
+	if (temp <= 0)
+		return sprintf(buf, "%d\n", false);
+
+	err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp);
+	if (err)
+		return err;
+
+	return sprintf(buf, "%d\n", temp >= emergency_temp);
+}
+
 enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
@@ -354,6 +454,8 @@ enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM,
 };
 
 static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -444,6 +546,20 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_label", num + 1);
 		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_temp_critical_alarm_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_crit_alarm", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_temp_emergency_alarm_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_emergency_alarm", num + 1);
+		break;
 	default:
 		WARN_ON(1);
 	}
@@ -460,7 +576,6 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
 	char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
-	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	int i;
 	int err;
 
@@ -471,7 +586,15 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 	}
 	mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
 	for (i = 0; i < mlxsw_hwmon->sensor_count; i++) {
-		mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true);
+		char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0};
+
+		mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, i);
+		err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp),
+				      mtmp_pl);
+		if (err)
+			return err;
+		mlxsw_reg_mtmp_mte_set(mtmp_pl, true);
+		mlxsw_reg_mtmp_mtr_set(mtmp_pl, true);
 		err = mlxsw_reg_write(mlxsw_hwmon->core,
 				      MLXSW_REG(mtmp), mtmp_pl);
 		if (err) {
@@ -566,6 +689,12 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
 		mlxsw_hwmon_attr_add(mlxsw_hwmon,
 				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
 				     i, i);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM,
+				     i, i);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM,
+				     i, i);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 1c64b03ff48e..641cdd81882b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -620,9 +620,9 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
 	return elem;
 }
 
-static void mlxsw_pci_cq_tasklet(unsigned long data)
+static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t)
 {
-	struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+	struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet);
 	struct mlxsw_pci *mlxsw_pci = q->pci;
 	char *cqe;
 	int items = 0;
@@ -733,9 +733,9 @@ static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
 	return elem;
 }
 
-static void mlxsw_pci_eq_tasklet(unsigned long data)
+static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t)
 {
-	struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+	struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet);
 	struct mlxsw_pci *mlxsw_pci = q->pci;
 	u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci);
 	unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)];
@@ -792,7 +792,7 @@ struct mlxsw_pci_queue_ops {
 		    struct mlxsw_pci_queue *q);
 	void (*fini)(struct mlxsw_pci *mlxsw_pci,
 		     struct mlxsw_pci_queue *q);
-	void (*tasklet)(unsigned long data);
+	void (*tasklet)(struct tasklet_struct *t);
 	u16 (*elem_count_f)(const struct mlxsw_pci_queue *q);
 	u8 (*elem_size_f)(const struct mlxsw_pci_queue *q);
 	u16 elem_count;
@@ -855,7 +855,7 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
 	q->pci = mlxsw_pci;
 
 	if (q_ops->tasklet)
-		tasklet_init(&q->tasklet, q_ops->tasklet, (unsigned long) q);
+		tasklet_setup(&q->tasklet, q_ops->tasklet);
 
 	mem_item->size = MLXSW_PCI_AQ_SIZE;
 	mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 079b080de7f7..39eff6a57ba2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4174,7 +4174,6 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
 
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M				BIT(0)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII			BIT(1)
-#define MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII			BIT(2)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R				BIT(3)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G			BIT(4)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G		BIT(5)
@@ -4197,7 +4196,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4		BIT(2)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4		BIT(3)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR		BIT(4)
-#define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2		BIT(5)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4		BIT(6)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4		BIT(7)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR		BIT(12)
@@ -4210,10 +4208,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4		BIT(20)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4		BIT(21)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4		BIT(22)
-#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4	BIT(23)
-#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX		BIT(24)
-#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T		BIT(25)
-#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T		BIT(26)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR		BIT(27)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR		BIT(28)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR		BIT(29)
@@ -5411,6 +5405,64 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port)
 	mlxsw_reg_pspa_sub_port_set(payload, 0);
 }
 
+/* PMAOS - Ports Module Administrative and Operational Status
+ * ----------------------------------------------------------
+ * This register configures and retrieves the per module status.
+ */
+#define MLXSW_REG_PMAOS_ID 0x5012
+#define MLXSW_REG_PMAOS_LEN 0x10
+
+MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN);
+
+/* reg_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmaos, slot_index, 0x00, 24, 4);
+
+/* reg_pmaos_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmaos, module, 0x00, 16, 8);
+
+/* reg_pmaos_ase
+ * Admin state update enable.
+ * If this bit is set, admin state will be updated based on admin_state field.
+ * Only relevant on Set() operations.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pmaos, ase, 0x04, 31, 1);
+
+/* reg_pmaos_ee
+ * Event update enable.
+ * If this bit is set, event generation will be updated based on the e field.
+ * Only relevant on Set operations.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pmaos, ee, 0x04, 30, 1);
+
+enum mlxsw_reg_pmaos_e {
+	MLXSW_REG_PMAOS_E_DO_NOT_GENERATE_EVENT,
+	MLXSW_REG_PMAOS_E_GENERATE_EVENT,
+	MLXSW_REG_PMAOS_E_GENERATE_SINGLE_EVENT,
+};
+
+/* reg_pmaos_e
+ * Event Generation on operational state change.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmaos, e, 0x04, 0, 2);
+
+static inline void mlxsw_reg_pmaos_pack(char *payload, u8 module,
+					enum mlxsw_reg_pmaos_e e)
+{
+	MLXSW_REG_ZERO(pmaos, payload);
+	mlxsw_reg_pmaos_module_set(payload, module);
+	mlxsw_reg_pmaos_e_set(payload, e);
+	mlxsw_reg_pmaos_ee_set(payload, true);
+}
+
 /* PPLR - Port Physical Loopback Register
  * --------------------------------------
  * This register allows configuration of the port's loopback mode.
@@ -5447,6 +5499,50 @@ static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port,
 				 MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0);
 }
 
+/* PMPE - Port Module Plug/Unplug Event Register
+ * ---------------------------------------------
+ * This register reports any operational status change of a module.
+ * A change in the module’s state will generate an event only if the change
+ * happens after arming the event mechanism. Any changes to the module state
+ * while the event mechanism is not armed will not be reported. Software can
+ * query the PMPE register for module status.
+ */
+#define MLXSW_REG_PMPE_ID 0x5024
+#define MLXSW_REG_PMPE_LEN 0x10
+
+MLXSW_REG_DEFINE(pmpe, MLXSW_REG_PMPE_ID, MLXSW_REG_PMPE_LEN);
+
+/* reg_pmpe_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmpe, slot_index, 0x00, 24, 4);
+
+/* reg_pmpe_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmpe, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmpe_module_status {
+	MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED = 1,
+	MLXSW_REG_PMPE_MODULE_STATUS_UNPLUGGED,
+	MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ERROR,
+	MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_DISABLED,
+};
+
+/* reg_pmpe_module_status
+ * Module status.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pmpe, module_status, 0x00, 0, 4);
+
+/* reg_pmpe_error_type
+ * Module error details.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pmpe, error_type, 0x04, 8, 4);
+
 /* PDDR - Port Diagnostics Database Register
  * -----------------------------------------
  * The PDDR enables to read the Phy debug database
@@ -5585,6 +5681,9 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
 
 enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+	MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
+	MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
+	MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
@@ -8418,6 +8517,13 @@ MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16);
  * 2 - Generate single event
  * Access: RW
  */
+
+enum mlxsw_reg_mtmp_tee {
+	MLXSW_REG_MTMP_TEE_NO_EVENT,
+	MLXSW_REG_MTMP_TEE_GENERATE_EVENT,
+	MLXSW_REG_MTMP_TEE_GENERATE_SINGLE_EVENT,
+};
+
 MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2);
 
 #define MLXSW_REG_MTMP_THRESH_HI 0x348	/* 105 Celsius */
@@ -8428,6 +8534,7 @@ MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2);
  */
 MLXSW_ITEM32(reg, mtmp, temperature_threshold_hi, 0x0C, 0, 16);
 
+#define MLXSW_REG_MTMP_HYSTERESIS_TEMP 0x28 /* 5 Celsius */
 /* reg_mtmp_temperature_threshold_lo
  * Low threshold for Temperature Warning Event. In 0.125 Celsius.
  * Access: RW
@@ -8471,6 +8578,23 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
 		mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
 }
 
+/* MTWE - Management Temperature Warning Event
+ * -------------------------------------------
+ * This register is used for over temperature warning.
+ */
+#define MLXSW_REG_MTWE_ID 0x900B
+#define MLXSW_REG_MTWE_LEN 0x10
+
+MLXSW_REG_DEFINE(mtwe, MLXSW_REG_MTWE_ID, MLXSW_REG_MTWE_LEN);
+
+/* reg_mtwe_sensor_warning
+ * Bit vector indicating which of the sensor reading is above threshold.
+ * Address 00h bit31 is sensor_warning[127].
+ * Address 0Ch bit0 is sensor_warning[0].
+ * Access: RO
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, mtwe, sensor_warning, 0x0, 0x10, 1);
+
 /* MTBR - Management Temperature Bulk Register
  * -------------------------------------------
  * This register is used for bulk temperature reading.
@@ -9827,6 +9951,26 @@ static inline void mlxsw_reg_mtptptp_pack(char *payload,
 	mlxsw_reg_mtptpt_message_type_set(payload, message_type);
 }
 
+/* MFGD - Monitoring FW General Debug Register
+ * -------------------------------------------
+ */
+#define MLXSW_REG_MFGD_ID 0x90F0
+#define MLXSW_REG_MFGD_LEN 0x0C
+
+MLXSW_REG_DEFINE(mfgd, MLXSW_REG_MFGD_ID, MLXSW_REG_MFGD_LEN);
+
+/* reg_mfgd_fw_fatal_event_mode
+ * 0 - don't check FW fatal (default)
+ * 1 - check FW fatal - enable MFDE trap
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfgd, fatal_event_mode, 0x00, 9, 2);
+
+/* reg_mfgd_trigger_test
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mfgd, trigger_test, 0x00, 11, 1);
+
 /* MGPIR - Management General Peripheral Information Register
  * ----------------------------------------------------------
  * MGPIR register allows software to query the hardware and
@@ -9886,6 +10030,84 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
 		*num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload);
 }
 
+/* MFDE - Monitoring FW Debug Register
+ * -----------------------------------
+ */
+#define MLXSW_REG_MFDE_ID 0x9200
+#define MLXSW_REG_MFDE_LEN 0x18
+
+MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN);
+
+/* reg_mfde_irisc_id
+ * Which irisc triggered the event
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 8, 4);
+
+enum mlxsw_reg_mfde_event_id {
+	MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1,
+	/* KVD insertion machine stopped */
+	MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP,
+};
+
+/* reg_mfde_event_id
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 8);
+
+enum mlxsw_reg_mfde_method {
+	MLXSW_REG_MFDE_METHOD_QUERY,
+	MLXSW_REG_MFDE_METHOD_WRITE,
+};
+
+/* reg_mfde_method
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, method, 0x04, 29, 1);
+
+/* reg_mfde_long_process
+ * Indicates if the command is in long_process mode.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, long_process, 0x04, 28, 1);
+
+enum mlxsw_reg_mfde_command_type {
+	MLXSW_REG_MFDE_COMMAND_TYPE_MAD,
+	MLXSW_REG_MFDE_COMMAND_TYPE_EMAD,
+	MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF,
+};
+
+/* reg_mfde_command_type
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, command_type, 0x04, 24, 2);
+
+/* reg_mfde_reg_attr_id
+ * EMAD - register id, MAD - attibute id
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, reg_attr_id, 0x04, 0, 16);
+
+/* reg_mfde_log_address
+ * crspace address accessed, which resulted in timeout.
+ * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, log_address, 0x10, 0, 32);
+
+/* reg_mfde_log_id
+ * Which irisc triggered the timeout.
+ * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, log_id, 0x14, 0, 4);
+
+/* reg_mfde_pipes_mask
+ * Bit per kvh pipe.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfde, pipes_mask, 0x10, 0, 16);
+
 /* TNGCR - Tunneling NVE General Configuration Register
  * ----------------------------------------------------
  * The TNGCR register is used for setting up the NVE Tunneling configuration.
@@ -10948,7 +11170,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(pptb),
 	MLXSW_REG(pbmc),
 	MLXSW_REG(pspa),
+	MLXSW_REG(pmaos),
 	MLXSW_REG(pplr),
+	MLXSW_REG(pmpe),
 	MLXSW_REG(pddr),
 	MLXSW_REG(pmtm),
 	MLXSW_REG(htgt),
@@ -10978,6 +11202,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(fore),
 	MLXSW_REG(mtcap),
 	MLXSW_REG(mtmp),
+	MLXSW_REG(mtwe),
 	MLXSW_REG(mtbr),
 	MLXSW_REG(mcia),
 	MLXSW_REG(mpat),
@@ -10999,7 +11224,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(mtpppc),
 	MLXSW_REG(mtpptr),
 	MLXSW_REG(mtptpt),
+	MLXSW_REG(mfgd),
 	MLXSW_REG(mgpir),
+	MLXSW_REG(mfde),
 	MLXSW_REG(tngcr),
 	MLXSW_REG(tnumt),
 	MLXSW_REG(tnqcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f3c0e241e1b4..16b47fce540b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -42,11 +42,10 @@
 #include "spectrum_span.h"
 #include "spectrum_ptp.h"
 #include "spectrum_trap.h"
-#include "../mlxfw/mlxfw.h"
 
 #define MLXSW_SP1_FWREV_MAJOR 13
-#define MLXSW_SP1_FWREV_MINOR 2007
-#define MLXSW_SP1_FWREV_SUBMINOR 1168
+#define MLXSW_SP1_FWREV_MINOR 2008
+#define MLXSW_SP1_FWREV_SUBMINOR 1310
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -62,8 +61,8 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
 	"." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
 
 #define MLXSW_SP2_FWREV_MAJOR 29
-#define MLXSW_SP2_FWREV_MINOR 2007
-#define MLXSW_SP2_FWREV_SUBMINOR 1168
+#define MLXSW_SP2_FWREV_MINOR 2008
+#define MLXSW_SP2_FWREV_SUBMINOR 1310
 
 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 	.major = MLXSW_SP2_FWREV_MAJOR,
@@ -77,8 +76,8 @@ static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 	"." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2"
 
 #define MLXSW_SP3_FWREV_MAJOR 30
-#define MLXSW_SP3_FWREV_MINOR 2007
-#define MLXSW_SP3_FWREV_SUBMINOR 1168
+#define MLXSW_SP3_FWREV_MINOR 2008
+#define MLXSW_SP3_FWREV_SUBMINOR 1310
 
 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = {
 	.major = MLXSW_SP3_FWREV_MAJOR,
@@ -170,274 +169,6 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16);
  */
 MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
 
-struct mlxsw_sp_mlxfw_dev {
-	struct mlxfw_dev mlxfw_dev;
-	struct mlxsw_sp *mlxsw_sp;
-};
-
-static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
-				    u16 component_index, u32 *p_max_size,
-				    u8 *p_align_bits, u16 *p_max_write_size)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcqi_pl[MLXSW_REG_MCQI_LEN];
-	int err;
-
-	mlxsw_reg_mcqi_pack(mcqi_pl, component_index);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcqi), mcqi_pl);
-	if (err)
-		return err;
-	mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits,
-			      p_max_write_size);
-
-	*p_align_bits = max_t(u8, *p_align_bits, 2);
-	*p_max_write_size = min_t(u16, *p_max_write_size,
-				  MLXSW_REG_MCDA_MAX_DATA_LEN);
-	return 0;
-}
-
-static int mlxsw_sp_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-	u8 control_state;
-	int err;
-
-	mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state);
-	if (control_state != MLXFW_FSM_STATE_IDLE)
-		return -EBUSY;
-
-	mlxsw_reg_mcc_pack(mcc_pl,
-			   MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE,
-			   0, *fwhandle, 0);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-}
-
-static int mlxsw_sp_fsm_component_update(struct mlxfw_dev *mlxfw_dev,
-					 u32 fwhandle, u16 component_index,
-					 u32 component_size)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-
-	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT,
-			   component_index, fwhandle, component_size);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-}
-
-static int mlxsw_sp_fsm_block_download(struct mlxfw_dev *mlxfw_dev,
-				       u32 fwhandle, u8 *data, u16 size,
-				       u32 offset)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcda_pl[MLXSW_REG_MCDA_LEN];
-
-	mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcda), mcda_pl);
-}
-
-static int mlxsw_sp_fsm_component_verify(struct mlxfw_dev *mlxfw_dev,
-					 u32 fwhandle, u16 component_index)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-
-	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT,
-			   component_index, fwhandle, 0);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-}
-
-static int mlxsw_sp_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-
-	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0,
-			   fwhandle, 0);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-}
-
-static int mlxsw_sp_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
-				    enum mlxfw_fsm_state *fsm_state,
-				    enum mlxfw_fsm_state_err *fsm_state_err)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-	u8 control_state;
-	u8 error_code;
-	int err;
-
-	mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state);
-	*fsm_state = control_state;
-	*fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code,
-			       MLXFW_FSM_STATE_ERR_MAX);
-	return 0;
-}
-
-static void mlxsw_sp_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-
-	mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0,
-			   fwhandle, 0);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-}
-
-static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
-{
-	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
-		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
-	char mcc_pl[MLXSW_REG_MCC_LEN];
-
-	mlxsw_reg_mcc_pack(mcc_pl,
-			   MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0,
-			   fwhandle, 0);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
-}
-
-static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
-	.component_query	= mlxsw_sp_component_query,
-	.fsm_lock		= mlxsw_sp_fsm_lock,
-	.fsm_component_update	= mlxsw_sp_fsm_component_update,
-	.fsm_block_download	= mlxsw_sp_fsm_block_download,
-	.fsm_component_verify	= mlxsw_sp_fsm_component_verify,
-	.fsm_activate		= mlxsw_sp_fsm_activate,
-	.fsm_query_state	= mlxsw_sp_fsm_query_state,
-	.fsm_cancel		= mlxsw_sp_fsm_cancel,
-	.fsm_release		= mlxsw_sp_fsm_release,
-};
-
-static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
-				   const struct firmware *firmware,
-				   struct netlink_ext_ack *extack)
-{
-	struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = {
-		.mlxfw_dev = {
-			.ops = &mlxsw_sp_mlxfw_dev_ops,
-			.psid = mlxsw_sp->bus_info->psid,
-			.psid_size = strlen(mlxsw_sp->bus_info->psid),
-			.devlink = priv_to_devlink(mlxsw_sp->core),
-		},
-		.mlxsw_sp = mlxsw_sp
-	};
-	int err;
-
-	mlxsw_core_fw_flash_start(mlxsw_sp->core);
-	err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev,
-				   firmware, extack);
-	mlxsw_core_fw_flash_end(mlxsw_sp->core);
-
-	return err;
-}
-
-static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
-{
-	const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
-	const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev;
-	const char *fw_filename = mlxsw_sp->fw_filename;
-	union devlink_param_value value;
-	const struct firmware *firmware;
-	int err;
-
-	/* Don't check if driver does not require it */
-	if (!req_rev || !fw_filename)
-		return 0;
-
-	/* Don't check if devlink 'fw_load_policy' param is 'flash' */
-	err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_sp->core),
-						 DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
-						 &value);
-	if (err)
-		return err;
-	if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)
-		return 0;
-
-	/* Validate driver & FW are compatible */
-	if (rev->major != req_rev->major) {
-		WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
-		     rev->major, req_rev->major);
-		return -EINVAL;
-	}
-	if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
-		return 0;
-
-	dev_err(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n",
-		rev->major, rev->minor, rev->subminor, req_rev->major,
-		req_rev->minor, req_rev->subminor);
-	dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
-		 fw_filename);
-
-	err = request_firmware_direct(&firmware, fw_filename,
-				      mlxsw_sp->bus_info->dev);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n",
-			fw_filename);
-		return err;
-	}
-
-	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL);
-	release_firmware(firmware);
-	if (err)
-		dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n");
-
-	/* On FW flash success, tell the caller FW reset is needed
-	 * if current FW supports it.
-	 */
-	if (rev->minor >= req_rev->can_reset_minor)
-		return err ? err : -EAGAIN;
-	else
-		return 0;
-}
-
-static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core,
-				 const char *file_name, const char *component,
-				 struct netlink_ext_ack *extack)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-	const struct firmware *firmware;
-	int err;
-
-	if (component)
-		return -EOPNOTSUPP;
-
-	err = request_firmware_direct(&firmware, file_name,
-				      mlxsw_sp->bus_info->dev);
-	if (err)
-		return err;
-	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack);
-	release_firmware(firmware);
-
-	return err;
-}
-
 int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
 			      unsigned int counter_index, u64 *packets,
 			      u64 *bytes)
@@ -590,21 +321,28 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr);
 }
 
-static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
+static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char pmtu_pl[MLXSW_REG_PMTU_LEN];
-	int max_mtu;
 	int err;
 
-	mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
 	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
 	if (err)
 		return err;
-	max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
 
-	if (mtu > max_mtu)
+	*p_max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
+	return 0;
+}
+
+static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char pmtu_pl[MLXSW_REG_PMTU_LEN];
+
+	mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
+	if (mtu > mlxsw_sp_port->max_mtu)
 		return -EINVAL;
 
 	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu);
@@ -872,133 +610,25 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
 	return 0;
 }
 
-static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp,
-					 int mtu)
-{
-	return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu);
-}
-
-#define MLXSW_SP_CELL_FACTOR 2	/* 2 * cell_size / (IPG + cell_size + 1) */
-
-static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
-				  u16 delay)
-{
-	delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay,
-							    BITS_PER_BYTE));
-	return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp,
-								   mtu);
-}
-
-/* Maximum delay buffer needed in case of PAUSE frames, in bytes.
- * Assumes 100m cable and maximum MTU.
- */
-#define MLXSW_SP_PAUSE_DELAY 58752
-
-static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
-				     u16 delay, bool pfc, bool pause)
-{
-	if (pfc)
-		return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay);
-	else if (pause)
-		return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY);
-	else
-		return 0;
-}
-
-static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres,
-				 bool lossy)
+static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
 {
-	if (lossy)
-		mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size);
-	else
-		mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size,
-						    thres);
-}
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp_hdroom orig_hdroom;
+	struct mlxsw_sp_hdroom hdroom;
+	int err;
 
-int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
-				 u8 *prio_tc, bool pause_en,
-				 struct ieee_pfc *my_pfc)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0;
-	u16 delay = !!my_pfc ? my_pfc->delay : 0;
-	char pbmc_pl[MLXSW_REG_PBMC_LEN];
-	u32 taken_headroom_cells = 0;
-	u32 max_headroom_cells;
-	int i, j, err;
+	orig_hdroom = *mlxsw_sp_port->hdroom;
 
-	max_headroom_cells = mlxsw_sp_sb_max_headroom_cells(mlxsw_sp);
+	hdroom = orig_hdroom;
+	hdroom.mtu = mtu;
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
 
-	mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
-	if (err)
+	err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
+	if (err) {
+		netdev_err(dev, "Failed to configure port's headroom\n");
 		return err;
-
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		bool configure = false;
-		bool pfc = false;
-		u16 thres_cells;
-		u16 delay_cells;
-		u16 total_cells;
-		bool lossy;
-
-		for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) {
-			if (prio_tc[j] == i) {
-				pfc = pfc_en & BIT(j);
-				configure = true;
-				break;
-			}
-		}
-
-		if (!configure)
-			continue;
-
-		lossy = !(pfc || pause_en);
-		thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu);
-		thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells);
-		delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay,
-							pfc, pause_en);
-		delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells);
-		total_cells = thres_cells + delay_cells;
-
-		taken_headroom_cells += total_cells;
-		if (taken_headroom_cells > max_headroom_cells)
-			return -ENOBUFS;
-
-		mlxsw_sp_pg_buf_pack(pbmc_pl, i, total_cells,
-				     thres_cells, lossy);
 	}
 
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
-}
-
-int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
-			       int mtu, bool pause_en)
-{
-	u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0};
-	bool dcb_en = !!mlxsw_sp_port->dcb.ets;
-	struct ieee_pfc *my_pfc;
-	u8 *prio_tc;
-
-	prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc;
-	my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL;
-
-	return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc,
-					    pause_en, my_pfc);
-}
-
-static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
-	int err;
-
-	err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en);
-	if (err)
-		return err;
-	err = mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, mtu);
-	if (err)
-		goto err_span_port_mtu_update;
 	err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu);
 	if (err)
 		goto err_port_mtu_set;
@@ -1006,9 +636,7 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 
 err_port_mtu_set:
-	mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, dev->mtu);
-err_span_port_mtu_update:
-	mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+	mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
 	return err;
 }
 
@@ -1737,6 +1365,22 @@ static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtctm), qtctm_pl);
 }
 
+static int mlxsw_sp_port_overheat_init_val_set(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	u8 module = mlxsw_sp_port->mapping.module;
+	u64 overheat_counter;
+	int err;
+
+	err = mlxsw_env_module_overheat_counter_get(mlxsw_sp->core, module,
+						    &overheat_counter);
+	if (err)
+		return err;
+
+	mlxsw_sp_port->module_overheat_initial_val = overheat_counter;
+	return 0;
+}
+
 static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				u8 split_base_local_port,
 				struct mlxsw_sp_port_mapping *port_mapping)
@@ -1842,6 +1486,21 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_port_speed_by_width_set;
 	}
 
+	err = mlxsw_sp->port_type_speed_ops->ptys_max_speed(mlxsw_sp_port,
+							    &mlxsw_sp_port->max_speed);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum speed\n",
+			mlxsw_sp_port->local_port);
+		goto err_max_speed_get;
+	}
+
+	err = mlxsw_sp_port_max_mtu_get(mlxsw_sp_port, &mlxsw_sp_port->max_mtu);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum MTU\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_max_mtu_get;
+	}
+
 	err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n",
@@ -1930,10 +1589,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 
 	INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw,
 			  mlxsw_sp->ptp_ops->shaper_work);
-	INIT_DELAYED_WORK(&mlxsw_sp_port->span.speed_update_dw,
-			  mlxsw_sp_span_speed_update_work);
 
 	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
+
+	err = mlxsw_sp_port_overheat_init_val_set(mlxsw_sp_port);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set overheat initial value\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_overheat_init_val_set;
+	}
+
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -1947,6 +1612,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	return 0;
 
 err_register_netdev:
+err_port_overheat_init_val_set:
 	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
 err_port_vlan_create:
@@ -1963,9 +1629,12 @@ err_port_dcb_init:
 	mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
 err_port_tc_mc_mode:
 err_port_ets_init:
+	mlxsw_sp_port_buffers_fini(mlxsw_sp_port);
 err_port_buffers_init:
 err_port_admin_status_set:
 err_port_mtu_set:
+err_port_max_mtu_get:
+err_max_speed_get:
 err_port_speed_by_width_set:
 err_port_system_port_mapping_set:
 err_dev_addr_init:
@@ -1986,7 +1655,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
 	cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
-	cancel_delayed_work_sync(&mlxsw_sp_port->span.speed_update_dw);
 	cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
 	mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
 	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
@@ -1998,6 +1666,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_sp_port_fids_fini(mlxsw_sp_port);
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 	mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
+	mlxsw_sp_port_buffers_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp_port);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
@@ -2390,7 +2059,6 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 		netdev_info(mlxsw_sp_port->dev, "link up\n");
 		netif_carrier_on(mlxsw_sp_port->dev);
 		mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0);
-		mlxsw_core_schedule_dw(&mlxsw_sp_port->span.speed_update_dw, 0);
 	} else {
 		netdev_info(mlxsw_sp_port->dev, "link down\n");
 		netif_carrier_off(mlxsw_sp_port->dev);
@@ -2783,11 +2451,36 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
 static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 {
 	char htgt_pl[MLXSW_REG_HTGT_LEN];
+	int err;
 
 	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
 			    MLXSW_REG_HTGT_INVALID_POLICER,
 			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
 			    MLXSW_REG_HTGT_DEFAULT_TC);
+	err =  mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
 	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
 }
 
@@ -2836,10 +2529,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->core = mlxsw_core;
 	mlxsw_sp->bus_info = mlxsw_bus_info;
 
-	err = mlxsw_sp_fw_rev_validate(mlxsw_sp);
-	if (err)
-		return err;
-
 	mlxsw_core_emad_string_tlv_enable(mlxsw_core);
 
 	err = mlxsw_sp_base_mac_get(mlxsw_sp);
@@ -3039,8 +2728,6 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
-	mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev;
-	mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
@@ -3051,6 +2738,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
 	mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
+	mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
 	mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
@@ -3069,8 +2757,6 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
-	mlxsw_sp->req_rev = &mlxsw_sp2_fw_rev;
-	mlxsw_sp->fw_filename = MLXSW_SP2_FW_FILENAME;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -3081,6 +2767,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
 	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
+	mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
 	mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
@@ -3097,8 +2784,6 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
-	mlxsw_sp->req_rev = &mlxsw_sp3_fw_rev;
-	mlxsw_sp->fw_filename = MLXSW_SP3_FW_FILENAME;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -3109,6 +2794,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
 	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
+	mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
 	mlxsw_sp->span_ops = &mlxsw_sp3_span_ops;
@@ -3451,52 +3137,6 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 }
 
 static int
-mlxsw_sp_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id,
-					       union devlink_param_value val,
-					       struct netlink_ext_ack *extack)
-{
-	if ((val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER) &&
-	    (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)) {
-		NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static const struct devlink_param mlxsw_sp_devlink_params[] = {
-	DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY,
-			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
-			      NULL, NULL,
-			      mlxsw_sp_devlink_param_fw_load_policy_validate),
-};
-
-static int mlxsw_sp_params_register(struct mlxsw_core *mlxsw_core)
-{
-	struct devlink *devlink = priv_to_devlink(mlxsw_core);
-	union devlink_param_value value;
-	int err;
-
-	err = devlink_params_register(devlink, mlxsw_sp_devlink_params,
-				      ARRAY_SIZE(mlxsw_sp_devlink_params));
-	if (err)
-		return err;
-
-	value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER;
-	devlink_param_driverinit_value_set(devlink,
-					   DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
-					   value);
-	return 0;
-}
-
-static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core)
-{
-	devlink_params_unregister(priv_to_devlink(mlxsw_core),
-				  mlxsw_sp_devlink_params,
-				  ARRAY_SIZE(mlxsw_sp_devlink_params));
-}
-
-static int
 mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id,
 					     struct devlink_param_gset_ctx *ctx)
 {
@@ -3533,24 +3173,16 @@ static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core)
 	union devlink_param_value value;
 	int err;
 
-	err = mlxsw_sp_params_register(mlxsw_core);
-	if (err)
-		return err;
-
 	err = devlink_params_register(devlink, mlxsw_sp2_devlink_params,
 				      ARRAY_SIZE(mlxsw_sp2_devlink_params));
 	if (err)
-		goto err_devlink_params_register;
+		return err;
 
 	value.vu32 = 0;
 	devlink_param_driverinit_value_set(devlink,
 					   MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
 					   value);
 	return 0;
-
-err_devlink_params_register:
-	mlxsw_sp_params_unregister(mlxsw_core);
-	return err;
 }
 
 static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
@@ -3558,7 +3190,6 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
 	devlink_params_unregister(priv_to_devlink(mlxsw_core),
 				  mlxsw_sp2_devlink_params,
 				  ARRAY_SIZE(mlxsw_sp2_devlink_params));
-	mlxsw_sp_params_unregister(mlxsw_core);
 }
 
 static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core,
@@ -3573,6 +3204,8 @@ static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core,
 static struct mlxsw_driver mlxsw_sp1_driver = {
 	.kind				= mlxsw_sp1_driver_name,
 	.priv_size			= sizeof(struct mlxsw_sp),
+	.fw_req_rev			= &mlxsw_sp1_fw_rev,
+	.fw_filename			= MLXSW_SP1_FW_FILENAME,
 	.init				= mlxsw_sp1_init,
 	.fini				= mlxsw_sp_fini,
 	.basic_trap_groups_set		= mlxsw_sp_basic_trap_groups_set,
@@ -3588,7 +3221,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
-	.flash_update			= mlxsw_sp_flash_update,
 	.trap_init			= mlxsw_sp_trap_init,
 	.trap_fini			= mlxsw_sp_trap_fini,
 	.trap_action_set		= mlxsw_sp_trap_action_set,
@@ -3601,17 +3233,19 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
 	.txhdr_construct		= mlxsw_sp_txhdr_construct,
 	.resources_register		= mlxsw_sp1_resources_register,
 	.kvd_sizes_get			= mlxsw_sp_kvd_sizes_get,
-	.params_register		= mlxsw_sp_params_register,
-	.params_unregister		= mlxsw_sp_params_unregister,
 	.ptp_transmitted		= mlxsw_sp_ptp_transmitted,
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp1_config_profile,
 	.res_query_enabled		= true,
+	.fw_fatal_enabled		= true,
+	.temp_warn_enabled		= true,
 };
 
 static struct mlxsw_driver mlxsw_sp2_driver = {
 	.kind				= mlxsw_sp2_driver_name,
 	.priv_size			= sizeof(struct mlxsw_sp),
+	.fw_req_rev			= &mlxsw_sp2_fw_rev,
+	.fw_filename			= MLXSW_SP2_FW_FILENAME,
 	.init				= mlxsw_sp2_init,
 	.fini				= mlxsw_sp_fini,
 	.basic_trap_groups_set		= mlxsw_sp_basic_trap_groups_set,
@@ -3627,7 +3261,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
-	.flash_update			= mlxsw_sp_flash_update,
 	.trap_init			= mlxsw_sp_trap_init,
 	.trap_fini			= mlxsw_sp_trap_fini,
 	.trap_action_set		= mlxsw_sp_trap_action_set,
@@ -3645,11 +3278,15 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp2_config_profile,
 	.res_query_enabled		= true,
+	.fw_fatal_enabled		= true,
+	.temp_warn_enabled		= true,
 };
 
 static struct mlxsw_driver mlxsw_sp3_driver = {
 	.kind				= mlxsw_sp3_driver_name,
 	.priv_size			= sizeof(struct mlxsw_sp),
+	.fw_req_rev			= &mlxsw_sp3_fw_rev,
+	.fw_filename			= MLXSW_SP3_FW_FILENAME,
 	.init				= mlxsw_sp3_init,
 	.fini				= mlxsw_sp_fini,
 	.basic_trap_groups_set		= mlxsw_sp_basic_trap_groups_set,
@@ -3665,7 +3302,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
-	.flash_update			= mlxsw_sp_flash_update,
 	.trap_init			= mlxsw_sp_trap_init,
 	.trap_fini			= mlxsw_sp_trap_fini,
 	.trap_action_set		= mlxsw_sp_trap_action_set,
@@ -3683,6 +3319,8 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp2_config_profile,
 	.res_query_enabled		= true,
+	.fw_fatal_enabled		= true,
+	.temp_warn_enabled		= true,
 };
 
 bool mlxsw_sp_port_dev_check(const struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5240bf11b6c4..3e26eb6cb140 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -125,6 +125,7 @@ struct mlxsw_sp_mr_tcam_ops;
 struct mlxsw_sp_acl_rulei_ops;
 struct mlxsw_sp_acl_tcam_ops;
 struct mlxsw_sp_nve_ops;
+struct mlxsw_sp_sb_ops;
 struct mlxsw_sp_sb_vals;
 struct mlxsw_sp_port_type_speed_ops;
 struct mlxsw_sp_ptp_state;
@@ -162,8 +163,6 @@ struct mlxsw_sp {
 	struct mlxsw_sp_counter_pool *counter_pool;
 	struct mlxsw_sp_span *span;
 	struct mlxsw_sp_trap *trap;
-	const struct mlxsw_fw_rev *req_rev;
-	const char *fw_filename;
 	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
 	const struct mlxsw_afa_ops *afa_ops;
 	const struct mlxsw_afk_ops *afk_ops;
@@ -173,6 +172,7 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_nve_ops **nve_ops_arr;
 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_sb_vals *sb_vals;
+	const struct mlxsw_sp_sb_ops *sb_ops;
 	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
 	const struct mlxsw_sp_ptp_ops *ptp_ops;
 	const struct mlxsw_sp_span_ops *span_ops;
@@ -316,9 +316,10 @@ struct mlxsw_sp_port {
 		struct mlxsw_sp_ptp_port_stats stats;
 	} ptp;
 	u8 split_base_local_port;
-	struct {
-		struct delayed_work speed_update_dw;
-	} span;
+	int max_mtu;
+	u32 max_speed;
+	struct mlxsw_sp_hdroom *hdroom;
+	u64 module_overheat_initial_val;
 };
 
 struct mlxsw_sp_port_type_speed_ops {
@@ -331,6 +332,7 @@ struct mlxsw_sp_port_type_speed_ops {
 	void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp,
 				       bool carrier_ok, u32 ptys_eth_proto,
 				       struct ethtool_link_ksettings *cmd);
+	int (*ptys_max_speed)(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed);
 	u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width,
 				   const struct ethtool_link_ksettings *cmd);
 	u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed);
@@ -414,34 +416,73 @@ mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port,
 	return NULL;
 }
 
-static inline u32
-mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port,
-				 u32 size_cells)
-{
-	/* Ports with eight lanes use two headroom buffers between which the
-	 * configured headroom size is split. Therefore, multiply the calculated
-	 * headroom size by two.
-	 */
-	return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells;
-}
-
 enum mlxsw_sp_flood_type {
 	MLXSW_SP_FLOOD_TYPE_UC,
 	MLXSW_SP_FLOOD_TYPE_BC,
 	MLXSW_SP_FLOOD_TYPE_MC,
 };
 
-int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
-			       int mtu, bool pause_en);
 int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp,
 				int prio, char *ppcnt_pl);
 int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				   bool is_up);
 
 /* spectrum_buffers.c */
+struct mlxsw_sp_hdroom_prio {
+	/* Number of port buffer associated with this priority. This is the
+	 * actually configured value.
+	 */
+	u8 buf_idx;
+	/* Value of buf_idx deduced from the DCB ETS configuration. */
+	u8 ets_buf_idx;
+	/* Value of buf_idx taken from the dcbnl_setbuffer configuration. */
+	u8 set_buf_idx;
+	bool lossy;
+};
+
+struct mlxsw_sp_hdroom_buf {
+	u32 thres_cells;
+	u32 size_cells;
+	/* Size requirement form dcbnl_setbuffer. */
+	u32 set_size_cells;
+	bool lossy;
+};
+
+enum mlxsw_sp_hdroom_mode {
+	MLXSW_SP_HDROOM_MODE_DCB,
+	MLXSW_SP_HDROOM_MODE_TC,
+};
+
+#define MLXSW_SP_PB_COUNT 10
+
+struct mlxsw_sp_hdroom {
+	enum mlxsw_sp_hdroom_mode mode;
+
+	struct {
+		struct mlxsw_sp_hdroom_prio prio[IEEE_8021Q_MAX_PRIORITIES];
+	} prios;
+	struct {
+		struct mlxsw_sp_hdroom_buf buf[MLXSW_SP_PB_COUNT];
+	} bufs;
+	struct {
+		/* Size actually configured for the internal buffer. Equal to
+		 * reserve when internal buffer is enabled.
+		 */
+		u32 size_cells;
+		/* Space reserved in the headroom for the internal buffer. Port
+		 * buffers are not allowed to grow into this space.
+		 */
+		u32 reserve_cells;
+		bool enable;
+	} int_buf;
+	int delay_bytes;
+	int mtu;
+};
+
 int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
 			 unsigned int sb_index, u16 pool_index,
 			 struct devlink_sb_pool_info *pool_info);
@@ -477,11 +518,20 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port,
 				     u32 *p_cur, u32 *p_max);
 u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells);
 u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes);
-u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom);
+void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom);
+void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct mlxsw_sp_hdroom *hdroom);
+int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
+			      const struct mlxsw_sp_hdroom *hdroom);
 
 extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals;
 extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;
 
+extern const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops;
+extern const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops;
+extern const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops;
+
 /* spectrum_switchdev.c */
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);
@@ -519,9 +569,6 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  bool dwrr, u8 dwrr_weight);
 int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			      u8 switch_prio, u8 tclass);
-int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
-				 u8 *prio_tc, bool pause_en,
-				 struct ieee_pfc *my_pfc);
 int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				  enum mlxsw_reg_qeec_hr hr, u8 index,
 				  u8 next_index, u32 maxrate, u8 burst_size);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 6f84557a5a6f..37ff29a1686e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -121,6 +121,10 @@ struct mlxsw_sp_sb_vals {
 	unsigned int cms_cpu_count;
 };
 
+struct mlxsw_sp_sb_ops {
+	u32 (*int_buf_size_get)(int mtu, u32 speed);
+};
+
 u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells)
 {
 	return mlxsw_sp->sb->cell_size * cells;
@@ -131,9 +135,14 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes)
 	return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size);
 }
 
-u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp)
+static u32 mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port,
+					    u32 size_cells)
 {
-	return mlxsw_sp->sb->max_headroom_cells;
+	/* Ports with eight lanes use two headroom buffers between which the
+	 * configured headroom size is split. Therefore, multiply the calculated
+	 * headroom size by two.
+	 */
+	return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells;
 }
 
 static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp,
@@ -291,55 +300,308 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				     (unsigned long) pm);
 }
 
-/* 1/4 of a headroom necessary for 100Gbps port and 100m cable. */
-#define MLXSW_SP_PB_HEADROOM 25632
+void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom)
+{
+	int prio;
+
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
+		switch (hdroom->mode) {
+		case MLXSW_SP_HDROOM_MODE_DCB:
+			hdroom->prios.prio[prio].buf_idx = hdroom->prios.prio[prio].ets_buf_idx;
+			break;
+		case MLXSW_SP_HDROOM_MODE_TC:
+			hdroom->prios.prio[prio].buf_idx = hdroom->prios.prio[prio].set_buf_idx;
+			break;
+		}
+	}
+}
+
+void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom)
+{
+	int prio;
+	int i;
+
+	for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+		hdroom->bufs.buf[i].lossy = true;
+
+	for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) {
+		if (!hdroom->prios.prio[prio].lossy)
+			hdroom->bufs.buf[hdroom->prios.prio[prio].buf_idx].lossy = false;
+	}
+}
+
+static u16 mlxsw_sp_hdroom_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp, int mtu)
+{
+	return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu);
+}
+
+static void mlxsw_sp_hdroom_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres, bool lossy)
+{
+	if (lossy)
+		mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size);
+	else
+		mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size,
+						    thres);
+}
+
+static u16 mlxsw_sp_hdroom_buf_delay_get(const struct mlxsw_sp *mlxsw_sp,
+					 const struct mlxsw_sp_hdroom *hdroom)
+{
+	u16 delay_cells;
+
+	delay_cells = mlxsw_sp_bytes_cells(mlxsw_sp, hdroom->delay_bytes);
+
+	/* In the worst case scenario the delay will be made up of packets that
+	 * are all of size CELL_SIZE + 1, which means each packet will require
+	 * almost twice its true size when buffered in the switch. We therefore
+	 * multiply this value by the "cell factor", which is close to 2.
+	 *
+	 * Another MTU is added in case the transmitting host already started
+	 * transmitting a maximum length frame when the PFC packet was received.
+	 */
+	return 2 * delay_cells + mlxsw_sp_bytes_cells(mlxsw_sp, hdroom->mtu);
+}
+
+static u32 mlxsw_sp_hdroom_int_buf_size_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed)
+{
+	u32 buffsize = mlxsw_sp->sb_ops->int_buf_size_get(speed, mtu);
+
+	return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1;
+}
+
+static bool mlxsw_sp_hdroom_buf_is_used(const struct mlxsw_sp_hdroom *hdroom, int buf)
+{
+	int prio;
+
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
+		if (hdroom->prios.prio[prio].buf_idx == buf)
+			return true;
+	}
+	return false;
+}
+
+void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct mlxsw_sp_hdroom *hdroom)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	u16 reserve_cells;
+	int i;
+
+	/* Internal buffer. */
+	reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_speed,
+							 mlxsw_sp_port->max_mtu);
+	reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells);
+	hdroom->int_buf.reserve_cells = reserve_cells;
+
+	if (hdroom->int_buf.enable)
+		hdroom->int_buf.size_cells = reserve_cells;
+	else
+		hdroom->int_buf.size_cells = 0;
+
+	/* PG buffers. */
+	for (i = 0; i < DCBX_MAX_BUFFERS; i++) {
+		struct mlxsw_sp_hdroom_buf *buf = &hdroom->bufs.buf[i];
+		u16 thres_cells;
+		u16 delay_cells;
+
+		if (!mlxsw_sp_hdroom_buf_is_used(hdroom, i)) {
+			thres_cells = 0;
+			delay_cells = 0;
+		} else if (buf->lossy) {
+			thres_cells = mlxsw_sp_hdroom_buf_threshold_get(mlxsw_sp, hdroom->mtu);
+			delay_cells = 0;
+		} else {
+			thres_cells = mlxsw_sp_hdroom_buf_threshold_get(mlxsw_sp, hdroom->mtu);
+			delay_cells = mlxsw_sp_hdroom_buf_delay_get(mlxsw_sp, hdroom);
+		}
+
+		thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells);
+		delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells);
+
+		buf->thres_cells = thres_cells;
+		if (hdroom->mode == MLXSW_SP_HDROOM_MODE_DCB) {
+			buf->size_cells = thres_cells + delay_cells;
+		} else {
+			/* Do not allow going below the minimum size, even if
+			 * the user requested it.
+			 */
+			buf->size_cells = max(buf->set_size_cells, buf->thres_cells);
+		}
+	}
+}
+
 #define MLXSW_SP_PB_UNUSED 8
 
-static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_hdroom_configure_buffers(struct mlxsw_sp_port *mlxsw_sp_port,
+					     const struct mlxsw_sp_hdroom *hdroom, bool force)
 {
-	const u32 pbs[] = {
-		[0] = MLXSW_SP_PB_HEADROOM * mlxsw_sp_port->mapping.width,
-		[9] = MLXSW_PORT_MAX_MTU,
-	};
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char pbmc_pl[MLXSW_REG_PBMC_LEN];
+	bool dirty;
+	int err;
 	int i;
 
-	mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port,
-			    0xffff, 0xffff / 2);
-	for (i = 0; i < ARRAY_SIZE(pbs); i++) {
-		u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, pbs[i]);
+	dirty = memcmp(&mlxsw_sp_port->hdroom->bufs, &hdroom->bufs, sizeof(hdroom->bufs));
+	if (!dirty && !force)
+		return 0;
+
+	mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2);
+	for (i = 0; i < MLXSW_SP_PB_COUNT; i++) {
+		const struct mlxsw_sp_hdroom_buf *buf = &hdroom->bufs.buf[i];
 
 		if (i == MLXSW_SP_PB_UNUSED)
 			continue;
-		size = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, size);
-		mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size);
+
+		mlxsw_sp_hdroom_buf_pack(pbmc_pl, i, buf->size_cells, buf->thres_cells, buf->lossy);
 	}
-	mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl,
-					 MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+
+	mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+	if (err)
+		return err;
+
+	mlxsw_sp_port->hdroom->bufs = hdroom->bufs;
+	return 0;
 }
 
-static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_hdroom_configure_priomap(struct mlxsw_sp_port *mlxsw_sp_port,
+					     const struct mlxsw_sp_hdroom *hdroom, bool force)
 {
 	char pptb_pl[MLXSW_REG_PPTB_LEN];
-	int i;
+	bool dirty;
+	int prio;
+	int err;
+
+	dirty = memcmp(&mlxsw_sp_port->hdroom->prios, &hdroom->prios, sizeof(hdroom->prios));
+	if (!dirty && !force)
+		return 0;
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0);
-	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
-			       pptb_pl);
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, prio, hdroom->prios.prio[prio].buf_idx);
+
+	err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), pptb_pl);
+	if (err)
+		return err;
+
+	mlxsw_sp_port->hdroom->prios = hdroom->prios;
+	return 0;
 }
 
-static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_hdroom_configure_int_buf(struct mlxsw_sp_port *mlxsw_sp_port,
+					     const struct mlxsw_sp_hdroom *hdroom, bool force)
+{
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	bool dirty;
+	int err;
+
+	dirty = memcmp(&mlxsw_sp_port->hdroom->int_buf, &hdroom->int_buf, sizeof(hdroom->int_buf));
+	if (!dirty && !force)
+		return 0;
+
+	mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, hdroom->int_buf.size_cells);
+	err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	if (err)
+		return err;
+
+	mlxsw_sp_port->hdroom->int_buf = hdroom->int_buf;
+	return 0;
+}
+
+static bool mlxsw_sp_hdroom_bufs_fit(struct mlxsw_sp *mlxsw_sp,
+				     const struct mlxsw_sp_hdroom *hdroom)
 {
+	u32 taken_headroom_cells = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_PB_COUNT; i++)
+		taken_headroom_cells += hdroom->bufs.buf[i].size_cells;
+
+	taken_headroom_cells += hdroom->int_buf.reserve_cells;
+	return taken_headroom_cells <= mlxsw_sp->sb->max_headroom_cells;
+}
+
+static int __mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
+				       const struct mlxsw_sp_hdroom *hdroom, bool force)
+{
+	struct mlxsw_sp_hdroom orig_hdroom;
+	struct mlxsw_sp_hdroom tmp_hdroom;
 	int err;
+	int i;
+
+	/* Port buffers need to be configured in three steps. First, all buffers
+	 * with non-zero size are configured. Then, prio-to-buffer map is
+	 * updated, allowing traffic to flow to the now non-zero buffers.
+	 * Finally, zero-sized buffers are configured, because now no traffic
+	 * should be directed to them anymore. This way, in a non-congested
+	 * system, no packet drops are introduced by the reconfiguration.
+	 */
 
-	err = mlxsw_sp_port_pb_init(mlxsw_sp_port);
+	orig_hdroom = *mlxsw_sp_port->hdroom;
+	tmp_hdroom = orig_hdroom;
+	for (i = 0; i < MLXSW_SP_PB_COUNT; i++) {
+		if (hdroom->bufs.buf[i].size_cells)
+			tmp_hdroom.bufs.buf[i] = hdroom->bufs.buf[i];
+	}
+
+	if (!mlxsw_sp_hdroom_bufs_fit(mlxsw_sp_port->mlxsw_sp, &tmp_hdroom) ||
+	    !mlxsw_sp_hdroom_bufs_fit(mlxsw_sp_port->mlxsw_sp, hdroom))
+		return -ENOBUFS;
+
+	err = mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &tmp_hdroom, force);
 	if (err)
 		return err;
-	return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port);
+
+	err = mlxsw_sp_hdroom_configure_priomap(mlxsw_sp_port, hdroom, force);
+	if (err)
+		goto err_configure_priomap;
+
+	err = mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, hdroom, false);
+	if (err)
+		goto err_configure_buffers;
+
+	err = mlxsw_sp_hdroom_configure_int_buf(mlxsw_sp_port, hdroom, false);
+	if (err)
+		goto err_configure_int_buf;
+
+	*mlxsw_sp_port->hdroom = *hdroom;
+	return 0;
+
+err_configure_int_buf:
+	mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &tmp_hdroom, false);
+err_configure_buffers:
+	mlxsw_sp_hdroom_configure_priomap(mlxsw_sp_port, &tmp_hdroom, false);
+err_configure_priomap:
+	mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &orig_hdroom, false);
+	return err;
+}
+
+int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
+			      const struct mlxsw_sp_hdroom *hdroom)
+{
+	return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, hdroom, false);
+}
+
+static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_hdroom hdroom = {};
+	u32 size9;
+	int prio;
+
+	hdroom.mtu = mlxsw_sp_port->dev->mtu;
+	hdroom.mode = MLXSW_SP_HDROOM_MODE_DCB;
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+		hdroom.prios.prio[prio].lossy = true;
+
+	mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+
+	/* Buffer 9 is used for control traffic. */
+	size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, mlxsw_sp_port->max_mtu);
+	hdroom.bufs.buf[9].size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size9);
+
+	return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom, true);
 }
 
 static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp,
@@ -916,6 +1178,46 @@ const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = {
 	.cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms),
 };
 
+static u32 mlxsw_sp1_pb_int_buf_size_get(int mtu, u32 speed)
+{
+	return mtu * 5 / 2;
+}
+
+static u32 __mlxsw_sp_pb_int_buf_size_get(int mtu, u32 speed, u32 buffer_factor)
+{
+	return 3 * mtu + buffer_factor * speed / 1000;
+}
+
+#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38
+
+static u32 mlxsw_sp2_pb_int_buf_size_get(int mtu, u32 speed)
+{
+	int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR;
+
+	return __mlxsw_sp_pb_int_buf_size_get(mtu, speed, factor);
+}
+
+#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50
+
+static u32 mlxsw_sp3_pb_int_buf_size_get(int mtu, u32 speed)
+{
+	int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR;
+
+	return __mlxsw_sp_pb_int_buf_size_get(mtu, speed, factor);
+}
+
+const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops = {
+	.int_buf_size_get = mlxsw_sp1_pb_int_buf_size_get,
+};
+
+const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops = {
+	.int_buf_size_get = mlxsw_sp2_pb_int_buf_size_get,
+};
+
+const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops = {
+	.int_buf_size_get = mlxsw_sp3_pb_int_buf_size_get,
+};
+
 int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
 {
 	u32 max_headroom_size;
@@ -995,17 +1297,34 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	int err;
 
+	mlxsw_sp_port->hdroom = kzalloc(sizeof(*mlxsw_sp_port->hdroom), GFP_KERNEL);
+	if (!mlxsw_sp_port->hdroom)
+		return -ENOMEM;
+	mlxsw_sp_port->hdroom->mtu = mlxsw_sp_port->dev->mtu;
+
 	err = mlxsw_sp_port_headroom_init(mlxsw_sp_port);
 	if (err)
-		return err;
+		goto err_headroom_init;
 	err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port);
 	if (err)
-		return err;
+		goto err_port_sb_cms_init;
 	err = mlxsw_sp_port_sb_pms_init(mlxsw_sp_port);
+	if (err)
+		goto err_port_sb_pms_init;
+	return 0;
 
+err_port_sb_pms_init:
+err_port_sb_cms_init:
+err_headroom_init:
+	kfree(mlxsw_sp_port->hdroom);
 	return err;
 }
 
+void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	kfree(mlxsw_sp_port->hdroom);
+}
+
 int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
 			 unsigned int sb_index, u16 pool_index,
 			 struct devlink_sb_pool_info *pool_info)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 0d3fb2e51ea5..5f92b1691360 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -64,87 +64,28 @@ static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
-static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
-				     u8 *prio_tc)
-{
-	char pptb_pl[MLXSW_REG_PPTB_LEN];
-	int i;
-
-	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]);
-
-	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
-			       pptb_pl);
-}
-
-static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg)
-{
-	int i;
-
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		if (prio_tc[i] == pg)
-			return true;
-	return false;
-}
-
-static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
-				    u8 *old_prio_tc, u8 *new_prio_tc)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char pbmc_pl[MLXSW_REG_PBMC_LEN];
-	int err, i;
-
-	mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
-	if (err)
-		return err;
-
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		u8 pg = old_prio_tc[i];
-
-		if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg))
-			mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0);
-	}
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
-}
-
 static int mlxsw_sp_port_headroom_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 					  struct ieee_ets *ets)
 {
-	bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
-	struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets;
 	struct net_device *dev = mlxsw_sp_port->dev;
+	struct mlxsw_sp_hdroom hdroom;
+	int prio;
 	int err;
 
-	/* Create the required PGs, but don't destroy existing ones, as
-	 * traffic is still directed to them.
-	 */
-	err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-					   ets->prio_tc, pause_en,
-					   mlxsw_sp_port->dcb.pfc);
+	hdroom = *mlxsw_sp_port->hdroom;
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+		hdroom.prios.prio[prio].ets_buf_idx = ets->prio_tc[prio];
+	mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+
+	err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
 	if (err) {
 		netdev_err(dev, "Failed to configure port's headroom\n");
 		return err;
 	}
 
-	err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc);
-	if (err) {
-		netdev_err(dev, "Failed to set PG-priority mapping\n");
-		goto err_port_prio_pg_map;
-	}
-
-	err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc,
-				       ets->prio_tc);
-	if (err)
-		netdev_warn(dev, "Failed to remove unused PGs\n");
-
 	return 0;
-
-err_port_prio_pg_map:
-	mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc);
-	return err;
 }
 
 static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -605,6 +546,9 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+	struct mlxsw_sp_hdroom orig_hdroom;
+	struct mlxsw_sp_hdroom hdroom;
+	int prio;
 	int err;
 
 	if (pause_en && pfc->pfc_en) {
@@ -612,9 +556,21 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-					   mlxsw_sp_port->dcb.ets->prio_tc,
-					   pause_en, pfc);
+	orig_hdroom = *mlxsw_sp_port->hdroom;
+
+	hdroom = orig_hdroom;
+	if (pfc->pfc_en)
+		hdroom.delay_bytes = DIV_ROUND_UP(pfc->delay, BITS_PER_BYTE);
+	else
+		hdroom.delay_bytes = 0;
+
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+		hdroom.prios.prio[prio].lossy = !(pfc->pfc_en & BIT(prio));
+
+	mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+
+	err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
 	if (err) {
 		netdev_err(dev, "Failed to configure port's headroom for PFC\n");
 		return err;
@@ -632,12 +588,66 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	return 0;
 
 err_port_pfc_set:
-	__mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-				     mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
-				     mlxsw_sp_port->dcb.pfc);
+	mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
 	return err;
 }
 
+static int mlxsw_sp_dcbnl_getbuffer(struct net_device *dev, struct dcbnl_buffer *buf)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp_hdroom *hdroom = mlxsw_sp_port->hdroom;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	int prio;
+	int i;
+
+	buf->total_size = 0;
+
+	BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT);
+	for (i = 0; i < MLXSW_SP_PB_COUNT; i++) {
+		u32 bytes = mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->bufs.buf[i].size_cells);
+
+		if (i < DCBX_MAX_BUFFERS)
+			buf->buffer_size[i] = bytes;
+		buf->total_size += bytes;
+	}
+
+	buf->total_size += mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->int_buf.size_cells);
+
+	for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++)
+		buf->prio2buffer[prio] = hdroom->prios.prio[prio].buf_idx;
+
+	return 0;
+}
+
+static int mlxsw_sp_dcbnl_setbuffer(struct net_device *dev, struct dcbnl_buffer *buf)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_hdroom hdroom;
+	int prio;
+	int i;
+
+	hdroom = *mlxsw_sp_port->hdroom;
+
+	if (hdroom.mode != MLXSW_SP_HDROOM_MODE_TC) {
+		netdev_err(dev, "The use of dcbnl_setbuffer is only allowed if egress is configured using TC\n");
+		return -EINVAL;
+	}
+
+	for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++)
+		hdroom.prios.prio[prio].set_buf_idx = buf->prio2buffer[prio];
+
+	BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT);
+	for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+		hdroom.bufs.buf[i].set_size_cells = mlxsw_sp_bytes_cells(mlxsw_sp,
+									 buf->buffer_size[i]);
+
+	mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+	return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
+}
+
 static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = {
 	.ieee_getets		= mlxsw_sp_dcbnl_ieee_getets,
 	.ieee_setets		= mlxsw_sp_dcbnl_ieee_setets,
@@ -650,6 +660,9 @@ static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = {
 
 	.getdcbx		= mlxsw_sp_dcbnl_getdcbx,
 	.setdcbx		= mlxsw_sp_dcbnl_setdcbx,
+
+	.dcbnl_getbuffer	= mlxsw_sp_dcbnl_getbuffer,
+	.dcbnl_setbuffer	= mlxsw_sp_dcbnl_setbuffer,
 };
 
 static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 14c78f73bb65..2096b6478958 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Mellanox Technologies. All rights reserved */
 
 #include "reg.h"
+#include "core.h"
 #include "spectrum.h"
 #include "core_env.h"
 
@@ -192,11 +193,19 @@ static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			       pfcc_pl);
 }
 
+/* Maximum delay buffer needed in case of PAUSE frames. Similar to PFC delay, but is
+ * measured in bytes. Assumes 100m cable and does not take into account MTU.
+ */
+#define MLXSW_SP_PAUSE_DELAY_BYTES 19476
+
 static int mlxsw_sp_port_set_pauseparam(struct net_device *dev,
 					struct ethtool_pauseparam *pause)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	bool pause_en = pause->tx_pause || pause->rx_pause;
+	struct mlxsw_sp_hdroom orig_hdroom;
+	struct mlxsw_sp_hdroom hdroom;
+	int prio;
 	int err;
 
 	if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) {
@@ -209,7 +218,21 @@ static int mlxsw_sp_port_set_pauseparam(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+	orig_hdroom = *mlxsw_sp_port->hdroom;
+
+	hdroom = orig_hdroom;
+	if (pause_en)
+		hdroom.delay_bytes = MLXSW_SP_PAUSE_DELAY_BYTES;
+	else
+		hdroom.delay_bytes = 0;
+
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+		hdroom.prios.prio[prio].lossy = !pause_en;
+
+	mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+
+	err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
 	if (err) {
 		netdev_err(dev, "Failed to configure port's headroom\n");
 		return err;
@@ -227,8 +250,7 @@ static int mlxsw_sp_port_set_pauseparam(struct net_device *dev,
 	return 0;
 
 err_port_pause_configure:
-	pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
-	mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+	mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
 	return err;
 }
 
@@ -531,6 +553,37 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
 
 #define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats)
 
+struct mlxsw_sp_port_stats {
+	char str[ETH_GSTRING_LEN];
+	u64 (*getter)(struct mlxsw_sp_port *mlxsw_sp_port);
+};
+
+static u64
+mlxsw_sp_port_get_transceiver_overheat_stats(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_port_mapping port_mapping = mlxsw_sp_port->mapping;
+	struct mlxsw_core *mlxsw_core = mlxsw_sp_port->mlxsw_sp->core;
+	u64 stats;
+	int err;
+
+	err = mlxsw_env_module_overheat_counter_get(mlxsw_core,
+						    port_mapping.module,
+						    &stats);
+	if (err)
+		return mlxsw_sp_port->module_overheat_initial_val;
+
+	return stats - mlxsw_sp_port->module_overheat_initial_val;
+}
+
+static struct mlxsw_sp_port_stats mlxsw_sp_port_transceiver_stats[] = {
+	{
+		.str = "transceiver_overheat",
+		.getter = mlxsw_sp_port_get_transceiver_overheat_stats,
+	},
+};
+
+#define MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_transceiver_stats)
+
 #define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \
 					 MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \
 					 MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \
@@ -540,7 +593,8 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
 					 (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \
 					  IEEE_8021QAZ_MAX_TCS) + \
 					 (MLXSW_SP_PORT_HW_TC_STATS_LEN * \
-					  TC_MAX_QUEUE))
+					  TC_MAX_QUEUE) + \
+					  MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN)
 
 static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio)
 {
@@ -616,6 +670,12 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev,
 			mlxsw_sp_port_get_tc_strings(&p, i);
 
 		mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p);
+
+		for (i = 0; i < MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN; i++) {
+			memcpy(p, mlxsw_sp_port_transceiver_stats[i].str,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
 		break;
 	}
 }
@@ -711,6 +771,17 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev,
 	}
 }
 
+static void __mlxsw_sp_port_get_env_stats(struct net_device *dev, u64 *data, int data_index,
+					  struct mlxsw_sp_port_stats *port_stats,
+					  int len)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < len; i++)
+		data[data_index + i] = port_stats[i].getter(mlxsw_sp_port);
+}
+
 static void mlxsw_sp_port_get_stats(struct net_device *dev,
 				    struct ethtool_stats *stats, u64 *data)
 {
@@ -765,6 +836,11 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev,
 	mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port,
 						    data, data_index);
 	data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count();
+
+	/* Transceiver counters */
+	__mlxsw_sp_port_get_env_stats(dev, data, data_index, mlxsw_sp_port_transceiver_stats,
+				      MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN);
+	data_index += MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN;
 }
 
 static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset)
@@ -842,6 +918,29 @@ mlxsw_sp_port_connector_port(enum mlxsw_reg_ptys_connector_type connector_type)
 	}
 }
 
+static int mlxsw_sp_port_ptys_query(struct mlxsw_sp_port *mlxsw_sp_port,
+				    u32 *p_eth_proto_cap, u32 *p_eth_proto_admin,
+				    u32 *p_eth_proto_oper, u8 *p_connector_type)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	const struct mlxsw_sp_port_type_speed_ops *ops;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	int err;
+
+	ops = mlxsw_sp->port_type_speed_ops;
+
+	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, 0, false);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	if (err)
+		return err;
+
+	ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, p_eth_proto_cap, p_eth_proto_admin,
+				 p_eth_proto_oper);
+	if (p_connector_type)
+		*p_connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl);
+	return 0;
+}
+
 static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 					    struct ethtool_link_ksettings *cmd)
 {
@@ -849,21 +948,17 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	const struct mlxsw_sp_port_type_speed_ops *ops;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
 	u8 connector_type;
 	bool autoneg;
 	int err;
 
-	ops = mlxsw_sp->port_type_speed_ops;
-
-	autoneg = mlxsw_sp_port->link.autoneg;
-	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
-			       0, false);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, &eth_proto_cap, &eth_proto_admin,
+				       &eth_proto_oper, &connector_type);
 	if (err)
 		return err;
-	ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, &eth_proto_cap,
-				 &eth_proto_admin, &eth_proto_oper);
+
+	ops = mlxsw_sp->port_type_speed_ops;
+	autoneg = mlxsw_sp_port->link.autoneg;
 
 	mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap,
 					 mlxsw_sp_port->mapping.width, cmd);
@@ -872,7 +967,6 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 					 mlxsw_sp_port->mapping.width, cmd);
 
 	cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
-	connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl);
 	cmd->base.port = mlxsw_sp_port_connector_port(connector_type);
 	ops->from_ptys_speed_duplex(mlxsw_sp, netif_carrier_ok(dev),
 				    eth_proto_oper, cmd);
@@ -993,22 +1087,12 @@ struct mlxsw_sp1_port_link_mode {
 
 static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-		.speed		= SPEED_100,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_SGMII |
 				  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
 		.mask_ethtool	= ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
 		.speed		= SPEED_1000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
-		.speed		= SPEED_10000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
 		.mask_ethtool	= ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
@@ -1023,11 +1107,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
 		.speed		= SPEED_10000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
-		.speed		= SPEED_20000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
 		.mask_ethtool	= ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
 		.speed		= SPEED_40000,
@@ -1092,11 +1171,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
 		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
 		.speed		= SPEED_100000,
 	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
-		.speed		= SPEED_100000,
-	},
 };
 
 #define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode)
@@ -1164,6 +1238,27 @@ mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
 		cmd->base.duplex = DUPLEX_FULL;
 }
 
+static int mlxsw_sp1_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed)
+{
+	u32 eth_proto_cap;
+	u32 max_speed = 0;
+	int err;
+	int i;
+
+	err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, &eth_proto_cap, NULL, NULL, NULL);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if ((eth_proto_cap & mlxsw_sp1_port_link_mode[i].mask) &&
+		    mlxsw_sp1_port_link_mode[i].speed > max_speed)
+			max_speed = mlxsw_sp1_port_link_mode[i].speed;
+	}
+
+	*p_max_speed = max_speed;
+	return 0;
+}
+
 static u32
 mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width,
 			      const struct ethtool_link_ksettings *cmd)
@@ -1213,6 +1308,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = {
 	.from_ptys_link			= mlxsw_sp1_from_ptys_link,
 	.from_ptys_speed		= mlxsw_sp1_from_ptys_speed,
 	.from_ptys_speed_duplex		= mlxsw_sp1_from_ptys_speed_duplex,
+	.ptys_max_speed			= mlxsw_sp1_ptys_max_speed,
 	.to_ptys_advert_link		= mlxsw_sp1_to_ptys_advert_link,
 	.to_ptys_speed			= mlxsw_sp1_to_ptys_speed,
 	.reg_ptys_eth_pack		= mlxsw_sp1_reg_ptys_eth_pack,
@@ -1237,14 +1333,6 @@ mlxsw_sp2_mask_ethtool_1000base_x_sgmii[] = {
 	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_1000base_x_sgmii)
 
 static const enum ethtool_link_mode_bit_indices
-mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii[] = {
-	ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
-};
-
-#define MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN \
-	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii)
-
-static const enum ethtool_link_mode_bit_indices
 mlxsw_sp2_mask_ethtool_5gbase_r[] = {
 	ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
 };
@@ -1408,16 +1496,6 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.speed		= SPEED_1000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII,
-		.mask_ethtool	= mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii,
-		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN,
-		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
-				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X |
-				  MLXSW_SP_PORT_MASK_WIDTH_8X,
-		.speed		= SPEED_2500,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R,
 		.mask_ethtool	= mlxsw_sp2_mask_ethtool_5gbase_r,
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN,
@@ -1568,6 +1646,27 @@ mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
 		cmd->base.duplex = DUPLEX_FULL;
 }
 
+static int mlxsw_sp2_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed)
+{
+	u32 eth_proto_cap;
+	u32 max_speed = 0;
+	int err;
+	int i;
+
+	err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, &eth_proto_cap, NULL, NULL, NULL);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if ((eth_proto_cap & mlxsw_sp2_port_link_mode[i].mask) &&
+		    mlxsw_sp2_port_link_mode[i].speed > max_speed)
+			max_speed = mlxsw_sp2_port_link_mode[i].speed;
+	}
+
+	*p_max_speed = max_speed;
+	return 0;
+}
+
 static bool
 mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode,
 			   const unsigned long *mode)
@@ -1637,6 +1736,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = {
 	.from_ptys_link			= mlxsw_sp2_from_ptys_link,
 	.from_ptys_speed		= mlxsw_sp2_from_ptys_speed,
 	.from_ptys_speed_duplex		= mlxsw_sp2_from_ptys_speed_duplex,
+	.ptys_max_speed			= mlxsw_sp2_ptys_max_speed,
 	.to_ptys_advert_link		= mlxsw_sp2_to_ptys_advert_link,
 	.to_ptys_speed			= mlxsw_sp2_to_ptys_speed,
 	.reg_ptys_eth_pack		= mlxsw_sp2_reg_ptys_eth_pack,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index 9650562fc0ef..ca8090a28dec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -314,11 +314,9 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
 			      u8 *p_message_type,
 			      u16 *p_sequence_id)
 {
-	unsigned int offset = 0;
 	unsigned int ptp_class;
-	u8 *data;
+	struct ptp_header *hdr;
 
-	data = skb_mac_header(skb);
 	ptp_class = ptp_classify_raw(skb);
 
 	switch (ptp_class & PTP_CLASS_VMASK) {
@@ -329,30 +327,14 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
 		return -ERANGE;
 	}
 
-	if (ptp_class & PTP_CLASS_VLAN)
-		offset += VLAN_HLEN;
-
-	switch (ptp_class & PTP_CLASS_PMASK) {
-	case PTP_CLASS_IPV4:
-		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
-		break;
-	case PTP_CLASS_IPV6:
-		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
-		break;
-	case PTP_CLASS_L2:
-		offset += ETH_HLEN;
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	/* PTP header is 34 bytes. */
-	if (skb->len < offset + 34)
+	hdr = ptp_parse_header(skb, ptp_class);
+	if (!hdr)
 		return -EINVAL;
 
-	*p_message_type = data[offset] & 0x0f;
-	*p_domain_number = data[offset + 4];
-	*p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31];
+	*p_message_type	 = ptp_get_msgtype(hdr, ptp_class);
+	*p_domain_number = hdr->domain_number;
+	*p_sequence_id	 = be16_to_cpu(hdr->sequence_id);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 964fd444bb10..fd672c6c9133 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -140,18 +140,31 @@ static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+	struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc;
+	int err_hdroom = 0;
 	int err = 0;
 
 	if (!mlxsw_sp_qdisc)
 		return 0;
 
+	if (root_qdisc == mlxsw_sp_qdisc) {
+		struct mlxsw_sp_hdroom hdroom = *mlxsw_sp_port->hdroom;
+
+		hdroom.mode = MLXSW_SP_HDROOM_MODE_DCB;
+		mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom);
+		mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+		mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+		err_hdroom = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
+	}
+
 	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy)
 		err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port,
 						   mlxsw_sp_qdisc);
 
 	mlxsw_sp_qdisc->handle = TC_H_UNSPEC;
 	mlxsw_sp_qdisc->ops = NULL;
-	return err;
+
+	return err_hdroom ?: err;
 }
 
 static int
@@ -159,6 +172,8 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 		       struct mlxsw_sp_qdisc_ops *ops, void *params)
 {
+	struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc;
+	struct mlxsw_sp_hdroom orig_hdroom;
 	int err;
 
 	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type)
@@ -168,6 +183,21 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 		 * new one.
 		 */
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+
+	orig_hdroom = *mlxsw_sp_port->hdroom;
+	if (root_qdisc == mlxsw_sp_qdisc) {
+		struct mlxsw_sp_hdroom hdroom = orig_hdroom;
+
+		hdroom.mode = MLXSW_SP_HDROOM_MODE_TC;
+		mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom);
+		mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
+		mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+
+		err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
+		if (err)
+			goto err_hdroom_configure;
+	}
+
 	err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params);
 	if (err)
 		goto err_bad_param;
@@ -191,6 +221,8 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 
 err_bad_param:
 err_config:
+	mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
+err_hdroom_configure:
 	if (mlxsw_sp_qdisc->handle == handle && ops->unoffload)
 		ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 460cb523312f..4381f8c6c3fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -8038,7 +8038,6 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
 	u64 max_rifs;
-	int err;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
 		return -EIO;
@@ -8047,10 +8046,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
-	if (err)
-		return err;
-	return 0;
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 }
 
 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 1d18e41ab255..c6c5826aba41 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -968,42 +968,26 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu,
-				      u32 speed)
+static int mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
 {
-	u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu);
+	struct mlxsw_sp_hdroom hdroom;
 
-	return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1;
+	hdroom = *mlxsw_sp_port->hdroom;
+	hdroom.int_buf.enable = enable;
+	mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
+
+	return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
 }
 
 static int
-mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
+mlxsw_sp_span_port_buffer_enable(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	u32 buffsize;
-	u32 speed;
-	int err;
-
-	err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed);
-	if (err)
-		return err;
-	if (speed == SPEED_UNKNOWN)
-		speed = 0;
-
-	buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu);
-	buffsize = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, buffsize);
-	mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	return mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, true);
 }
 
-static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp *mlxsw_sp,
-					      u8 local_port)
+static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-
-	mlxsw_reg_sbib_pack(sbib_pl, local_port, 0);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, false);
 }
 
 static struct mlxsw_sp_span_analyzed_port *
@@ -1021,48 +1005,6 @@ mlxsw_sp_span_analyzed_port_find(struct mlxsw_sp_span *span, u8 local_port,
 	return NULL;
 }
 
-int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	int err = 0;
-
-	/* If port is egress mirrored, the shared buffer size should be
-	 * updated according to the mtu value
-	 */
-	mutex_lock(&mlxsw_sp->span->analyzed_ports_lock);
-
-	if (mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span, port->local_port,
-					     false))
-		err = mlxsw_sp_span_port_buffer_update(port, mtu);
-
-	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
-
-	return err;
-}
-
-void mlxsw_sp_span_speed_update_work(struct work_struct *work)
-{
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct mlxsw_sp_port *mlxsw_sp_port;
-	struct mlxsw_sp *mlxsw_sp;
-
-	mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
-				     span.speed_update_dw);
-
-	/* If port is egress mirrored, the shared buffer size should be
-	 * updated according to the speed value.
-	 */
-	mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	mutex_lock(&mlxsw_sp->span->analyzed_ports_lock);
-
-	if (mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span,
-					     mlxsw_sp_port->local_port, false))
-		mlxsw_sp_span_port_buffer_update(mlxsw_sp_port,
-						 mlxsw_sp_port->dev->mtu);
-
-	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
-}
-
 static const struct mlxsw_sp_span_entry_ops *
 mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
 			const struct net_device *to_dev)
@@ -1180,9 +1122,7 @@ mlxsw_sp_span_analyzed_port_create(struct mlxsw_sp_span *span,
 	 * does the mirroring.
 	 */
 	if (!ingress) {
-		u16 mtu = mlxsw_sp_port->dev->mtu;
-
-		err = mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, mtu);
+		err = mlxsw_sp_span_port_buffer_enable(mlxsw_sp_port);
 		if (err)
 			goto err_buffer_update;
 	}
@@ -1196,18 +1136,15 @@ err_buffer_update:
 }
 
 static void
-mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_span *span,
+mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 				    struct mlxsw_sp_span_analyzed_port *
 				    analyzed_port)
 {
-	struct mlxsw_sp *mlxsw_sp = span->mlxsw_sp;
-
 	/* Remove egress mirror buffer now that port is no longer analyzed
 	 * at egress.
 	 */
 	if (!analyzed_port->ingress)
-		mlxsw_sp_span_port_buffer_disable(mlxsw_sp,
-						  analyzed_port->local_port);
+		mlxsw_sp_span_port_buffer_disable(mlxsw_sp_port);
 
 	list_del(&analyzed_port->list);
 	kfree(analyzed_port);
@@ -1258,7 +1195,7 @@ void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (!refcount_dec_and_test(&analyzed_port->ref_count))
 		goto out_unlock;
 
-	mlxsw_sp_span_analyzed_port_destroy(mlxsw_sp->span, analyzed_port);
+	mlxsw_sp_span_analyzed_port_destroy(mlxsw_sp_port, analyzed_port);
 
 out_unlock:
 	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
@@ -1712,11 +1649,6 @@ static int mlxsw_sp1_span_init(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
-static u32 mlxsw_sp1_span_buffsize_get(int mtu, u32 speed)
-{
-	return mtu * 5 / 2;
-}
-
 static int mlxsw_sp1_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp,
 					      u16 policer_id_base)
 {
@@ -1725,7 +1657,6 @@ static int mlxsw_sp1_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp,
 
 const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = {
 	.init = mlxsw_sp1_span_init,
-	.buffsize_get = mlxsw_sp1_span_buffsize_get,
 	.policer_id_base_set = mlxsw_sp1_span_policer_id_base_set,
 };
 
@@ -1750,18 +1681,6 @@ static int mlxsw_sp2_span_init(struct mlxsw_sp *mlxsw_sp)
 #define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38
 #define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50
 
-static u32 __mlxsw_sp_span_buffsize_get(int mtu, u32 speed, u32 buffer_factor)
-{
-	return 3 * mtu + buffer_factor * speed / 1000;
-}
-
-static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed)
-{
-	int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR;
-
-	return __mlxsw_sp_span_buffsize_get(mtu, speed, factor);
-}
-
 static int mlxsw_sp2_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp,
 					      u16 policer_id_base)
 {
@@ -1778,19 +1697,10 @@ static int mlxsw_sp2_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp,
 
 const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = {
 	.init = mlxsw_sp2_span_init,
-	.buffsize_get = mlxsw_sp2_span_buffsize_get,
 	.policer_id_base_set = mlxsw_sp2_span_policer_id_base_set,
 };
 
-static u32 mlxsw_sp3_span_buffsize_get(int mtu, u32 speed)
-{
-	int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR;
-
-	return __mlxsw_sp_span_buffsize_get(mtu, speed, factor);
-}
-
 const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops = {
 	.init = mlxsw_sp2_span_init,
-	.buffsize_get = mlxsw_sp3_span_buffsize_get,
 	.policer_id_base_set = mlxsw_sp2_span_policer_id_base_set,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 1c746dd3b1bd..d907718bc8c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -47,7 +47,6 @@ struct mlxsw_sp_span_entry_ops;
 
 struct mlxsw_sp_span_ops {
 	int (*init)(struct mlxsw_sp *mlxsw_sp);
-	u32 (*buffsize_get)(int mtu, u32 speed);
 	int (*policer_id_base_set)(struct mlxsw_sp *mlxsw_sp,
 				   u16 policer_id_base);
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 2e41c5519c1b..433f14ade464 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -291,7 +291,7 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 static const struct mlxsw_sp_trap_policer_item
 mlxsw_sp_trap_policer_items_arr[] = {
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 4096),
 	},
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(2, 128, 128),
@@ -303,25 +303,25 @@ mlxsw_sp_trap_policer_items_arr[] = {
 		.policer = MLXSW_SP_TRAP_POLICER(4, 128, 128),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 8192),
 	},
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(6, 128, 128),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(7, 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(7, 1024, 512),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 1024),
+		.policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 8192),
 	},
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(9, 128, 128),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(10, 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(10, 1024, 512),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(11, 360, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(11, 256, 128),
 	},
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(12, 128, 128),
@@ -330,19 +330,19 @@ mlxsw_sp_trap_policer_items_arr[] = {
 		.policer = MLXSW_SP_TRAP_POLICER(13, 128, 128),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(14, 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(14, 1024, 512),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(15, 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(15, 1024, 512),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 4096),
+		.policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 16384),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 4096),
+		.policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 8192),
 	},
 	{
-		.policer = MLXSW_SP_TRAP_POLICER(18, 1024, 128),
+		.policer = MLXSW_SP_TRAP_POLICER(18, 1024, 512),
 	},
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(19, 1024, 512),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 6f9a725662fb..5023d91269f4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -551,16 +551,6 @@ struct mlxsw_sx_port_link_mode {
 
 static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
-		.supported	= SUPPORTED_100baseT_Full,
-		.advertised	= ADVERTISED_100baseT_Full,
-		.speed		= 100,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX,
-		.speed		= 100,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_SGMII |
 				  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
 		.supported	= SUPPORTED_1000baseKX_Full,
@@ -568,12 +558,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 		.speed		= 1000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
-		.supported	= SUPPORTED_10000baseT_Full,
-		.advertised	= ADVERTISED_10000baseT_Full,
-		.speed		= 10000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
 		.supported	= SUPPORTED_10000baseKX4_Full,
@@ -590,12 +574,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 		.speed		= 10000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
-		.supported	= SUPPORTED_20000baseKR2_Full,
-		.advertised	= ADVERTISED_20000baseKR2_Full,
-		.speed		= 20000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
 		.supported	= SUPPORTED_40000baseCR4_Full,
 		.advertised	= ADVERTISED_40000baseCR4_Full,
@@ -634,8 +612,7 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
 		.speed		= 100000,
 	},
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 33909887d0ac..57f9e24602d0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -120,8 +120,14 @@ enum {
 };
 
 enum mlxsw_event_trap_id {
+	/* Fatal Event generated by FW */
+	MLXSW_TRAP_ID_MFDE = 0x3,
 	/* Port Up/Down event generated by hardware */
 	MLXSW_TRAP_ID_PUDE = 0x8,
+	/* Port Module Plug/Unplug Event generated by hardware */
+	MLXSW_TRAP_ID_PMPE = 0x9,
+	/* Temperature Warning event generated by hardware */
+	MLXSW_TRAP_ID_MTWE = 0xC,
 	/* PTP Ingress FIFO has a new entry */
 	MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D,
 	/* PTP Egress FIFO has a new entry */
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index f3f6dfe3eddc..caa251d0e381 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -587,10 +587,10 @@ out:
 	return err;
 }
 
-static void ks8842_rx_frame_dma_tasklet(unsigned long arg)
+static void ks8842_rx_frame_dma_tasklet(struct tasklet_struct *t)
 {
-	struct net_device *netdev = (struct net_device *)arg;
-	struct ks8842_adapter *adapter = netdev_priv(netdev);
+	struct ks8842_adapter *adapter = from_tasklet(adapter, t, dma_rx.tasklet);
+	struct net_device *netdev = adapter->netdev;
 	struct ks8842_rx_dma_ctl *ctl = &adapter->dma_rx;
 	struct sk_buff *skb = ctl->skb;
 	dma_addr_t addr = sg_dma_address(&ctl->sg);
@@ -720,10 +720,10 @@ static void ks8842_handle_rx_overrun(struct net_device *netdev,
 	netdev->stats.rx_fifo_errors++;
 }
 
-static void ks8842_tasklet(unsigned long arg)
+static void ks8842_tasklet(struct tasklet_struct *t)
 {
-	struct net_device *netdev = (struct net_device *)arg;
-	struct ks8842_adapter *adapter = netdev_priv(netdev);
+	struct ks8842_adapter *adapter = from_tasklet(adapter, t, tasklet);
+	struct net_device *netdev = adapter->netdev;
 	u16 isr;
 	unsigned long flags;
 	u16 entry_bank;
@@ -953,8 +953,7 @@ static int ks8842_alloc_dma_bufs(struct net_device *netdev)
 		goto err;
 	}
 
-	tasklet_init(&rx_ctl->tasklet, ks8842_rx_frame_dma_tasklet,
-		(unsigned long)netdev);
+	tasklet_setup(&rx_ctl->tasklet, ks8842_rx_frame_dma_tasklet);
 
 	return 0;
 err:
@@ -1173,7 +1172,7 @@ static int ks8842_probe(struct platform_device *pdev)
 		adapter->dma_tx.channel = -1;
 	}
 
-	tasklet_init(&adapter->tasklet, ks8842_tasklet, (unsigned long)netdev);
+	tasklet_setup(&adapter->tasklet, ks8842_tasklet);
 	spin_lock_init(&adapter->lock);
 
 	netdev->netdev_ops = &ks8842_netdev_ops;
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index bb646b65cc95..9ed264ed7070 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
  * drivers/net/ethernet/micrel/ksx884x.c - Micrel KSZ8841/2 PCI Ethernet driver
  *
  * Copyright (c) 2009-2010 Micrel, Inc.
@@ -959,7 +959,7 @@ struct ksz_sw_desc {
  * struct ksz_dma_buf - OS dependent DMA buffer data structure
  * @skb:	Associated socket buffer.
  * @dma:	Associated physical DMA address.
- * len:		Actual len used.
+ * @len:	Actual len used.
  */
 struct ksz_dma_buf {
 	struct sk_buff *skb;
@@ -1254,6 +1254,7 @@ struct ksz_port_info {
  * @multi_list_size:	Multicast address list size.
  * @enabled:		Indication of hardware enabled.
  * @rx_stop:		Indication of receive process stop.
+ * @reserved2:		none
  * @features:		Hardware features to enable.
  * @overrides:		Hardware features to override.
  * @parent:		Pointer to parent, network device private structure.
@@ -1447,7 +1448,7 @@ struct dev_info {
  * struct dev_priv - Network device private data structure
  * @adapter:		Adapter device information.
  * @port:		Port information.
- * @monitor_time_info:	Timer to monitor ports.
+ * @monitor_timer_info:	Timer to monitor ports.
  * @proc_sem:		Semaphore for proc accessing.
  * @id:			Device ID.
  * @mii_if:		MII interface information.
@@ -1566,6 +1567,7 @@ static inline void hw_restore_intr(struct ksz_hw *hw, uint interrupt)
 
 /**
  * hw_block_intr - block hardware interrupts
+ * @hw: The hardware instance.
  *
  * This function blocks all interrupts of the hardware and returns the current
  * interrupt enable mask so that interrupts can be restored later.
@@ -1649,8 +1651,7 @@ static inline void set_tx_len(struct ksz_desc *desc, u32 len)
 
 #define HW_DELAY(hw, reg)			\
 	do {					\
-		u16 dummy;			\
-		dummy = readw(hw->io + reg);	\
+		readw(hw->io + reg);		\
 	} while (0)
 
 /**
@@ -1819,6 +1820,7 @@ static void port_r_mib_cnt(struct ksz_hw *hw, int port, u16 addr, u64 *cnt)
  * port_r_mib_pkt - read dropped packet counts
  * @hw: 	The hardware instance.
  * @port:	The port index.
+ * @last:	last one
  * @cnt:	Buffer to store the receive and transmit dropped packet counts.
  *
  * This routine reads the dropped packet counts of the port.
@@ -1972,7 +1974,7 @@ static void port_cfg(struct ksz_hw *hw, int port, int offset, u16 bits,
  * port_chk_shift - check port bit
  * @hw: 	The hardware instance.
  * @port:	The port index.
- * @offset:	The offset of the register.
+ * @addr:	The offset of the register.
  * @shift:	Number of bits to shift.
  *
  * This function checks whether the specified port is set in the register or
@@ -1994,7 +1996,7 @@ static int port_chk_shift(struct ksz_hw *hw, int port, u32 addr, int shift)
  * port_cfg_shift - set port bit
  * @hw: 	The hardware instance.
  * @port:	The port index.
- * @offset:	The offset of the register.
+ * @addr:	The offset of the register.
  * @shift:	Number of bits to shift.
  * @set:	The flag indicating whether the port is to be set or not.
  *
@@ -4425,6 +4427,8 @@ static int ksz_alloc_desc(struct dev_info *adapter)
 /**
  * free_dma_buf - release DMA buffer resources
  * @adapter:	Adapter information structure.
+ * @dma_buf:	pointer to buf
+ * @direction:	to or from device
  *
  * This routine is just a helper function to release the DMA buffer resources.
  */
@@ -4562,6 +4566,7 @@ static void ksz_free_desc(struct dev_info *adapter)
  * ksz_free_buffers - free buffers used in the descriptors
  * @adapter:	Adapter information structure.
  * @desc_info:	Descriptor information structure.
+ * @direction:	to or from device
  *
  * This local routine frees buffers used in the DMA buffers.
  */
@@ -4721,7 +4726,8 @@ static void send_packet(struct sk_buff *skb, struct net_device *dev)
 
 /**
  * transmit_cleanup - clean up transmit descriptors
- * @dev:	Network device.
+ * @hw_priv:	Network device.
+ * @normal:	break if owned
  *
  * This routine is called to clean up the transmitted buffers.
  */
@@ -4777,7 +4783,7 @@ static void transmit_cleanup(struct dev_info *hw_priv, int normal)
 
 /**
  * transmit_done - transmit done processing
- * @dev:	Network device.
+ * @hw_priv:	Network device.
  *
  * This routine is called when the transmit interrupt is triggered, indicating
  * either a packet is sent successfully or there are transmit errors.
@@ -4883,6 +4889,7 @@ unlock:
 /**
  * netdev_tx_timeout - transmit timeout processing
  * @dev:	Network device.
+ * @txqueue:	index of hanging queue
  *
  * This routine is called when the transmit timer expires.  That indicates the
  * hardware is not running correctly because transmit interrupts are not
@@ -4978,7 +4985,6 @@ static inline int rx_proc(struct net_device *dev, struct ksz_hw* hw,
 	struct dev_info *hw_priv = priv->adapter;
 	struct ksz_dma_buf *dma_buf;
 	struct sk_buff *skb;
-	int rx_status;
 
 	/* Received length includes 4-byte CRC. */
 	packet_len = status.rx.frame_len - 4;
@@ -5014,7 +5020,7 @@ static inline int rx_proc(struct net_device *dev, struct ksz_hw* hw,
 	dev->stats.rx_bytes += packet_len;
 
 	/* Notify upper layer for received packet. */
-	rx_status = netif_rx(skb);
+	netif_rx(skb);
 
 	return 0;
 }
@@ -5159,9 +5165,9 @@ release_packet:
 	return received;
 }
 
-static void rx_proc_task(unsigned long data)
+static void rx_proc_task(struct tasklet_struct *t)
 {
-	struct dev_info *hw_priv = (struct dev_info *) data;
+	struct dev_info *hw_priv = from_tasklet(hw_priv, t, rx_tasklet);
 	struct ksz_hw *hw = &hw_priv->hw;
 
 	if (!hw->enabled)
@@ -5181,9 +5187,9 @@ static void rx_proc_task(unsigned long data)
 	}
 }
 
-static void tx_proc_task(unsigned long data)
+static void tx_proc_task(struct tasklet_struct *t)
 {
-	struct dev_info *hw_priv = (struct dev_info *) data;
+	struct dev_info *hw_priv = from_tasklet(hw_priv, t, tx_tasklet);
 	struct ksz_hw *hw = &hw_priv->hw;
 
 	hw_ack_intr(hw, KS884X_INT_TX_MASK);
@@ -5436,10 +5442,8 @@ static int prepare_hardware(struct net_device *dev)
 	rc = request_irq(dev->irq, netdev_intr, IRQF_SHARED, dev->name, dev);
 	if (rc)
 		return rc;
-	tasklet_init(&hw_priv->rx_tasklet, rx_proc_task,
-		     (unsigned long) hw_priv);
-	tasklet_init(&hw_priv->tx_tasklet, tx_proc_task,
-		     (unsigned long) hw_priv);
+	tasklet_setup(&hw_priv->rx_tasklet, rx_proc_task);
+	tasklet_setup(&hw_priv->tx_tasklet, tx_proc_task);
 
 	hw->promiscuous = 0;
 	hw->all_multi = 0;
@@ -5829,8 +5833,7 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	/* Get address of MII PHY in use. */
 	case SIOCGMIIPHY:
 		data->phy_id = priv->id;
-
-		/* Fallthrough... */
+		fallthrough;
 
 	/* Read MII PHY register. */
 	case SIOCGMIIREG:
@@ -6078,14 +6081,6 @@ static void netdev_get_drvinfo(struct net_device *dev,
 		sizeof(info->bus_info));
 }
 
-/**
- * netdev_get_regs_len - get length of register dump
- * @dev:	Network device.
- *
- * This function returns the length of the register dump.
- *
- * Return length of the register dump.
- */
 static struct hw_regs {
 	int start;
 	int end;
@@ -6099,6 +6094,14 @@ static struct hw_regs {
 	{ 0, 0 }
 };
 
+/**
+ * netdev_get_regs_len - get length of register dump
+ * @dev:	Network device.
+ *
+ * This function returns the length of the register dump.
+ *
+ * Return length of the register dump.
+ */
 static int netdev_get_regs_len(struct net_device *dev)
 {
 	struct hw_regs *range = hw_regs_range;
@@ -6240,6 +6243,8 @@ static int netdev_get_eeprom_len(struct net_device *dev)
 	return EEPROM_SIZE * 2;
 }
 
+#define EEPROM_MAGIC			0x10A18842
+
 /**
  * netdev_get_eeprom - get EEPROM data
  * @dev:	Network device.
@@ -6250,8 +6255,6 @@ static int netdev_get_eeprom_len(struct net_device *dev)
  *
  * Return 0 if successful; otherwise an error code.
  */
-#define EEPROM_MAGIC			0x10A18842
-
 static int netdev_get_eeprom(struct net_device *dev,
 	struct ethtool_eeprom *eeprom, u8 *data)
 {
@@ -6388,7 +6391,7 @@ static int netdev_set_pauseparam(struct net_device *dev,
 /**
  * netdev_get_ringparam - get tx/rx ring parameters
  * @dev:	Network device.
- * @pause:	Ethtool RING settings data structure.
+ * @ring:	Ethtool RING settings data structure.
  *
  * This procedure returns the TX/RX ring settings.
  */
@@ -6509,7 +6512,6 @@ static void netdev_get_ethtool_stats(struct net_device *dev,
 	int i;
 	int n;
 	int p;
-	int rc;
 	u64 counter[TOTAL_PORT_COUNTER_NUM];
 
 	mutex_lock(&hw_priv->lock);
@@ -6530,19 +6532,19 @@ static void netdev_get_ethtool_stats(struct net_device *dev,
 
 	if (1 == port->mib_port_cnt && n < SWITCH_PORT_NUM) {
 		p = n;
-		rc = wait_event_interruptible_timeout(
+		wait_event_interruptible_timeout(
 			hw_priv->counter[p].counter,
 			2 == hw_priv->counter[p].read,
 			HZ * 1);
 	} else
 		for (i = 0, p = n; i < port->mib_port_cnt - n; i++, p++) {
 			if (0 == i) {
-				rc = wait_event_interruptible_timeout(
+				wait_event_interruptible_timeout(
 					hw_priv->counter[p].counter,
 					2 == hw_priv->counter[p].read,
 					HZ * 2);
 			} else if (hw->port_mib[p].cnt_ptr) {
-				rc = wait_event_interruptible_timeout(
+				wait_event_interruptible_timeout(
 					hw_priv->counter[p].counter,
 					2 == hw_priv->counter[p].read,
 					HZ * 1);
@@ -6693,7 +6695,7 @@ static void mib_monitor(struct timer_list *t)
 
 /**
  * dev_monitor - periodic monitoring
- * @ptr:	Network device pointer.
+ * @t:	timer list containing a network device pointer.
  *
  * This routine is run in a kernel timer to monitor the network device.
  */
diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 5bd7fb917b7a..796e46a53926 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
  * Register map access API - ENCX24J600 support
  *
  * Copyright 2015 Gridpoint
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index de93cc6ebc1a..a1938842f828 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -158,9 +158,8 @@ static void lan743x_tx_isr(void *context, u32 int_sts, u32 flags)
 	struct lan743x_tx *tx = context;
 	struct lan743x_adapter *adapter = tx->adapter;
 	bool enable_flag = true;
-	u32 int_en = 0;
 
-	int_en = lan743x_csr_read(adapter, INT_EN_SET);
+	lan743x_csr_read(adapter, INT_EN_SET);
 	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
 		lan743x_csr_write(adapter, INT_EN_CLR,
 				  INT_BIT_DMA_TX_(tx->channel_number));
@@ -1699,10 +1698,9 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
 	bool start_transmitter = false;
 	unsigned long irq_flags = 0;
 	u32 ioc_bit = 0;
-	u32 int_sts = 0;
 
 	ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
-	int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+	lan743x_csr_read(adapter, DMAC_INT_STS);
 	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C)
 		lan743x_csr_write(adapter, DMAC_INT_STS, ioc_bit);
 	spin_lock_irqsave(&tx->ring_lock, irq_flags);
@@ -3038,7 +3036,6 @@ static int lan743x_pm_suspend(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
-	int ret;
 
 	lan743x_pcidev_shutdown(pdev);
 
@@ -3051,9 +3048,7 @@ static int lan743x_pm_suspend(struct device *dev)
 		lan743x_pm_set_wol(adapter);
 
 	/* Host sets PME_En, put D3hot */
-	ret = pci_prepare_to_sleep(pdev);
-
-	return 0;
+	return pci_prepare_to_sleep(pdev);;
 }
 
 static int lan743x_pm_resume(struct device *dev)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index aa002db04250..70bf8c67d7ef 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2017 Microsemi Corporation
  */
 #include <linux/if_bridge.h>
+#include <soc/mscc/ocelot_vcap.h>
 #include "ocelot.h"
 #include "ocelot_vcap.h"
 
@@ -107,6 +108,13 @@ static void ocelot_vcap_enable(struct ocelot *ocelot, int port)
 	ocelot_write_gix(ocelot, ANA_PORT_VCAP_S2_CFG_S2_ENA |
 			 ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(0xa),
 			 ANA_PORT_VCAP_S2_CFG, port);
+
+	ocelot_write_gix(ocelot, ANA_PORT_VCAP_CFG_S1_ENA,
+			 ANA_PORT_VCAP_CFG, port);
+
+	ocelot_rmw_gix(ocelot, REW_PORT_CFG_ES0_EN,
+		       REW_PORT_CFG_ES0_EN,
+		       REW_PORT_CFG, port);
 }
 
 static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot)
@@ -191,12 +199,28 @@ static int ocelot_port_set_native_vlan(struct ocelot *ocelot, int port,
 	return 0;
 }
 
-void ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
-				bool vlan_aware)
+int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
+			       bool vlan_aware, struct switchdev_trans *trans)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	u32 val;
 
+	if (switchdev_trans_ph_prepare(trans)) {
+		struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
+		struct ocelot_vcap_filter *filter;
+
+		list_for_each_entry(filter, &block->rules, list) {
+			if (filter->ingress_port_mask & BIT(port) &&
+			    filter->action.vid_replace_ena) {
+				dev_err(ocelot->dev,
+					"Cannot change VLAN state with vlan modify rules active\n");
+				return -EBUSY;
+			}
+		}
+
+		return 0;
+	}
+
 	ocelot_port->vlan_aware = vlan_aware;
 
 	if (vlan_aware)
@@ -210,6 +234,8 @@ void ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
 		       ANA_PORT_VLAN_CFG, port);
 
 	ocelot_port_set_native_vlan(ocelot, port, ocelot_port->vid);
+
+	return 0;
 }
 EXPORT_SYMBOL(ocelot_port_vlan_filtering);
 
@@ -413,26 +439,20 @@ void ocelot_port_disable(struct ocelot *ocelot, int port)
 }
 EXPORT_SYMBOL(ocelot_port_disable);
 
-int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port,
-				 struct sk_buff *skb)
+void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
+				  struct sk_buff *clone)
 {
-	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
 
-	if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP &&
-	    ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
-		spin_lock(&ocelot_port->ts_id_lock);
+	spin_lock(&ocelot_port->ts_id_lock);
 
-		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
-		/* Store timestamp ID in cb[0] of sk_buff */
-		skb->cb[0] = ocelot_port->ts_id;
-		ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
-		skb_queue_tail(&ocelot_port->tx_skbs, skb);
+	skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
+	/* Store timestamp ID in cb[0] of sk_buff */
+	clone->cb[0] = ocelot_port->ts_id;
+	ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
+	skb_queue_tail(&ocelot_port->tx_skbs, clone);
 
-		spin_unlock(&ocelot_port->ts_id_lock);
-		return 0;
-	}
-	return -ENODATA;
+	spin_unlock(&ocelot_port->ts_id_lock);
 }
 EXPORT_SYMBOL(ocelot_port_add_txtstamp_skb);
 
@@ -511,9 +531,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot)
 		/* Set the timestamp into the skb */
 		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 		shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
-		skb_tstamp_tx(skb_match, &shhwtstamps);
-
-		dev_kfree_skb_any(skb_match);
+		skb_complete_tx_timestamp(skb_match, &shhwtstamps);
 
 		/* Next ts */
 		ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
@@ -1102,12 +1120,24 @@ EXPORT_SYMBOL(ocelot_port_bridge_join);
 int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
 			     struct net_device *bridge)
 {
+	struct switchdev_trans trans;
+	int ret;
+
 	ocelot->bridge_mask &= ~BIT(port);
 
 	if (!ocelot->bridge_mask)
 		ocelot->hw_bridge_dev = NULL;
 
-	ocelot_port_vlan_filtering(ocelot, port, 0);
+	trans.ph_prepare = true;
+	ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
+	if (ret)
+		return ret;
+
+	trans.ph_prepare = false;
+	ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
+	if (ret)
+		return ret;
+
 	ocelot_port_set_pvid(ocelot, port, 0);
 	return ocelot_port_set_native_vlan(ocelot, port, 0);
 }
@@ -1354,22 +1384,14 @@ void ocelot_init_port(struct ocelot *ocelot, int port)
 }
 EXPORT_SYMBOL(ocelot_init_port);
 
-/* Configure and enable the CPU port module, which is a set of queues.
- * If @npi contains a valid port index, the CPU port module is connected
- * to the Node Processor Interface (NPI). This is the mode through which
- * frames can be injected from and extracted to an external CPU,
- * over Ethernet.
+/* Configure and enable the CPU port module, which is a set of queues
+ * accessible through register MMIO, frame DMA or Ethernet (in case
+ * NPI mode is used).
  */
-void ocelot_configure_cpu(struct ocelot *ocelot, int npi,
-			  enum ocelot_tag_prefix injection,
-			  enum ocelot_tag_prefix extraction)
+static void ocelot_cpu_port_init(struct ocelot *ocelot)
 {
 	int cpu = ocelot->num_phys_ports;
 
-	ocelot->npi = npi;
-	ocelot->inj_prefix = injection;
-	ocelot->xtr_prefix = extraction;
-
 	/* The unicast destination PGID for the CPU port module is unused */
 	ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, cpu);
 	/* Instead set up a multicast destination PGID for traffic copied to
@@ -1381,31 +1403,13 @@ void ocelot_configure_cpu(struct ocelot *ocelot, int npi,
 			 ANA_PORT_PORT_CFG_PORTID_VAL(cpu),
 			 ANA_PORT_PORT_CFG, cpu);
 
-	if (npi >= 0 && npi < ocelot->num_phys_ports) {
-		ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
-			     QSYS_EXT_CPU_CFG_EXT_CPU_PORT(npi),
-			     QSYS_EXT_CPU_CFG);
-
-		/* Enable NPI port */
-		ocelot_fields_write(ocelot, npi,
-				    QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
-		/* NPI port Injection/Extraction configuration */
-		ocelot_fields_write(ocelot, npi, SYS_PORT_MODE_INCL_XTR_HDR,
-				    extraction);
-		ocelot_fields_write(ocelot, npi, SYS_PORT_MODE_INCL_INJ_HDR,
-				    injection);
-
-		/* Disable transmission of pause frames */
-		ocelot_fields_write(ocelot, npi, SYS_PAUSE_CFG_PAUSE_ENA, 0);
-	}
-
 	/* Enable CPU port module */
 	ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
 	/* CPU port Injection/Extraction configuration */
 	ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR,
-			    extraction);
+			    ocelot->xtr_prefix);
 	ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR,
-			    injection);
+			    ocelot->inj_prefix);
 
 	/* Configure the CPU port to be VLAN aware */
 	ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) |
@@ -1413,7 +1417,6 @@ void ocelot_configure_cpu(struct ocelot *ocelot, int npi,
 				 ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1),
 			 ANA_PORT_VLAN_CFG, cpu);
 }
-EXPORT_SYMBOL(ocelot_configure_cpu);
 
 int ocelot_init(struct ocelot *ocelot)
 {
@@ -1453,6 +1456,7 @@ int ocelot_init(struct ocelot *ocelot)
 	ocelot_mact_init(ocelot);
 	ocelot_vlan_init(ocelot);
 	ocelot_vcap_init(ocelot);
+	ocelot_cpu_port_init(ocelot);
 
 	for (port = 0; port < ocelot->num_phys_ports; port++) {
 		/* Clear all counters (5 groups) */
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index dc29e05103a1..abb407dff93c 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -98,6 +98,8 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port,
 			 struct net_device *bond);
 void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
 			   struct net_device *bond);
+struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port);
+int ocelot_netdev_to_port(struct net_device *dev);
 
 u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
 void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index ec1b6e2572ba..729495a1a77e 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -5,56 +5,433 @@
 
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
-
+#include <soc/mscc/ocelot_vcap.h>
 #include "ocelot_vcap.h"
 
-static int ocelot_flower_parse_action(struct flow_cls_offload *f,
+/* Arbitrarily chosen constants for encoding the VCAP block and lookup number
+ * into the chain number. This is UAPI.
+ */
+#define VCAP_BLOCK			10000
+#define VCAP_LOOKUP			1000
+#define VCAP_IS1_NUM_LOOKUPS		3
+#define VCAP_IS2_NUM_LOOKUPS		2
+#define VCAP_IS2_NUM_PAG		256
+#define VCAP_IS1_CHAIN(lookup)		\
+	(1 * VCAP_BLOCK + (lookup) * VCAP_LOOKUP)
+#define VCAP_IS2_CHAIN(lookup, pag)	\
+	(2 * VCAP_BLOCK + (lookup) * VCAP_LOOKUP + (pag))
+
+static int ocelot_chain_to_block(int chain, bool ingress)
+{
+	int lookup, pag;
+
+	if (!ingress) {
+		if (chain == 0)
+			return VCAP_ES0;
+		return -EOPNOTSUPP;
+	}
+
+	/* Backwards compatibility with older, single-chain tc-flower
+	 * offload support in Ocelot
+	 */
+	if (chain == 0)
+		return VCAP_IS2;
+
+	for (lookup = 0; lookup < VCAP_IS1_NUM_LOOKUPS; lookup++)
+		if (chain == VCAP_IS1_CHAIN(lookup))
+			return VCAP_IS1;
+
+	for (lookup = 0; lookup < VCAP_IS2_NUM_LOOKUPS; lookup++)
+		for (pag = 0; pag < VCAP_IS2_NUM_PAG; pag++)
+			if (chain == VCAP_IS2_CHAIN(lookup, pag))
+				return VCAP_IS2;
+
+	return -EOPNOTSUPP;
+}
+
+/* Caller must ensure this is a valid IS1 or IS2 chain first,
+ * by calling ocelot_chain_to_block.
+ */
+static int ocelot_chain_to_lookup(int chain)
+{
+	return (chain / VCAP_LOOKUP) % 10;
+}
+
+/* Caller must ensure this is a valid IS2 chain first,
+ * by calling ocelot_chain_to_block.
+ */
+static int ocelot_chain_to_pag(int chain)
+{
+	int lookup = ocelot_chain_to_lookup(chain);
+
+	/* calculate PAG value as chain index relative to the first PAG */
+	return chain - VCAP_IS2_CHAIN(lookup, 0);
+}
+
+static bool ocelot_is_goto_target_valid(int goto_target, int chain,
+					bool ingress)
+{
+	int pag;
+
+	/* Can't offload GOTO in VCAP ES0 */
+	if (!ingress)
+		return (goto_target < 0);
+
+	/* Non-optional GOTOs */
+	if (chain == 0)
+		/* VCAP IS1 can be skipped, either partially or completely */
+		return (goto_target == VCAP_IS1_CHAIN(0) ||
+			goto_target == VCAP_IS1_CHAIN(1) ||
+			goto_target == VCAP_IS1_CHAIN(2) ||
+			goto_target == VCAP_IS2_CHAIN(0, 0) ||
+			goto_target == VCAP_IS2_CHAIN(1, 0));
+
+	if (chain == VCAP_IS1_CHAIN(0))
+		return (goto_target == VCAP_IS1_CHAIN(1));
+
+	if (chain == VCAP_IS1_CHAIN(1))
+		return (goto_target == VCAP_IS1_CHAIN(2));
+
+	/* Lookup 2 of VCAP IS1 can really support non-optional GOTOs,
+	 * using a Policy Association Group (PAG) value, which is an 8-bit
+	 * value encoding a VCAP IS2 target chain.
+	 */
+	if (chain == VCAP_IS1_CHAIN(2)) {
+		for (pag = 0; pag < VCAP_IS2_NUM_PAG; pag++)
+			if (goto_target == VCAP_IS2_CHAIN(0, pag))
+				return true;
+
+		return false;
+	}
+
+	/* Non-optional GOTO from VCAP IS2 lookup 0 to lookup 1.
+	 * We cannot change the PAG at this point.
+	 */
+	for (pag = 0; pag < VCAP_IS2_NUM_PAG; pag++)
+		if (chain == VCAP_IS2_CHAIN(0, pag))
+			return (goto_target == VCAP_IS2_CHAIN(1, pag));
+
+	/* VCAP IS2 lookup 1 cannot jump anywhere */
+	return false;
+}
+
+static struct ocelot_vcap_filter *
+ocelot_find_vcap_filter_that_points_at(struct ocelot *ocelot, int chain)
+{
+	struct ocelot_vcap_filter *filter;
+	struct ocelot_vcap_block *block;
+	int block_id;
+
+	block_id = ocelot_chain_to_block(chain, true);
+	if (block_id < 0)
+		return NULL;
+
+	if (block_id == VCAP_IS2) {
+		block = &ocelot->block[VCAP_IS1];
+
+		list_for_each_entry(filter, &block->rules, list)
+			if (filter->type == OCELOT_VCAP_FILTER_PAG &&
+			    filter->goto_target == chain)
+				return filter;
+	}
+
+	list_for_each_entry(filter, &ocelot->dummy_rules, list)
+		if (filter->goto_target == chain)
+			return filter;
+
+	return NULL;
+}
+
+static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
+				      bool ingress, struct flow_cls_offload *f,
 				      struct ocelot_vcap_filter *filter)
 {
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	struct netlink_ext_ack *extack = f->common.extack;
+	bool allow_missing_goto_target = false;
 	const struct flow_action_entry *a;
+	enum ocelot_tag_tpid_sel tpid;
+	int i, chain, egress_port;
 	u64 rate;
-	int i;
-
-	if (!flow_offload_has_one_action(&f->rule->action))
-		return -EOPNOTSUPP;
 
 	if (!flow_action_basic_hw_stats_check(&f->rule->action,
 					      f->common.extack))
 		return -EOPNOTSUPP;
 
+	chain = f->common.chain_index;
+	filter->block_id = ocelot_chain_to_block(chain, ingress);
+	if (filter->block_id < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot offload to this chain");
+		return -EOPNOTSUPP;
+	}
+	if (filter->block_id == VCAP_IS1 || filter->block_id == VCAP_IS2)
+		filter->lookup = ocelot_chain_to_lookup(chain);
+	if (filter->block_id == VCAP_IS2)
+		filter->pag = ocelot_chain_to_pag(chain);
+
+	filter->goto_target = -1;
+	filter->type = OCELOT_VCAP_FILTER_DUMMY;
+
 	flow_action_for_each(i, a, &f->rule->action) {
 		switch (a->id) {
 		case FLOW_ACTION_DROP:
-			filter->action = OCELOT_VCAP_ACTION_DROP;
+			if (filter->block_id != VCAP_IS2) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Drop action can only be offloaded to VCAP IS2");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
+			filter->action.port_mask = 0;
+			filter->action.police_ena = true;
+			filter->action.pol_ix = OCELOT_POLICER_DISCARD;
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
 			break;
 		case FLOW_ACTION_TRAP:
-			filter->action = OCELOT_VCAP_ACTION_TRAP;
+			if (filter->block_id != VCAP_IS2) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Trap action can only be offloaded to VCAP IS2");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
+			filter->action.port_mask = 0;
+			filter->action.cpu_copy_ena = true;
+			filter->action.cpu_qu_num = 0;
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
 			break;
 		case FLOW_ACTION_POLICE:
-			filter->action = OCELOT_VCAP_ACTION_POLICE;
+			if (filter->block_id != VCAP_IS2 ||
+			    filter->lookup != 0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Police action can only be offloaded to VCAP IS2 lookup 0");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			filter->action.police_ena = true;
 			rate = a->police.rate_bytes_ps;
-			filter->pol.rate = div_u64(rate, 1000) * 8;
-			filter->pol.burst = a->police.burst;
+			filter->action.pol.rate = div_u64(rate, 1000) * 8;
+			filter->action.pol.burst = a->police.burst;
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+			break;
+		case FLOW_ACTION_REDIRECT:
+			if (filter->block_id != VCAP_IS2) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Redirect action can only be offloaded to VCAP IS2");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			egress_port = ocelot->ops->netdev_to_port(a->dev);
+			if (egress_port < 0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Destination not an ocelot port");
+				return -EOPNOTSUPP;
+			}
+			filter->action.mask_mode = OCELOT_MASK_MODE_REDIRECT;
+			filter->action.port_mask = BIT(egress_port);
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+			break;
+		case FLOW_ACTION_VLAN_POP:
+			if (filter->block_id != VCAP_IS1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "VLAN pop action can only be offloaded to VCAP IS1");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			filter->action.vlan_pop_cnt_ena = true;
+			filter->action.vlan_pop_cnt++;
+			if (filter->action.vlan_pop_cnt > 2) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Cannot pop more than 2 VLAN headers");
+				return -EOPNOTSUPP;
+			}
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+			break;
+		case FLOW_ACTION_VLAN_MANGLE:
+			if (filter->block_id != VCAP_IS1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "VLAN modify action can only be offloaded to VCAP IS1");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			if (!ocelot_port->vlan_aware) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Can only modify VLAN under VLAN aware bridge");
+				return -EOPNOTSUPP;
+			}
+			filter->action.vid_replace_ena = true;
+			filter->action.pcp_dei_ena = true;
+			filter->action.vid = a->vlan.vid;
+			filter->action.pcp = a->vlan.prio;
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+			break;
+		case FLOW_ACTION_PRIORITY:
+			if (filter->block_id != VCAP_IS1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Priority action can only be offloaded to VCAP IS1");
+				return -EOPNOTSUPP;
+			}
+			if (filter->goto_target != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Last action must be GOTO");
+				return -EOPNOTSUPP;
+			}
+			filter->action.qos_ena = true;
+			filter->action.qos_val = a->priority;
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+			break;
+		case FLOW_ACTION_GOTO:
+			filter->goto_target = a->chain_index;
+
+			if (filter->block_id == VCAP_IS1 && filter->lookup == 2) {
+				int pag = ocelot_chain_to_pag(filter->goto_target);
+
+				filter->action.pag_override_mask = 0xff;
+				filter->action.pag_val = pag;
+				filter->type = OCELOT_VCAP_FILTER_PAG;
+			}
+			break;
+		case FLOW_ACTION_VLAN_PUSH:
+			if (filter->block_id != VCAP_ES0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "VLAN push action can only be offloaded to VCAP ES0");
+				return -EOPNOTSUPP;
+			}
+			switch (ntohs(a->vlan.proto)) {
+			case ETH_P_8021Q:
+				tpid = OCELOT_TAG_TPID_SEL_8021Q;
+				break;
+			case ETH_P_8021AD:
+				tpid = OCELOT_TAG_TPID_SEL_8021AD;
+				break;
+			default:
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Cannot push custom TPID");
+				return -EOPNOTSUPP;
+			}
+			filter->action.tag_a_tpid_sel = tpid;
+			filter->action.push_outer_tag = OCELOT_ES0_TAG;
+			filter->action.tag_a_vid_sel = 1;
+			filter->action.vid_a_val = a->vlan.vid;
+			filter->action.pcp_a_val = a->vlan.prio;
+			filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
 			break;
 		default:
+			NL_SET_ERR_MSG_MOD(extack, "Cannot offload action");
 			return -EOPNOTSUPP;
 		}
 	}
 
+	if (filter->goto_target == -1) {
+		if ((filter->block_id == VCAP_IS2 && filter->lookup == 1) ||
+		    chain == 0) {
+			allow_missing_goto_target = true;
+		} else {
+			NL_SET_ERR_MSG_MOD(extack, "Missing GOTO action");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (!ocelot_is_goto_target_valid(filter->goto_target, chain, ingress) &&
+	    !allow_missing_goto_target) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot offload this GOTO target");
+		return -EOPNOTSUPP;
+	}
+
 	return 0;
 }
 
-static int ocelot_flower_parse(struct flow_cls_offload *f,
-			       struct ocelot_vcap_filter *filter)
+static int ocelot_flower_parse_indev(struct ocelot *ocelot, int port,
+				     struct flow_cls_offload *f,
+				     struct ocelot_vcap_filter *filter)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_ES0];
+	int key_length = vcap->keys[VCAP_ES0_IGR_PORT].length;
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct net_device *dev, *indev;
+	struct flow_match_meta match;
+	int ingress_port;
+
+	flow_rule_match_meta(rule, &match);
+
+	if (!match.mask->ingress_ifindex)
+		return 0;
+
+	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
+		return -EOPNOTSUPP;
+	}
+
+	dev = ocelot->ops->port_to_netdev(ocelot, port);
+	if (!dev)
+		return -EINVAL;
+
+	indev = __dev_get_by_index(dev_net(dev), match.key->ingress_ifindex);
+	if (!indev) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can't find the ingress port to match on");
+		return -ENOENT;
+	}
+
+	ingress_port = ocelot->ops->netdev_to_port(indev);
+	if (ingress_port < 0) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can only offload an ocelot ingress port");
+		return -EOPNOTSUPP;
+	}
+	if (ingress_port == port) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Ingress port is equal to the egress port");
+		return -EINVAL;
+	}
+
+	filter->ingress_port.value = ingress_port;
+	filter->ingress_port.mask = GENMASK(key_length - 1, 0);
+
+	return 0;
+}
+
+static int
+ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
+			struct flow_cls_offload *f,
+			struct ocelot_vcap_filter *filter)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
+	struct netlink_ext_ack *extack = f->common.extack;
 	u16 proto = ntohs(f->common.protocol);
 	bool match_protocol = true;
+	int ret;
 
 	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_META) |
 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
@@ -63,6 +440,13 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 		return -EOPNOTSUPP;
 	}
 
+	/* For VCAP ES0 (egress rewriter) we can match on the ingress port */
+	if (!ingress) {
+		ret = ocelot_flower_parse_indev(ocelot, port, f, filter);
+		if (ret)
+			return ret;
+	}
+
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_match_control match;
 
@@ -72,6 +456,19 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct flow_match_eth_addrs match;
 
+		if (filter->block_id == VCAP_ES0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "VCAP ES0 cannot match on MAC address");
+			return -EOPNOTSUPP;
+		}
+
+		if (filter->block_id == VCAP_IS1 &&
+		    !is_zero_ether_addr(match.mask->dst)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Key type S1_NORMAL cannot match on destination MAC");
+			return -EOPNOTSUPP;
+		}
+
 		/* The hw support mac matches only for MAC_ETYPE key,
 		 * therefore if other matches(port, tcp flags, etc) are added
 		 * then just bail out
@@ -103,6 +500,12 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 
 		flow_rule_match_basic(rule, &match);
 		if (ntohs(match.key->n_proto) == ETH_P_IP) {
+			if (filter->block_id == VCAP_ES0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "VCAP ES0 cannot match on IP protocol");
+				return -EOPNOTSUPP;
+			}
+
 			filter->key_type = OCELOT_VCAP_KEY_IPV4;
 			filter->key.ipv4.proto.value[0] =
 				match.key->ip_proto;
@@ -111,6 +514,12 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 			match_protocol = false;
 		}
 		if (ntohs(match.key->n_proto) == ETH_P_IPV6) {
+			if (filter->block_id == VCAP_ES0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "VCAP ES0 cannot match on IP protocol");
+				return -EOPNOTSUPP;
+			}
+
 			filter->key_type = OCELOT_VCAP_KEY_IPV6;
 			filter->key.ipv6.proto.value[0] =
 				match.key->ip_proto;
@@ -125,6 +534,18 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 		struct flow_match_ipv4_addrs match;
 		u8 *tmp;
 
+		if (filter->block_id == VCAP_ES0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "VCAP ES0 cannot match on IP address");
+			return -EOPNOTSUPP;
+		}
+
+		if (filter->block_id == VCAP_IS1 && *(u32 *)&match.mask->dst) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Key type S1_NORMAL cannot match on destination IP");
+			return -EOPNOTSUPP;
+		}
+
 		flow_rule_match_ipv4_addrs(rule, &match);
 		tmp = &filter->key.ipv4.sip.value.addr[0];
 		memcpy(tmp, &match.key->src, 4);
@@ -148,6 +569,12 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 		struct flow_match_ports match;
 
+		if (filter->block_id == VCAP_ES0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "VCAP ES0 cannot match on L4 ports");
+			return -EOPNOTSUPP;
+		}
+
 		flow_rule_match_ports(rule, &match);
 		filter->key.ipv4.sport.value = ntohs(match.key->src);
 		filter->key.ipv4.sport.mask = ntohs(match.mask->src);
@@ -170,6 +597,12 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 
 finished_key_parsing:
 	if (match_protocol && proto != ETH_P_ALL) {
+		if (filter->block_id == VCAP_ES0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "VCAP ES0 cannot match on L2 proto");
+			return -EOPNOTSUPP;
+		}
+
 		/* TODO: support SNAP, LLC etc */
 		if (proto < ETH_P_802_3_MIN)
 			return -EOPNOTSUPP;
@@ -179,14 +612,28 @@ finished_key_parsing:
 	}
 	/* else, a filter of type OCELOT_VCAP_KEY_ANY is implicitly added */
 
+	return 0;
+}
+
+static int ocelot_flower_parse(struct ocelot *ocelot, int port, bool ingress,
+			       struct flow_cls_offload *f,
+			       struct ocelot_vcap_filter *filter)
+{
+	int ret;
+
 	filter->prio = f->common.prio;
 	filter->id = f->cookie;
-	return ocelot_flower_parse_action(f, filter);
+
+	ret = ocelot_flower_parse_action(ocelot, port, ingress, f, filter);
+	if (ret)
+		return ret;
+
+	return ocelot_flower_parse_key(ocelot, port, ingress, f, filter);
 }
 
 static struct ocelot_vcap_filter
-*ocelot_vcap_filter_create(struct ocelot *ocelot, int port,
-			 struct flow_cls_offload *f)
+*ocelot_vcap_filter_create(struct ocelot *ocelot, int port, bool ingress,
+			   struct flow_cls_offload *f)
 {
 	struct ocelot_vcap_filter *filter;
 
@@ -194,26 +641,65 @@ static struct ocelot_vcap_filter
 	if (!filter)
 		return NULL;
 
-	filter->ingress_port_mask = BIT(port);
+	if (ingress) {
+		filter->ingress_port_mask = BIT(port);
+	} else {
+		const struct vcap_props *vcap = &ocelot->vcap[VCAP_ES0];
+		int key_length = vcap->keys[VCAP_ES0_EGR_PORT].length;
+
+		filter->egress_port.value = port;
+		filter->egress_port.mask = GENMASK(key_length - 1, 0);
+	}
+
 	return filter;
 }
 
+static int ocelot_vcap_dummy_filter_add(struct ocelot *ocelot,
+					struct ocelot_vcap_filter *filter)
+{
+	list_add(&filter->list, &ocelot->dummy_rules);
+
+	return 0;
+}
+
+static int ocelot_vcap_dummy_filter_del(struct ocelot *ocelot,
+					struct ocelot_vcap_filter *filter)
+{
+	list_del(&filter->list);
+	kfree(filter);
+
+	return 0;
+}
+
 int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
 			      struct flow_cls_offload *f, bool ingress)
 {
+	struct netlink_ext_ack *extack = f->common.extack;
 	struct ocelot_vcap_filter *filter;
+	int chain = f->common.chain_index;
 	int ret;
 
-	filter = ocelot_vcap_filter_create(ocelot, port, f);
+	if (chain && !ocelot_find_vcap_filter_that_points_at(ocelot, chain)) {
+		NL_SET_ERR_MSG_MOD(extack, "No default GOTO action points to this chain");
+		return -EOPNOTSUPP;
+	}
+
+	filter = ocelot_vcap_filter_create(ocelot, port, ingress, f);
 	if (!filter)
 		return -ENOMEM;
 
-	ret = ocelot_flower_parse(f, filter);
+	ret = ocelot_flower_parse(ocelot, port, ingress, f, filter);
 	if (ret) {
 		kfree(filter);
 		return ret;
 	}
 
+	/* The non-optional GOTOs for the TCAM skeleton don't need
+	 * to be actually offloaded.
+	 */
+	if (filter->type == OCELOT_VCAP_FILTER_DUMMY)
+		return ocelot_vcap_dummy_filter_add(ocelot, filter);
+
 	return ocelot_vcap_filter_add(ocelot, filter, f->common.extack);
 }
 EXPORT_SYMBOL_GPL(ocelot_cls_flower_replace);
@@ -221,28 +707,49 @@ EXPORT_SYMBOL_GPL(ocelot_cls_flower_replace);
 int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port,
 			      struct flow_cls_offload *f, bool ingress)
 {
-	struct ocelot_vcap_filter filter;
+	struct ocelot_vcap_filter *filter;
+	struct ocelot_vcap_block *block;
+	int block_id;
+
+	block_id = ocelot_chain_to_block(f->common.chain_index, ingress);
+	if (block_id < 0)
+		return 0;
 
-	filter.prio = f->common.prio;
-	filter.id = f->cookie;
+	block = &ocelot->block[block_id];
 
-	return ocelot_vcap_filter_del(ocelot, &filter);
+	filter = ocelot_vcap_block_find_filter_by_id(block, f->cookie);
+	if (!filter)
+		return 0;
+
+	if (filter->type == OCELOT_VCAP_FILTER_DUMMY)
+		return ocelot_vcap_dummy_filter_del(ocelot, filter);
+
+	return ocelot_vcap_filter_del(ocelot, filter);
 }
 EXPORT_SYMBOL_GPL(ocelot_cls_flower_destroy);
 
 int ocelot_cls_flower_stats(struct ocelot *ocelot, int port,
 			    struct flow_cls_offload *f, bool ingress)
 {
-	struct ocelot_vcap_filter filter;
-	int ret;
+	struct ocelot_vcap_filter *filter;
+	struct ocelot_vcap_block *block;
+	int block_id, ret;
+
+	block_id = ocelot_chain_to_block(f->common.chain_index, ingress);
+	if (block_id < 0)
+		return 0;
+
+	block = &ocelot->block[block_id];
+
+	filter = ocelot_vcap_block_find_filter_by_id(block, f->cookie);
+	if (!filter || filter->type == OCELOT_VCAP_FILTER_DUMMY)
+		return 0;
 
-	filter.prio = f->common.prio;
-	filter.id = f->cookie;
-	ret = ocelot_vcap_filter_stats_update(ocelot, &filter);
+	ret = ocelot_vcap_filter_stats_update(ocelot, filter);
 	if (ret)
 		return ret;
 
-	flow_stats_update(&f->stats, 0x0, filter.stats.pkts, 0, 0x0,
+	flow_stats_update(&f->stats, 0x0, filter->stats.pkts, 0, 0x0,
 			  FLOW_ACTION_HW_STATS_IMMEDIATE);
 	return 0;
 }
diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c
index d22711282183..0acb45948418 100644
--- a/drivers/net/ethernet/mscc/ocelot_io.c
+++ b/drivers/net/ethernet/mscc/ocelot_io.c
@@ -71,6 +71,23 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg)
 }
 EXPORT_SYMBOL(ocelot_port_writel);
 
+u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
+			    u32 reg, u32 offset)
+{
+	u32 val;
+
+	regmap_read(ocelot->targets[target],
+		    ocelot->map[target][reg] + offset, &val);
+	return val;
+}
+
+void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
+			      u32 val, u32 reg, u32 offset)
+{
+	regmap_write(ocelot->targets[target],
+		     ocelot->map[target][reg] + offset, val);
+}
+
 int ocelot_regfields_init(struct ocelot *ocelot,
 			  const struct reg_field *const regfields)
 {
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 8490e42e9e2d..b34da11acf65 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -330,7 +330,6 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 grp = 0; /* Send everything on CPU group 0 */
 	unsigned int i, count, last;
 	int port = priv->chip_port;
-	bool do_tstamp;
 
 	val = ocelot_read(ocelot, QS_INJ_STATUS);
 	if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))) ||
@@ -345,7 +344,23 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 	info.vid = skb_vlan_tag_get(skb);
 
 	/* Check if timestamping is needed */
-	do_tstamp = (ocelot_port_add_txtstamp_skb(ocelot_port, skb) == 0);
+	if (ocelot->ptp && (shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
+		info.rew_op = ocelot_port->ptp_cmd;
+
+		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+			struct sk_buff *clone;
+
+			clone = skb_clone_sk(skb);
+			if (!clone) {
+				kfree_skb(skb);
+				return NETDEV_TX_OK;
+			}
+
+			ocelot_port_add_txtstamp_skb(ocelot, port, clone);
+
+			info.rew_op |= clone->cb[0] << 3;
+		}
+	}
 
 	if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) {
 		info.rew_op = ocelot_port->ptp_cmd;
@@ -383,8 +398,7 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 
-	if (!do_tstamp)
-		dev_kfree_skb_any(skb);
+	kfree_skb(skb);
 
 	return NETDEV_TX_OK;
 }
@@ -642,6 +656,37 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
 	.ndo_do_ioctl			= ocelot_ioctl,
 };
 
+struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port)
+{
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	struct ocelot_port_private *priv;
+
+	if (!ocelot_port)
+		return NULL;
+
+	priv = container_of(ocelot_port, struct ocelot_port_private, port);
+
+	return priv->dev;
+}
+
+/* Checks if the net_device instance given to us originates from our driver */
+static bool ocelot_netdevice_dev_check(const struct net_device *dev)
+{
+	return dev->netdev_ops == &ocelot_port_netdev_ops;
+}
+
+int ocelot_netdev_to_port(struct net_device *dev)
+{
+	struct ocelot_port_private *priv;
+
+	if (!dev || !ocelot_netdevice_dev_check(dev))
+		return -EINVAL;
+
+	priv = netdev_priv(dev);
+
+	return priv->chip_port;
+}
+
 static void ocelot_port_get_strings(struct net_device *netdev, u32 sset,
 				    u8 *data)
 {
@@ -746,7 +791,7 @@ static int ocelot_port_attr_set(struct net_device *dev,
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
 		ocelot_port_vlan_filtering(ocelot, port,
-					   attr->u.vlan_filtering);
+					   attr->u.vlan_filtering, trans);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
 		ocelot_port_attr_mc_set(ocelot, port, !attr->u.mc_disabled);
@@ -863,12 +908,6 @@ static int ocelot_port_obj_del(struct net_device *dev,
 	return ret;
 }
 
-/* Checks if the net_device instance given to us originate from our driver. */
-static bool ocelot_netdevice_dev_check(const struct net_device *dev)
-{
-	return dev->netdev_ops == &ocelot_port_netdev_ops;
-}
-
 static int ocelot_netdevice_port_event(struct net_device *dev,
 				       unsigned long event,
 				       struct netdev_notifier_changeupper_info *info)
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index 1e08fe4daaef..a33ab315cc6b 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -300,7 +300,8 @@ int ocelot_ptp_enable(struct ptp_clock_info *ptp,
 }
 EXPORT_SYMBOL(ocelot_ptp_enable);
 
-int ocelot_init_timestamp(struct ocelot *ocelot, struct ptp_clock_info *info)
+int ocelot_init_timestamp(struct ocelot *ocelot,
+			  const struct ptp_clock_info *info)
 {
 	struct ptp_clock *ptp_clock;
 	int i;
diff --git a/drivers/net/ethernet/mscc/ocelot_s2.h b/drivers/net/ethernet/mscc/ocelot_s2.h
deleted file mode 100644
index 80107bec2e45..000000000000
--- a/drivers/net/ethernet/mscc/ocelot_s2.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/* Microsemi Ocelot Switch driver
- * Copyright (c) 2018 Microsemi Corporation
- */
-
-#ifndef _OCELOT_S2_CORE_H_
-#define _OCELOT_S2_CORE_H_
-
-#define S2_CORE_UPDATE_CTRL_UPDATE_CMD(x)      (((x) << 22) & GENMASK(24, 22))
-#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_M       GENMASK(24, 22)
-#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_X(x)    (((x) & GENMASK(24, 22)) >> 22)
-#define S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS   BIT(21)
-#define S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS  BIT(20)
-#define S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS     BIT(19)
-#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR(x)     (((x) << 3) & GENMASK(18, 3))
-#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_M      GENMASK(18, 3)
-#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_X(x)   (((x) & GENMASK(18, 3)) >> 3)
-#define S2_CORE_UPDATE_CTRL_UPDATE_SHOT        BIT(2)
-#define S2_CORE_UPDATE_CTRL_CLEAR_CACHE        BIT(1)
-#define S2_CORE_UPDATE_CTRL_MV_TRAFFIC_IGN     BIT(0)
-
-#define S2_CORE_MV_CFG_MV_NUM_POS(x)           (((x) << 16) & GENMASK(31, 16))
-#define S2_CORE_MV_CFG_MV_NUM_POS_M            GENMASK(31, 16)
-#define S2_CORE_MV_CFG_MV_NUM_POS_X(x)         (((x) & GENMASK(31, 16)) >> 16)
-#define S2_CORE_MV_CFG_MV_SIZE(x)              ((x) & GENMASK(15, 0))
-#define S2_CORE_MV_CFG_MV_SIZE_M               GENMASK(15, 0)
-
-#define S2_CACHE_ENTRY_DAT_RSZ                 0x4
-
-#define S2_CACHE_MASK_DAT_RSZ                  0x4
-
-#define S2_CACHE_ACTION_DAT_RSZ                0x4
-
-#define S2_CACHE_CNT_DAT_RSZ                   0x4
-
-#define S2_STICKY_VCAP_ROW_DELETED_STICKY      BIT(0)
-
-#define S2_BIST_CTRL_TCAM_BIST                 BIT(1)
-#define S2_BIST_CTRL_TCAM_INIT                 BIT(0)
-
-#define S2_BIST_CFG_TCAM_BIST_SOE_ENA          BIT(8)
-#define S2_BIST_CFG_TCAM_HCG_DIS               BIT(7)
-#define S2_BIST_CFG_TCAM_CG_DIS                BIT(6)
-#define S2_BIST_CFG_TCAM_BIAS(x)               ((x) & GENMASK(5, 0))
-#define S2_BIST_CFG_TCAM_BIAS_M                GENMASK(5, 0)
-
-#define S2_BIST_STAT_BIST_RT_ERR               BIT(15)
-#define S2_BIST_STAT_BIST_PENC_ERR             BIT(14)
-#define S2_BIST_STAT_BIST_COMP_ERR             BIT(13)
-#define S2_BIST_STAT_BIST_ADDR_ERR             BIT(12)
-#define S2_BIST_STAT_BIST_BL1E_ERR             BIT(11)
-#define S2_BIST_STAT_BIST_BL1_ERR              BIT(10)
-#define S2_BIST_STAT_BIST_BL0E_ERR             BIT(9)
-#define S2_BIST_STAT_BIST_BL0_ERR              BIT(8)
-#define S2_BIST_STAT_BIST_PH1_ERR              BIT(7)
-#define S2_BIST_STAT_BIST_PH0_ERR              BIT(6)
-#define S2_BIST_STAT_BIST_PV1_ERR              BIT(5)
-#define S2_BIST_STAT_BIST_PV0_ERR              BIT(4)
-#define S2_BIST_STAT_BIST_RUN                  BIT(3)
-#define S2_BIST_STAT_BIST_ERR                  BIT(2)
-#define S2_BIST_STAT_BIST_BUSY                 BIT(1)
-#define S2_BIST_STAT_TCAM_RDY                  BIT(0)
-
-#endif /* _OCELOT_S2_CORE_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
index 3ef620faf995..d8c778ee6f1b 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
@@ -9,9 +9,7 @@
 #include <soc/mscc/ocelot_vcap.h>
 #include "ocelot_police.h"
 #include "ocelot_vcap.h"
-#include "ocelot_s2.h"
 
-#define OCELOT_POLICER_DISCARD 0x17f
 #define ENTRY_WIDTH 32
 
 enum vcap_sel {
@@ -48,145 +46,174 @@ struct vcap_data {
 	u32 tg_mask; /* Current type-group mask */
 };
 
-static u32 vcap_s2_read_update_ctrl(struct ocelot *ocelot)
+static u32 vcap_read_update_ctrl(struct ocelot *ocelot,
+				 const struct vcap_props *vcap)
 {
-	return ocelot_read(ocelot, S2_CORE_UPDATE_CTRL);
+	return ocelot_target_read(ocelot, vcap->target, VCAP_CORE_UPDATE_CTRL);
 }
 
-static void vcap_cmd(struct ocelot *ocelot, u16 ix, int cmd, int sel)
+static void vcap_cmd(struct ocelot *ocelot, const struct vcap_props *vcap,
+		     u16 ix, int cmd, int sel)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
+	u32 value = (VCAP_CORE_UPDATE_CTRL_UPDATE_CMD(cmd) |
+		     VCAP_CORE_UPDATE_CTRL_UPDATE_ADDR(ix) |
+		     VCAP_CORE_UPDATE_CTRL_UPDATE_SHOT);
 
-	u32 value = (S2_CORE_UPDATE_CTRL_UPDATE_CMD(cmd) |
-		     S2_CORE_UPDATE_CTRL_UPDATE_ADDR(ix) |
-		     S2_CORE_UPDATE_CTRL_UPDATE_SHOT);
-
-	if ((sel & VCAP_SEL_ENTRY) && ix >= vcap_is2->entry_count)
+	if ((sel & VCAP_SEL_ENTRY) && ix >= vcap->entry_count)
 		return;
 
 	if (!(sel & VCAP_SEL_ENTRY))
-		value |= S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS;
+		value |= VCAP_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS;
 
 	if (!(sel & VCAP_SEL_ACTION))
-		value |= S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS;
+		value |= VCAP_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS;
 
 	if (!(sel & VCAP_SEL_COUNTER))
-		value |= S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS;
+		value |= VCAP_CORE_UPDATE_CTRL_UPDATE_CNT_DIS;
+
+	ocelot_target_write(ocelot, vcap->target, value, VCAP_CORE_UPDATE_CTRL);
 
-	ocelot_write(ocelot, value, S2_CORE_UPDATE_CTRL);
-	readx_poll_timeout(vcap_s2_read_update_ctrl, ocelot, value,
-				(value & S2_CORE_UPDATE_CTRL_UPDATE_SHOT) == 0,
-				10, 100000);
+	read_poll_timeout(vcap_read_update_ctrl, value,
+			  (value & VCAP_CORE_UPDATE_CTRL_UPDATE_SHOT) == 0,
+			  10, 100000, false, ocelot, vcap);
 }
 
 /* Convert from 0-based row to VCAP entry row and run command */
-static void vcap_row_cmd(struct ocelot *ocelot, u32 row, int cmd, int sel)
+static void vcap_row_cmd(struct ocelot *ocelot, const struct vcap_props *vcap,
+			 u32 row, int cmd, int sel)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
-
-	vcap_cmd(ocelot, vcap_is2->entry_count - row - 1, cmd, sel);
+	vcap_cmd(ocelot, vcap, vcap->entry_count - row - 1, cmd, sel);
 }
 
-static void vcap_entry2cache(struct ocelot *ocelot, struct vcap_data *data)
+static void vcap_entry2cache(struct ocelot *ocelot,
+			     const struct vcap_props *vcap,
+			     struct vcap_data *data)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
 	u32 entry_words, i;
 
-	entry_words = DIV_ROUND_UP(vcap_is2->entry_width, ENTRY_WIDTH);
+	entry_words = DIV_ROUND_UP(vcap->entry_width, ENTRY_WIDTH);
 
 	for (i = 0; i < entry_words; i++) {
-		ocelot_write_rix(ocelot, data->entry[i], S2_CACHE_ENTRY_DAT, i);
-		ocelot_write_rix(ocelot, ~data->mask[i], S2_CACHE_MASK_DAT, i);
+		ocelot_target_write_rix(ocelot, vcap->target, data->entry[i],
+					VCAP_CACHE_ENTRY_DAT, i);
+		ocelot_target_write_rix(ocelot, vcap->target, ~data->mask[i],
+					VCAP_CACHE_MASK_DAT, i);
 	}
-	ocelot_write(ocelot, data->tg, S2_CACHE_TG_DAT);
+	ocelot_target_write(ocelot, vcap->target, data->tg, VCAP_CACHE_TG_DAT);
 }
 
-static void vcap_cache2entry(struct ocelot *ocelot, struct vcap_data *data)
+static void vcap_cache2entry(struct ocelot *ocelot,
+			     const struct vcap_props *vcap,
+			     struct vcap_data *data)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
 	u32 entry_words, i;
 
-	entry_words = DIV_ROUND_UP(vcap_is2->entry_width, ENTRY_WIDTH);
+	entry_words = DIV_ROUND_UP(vcap->entry_width, ENTRY_WIDTH);
 
 	for (i = 0; i < entry_words; i++) {
-		data->entry[i] = ocelot_read_rix(ocelot, S2_CACHE_ENTRY_DAT, i);
+		data->entry[i] = ocelot_target_read_rix(ocelot, vcap->target,
+							VCAP_CACHE_ENTRY_DAT, i);
 		// Invert mask
-		data->mask[i] = ~ocelot_read_rix(ocelot, S2_CACHE_MASK_DAT, i);
+		data->mask[i] = ~ocelot_target_read_rix(ocelot, vcap->target,
+							VCAP_CACHE_MASK_DAT, i);
 	}
-	data->tg = ocelot_read(ocelot, S2_CACHE_TG_DAT);
+	data->tg = ocelot_target_read(ocelot, vcap->target, VCAP_CACHE_TG_DAT);
 }
 
-static void vcap_action2cache(struct ocelot *ocelot, struct vcap_data *data)
+static void vcap_action2cache(struct ocelot *ocelot,
+			      const struct vcap_props *vcap,
+			      struct vcap_data *data)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
 	u32 action_words, mask;
 	int i, width;
 
 	/* Encode action type */
-	width = vcap_is2->action_type_width;
+	width = vcap->action_type_width;
 	if (width) {
 		mask = GENMASK(width, 0);
 		data->action[0] = ((data->action[0] & ~mask) | data->type);
 	}
 
-	action_words = DIV_ROUND_UP(vcap_is2->action_width, ENTRY_WIDTH);
+	action_words = DIV_ROUND_UP(vcap->action_width, ENTRY_WIDTH);
 
 	for (i = 0; i < action_words; i++)
-		ocelot_write_rix(ocelot, data->action[i], S2_CACHE_ACTION_DAT,
-				 i);
+		ocelot_target_write_rix(ocelot, vcap->target, data->action[i],
+					VCAP_CACHE_ACTION_DAT, i);
 
-	for (i = 0; i < vcap_is2->counter_words; i++)
-		ocelot_write_rix(ocelot, data->counter[i], S2_CACHE_CNT_DAT, i);
+	for (i = 0; i < vcap->counter_words; i++)
+		ocelot_target_write_rix(ocelot, vcap->target, data->counter[i],
+					VCAP_CACHE_CNT_DAT, i);
 }
 
-static void vcap_cache2action(struct ocelot *ocelot, struct vcap_data *data)
+static void vcap_cache2action(struct ocelot *ocelot,
+			      const struct vcap_props *vcap,
+			      struct vcap_data *data)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
 	u32 action_words;
 	int i, width;
 
-	action_words = DIV_ROUND_UP(vcap_is2->action_width, ENTRY_WIDTH);
+	action_words = DIV_ROUND_UP(vcap->action_width, ENTRY_WIDTH);
 
 	for (i = 0; i < action_words; i++)
-		data->action[i] = ocelot_read_rix(ocelot, S2_CACHE_ACTION_DAT,
-						  i);
+		data->action[i] = ocelot_target_read_rix(ocelot, vcap->target,
+							 VCAP_CACHE_ACTION_DAT,
+							 i);
 
-	for (i = 0; i < vcap_is2->counter_words; i++)
-		data->counter[i] = ocelot_read_rix(ocelot, S2_CACHE_CNT_DAT, i);
+	for (i = 0; i < vcap->counter_words; i++)
+		data->counter[i] = ocelot_target_read_rix(ocelot, vcap->target,
+							  VCAP_CACHE_CNT_DAT,
+							  i);
 
 	/* Extract action type */
-	width = vcap_is2->action_type_width;
+	width = vcap->action_type_width;
 	data->type = (width ? (data->action[0] & GENMASK(width, 0)) : 0);
 }
 
 /* Calculate offsets for entry */
-static void is2_data_get(struct ocelot *ocelot, struct vcap_data *data, int ix)
+static void vcap_data_offset_get(const struct vcap_props *vcap,
+				 struct vcap_data *data, int ix)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
-	int i, col, offset, count, cnt, base;
-	int width = vcap_is2->tg_width;
+	int num_subwords_per_entry, num_subwords_per_action;
+	int i, col, offset, num_entries_per_row, base;
+	u32 width = vcap->tg_width;
 
-	count = (data->tg_sw == VCAP_TG_HALF ? 2 : 4);
-	col = (ix % 2);
-	cnt = (vcap_is2->sw_count / count);
-	base = (vcap_is2->sw_count - col * cnt - cnt);
+	switch (data->tg_sw) {
+	case VCAP_TG_FULL:
+		num_entries_per_row = 1;
+		break;
+	case VCAP_TG_HALF:
+		num_entries_per_row = 2;
+		break;
+	case VCAP_TG_QUARTER:
+		num_entries_per_row = 4;
+		break;
+	default:
+		return;
+	}
+
+	col = (ix % num_entries_per_row);
+	num_subwords_per_entry = (vcap->sw_count / num_entries_per_row);
+	base = (vcap->sw_count - col * num_subwords_per_entry -
+		num_subwords_per_entry);
 	data->tg_value = 0;
 	data->tg_mask = 0;
-	for (i = 0; i < cnt; i++) {
+	for (i = 0; i < num_subwords_per_entry; i++) {
 		offset = ((base + i) * width);
 		data->tg_value |= (data->tg_sw << offset);
 		data->tg_mask |= GENMASK(offset + width - 1, offset);
 	}
 
 	/* Calculate key/action/counter offsets */
-	col = (count - col - 1);
-	data->key_offset = (base * vcap_is2->entry_width) / vcap_is2->sw_count;
-	data->counter_offset = (cnt * col * vcap_is2->counter_width);
+	col = (num_entries_per_row - col - 1);
+	data->key_offset = (base * vcap->entry_width) / vcap->sw_count;
+	data->counter_offset = (num_subwords_per_entry * col *
+				vcap->counter_width);
 	i = data->type;
-	width = vcap_is2->action_table[i].width;
-	cnt = vcap_is2->action_table[i].count;
-	data->action_offset =
-		(((cnt * col * width) / count) + vcap_is2->action_type_width);
+	width = vcap->action_table[i].width;
+	num_subwords_per_action = vcap->action_table[i].count;
+	data->action_offset = ((num_subwords_per_action * col * width) /
+				num_entries_per_row);
+	data->action_offset += vcap->action_type_width;
 }
 
 static void vcap_data_set(u32 *data, u32 offset, u32 len, u32 value)
@@ -224,22 +251,21 @@ static void vcap_key_field_set(struct vcap_data *data, u32 offset, u32 width,
 	vcap_data_set(data->mask, offset + data->key_offset, width, mask);
 }
 
-static void vcap_key_set(struct ocelot *ocelot, struct vcap_data *data,
-			 enum vcap_is2_half_key_field field,
-			 u32 value, u32 mask)
+static void vcap_key_set(const struct vcap_props *vcap, struct vcap_data *data,
+			 int field, u32 value, u32 mask)
 {
-	u32 offset = ocelot->vcap_is2_keys[field].offset;
-	u32 length = ocelot->vcap_is2_keys[field].length;
+	u32 offset = vcap->keys[field].offset;
+	u32 length = vcap->keys[field].length;
 
 	vcap_key_field_set(data, offset, length, value, mask);
 }
 
-static void vcap_key_bytes_set(struct ocelot *ocelot, struct vcap_data *data,
-			       enum vcap_is2_half_key_field field,
+static void vcap_key_bytes_set(const struct vcap_props *vcap,
+			       struct vcap_data *data, int field,
 			       u8 *val, u8 *msk)
 {
-	u32 offset = ocelot->vcap_is2_keys[field].offset;
-	u32 count  = ocelot->vcap_is2_keys[field].length;
+	u32 offset = vcap->keys[field].offset;
+	u32 count  = vcap->keys[field].length;
 	u32 i, j, n = 0, value = 0, mask = 0;
 
 	WARN_ON(count % 8);
@@ -265,37 +291,37 @@ static void vcap_key_bytes_set(struct ocelot *ocelot, struct vcap_data *data,
 	}
 }
 
-static void vcap_key_l4_port_set(struct ocelot *ocelot, struct vcap_data *data,
-				 enum vcap_is2_half_key_field field,
+static void vcap_key_l4_port_set(const struct vcap_props *vcap,
+				 struct vcap_data *data, int field,
 				 struct ocelot_vcap_udp_tcp *port)
 {
-	u32 offset = ocelot->vcap_is2_keys[field].offset;
-	u32 length = ocelot->vcap_is2_keys[field].length;
+	u32 offset = vcap->keys[field].offset;
+	u32 length = vcap->keys[field].length;
 
 	WARN_ON(length != 16);
 
 	vcap_key_field_set(data, offset, length, port->value, port->mask);
 }
 
-static void vcap_key_bit_set(struct ocelot *ocelot, struct vcap_data *data,
-			     enum vcap_is2_half_key_field field,
+static void vcap_key_bit_set(const struct vcap_props *vcap,
+			     struct vcap_data *data, int field,
 			     enum ocelot_vcap_bit val)
 {
-	u32 offset = ocelot->vcap_is2_keys[field].offset;
-	u32 length = ocelot->vcap_is2_keys[field].length;
 	u32 value = (val == OCELOT_VCAP_BIT_1 ? 1 : 0);
 	u32 msk = (val == OCELOT_VCAP_BIT_ANY ? 0 : 1);
+	u32 offset = vcap->keys[field].offset;
+	u32 length = vcap->keys[field].length;
 
 	WARN_ON(length != 1);
 
 	vcap_key_field_set(data, offset, length, value, msk);
 }
 
-static void vcap_action_set(struct ocelot *ocelot, struct vcap_data *data,
-			    enum vcap_is2_action_field field, u32 value)
+static void vcap_action_set(const struct vcap_props *vcap,
+			    struct vcap_data *data, int field, u32 value)
 {
-	int offset = ocelot->vcap_is2_actions[field].offset;
-	int length = ocelot->vcap_is2_actions[field].length;
+	int offset = vcap->actions[field].offset;
+	int length = vcap->actions[field].length;
 
 	vcap_data_set(data->action, offset + data->action_offset, length,
 		      value);
@@ -304,40 +330,21 @@ static void vcap_action_set(struct ocelot *ocelot, struct vcap_data *data,
 static void is2_action_set(struct ocelot *ocelot, struct vcap_data *data,
 			   struct ocelot_vcap_filter *filter)
 {
-	switch (filter->action) {
-	case OCELOT_VCAP_ACTION_DROP:
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_PORT_MASK, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_MASK_MODE, 1);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_POLICE_ENA, 1);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_POLICE_IDX,
-				OCELOT_POLICER_DISCARD);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_CPU_QU_NUM, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_CPU_COPY_ENA, 0);
-		break;
-	case OCELOT_VCAP_ACTION_TRAP:
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_PORT_MASK, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_MASK_MODE, 1);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_POLICE_ENA, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_POLICE_IDX, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_CPU_QU_NUM, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_CPU_COPY_ENA, 1);
-		break;
-	case OCELOT_VCAP_ACTION_POLICE:
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_PORT_MASK, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_MASK_MODE, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_POLICE_ENA, 1);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_POLICE_IDX,
-				filter->pol_ix);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_CPU_QU_NUM, 0);
-		vcap_action_set(ocelot, data, VCAP_IS2_ACT_CPU_COPY_ENA, 0);
-		break;
-	}
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_IS2];
+	struct ocelot_vcap_action *a = &filter->action;
+
+	vcap_action_set(vcap, data, VCAP_IS2_ACT_MASK_MODE, a->mask_mode);
+	vcap_action_set(vcap, data, VCAP_IS2_ACT_PORT_MASK, a->port_mask);
+	vcap_action_set(vcap, data, VCAP_IS2_ACT_POLICE_ENA, a->police_ena);
+	vcap_action_set(vcap, data, VCAP_IS2_ACT_POLICE_IDX, a->pol_ix);
+	vcap_action_set(vcap, data, VCAP_IS2_ACT_CPU_QU_NUM, a->cpu_qu_num);
+	vcap_action_set(vcap, data, VCAP_IS2_ACT_CPU_COPY_ENA, a->cpu_copy_ena);
 }
 
 static void is2_entry_set(struct ocelot *ocelot, int ix,
 			  struct ocelot_vcap_filter *filter)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_IS2];
 	struct ocelot_vcap_key_vlan *tag = &filter->vlan;
 	u32 val, msk, type, type_mask = 0xf, i, count;
 	struct ocelot_vcap_u64 payload;
@@ -348,52 +355,55 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 	memset(&data, 0, sizeof(data));
 
 	/* Read row */
-	vcap_row_cmd(ocelot, row, VCAP_CMD_READ, VCAP_SEL_ALL);
-	vcap_cache2entry(ocelot, &data);
-	vcap_cache2action(ocelot, &data);
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_READ, VCAP_SEL_ALL);
+	vcap_cache2entry(ocelot, vcap, &data);
+	vcap_cache2action(ocelot, vcap, &data);
 
 	data.tg_sw = VCAP_TG_HALF;
-	is2_data_get(ocelot, &data, ix);
+	vcap_data_offset_get(vcap, &data, ix);
 	data.tg = (data.tg & ~data.tg_mask);
 	if (filter->prio != 0)
 		data.tg |= data.tg_value;
 
 	data.type = IS2_ACTION_TYPE_NORMAL;
 
-	vcap_key_set(ocelot, &data, VCAP_IS2_HK_PAG, 0, 0);
-	vcap_key_set(ocelot, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0,
+	vcap_key_set(vcap, &data, VCAP_IS2_HK_PAG, filter->pag, 0xff);
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST,
+			 (filter->lookup == 0) ? OCELOT_VCAP_BIT_1 :
+			 OCELOT_VCAP_BIT_0);
+	vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0,
 		     ~filter->ingress_port_mask);
-	vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_1);
-	vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_HOST_MATCH,
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY);
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH,
 			 OCELOT_VCAP_BIT_ANY);
-	vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc);
-	vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L2_BC, filter->dmac_bc);
-	vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_VLAN_TAGGED, tag->tagged);
-	vcap_key_set(ocelot, &data, VCAP_IS2_HK_VID,
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc);
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_BC, filter->dmac_bc);
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_VLAN_TAGGED, tag->tagged);
+	vcap_key_set(vcap, &data, VCAP_IS2_HK_VID,
 		     tag->vid.value, tag->vid.mask);
-	vcap_key_set(ocelot, &data, VCAP_IS2_HK_PCP,
+	vcap_key_set(vcap, &data, VCAP_IS2_HK_PCP,
 		     tag->pcp.value[0], tag->pcp.mask[0]);
-	vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_DEI, tag->dei);
+	vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_DEI, tag->dei);
 
 	switch (filter->key_type) {
 	case OCELOT_VCAP_KEY_ETYPE: {
 		struct ocelot_vcap_key_etype *etype = &filter->key.etype;
 
 		type = IS2_TYPE_ETYPE;
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L2_DMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L2_DMAC,
 				   etype->dmac.value, etype->dmac.mask);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L2_SMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L2_SMAC,
 				   etype->smac.value, etype->smac.mask);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_MAC_ETYPE_ETYPE,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_MAC_ETYPE_ETYPE,
 				   etype->etype.value, etype->etype.mask);
 		/* Clear unused bits */
-		vcap_key_set(ocelot, &data, VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD0,
+		vcap_key_set(vcap, &data, VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD0,
 			     0, 0);
-		vcap_key_set(ocelot, &data, VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD1,
+		vcap_key_set(vcap, &data, VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD1,
 			     0, 0);
-		vcap_key_set(ocelot, &data, VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD2,
+		vcap_key_set(vcap, &data, VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD2,
 			     0, 0);
-		vcap_key_bytes_set(ocelot, &data,
+		vcap_key_bytes_set(vcap, &data,
 				   VCAP_IS2_HK_MAC_ETYPE_L2_PAYLOAD0,
 				   etype->data.value, etype->data.mask);
 		break;
@@ -402,15 +412,15 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 		struct ocelot_vcap_key_llc *llc = &filter->key.llc;
 
 		type = IS2_TYPE_LLC;
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L2_DMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L2_DMAC,
 				   llc->dmac.value, llc->dmac.mask);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L2_SMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L2_SMAC,
 				   llc->smac.value, llc->smac.mask);
 		for (i = 0; i < 4; i++) {
 			payload.value[i] = llc->llc.value[i];
 			payload.mask[i] = llc->llc.mask[i];
 		}
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_MAC_LLC_L2_LLC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_MAC_LLC_L2_LLC,
 				   payload.value, payload.mask);
 		break;
 	}
@@ -418,11 +428,11 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 		struct ocelot_vcap_key_snap *snap = &filter->key.snap;
 
 		type = IS2_TYPE_SNAP;
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L2_DMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L2_DMAC,
 				   snap->dmac.value, snap->dmac.mask);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L2_SMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L2_SMAC,
 				   snap->smac.value, snap->smac.mask);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_MAC_SNAP_L2_SNAP,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_MAC_SNAP_L2_SNAP,
 				   filter->key.snap.snap.value,
 				   filter->key.snap.snap.mask);
 		break;
@@ -431,24 +441,24 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 		struct ocelot_vcap_key_arp *arp = &filter->key.arp;
 
 		type = IS2_TYPE_ARP;
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_MAC_ARP_SMAC,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_MAC_ARP_SMAC,
 				   arp->smac.value, arp->smac.mask);
-		vcap_key_bit_set(ocelot, &data,
+		vcap_key_bit_set(vcap, &data,
 				 VCAP_IS2_HK_MAC_ARP_ADDR_SPACE_OK,
 				 arp->ethernet);
-		vcap_key_bit_set(ocelot, &data,
+		vcap_key_bit_set(vcap, &data,
 				 VCAP_IS2_HK_MAC_ARP_PROTO_SPACE_OK,
 				 arp->ip);
-		vcap_key_bit_set(ocelot, &data,
+		vcap_key_bit_set(vcap, &data,
 				 VCAP_IS2_HK_MAC_ARP_LEN_OK,
 				 arp->length);
-		vcap_key_bit_set(ocelot, &data,
+		vcap_key_bit_set(vcap, &data,
 				 VCAP_IS2_HK_MAC_ARP_TARGET_MATCH,
 				 arp->dmac_match);
-		vcap_key_bit_set(ocelot, &data,
+		vcap_key_bit_set(vcap, &data,
 				 VCAP_IS2_HK_MAC_ARP_SENDER_MATCH,
 				 arp->smac_match);
-		vcap_key_bit_set(ocelot, &data,
+		vcap_key_bit_set(vcap, &data,
 				 VCAP_IS2_HK_MAC_ARP_OPCODE_UNKNOWN,
 				 arp->unknown);
 
@@ -457,15 +467,15 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 		       (arp->arp == OCELOT_VCAP_BIT_0 ? 2 : 0));
 		msk = ((arp->req == OCELOT_VCAP_BIT_ANY ? 0 : 1) |
 		       (arp->arp == OCELOT_VCAP_BIT_ANY ? 0 : 2));
-		vcap_key_set(ocelot, &data, VCAP_IS2_HK_MAC_ARP_OPCODE,
+		vcap_key_set(vcap, &data, VCAP_IS2_HK_MAC_ARP_OPCODE,
 			     val, msk);
-		vcap_key_bytes_set(ocelot, &data,
+		vcap_key_bytes_set(vcap, &data,
 				   VCAP_IS2_HK_MAC_ARP_L3_IP4_DIP,
 				   arp->dip.value.addr, arp->dip.mask.addr);
-		vcap_key_bytes_set(ocelot, &data,
+		vcap_key_bytes_set(vcap, &data,
 				   VCAP_IS2_HK_MAC_ARP_L3_IP4_SIP,
 				   arp->sip.value.addr, arp->sip.mask.addr);
-		vcap_key_set(ocelot, &data, VCAP_IS2_HK_MAC_ARP_DIP_EQ_SIP,
+		vcap_key_set(vcap, &data, VCAP_IS2_HK_MAC_ARP_DIP_EQ_SIP,
 			     0, 0);
 		break;
 	}
@@ -534,22 +544,22 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 			seq_zero = ipv6->seq_zero;
 		}
 
-		vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_IP4,
+		vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_IP4,
 				 ipv4 ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
-		vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L3_FRAGMENT,
+		vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L3_FRAGMENT,
 				 fragment);
-		vcap_key_set(ocelot, &data, VCAP_IS2_HK_L3_FRAG_OFS_GT0, 0, 0);
-		vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L3_OPTIONS,
+		vcap_key_set(vcap, &data, VCAP_IS2_HK_L3_FRAG_OFS_GT0, 0, 0);
+		vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L3_OPTIONS,
 				 options);
-		vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_IP4_L3_TTL_GT0,
+		vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_IP4_L3_TTL_GT0,
 				 ttl);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L3_TOS,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L3_TOS,
 				   ds.value, ds.mask);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L3_IP4_DIP,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L3_IP4_DIP,
 				   dip.value.addr, dip.mask.addr);
-		vcap_key_bytes_set(ocelot, &data, VCAP_IS2_HK_L3_IP4_SIP,
+		vcap_key_bytes_set(vcap, &data, VCAP_IS2_HK_L3_IP4_SIP,
 				   sip.value.addr, sip.mask.addr);
-		vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_DIP_EQ_SIP,
+		vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_DIP_EQ_SIP,
 				 sip_eq_dip);
 		val = proto.value[0];
 		msk = proto.mask[0];
@@ -558,33 +568,33 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 			/* UDP/TCP protocol match */
 			tcp = (val == 6 ?
 			       OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_TCP, tcp);
-			vcap_key_l4_port_set(ocelot, &data,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_TCP, tcp);
+			vcap_key_l4_port_set(vcap, &data,
 					     VCAP_IS2_HK_L4_DPORT, dport);
-			vcap_key_l4_port_set(ocelot, &data,
+			vcap_key_l4_port_set(vcap, &data,
 					     VCAP_IS2_HK_L4_SPORT, sport);
-			vcap_key_set(ocelot, &data, VCAP_IS2_HK_L4_RNG, 0, 0);
-			vcap_key_bit_set(ocelot, &data,
+			vcap_key_set(vcap, &data, VCAP_IS2_HK_L4_RNG, 0, 0);
+			vcap_key_bit_set(vcap, &data,
 					 VCAP_IS2_HK_L4_SPORT_EQ_DPORT,
 					 sport_eq_dport);
-			vcap_key_bit_set(ocelot, &data,
+			vcap_key_bit_set(vcap, &data,
 					 VCAP_IS2_HK_L4_SEQUENCE_EQ0,
 					 seq_zero);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L4_FIN,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L4_FIN,
 					 tcp_fin);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L4_SYN,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L4_SYN,
 					 tcp_syn);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L4_RST,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L4_RST,
 					 tcp_rst);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L4_PSH,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L4_PSH,
 					 tcp_psh);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L4_ACK,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L4_ACK,
 					 tcp_ack);
-			vcap_key_bit_set(ocelot, &data, VCAP_IS2_HK_L4_URG,
+			vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L4_URG,
 					 tcp_urg);
-			vcap_key_set(ocelot, &data, VCAP_IS2_HK_L4_1588_DOM,
+			vcap_key_set(vcap, &data, VCAP_IS2_HK_L4_1588_DOM,
 				     0, 0);
-			vcap_key_set(ocelot, &data, VCAP_IS2_HK_L4_1588_VER,
+			vcap_key_set(vcap, &data, VCAP_IS2_HK_L4_1588_VER,
 				     0, 0);
 		} else {
 			if (msk == 0) {
@@ -598,10 +608,10 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 					payload.mask[i] = ip_data->mask[i];
 				}
 			}
-			vcap_key_bytes_set(ocelot, &data,
+			vcap_key_bytes_set(vcap, &data,
 					   VCAP_IS2_HK_IP4_L3_PROTO,
 					   proto.value, proto.mask);
-			vcap_key_bytes_set(ocelot, &data,
+			vcap_key_bytes_set(vcap, &data,
 					   VCAP_IS2_HK_L3_PAYLOAD,
 					   payload.value, payload.mask);
 		}
@@ -611,46 +621,271 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
 	default:
 		type = 0;
 		type_mask = 0;
-		count = vcap_is2->entry_width / 2;
+		count = vcap->entry_width / 2;
 		/* Iterate over the non-common part of the key and
 		 * clear entry data
 		 */
-		for (i = ocelot->vcap_is2_keys[VCAP_IS2_HK_L2_DMAC].offset;
+		for (i = vcap->keys[VCAP_IS2_HK_L2_DMAC].offset;
 		     i < count; i += ENTRY_WIDTH) {
 			vcap_key_field_set(&data, i, min(32u, count - i), 0, 0);
 		}
 		break;
 	}
 
-	vcap_key_set(ocelot, &data, VCAP_IS2_TYPE, type, type_mask);
+	vcap_key_set(vcap, &data, VCAP_IS2_TYPE, type, type_mask);
 	is2_action_set(ocelot, &data, filter);
 	vcap_data_set(data.counter, data.counter_offset,
-		      vcap_is2->counter_width, filter->stats.pkts);
+		      vcap->counter_width, filter->stats.pkts);
 
 	/* Write row */
-	vcap_entry2cache(ocelot, &data);
-	vcap_action2cache(ocelot, &data);
-	vcap_row_cmd(ocelot, row, VCAP_CMD_WRITE, VCAP_SEL_ALL);
+	vcap_entry2cache(ocelot, vcap, &data);
+	vcap_action2cache(ocelot, vcap, &data);
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_WRITE, VCAP_SEL_ALL);
+}
+
+static void is1_action_set(struct ocelot *ocelot, struct vcap_data *data,
+			   const struct ocelot_vcap_filter *filter)
+{
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_IS1];
+	const struct ocelot_vcap_action *a = &filter->action;
+
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_VID_REPLACE_ENA,
+			a->vid_replace_ena);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_VID_ADD_VAL, a->vid);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_VLAN_POP_CNT_ENA,
+			a->vlan_pop_cnt_ena);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_VLAN_POP_CNT,
+			a->vlan_pop_cnt);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_PCP_DEI_ENA, a->pcp_dei_ena);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_PCP_VAL, a->pcp);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_DEI_VAL, a->dei);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_QOS_ENA, a->qos_ena);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_QOS_VAL, a->qos_val);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_PAG_OVERRIDE_MASK,
+			a->pag_override_mask);
+	vcap_action_set(vcap, data, VCAP_IS1_ACT_PAG_VAL, a->pag_val);
 }
 
-static void is2_entry_get(struct ocelot *ocelot, struct ocelot_vcap_filter *filter,
-			  int ix)
+static void is1_entry_set(struct ocelot *ocelot, int ix,
+			  struct ocelot_vcap_filter *filter)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_IS1];
+	struct ocelot_vcap_key_vlan *tag = &filter->vlan;
+	struct ocelot_vcap_u64 payload;
 	struct vcap_data data;
-	int row = (ix / 2);
-	u32 cnt;
+	int row = ix / 2;
+	u32 type;
+
+	memset(&payload, 0, sizeof(payload));
+	memset(&data, 0, sizeof(data));
+
+	/* Read row */
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_READ, VCAP_SEL_ALL);
+	vcap_cache2entry(ocelot, vcap, &data);
+	vcap_cache2action(ocelot, vcap, &data);
 
-	vcap_row_cmd(ocelot, row, VCAP_CMD_READ, VCAP_SEL_COUNTER);
-	vcap_cache2action(ocelot, &data);
 	data.tg_sw = VCAP_TG_HALF;
-	is2_data_get(ocelot, &data, ix);
+	data.type = IS1_ACTION_TYPE_NORMAL;
+	vcap_data_offset_get(vcap, &data, ix);
+	data.tg = (data.tg & ~data.tg_mask);
+	if (filter->prio != 0)
+		data.tg |= data.tg_value;
+
+	vcap_key_set(vcap, &data, VCAP_IS1_HK_LOOKUP, filter->lookup, 0x3);
+	vcap_key_set(vcap, &data, VCAP_IS1_HK_IGR_PORT_MASK, 0,
+		     ~filter->ingress_port_mask);
+	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc);
+	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc);
+	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged);
+	vcap_key_set(vcap, &data, VCAP_IS1_HK_VID,
+		     tag->vid.value, tag->vid.mask);
+	vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP,
+		     tag->pcp.value[0], tag->pcp.mask[0]);
+	type = IS1_TYPE_S1_NORMAL;
+
+	switch (filter->key_type) {
+	case OCELOT_VCAP_KEY_ETYPE: {
+		struct ocelot_vcap_key_etype *etype = &filter->key.etype;
+
+		vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_L2_SMAC,
+				   etype->smac.value, etype->smac.mask);
+		vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_ETYPE,
+				   etype->etype.value, etype->etype.mask);
+		break;
+	}
+	case OCELOT_VCAP_KEY_IPV4: {
+		struct ocelot_vcap_key_ipv4 *ipv4 = &filter->key.ipv4;
+		struct ocelot_vcap_udp_tcp *sport = &ipv4->sport;
+		struct ocelot_vcap_udp_tcp *dport = &ipv4->dport;
+		enum ocelot_vcap_bit tcp_udp = OCELOT_VCAP_BIT_0;
+		struct ocelot_vcap_u8 proto = ipv4->proto;
+		struct ocelot_vcap_ipv4 sip = ipv4->sip;
+		u32 val, msk;
+
+		vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_IP_SNAP,
+				 OCELOT_VCAP_BIT_1);
+		vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_IP4,
+				 OCELOT_VCAP_BIT_1);
+		vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_ETYPE_LEN,
+				 OCELOT_VCAP_BIT_1);
+		vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_L3_IP4_SIP,
+				   sip.value.addr, sip.mask.addr);
+
+		val = proto.value[0];
+		msk = proto.mask[0];
+
+		if ((val == NEXTHDR_TCP || val == NEXTHDR_UDP) && msk == 0xff)
+			tcp_udp = OCELOT_VCAP_BIT_1;
+		vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TCP_UDP, tcp_udp);
+
+		if (tcp_udp) {
+			enum ocelot_vcap_bit tcp = OCELOT_VCAP_BIT_0;
+
+			if (val == NEXTHDR_TCP)
+				tcp = OCELOT_VCAP_BIT_1;
+
+			vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TCP, tcp);
+			vcap_key_l4_port_set(vcap, &data, VCAP_IS1_HK_L4_SPORT,
+					     sport);
+			/* Overloaded field */
+			vcap_key_l4_port_set(vcap, &data, VCAP_IS1_HK_ETYPE,
+					     dport);
+		} else {
+			/* IPv4 "other" frame */
+			struct ocelot_vcap_u16 etype = {0};
+
+			/* Overloaded field */
+			etype.value[0] = proto.value[0];
+			etype.mask[0] = proto.mask[0];
+
+			vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_ETYPE,
+					   etype.value, etype.mask);
+		}
+	}
+	default:
+		break;
+	}
+	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TYPE,
+			 type ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
+
+	is1_action_set(ocelot, &data, filter);
+	vcap_data_set(data.counter, data.counter_offset,
+		      vcap->counter_width, filter->stats.pkts);
+
+	/* Write row */
+	vcap_entry2cache(ocelot, vcap, &data);
+	vcap_action2cache(ocelot, vcap, &data);
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_WRITE, VCAP_SEL_ALL);
+}
+
+static void es0_action_set(struct ocelot *ocelot, struct vcap_data *data,
+			   const struct ocelot_vcap_filter *filter)
+{
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_ES0];
+	const struct ocelot_vcap_action *a = &filter->action;
+
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_PUSH_OUTER_TAG,
+			a->push_outer_tag);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_PUSH_INNER_TAG,
+			a->push_inner_tag);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_TAG_A_TPID_SEL,
+			a->tag_a_tpid_sel);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_TAG_A_VID_SEL,
+			a->tag_a_vid_sel);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_TAG_A_PCP_SEL,
+			a->tag_a_pcp_sel);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_VID_A_VAL, a->vid_a_val);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_PCP_A_VAL, a->pcp_a_val);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_TAG_B_TPID_SEL,
+			a->tag_b_tpid_sel);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_TAG_B_VID_SEL,
+			a->tag_b_vid_sel);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_TAG_B_PCP_SEL,
+			a->tag_b_pcp_sel);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_VID_B_VAL, a->vid_b_val);
+	vcap_action_set(vcap, data, VCAP_ES0_ACT_PCP_B_VAL, a->pcp_b_val);
+}
+
+static void es0_entry_set(struct ocelot *ocelot, int ix,
+			  struct ocelot_vcap_filter *filter)
+{
+	const struct vcap_props *vcap = &ocelot->vcap[VCAP_ES0];
+	struct ocelot_vcap_key_vlan *tag = &filter->vlan;
+	struct ocelot_vcap_u64 payload;
+	struct vcap_data data;
+	int row = ix;
+
+	memset(&payload, 0, sizeof(payload));
+	memset(&data, 0, sizeof(data));
+
+	/* Read row */
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_READ, VCAP_SEL_ALL);
+	vcap_cache2entry(ocelot, vcap, &data);
+	vcap_cache2action(ocelot, vcap, &data);
+
+	data.tg_sw = VCAP_TG_FULL;
+	data.type = ES0_ACTION_TYPE_NORMAL;
+	vcap_data_offset_get(vcap, &data, ix);
+	data.tg = (data.tg & ~data.tg_mask);
+	if (filter->prio != 0)
+		data.tg |= data.tg_value;
+
+	vcap_key_set(vcap, &data, VCAP_ES0_IGR_PORT, filter->ingress_port.value,
+		     filter->ingress_port.mask);
+	vcap_key_set(vcap, &data, VCAP_ES0_EGR_PORT, filter->egress_port.value,
+		     filter->egress_port.mask);
+	vcap_key_bit_set(vcap, &data, VCAP_ES0_L2_MC, filter->dmac_mc);
+	vcap_key_bit_set(vcap, &data, VCAP_ES0_L2_BC, filter->dmac_bc);
+	vcap_key_set(vcap, &data, VCAP_ES0_VID,
+		     tag->vid.value, tag->vid.mask);
+	vcap_key_set(vcap, &data, VCAP_ES0_PCP,
+		     tag->pcp.value[0], tag->pcp.mask[0]);
+
+	es0_action_set(ocelot, &data, filter);
+	vcap_data_set(data.counter, data.counter_offset,
+		      vcap->counter_width, filter->stats.pkts);
+
+	/* Write row */
+	vcap_entry2cache(ocelot, vcap, &data);
+	vcap_action2cache(ocelot, vcap, &data);
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_WRITE, VCAP_SEL_ALL);
+}
+
+static void vcap_entry_get(struct ocelot *ocelot, int ix,
+			   struct ocelot_vcap_filter *filter)
+{
+	const struct vcap_props *vcap = &ocelot->vcap[filter->block_id];
+	struct vcap_data data;
+	int row, count;
+	u32 cnt;
+
+	if (filter->block_id == VCAP_ES0)
+		data.tg_sw = VCAP_TG_FULL;
+	else
+		data.tg_sw = VCAP_TG_HALF;
+
+	count = (1 << (data.tg_sw - 1));
+	row = (ix / count);
+	vcap_row_cmd(ocelot, vcap, row, VCAP_CMD_READ, VCAP_SEL_COUNTER);
+	vcap_cache2action(ocelot, vcap, &data);
+	vcap_data_offset_get(vcap, &data, ix);
 	cnt = vcap_data_get(data.counter, data.counter_offset,
-			    vcap_is2->counter_width);
+			    vcap->counter_width);
 
 	filter->stats.pkts = cnt;
 }
 
+static void vcap_entry_set(struct ocelot *ocelot, int ix,
+			   struct ocelot_vcap_filter *filter)
+{
+	if (filter->block_id == VCAP_IS1)
+		return is1_entry_set(ocelot, ix, filter);
+	if (filter->block_id == VCAP_IS2)
+		return is2_entry_set(ocelot, ix, filter);
+	if (filter->block_id == VCAP_ES0)
+		return es0_entry_set(ocelot, ix, filter);
+}
+
 static int ocelot_vcap_policer_add(struct ocelot *ocelot, u32 pol_ix,
 				   struct ocelot_policer *pol)
 {
@@ -679,11 +914,12 @@ static void ocelot_vcap_policer_del(struct ocelot *ocelot,
 
 	list_for_each_entry(filter, &block->rules, list) {
 		index++;
-		if (filter->action == OCELOT_VCAP_ACTION_POLICE &&
-		    filter->pol_ix < pol_ix) {
-			filter->pol_ix += 1;
-			ocelot_vcap_policer_add(ocelot, filter->pol_ix,
-						&filter->pol);
+		if (filter->block_id == VCAP_IS2 &&
+		    filter->action.police_ena &&
+		    filter->action.pol_ix < pol_ix) {
+			filter->action.pol_ix += 1;
+			ocelot_vcap_policer_add(ocelot, filter->action.pol_ix,
+						&filter->action.pol);
 			is2_entry_set(ocelot, index, filter);
 		}
 	}
@@ -701,10 +937,11 @@ static void ocelot_vcap_filter_add_to_block(struct ocelot *ocelot,
 	struct ocelot_vcap_filter *tmp;
 	struct list_head *pos, *n;
 
-	if (filter->action == OCELOT_VCAP_ACTION_POLICE) {
+	if (filter->block_id == VCAP_IS2 && filter->action.police_ena) {
 		block->pol_lpr--;
-		filter->pol_ix = block->pol_lpr;
-		ocelot_vcap_policer_add(ocelot, filter->pol_ix, &filter->pol);
+		filter->action.pol_ix = block->pol_lpr;
+		ocelot_vcap_policer_add(ocelot, filter->action.pol_ix,
+					&filter->action.pol);
 	}
 
 	block->count++;
@@ -726,19 +963,20 @@ static int ocelot_vcap_block_get_filter_index(struct ocelot_vcap_block *block,
 					      struct ocelot_vcap_filter *filter)
 {
 	struct ocelot_vcap_filter *tmp;
-	int index = -1;
+	int index = 0;
 
 	list_for_each_entry(tmp, &block->rules, list) {
-		++index;
 		if (filter->id == tmp->id)
-			break;
+			return index;
+		index++;
 	}
-	return index;
+
+	return -ENOENT;
 }
 
 static struct ocelot_vcap_filter*
-ocelot_vcap_block_find_filter(struct ocelot_vcap_block *block,
-			      int index)
+ocelot_vcap_block_find_filter_by_index(struct ocelot_vcap_block *block,
+				       int index)
 {
 	struct ocelot_vcap_filter *tmp;
 	int i = 0;
@@ -752,6 +990,18 @@ ocelot_vcap_block_find_filter(struct ocelot_vcap_block *block,
 	return NULL;
 }
 
+struct ocelot_vcap_filter *
+ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id)
+{
+	struct ocelot_vcap_filter *filter;
+
+	list_for_each_entry(filter, &block->rules, list)
+		if (filter->id == id)
+			return filter;
+
+	return NULL;
+}
+
 /* If @on=false, then SNAP, ARP, IP and OAM frames will not match on keys based
  * on destination and source MAC addresses, but only on higher-level protocol
  * information. The only frame types to match on keys containing MAC addresses
@@ -763,23 +1013,23 @@ ocelot_vcap_block_find_filter(struct ocelot_vcap_block *block,
  * on any _other_ keys than MAC_ETYPE ones.
  */
 static void ocelot_match_all_as_mac_etype(struct ocelot *ocelot, int port,
-					  bool on)
+					  int lookup, bool on)
 {
 	u32 val = 0;
 
 	if (on)
-		val = ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(3) |
-		      ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(3) |
-		      ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(3) |
-		      ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(3) |
-		      ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(3);
+		val = ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(BIT(lookup)) |
+		      ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(BIT(lookup)) |
+		      ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(BIT(lookup)) |
+		      ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(BIT(lookup)) |
+		      ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(BIT(lookup));
 
 	ocelot_rmw_gix(ocelot, val,
-		       ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_M |
-		       ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_M |
-		       ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_M |
-		       ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_M |
-		       ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS_M,
+		       ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(BIT(lookup)) |
+		       ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(BIT(lookup)) |
+		       ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(BIT(lookup)) |
+		       ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(BIT(lookup)) |
+		       ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(BIT(lookup)),
 		       ANA_PORT_VCAP_S2_CFG, port);
 }
 
@@ -825,35 +1075,43 @@ static bool
 ocelot_exclusive_mac_etype_filter_rules(struct ocelot *ocelot,
 					struct ocelot_vcap_filter *filter)
 {
-	struct ocelot_vcap_block *block = &ocelot->block;
+	struct ocelot_vcap_block *block = &ocelot->block[filter->block_id];
 	struct ocelot_vcap_filter *tmp;
 	unsigned long port;
 	int i;
 
+	/* We only have the S2_IP_TCPUDP_DIS set of knobs for VCAP IS2 */
+	if (filter->block_id != VCAP_IS2)
+		return true;
+
 	if (ocelot_vcap_is_problematic_mac_etype(filter)) {
 		/* Search for any non-MAC_ETYPE rules on the port */
 		for (i = 0; i < block->count; i++) {
-			tmp = ocelot_vcap_block_find_filter(block, i);
+			tmp = ocelot_vcap_block_find_filter_by_index(block, i);
 			if (tmp->ingress_port_mask & filter->ingress_port_mask &&
+			    tmp->lookup == filter->lookup &&
 			    ocelot_vcap_is_problematic_non_mac_etype(tmp))
 				return false;
 		}
 
 		for_each_set_bit(port, &filter->ingress_port_mask,
 				 ocelot->num_phys_ports)
-			ocelot_match_all_as_mac_etype(ocelot, port, true);
+			ocelot_match_all_as_mac_etype(ocelot, port,
+						      filter->lookup, true);
 	} else if (ocelot_vcap_is_problematic_non_mac_etype(filter)) {
 		/* Search for any MAC_ETYPE rules on the port */
 		for (i = 0; i < block->count; i++) {
-			tmp = ocelot_vcap_block_find_filter(block, i);
+			tmp = ocelot_vcap_block_find_filter_by_index(block, i);
 			if (tmp->ingress_port_mask & filter->ingress_port_mask &&
+			    tmp->lookup == filter->lookup &&
 			    ocelot_vcap_is_problematic_mac_etype(tmp))
 				return false;
 		}
 
 		for_each_set_bit(port, &filter->ingress_port_mask,
 				 ocelot->num_phys_ports)
-			ocelot_match_all_as_mac_etype(ocelot, port, false);
+			ocelot_match_all_as_mac_etype(ocelot, port,
+						      filter->lookup, false);
 	}
 
 	return true;
@@ -863,12 +1121,12 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
 			   struct ocelot_vcap_filter *filter,
 			   struct netlink_ext_ack *extack)
 {
-	struct ocelot_vcap_block *block = &ocelot->block;
+	struct ocelot_vcap_block *block = &ocelot->block[filter->block_id];
 	int i, index;
 
 	if (!ocelot_exclusive_mac_etype_filter_rules(ocelot, filter)) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Cannot mix MAC_ETYPE with non-MAC_ETYPE rules");
+				   "Cannot mix MAC_ETYPE with non-MAC_ETYPE rules, use the other IS2 lookup");
 		return -EBUSY;
 	}
 
@@ -877,17 +1135,19 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
 
 	/* Get the index of the inserted filter */
 	index = ocelot_vcap_block_get_filter_index(block, filter);
+	if (index < 0)
+		return index;
 
 	/* Move down the rules to make place for the new filter */
 	for (i = block->count - 1; i > index; i--) {
 		struct ocelot_vcap_filter *tmp;
 
-		tmp = ocelot_vcap_block_find_filter(block, i);
-		is2_entry_set(ocelot, i, tmp);
+		tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+		vcap_entry_set(ocelot, i, tmp);
 	}
 
 	/* Now insert the new filter */
-	is2_entry_set(ocelot, index, filter);
+	vcap_entry_set(ocelot, index, filter);
 	return 0;
 }
 
@@ -901,9 +1161,10 @@ static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot,
 	list_for_each_safe(pos, q, &block->rules) {
 		tmp = list_entry(pos, struct ocelot_vcap_filter, list);
 		if (tmp->id == filter->id) {
-			if (tmp->action == OCELOT_VCAP_ACTION_POLICE)
+			if (tmp->block_id == VCAP_IS2 &&
+			    tmp->action.police_ena)
 				ocelot_vcap_policer_del(ocelot, block,
-							tmp->pol_ix);
+							tmp->action.pol_ix);
 
 			list_del(pos);
 			kfree(tmp);
@@ -916,7 +1177,7 @@ static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot,
 int ocelot_vcap_filter_del(struct ocelot *ocelot,
 			   struct ocelot_vcap_filter *filter)
 {
-	struct ocelot_vcap_block *block = &ocelot->block;
+	struct ocelot_vcap_block *block = &ocelot->block[filter->block_id];
 	struct ocelot_vcap_filter del_filter;
 	int i, index;
 
@@ -924,6 +1185,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
 
 	/* Gets index of the filter */
 	index = ocelot_vcap_block_get_filter_index(block, filter);
+	if (index < 0)
+		return index;
 
 	/* Delete filter */
 	ocelot_vcap_block_remove_filter(ocelot, block, filter);
@@ -932,12 +1195,12 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
 	for (i = index; i < block->count; i++) {
 		struct ocelot_vcap_filter *tmp;
 
-		tmp = ocelot_vcap_block_find_filter(block, i);
-		is2_entry_set(ocelot, i, tmp);
+		tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+		vcap_entry_set(ocelot, i, tmp);
 	}
 
 	/* Now delete the last filter, because it is duplicated */
-	is2_entry_set(ocelot, block->count, &del_filter);
+	vcap_entry_set(ocelot, block->count, &del_filter);
 
 	return 0;
 }
@@ -945,37 +1208,115 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
 int ocelot_vcap_filter_stats_update(struct ocelot *ocelot,
 				    struct ocelot_vcap_filter *filter)
 {
-	struct ocelot_vcap_block *block = &ocelot->block;
-	struct ocelot_vcap_filter *tmp;
+	struct ocelot_vcap_block *block = &ocelot->block[filter->block_id];
+	struct ocelot_vcap_filter tmp;
 	int index;
 
 	index = ocelot_vcap_block_get_filter_index(block, filter);
-	is2_entry_get(ocelot, filter, index);
+	if (index < 0)
+		return index;
+
+	vcap_entry_get(ocelot, index, filter);
 
 	/* After we get the result we need to clear the counters */
-	tmp = ocelot_vcap_block_find_filter(block, index);
-	tmp->stats.pkts = 0;
-	is2_entry_set(ocelot, index, tmp);
+	tmp = *filter;
+	tmp.stats.pkts = 0;
+	vcap_entry_set(ocelot, index, &tmp);
 
 	return 0;
 }
 
-int ocelot_vcap_init(struct ocelot *ocelot)
+static void ocelot_vcap_init_one(struct ocelot *ocelot,
+				 const struct vcap_props *vcap)
 {
-	const struct vcap_props *vcap_is2 = &ocelot->vcap[VCAP_IS2];
-	struct ocelot_vcap_block *block = &ocelot->block;
 	struct vcap_data data;
 
 	memset(&data, 0, sizeof(data));
 
-	vcap_entry2cache(ocelot, &data);
-	ocelot_write(ocelot, vcap_is2->entry_count, S2_CORE_MV_CFG);
-	vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE, VCAP_SEL_ENTRY);
+	vcap_entry2cache(ocelot, vcap, &data);
+	ocelot_target_write(ocelot, vcap->target, vcap->entry_count,
+			    VCAP_CORE_MV_CFG);
+	vcap_cmd(ocelot, vcap, 0, VCAP_CMD_INITIALIZE, VCAP_SEL_ENTRY);
 
-	vcap_action2cache(ocelot, &data);
-	ocelot_write(ocelot, vcap_is2->action_count, S2_CORE_MV_CFG);
-	vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE,
+	vcap_action2cache(ocelot, vcap, &data);
+	ocelot_target_write(ocelot, vcap->target, vcap->action_count,
+			    VCAP_CORE_MV_CFG);
+	vcap_cmd(ocelot, vcap, 0, VCAP_CMD_INITIALIZE,
 		 VCAP_SEL_ACTION | VCAP_SEL_COUNTER);
+}
+
+static void ocelot_vcap_detect_constants(struct ocelot *ocelot,
+					 struct vcap_props *vcap)
+{
+	int counter_memory_width;
+	int num_default_actions;
+	int version;
+
+	version = ocelot_target_read(ocelot, vcap->target,
+				     VCAP_CONST_VCAP_VER);
+	/* Only version 0 VCAP supported for now */
+	if (WARN_ON(version != 0))
+		return;
+
+	/* Width in bits of type-group field */
+	vcap->tg_width = ocelot_target_read(ocelot, vcap->target,
+					    VCAP_CONST_ENTRY_TG_WIDTH);
+	/* Number of subwords per TCAM row */
+	vcap->sw_count = ocelot_target_read(ocelot, vcap->target,
+					    VCAP_CONST_ENTRY_SWCNT);
+	/* Number of rows in TCAM. There can be this many full keys, or double
+	 * this number half keys, or 4 times this number quarter keys.
+	 */
+	vcap->entry_count = ocelot_target_read(ocelot, vcap->target,
+					       VCAP_CONST_ENTRY_CNT);
+	/* Assuming there are 4 subwords per TCAM row, their layout in the
+	 * actual TCAM (not in the cache) would be:
+	 *
+	 * |  SW 3  | TG 3 |  SW 2  | TG 2 |  SW 1  | TG 1 |  SW 0  | TG 0 |
+	 *
+	 * (where SW=subword and TG=Type-Group).
+	 *
+	 * What VCAP_CONST_ENTRY_CNT is giving us is the width of one full TCAM
+	 * row. But when software accesses the TCAM through the cache
+	 * registers, the Type-Group values are written through another set of
+	 * registers VCAP_TG_DAT, and therefore, it appears as though the 4
+	 * subwords are contiguous in the cache memory.
+	 * Important mention: regardless of the number of key entries per row
+	 * (and therefore of key size: 1 full key or 2 half keys or 4 quarter
+	 * keys), software always has to configure 4 Type-Group values. For
+	 * example, in the case of 1 full key, the driver needs to set all 4
+	 * Type-Group to be full key.
+	 *
+	 * For this reason, we need to fix up the value that the hardware is
+	 * giving us. We don't actually care about the width of the entry in
+	 * the TCAM. What we care about is the width of the entry in the cache
+	 * registers, which is how we get to interact with it. And since the
+	 * VCAP_ENTRY_DAT cache registers access only the subwords and not the
+	 * Type-Groups, this means we need to subtract the width of the
+	 * Type-Groups when packing and unpacking key entry data in a TCAM row.
+	 */
+	vcap->entry_width = ocelot_target_read(ocelot, vcap->target,
+					       VCAP_CONST_ENTRY_WIDTH);
+	vcap->entry_width -= vcap->tg_width * vcap->sw_count;
+	num_default_actions = ocelot_target_read(ocelot, vcap->target,
+						 VCAP_CONST_ACTION_DEF_CNT);
+	vcap->action_count = vcap->entry_count + num_default_actions;
+	vcap->action_width = ocelot_target_read(ocelot, vcap->target,
+						VCAP_CONST_ACTION_WIDTH);
+	/* The width of the counter memory, this is the complete width of all
+	 * counter-fields associated with one full-word entry. There is one
+	 * counter per entry sub-word (see CAP_CORE::ENTRY_SWCNT for number of
+	 * subwords.)
+	 */
+	vcap->counter_words = vcap->sw_count;
+	counter_memory_width = ocelot_target_read(ocelot, vcap->target,
+						  VCAP_CONST_CNT_WIDTH);
+	vcap->counter_width = counter_memory_width / vcap->counter_words;
+}
+
+int ocelot_vcap_init(struct ocelot *ocelot)
+{
+	int i;
 
 	/* Create a policer that will drop the frames for the cpu.
 	 * This policer will be used as action in the acl rules to drop
@@ -992,9 +1333,18 @@ int ocelot_vcap_init(struct ocelot *ocelot)
 	ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_CIR_STATE,
 			 OCELOT_POLICER_DISCARD);
 
-	block->pol_lpr = OCELOT_POLICER_DISCARD - 1;
+	for (i = 0; i < OCELOT_NUM_VCAP_BLOCKS; i++) {
+		struct ocelot_vcap_block *block = &ocelot->block[i];
+		struct vcap_props *vcap = &ocelot->vcap[i];
+
+		INIT_LIST_HEAD(&block->rules);
+		block->pol_lpr = OCELOT_POLICER_DISCARD - 1;
+
+		ocelot_vcap_detect_constants(ocelot, vcap);
+		ocelot_vcap_init_one(ocelot, vcap);
+	}
 
-	INIT_LIST_HEAD(&ocelot->block.rules);
+	INIT_LIST_HEAD(&ocelot->dummy_rules);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h
index 0dfbfc011b2e..82fd10581a14 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.h
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.h
@@ -11,6 +11,8 @@
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
 
+#define OCELOT_POLICER_DISCARD 0x17f
+
 struct ocelot_ipv4 {
 	u8 addr[4];
 };
@@ -76,6 +78,11 @@ struct ocelot_vcap_udp_tcp {
 	u16 mask;
 };
 
+struct ocelot_vcap_port {
+	u8 value;
+	u8 mask;
+};
+
 enum ocelot_vcap_key_type {
 	OCELOT_VCAP_KEY_ANY,
 	OCELOT_VCAP_KEY_ETYPE,
@@ -158,6 +165,7 @@ struct ocelot_vcap_key_ipv4 {
 struct ocelot_vcap_key_ipv6 {
 	struct ocelot_vcap_u8 proto; /* IPv6 protocol */
 	struct ocelot_vcap_u128 sip; /* IPv6 source (byte 0-7 ignored) */
+	struct ocelot_vcap_u128 dip; /* IPv6 destination (byte 0-7 ignored) */
 	enum ocelot_vcap_bit ttl;  /* TTL zero */
 	struct ocelot_vcap_u8 ds;
 	struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */
@@ -174,10 +182,71 @@ struct ocelot_vcap_key_ipv6 {
 	enum ocelot_vcap_bit seq_zero;       /* TCP sequence number is zero */
 };
 
-enum ocelot_vcap_action {
-	OCELOT_VCAP_ACTION_DROP,
-	OCELOT_VCAP_ACTION_TRAP,
-	OCELOT_VCAP_ACTION_POLICE,
+enum ocelot_mask_mode {
+	OCELOT_MASK_MODE_NONE,
+	OCELOT_MASK_MODE_PERMIT_DENY,
+	OCELOT_MASK_MODE_POLICY,
+	OCELOT_MASK_MODE_REDIRECT,
+};
+
+enum ocelot_es0_tag {
+	OCELOT_NO_ES0_TAG,
+	OCELOT_ES0_TAG,
+	OCELOT_FORCE_PORT_TAG,
+	OCELOT_FORCE_UNTAG,
+};
+
+enum ocelot_tag_tpid_sel {
+	OCELOT_TAG_TPID_SEL_8021Q,
+	OCELOT_TAG_TPID_SEL_8021AD,
+};
+
+struct ocelot_vcap_action {
+	union {
+		/* VCAP ES0 */
+		struct {
+			enum ocelot_es0_tag push_outer_tag;
+			enum ocelot_es0_tag push_inner_tag;
+			enum ocelot_tag_tpid_sel tag_a_tpid_sel;
+			int tag_a_vid_sel;
+			int tag_a_pcp_sel;
+			u16 vid_a_val;
+			u8 pcp_a_val;
+			u8 dei_a_val;
+			enum ocelot_tag_tpid_sel tag_b_tpid_sel;
+			int tag_b_vid_sel;
+			int tag_b_pcp_sel;
+			u16 vid_b_val;
+			u8 pcp_b_val;
+			u8 dei_b_val;
+		};
+
+		/* VCAP IS1 */
+		struct {
+			bool vid_replace_ena;
+			u16 vid;
+			bool vlan_pop_cnt_ena;
+			int vlan_pop_cnt;
+			bool pcp_dei_ena;
+			u8 pcp;
+			u8 dei;
+			bool qos_ena;
+			u8 qos_val;
+			u8 pag_override_mask;
+			u8 pag_val;
+		};
+
+		/* VCAP IS2 */
+		struct {
+			bool cpu_copy_ena;
+			u8 cpu_qu_num;
+			enum ocelot_mask_mode mask_mode;
+			unsigned long port_mask;
+			bool police_ena;
+			struct ocelot_policer pol;
+			u32 pol_ix;
+		};
+	};
 };
 
 struct ocelot_vcap_stats {
@@ -186,15 +255,30 @@ struct ocelot_vcap_stats {
 	u64 used;
 };
 
+enum ocelot_vcap_filter_type {
+	OCELOT_VCAP_FILTER_DUMMY,
+	OCELOT_VCAP_FILTER_PAG,
+	OCELOT_VCAP_FILTER_OFFLOAD,
+};
+
 struct ocelot_vcap_filter {
 	struct list_head list;
 
+	enum ocelot_vcap_filter_type type;
+	int block_id;
+	int goto_target;
+	int lookup;
+	u8 pag;
 	u16 prio;
 	u32 id;
 
-	enum ocelot_vcap_action action;
+	struct ocelot_vcap_action action;
 	struct ocelot_vcap_stats stats;
+	/* For VCAP IS1 and IS2 */
 	unsigned long ingress_port_mask;
+	/* For VCAP ES0 */
+	struct ocelot_vcap_port ingress_port;
+	struct ocelot_vcap_port egress_port;
 
 	enum ocelot_vcap_bit dmac_mc;
 	enum ocelot_vcap_bit dmac_bc;
@@ -210,8 +294,6 @@ struct ocelot_vcap_filter {
 		struct ocelot_vcap_key_ipv4 ipv4;
 		struct ocelot_vcap_key_ipv6 ipv6;
 	} key;
-	struct ocelot_policer pol;
-	u32 pol_ix;
 };
 
 int ocelot_vcap_filter_add(struct ocelot *ocelot,
@@ -221,7 +303,10 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
 			   struct ocelot_vcap_filter *rule);
 int ocelot_vcap_filter_stats_update(struct ocelot *ocelot,
 				    struct ocelot_vcap_filter *rule);
+struct ocelot_vcap_filter *
+ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id);
 
+void ocelot_detect_vcap_constants(struct ocelot *ocelot);
 int ocelot_vcap_init(struct ocelot *ocelot);
 
 int ocelot_setup_tc_cls_flower(struct ocelot_port_private *priv,
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 8a6917691ba6..dc00772950e5 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -19,10 +19,6 @@
 #include "ocelot.h"
 
 #define IFH_EXTRACT_BITFIELD64(x, o, w) (((x) >> (o)) & GENMASK_ULL((w) - 1, 0))
-#define VSC7514_VCAP_IS2_CNT 64
-#define VSC7514_VCAP_IS2_ENTRY_WIDTH 376
-#define VSC7514_VCAP_IS2_ACTION_WIDTH 99
-#define VSC7514_VCAP_PORT_CNT 11
 
 static const u32 ocelot_ana_regmap[] = {
 	REG(ANA_ADVLEARN,				0x009000),
@@ -241,14 +237,27 @@ static const u32 ocelot_sys_regmap[] = {
 	REG(SYS_PTP_CFG,				0x0006c4),
 };
 
-static const u32 ocelot_s2_regmap[] = {
-	REG(S2_CORE_UPDATE_CTRL,			0x000000),
-	REG(S2_CORE_MV_CFG,				0x000004),
-	REG(S2_CACHE_ENTRY_DAT,				0x000008),
-	REG(S2_CACHE_MASK_DAT,				0x000108),
-	REG(S2_CACHE_ACTION_DAT,			0x000208),
-	REG(S2_CACHE_CNT_DAT,				0x000308),
-	REG(S2_CACHE_TG_DAT,				0x000388),
+static const u32 ocelot_vcap_regmap[] = {
+	/* VCAP_CORE_CFG */
+	REG(VCAP_CORE_UPDATE_CTRL,			0x000000),
+	REG(VCAP_CORE_MV_CFG,				0x000004),
+	/* VCAP_CORE_CACHE */
+	REG(VCAP_CACHE_ENTRY_DAT,			0x000008),
+	REG(VCAP_CACHE_MASK_DAT,			0x000108),
+	REG(VCAP_CACHE_ACTION_DAT,			0x000208),
+	REG(VCAP_CACHE_CNT_DAT,				0x000308),
+	REG(VCAP_CACHE_TG_DAT,				0x000388),
+	/* VCAP_CONST */
+	REG(VCAP_CONST_VCAP_VER,			0x000398),
+	REG(VCAP_CONST_ENTRY_WIDTH,			0x00039c),
+	REG(VCAP_CONST_ENTRY_CNT,			0x0003a0),
+	REG(VCAP_CONST_ENTRY_SWCNT,			0x0003a4),
+	REG(VCAP_CONST_ENTRY_TG_WIDTH,			0x0003a8),
+	REG(VCAP_CONST_ACTION_DEF_CNT,			0x0003ac),
+	REG(VCAP_CONST_ACTION_WIDTH,			0x0003b0),
+	REG(VCAP_CONST_CNT_WIDTH,			0x0003b4),
+	REG(VCAP_CONST_CORE_CNT,			0x0003b8),
+	REG(VCAP_CONST_IF_CNT,				0x0003bc),
 };
 
 static const u32 ocelot_ptp_regmap[] = {
@@ -311,7 +320,9 @@ static const u32 *ocelot_regmap[TARGET_MAX] = {
 	[QSYS] = ocelot_qsys_regmap,
 	[REW] = ocelot_rew_regmap,
 	[SYS] = ocelot_sys_regmap,
-	[S2] = ocelot_s2_regmap,
+	[S0] = ocelot_vcap_regmap,
+	[S1] = ocelot_vcap_regmap,
+	[S2] = ocelot_vcap_regmap,
 	[PTP] = ocelot_ptp_regmap,
 	[DEV_GMII] = ocelot_dev_gmii_regmap,
 };
@@ -756,6 +767,115 @@ static u16 ocelot_wm_enc(u16 value)
 static const struct ocelot_ops ocelot_ops = {
 	.reset			= ocelot_reset,
 	.wm_enc			= ocelot_wm_enc,
+	.port_to_netdev		= ocelot_port_to_netdev,
+	.netdev_to_port		= ocelot_netdev_to_port,
+};
+
+static const struct vcap_field vsc7514_vcap_es0_keys[] = {
+	[VCAP_ES0_EGR_PORT]			= {  0,  4},
+	[VCAP_ES0_IGR_PORT]			= {  4,  4},
+	[VCAP_ES0_RSV]				= {  8,  2},
+	[VCAP_ES0_L2_MC]			= { 10,  1},
+	[VCAP_ES0_L2_BC]			= { 11,  1},
+	[VCAP_ES0_VID]				= { 12, 12},
+	[VCAP_ES0_DP]				= { 24,  1},
+	[VCAP_ES0_PCP]				= { 25,  3},
+};
+
+static const struct vcap_field vsc7514_vcap_es0_actions[] = {
+	[VCAP_ES0_ACT_PUSH_OUTER_TAG]		= {  0,  2},
+	[VCAP_ES0_ACT_PUSH_INNER_TAG]		= {  2,  1},
+	[VCAP_ES0_ACT_TAG_A_TPID_SEL]		= {  3,  2},
+	[VCAP_ES0_ACT_TAG_A_VID_SEL]		= {  5,  1},
+	[VCAP_ES0_ACT_TAG_A_PCP_SEL]		= {  6,  2},
+	[VCAP_ES0_ACT_TAG_A_DEI_SEL]		= {  8,  2},
+	[VCAP_ES0_ACT_TAG_B_TPID_SEL]		= { 10,  2},
+	[VCAP_ES0_ACT_TAG_B_VID_SEL]		= { 12,  1},
+	[VCAP_ES0_ACT_TAG_B_PCP_SEL]		= { 13,  2},
+	[VCAP_ES0_ACT_TAG_B_DEI_SEL]		= { 15,  2},
+	[VCAP_ES0_ACT_VID_A_VAL]		= { 17, 12},
+	[VCAP_ES0_ACT_PCP_A_VAL]		= { 29,  3},
+	[VCAP_ES0_ACT_DEI_A_VAL]		= { 32,  1},
+	[VCAP_ES0_ACT_VID_B_VAL]		= { 33, 12},
+	[VCAP_ES0_ACT_PCP_B_VAL]		= { 45,  3},
+	[VCAP_ES0_ACT_DEI_B_VAL]		= { 48,  1},
+	[VCAP_ES0_ACT_RSV]			= { 49, 24},
+	[VCAP_ES0_ACT_HIT_STICKY]		= { 73,  1},
+};
+
+static const struct vcap_field vsc7514_vcap_is1_keys[] = {
+	[VCAP_IS1_HK_TYPE]			= {  0,   1},
+	[VCAP_IS1_HK_LOOKUP]			= {  1,   2},
+	[VCAP_IS1_HK_IGR_PORT_MASK]		= {  3,  12},
+	[VCAP_IS1_HK_RSV]			= { 15,   9},
+	[VCAP_IS1_HK_OAM_Y1731]			= { 24,   1},
+	[VCAP_IS1_HK_L2_MC]			= { 25,   1},
+	[VCAP_IS1_HK_L2_BC]			= { 26,   1},
+	[VCAP_IS1_HK_IP_MC]			= { 27,   1},
+	[VCAP_IS1_HK_VLAN_TAGGED]		= { 28,   1},
+	[VCAP_IS1_HK_VLAN_DBL_TAGGED]		= { 29,   1},
+	[VCAP_IS1_HK_TPID]			= { 30,   1},
+	[VCAP_IS1_HK_VID]			= { 31,  12},
+	[VCAP_IS1_HK_DEI]			= { 43,   1},
+	[VCAP_IS1_HK_PCP]			= { 44,   3},
+	/* Specific Fields for IS1 Half Key S1_NORMAL */
+	[VCAP_IS1_HK_L2_SMAC]			= { 47,  48},
+	[VCAP_IS1_HK_ETYPE_LEN]			= { 95,   1},
+	[VCAP_IS1_HK_ETYPE]			= { 96,  16},
+	[VCAP_IS1_HK_IP_SNAP]			= {112,   1},
+	[VCAP_IS1_HK_IP4]			= {113,   1},
+	/* Layer-3 Information */
+	[VCAP_IS1_HK_L3_FRAGMENT]		= {114,   1},
+	[VCAP_IS1_HK_L3_FRAG_OFS_GT0]		= {115,   1},
+	[VCAP_IS1_HK_L3_OPTIONS]		= {116,   1},
+	[VCAP_IS1_HK_L3_DSCP]			= {117,   6},
+	[VCAP_IS1_HK_L3_IP4_SIP]		= {123,  32},
+	/* Layer-4 Information */
+	[VCAP_IS1_HK_TCP_UDP]			= {155,   1},
+	[VCAP_IS1_HK_TCP]			= {156,   1},
+	[VCAP_IS1_HK_L4_SPORT]			= {157,  16},
+	[VCAP_IS1_HK_L4_RNG]			= {173,   8},
+	/* Specific Fields for IS1 Half Key S1_5TUPLE_IP4 */
+	[VCAP_IS1_HK_IP4_INNER_TPID]            = { 47,   1},
+	[VCAP_IS1_HK_IP4_INNER_VID]		= { 48,  12},
+	[VCAP_IS1_HK_IP4_INNER_DEI]		= { 60,   1},
+	[VCAP_IS1_HK_IP4_INNER_PCP]		= { 61,   3},
+	[VCAP_IS1_HK_IP4_IP4]			= { 64,   1},
+	[VCAP_IS1_HK_IP4_L3_FRAGMENT]		= { 65,   1},
+	[VCAP_IS1_HK_IP4_L3_FRAG_OFS_GT0]	= { 66,   1},
+	[VCAP_IS1_HK_IP4_L3_OPTIONS]		= { 67,   1},
+	[VCAP_IS1_HK_IP4_L3_DSCP]		= { 68,   6},
+	[VCAP_IS1_HK_IP4_L3_IP4_DIP]		= { 74,  32},
+	[VCAP_IS1_HK_IP4_L3_IP4_SIP]		= {106,  32},
+	[VCAP_IS1_HK_IP4_L3_PROTO]		= {138,   8},
+	[VCAP_IS1_HK_IP4_TCP_UDP]		= {146,   1},
+	[VCAP_IS1_HK_IP4_TCP]			= {147,   1},
+	[VCAP_IS1_HK_IP4_L4_RNG]		= {148,   8},
+	[VCAP_IS1_HK_IP4_IP_PAYLOAD_S1_5TUPLE]	= {156,  32},
+};
+
+static const struct vcap_field vsc7514_vcap_is1_actions[] = {
+	[VCAP_IS1_ACT_DSCP_ENA]			= {  0,  1},
+	[VCAP_IS1_ACT_DSCP_VAL]			= {  1,  6},
+	[VCAP_IS1_ACT_QOS_ENA]			= {  7,  1},
+	[VCAP_IS1_ACT_QOS_VAL]			= {  8,  3},
+	[VCAP_IS1_ACT_DP_ENA]			= { 11,  1},
+	[VCAP_IS1_ACT_DP_VAL]			= { 12,  1},
+	[VCAP_IS1_ACT_PAG_OVERRIDE_MASK]	= { 13,  8},
+	[VCAP_IS1_ACT_PAG_VAL]			= { 21,  8},
+	[VCAP_IS1_ACT_RSV]			= { 29,  9},
+	/* The fields below are incorrectly shifted by 2 in the manual */
+	[VCAP_IS1_ACT_VID_REPLACE_ENA]		= { 38,  1},
+	[VCAP_IS1_ACT_VID_ADD_VAL]		= { 39, 12},
+	[VCAP_IS1_ACT_FID_SEL]			= { 51,  2},
+	[VCAP_IS1_ACT_FID_VAL]			= { 53, 13},
+	[VCAP_IS1_ACT_PCP_DEI_ENA]		= { 66,  1},
+	[VCAP_IS1_ACT_PCP_VAL]			= { 67,  3},
+	[VCAP_IS1_ACT_DEI_VAL]			= { 70,  1},
+	[VCAP_IS1_ACT_VLAN_POP_CNT_ENA]		= { 71,  1},
+	[VCAP_IS1_ACT_VLAN_POP_CNT]		= { 72,  2},
+	[VCAP_IS1_ACT_CUSTOM_ACE_TYPE_ENA]	= { 74,  4},
+	[VCAP_IS1_ACT_HIT_STICKY]		= { 78,  1},
 };
 
 static const struct vcap_field vsc7514_vcap_is2_keys[] = {
@@ -856,15 +976,32 @@ static const struct vcap_field vsc7514_vcap_is2_actions[] = {
 	[VCAP_IS2_ACT_HIT_CNT]			= { 49, 32},
 };
 
-static const struct vcap_props vsc7514_vcap_props[] = {
+static struct vcap_props vsc7514_vcap_props[] = {
+	[VCAP_ES0] = {
+		.action_type_width = 0,
+		.action_table = {
+			[ES0_ACTION_TYPE_NORMAL] = {
+				.width = 73, /* HIT_STICKY not included */
+				.count = 1,
+			},
+		},
+		.target = S0,
+		.keys = vsc7514_vcap_es0_keys,
+		.actions = vsc7514_vcap_es0_actions,
+	},
+	[VCAP_IS1] = {
+		.action_type_width = 0,
+		.action_table = {
+			[IS1_ACTION_TYPE_NORMAL] = {
+				.width = 78, /* HIT_STICKY not included */
+				.count = 4,
+			},
+		},
+		.target = S1,
+		.keys = vsc7514_vcap_is1_keys,
+		.actions = vsc7514_vcap_is1_actions,
+	},
 	[VCAP_IS2] = {
-		.tg_width = 2,
-		.sw_count = 4,
-		.entry_count = VSC7514_VCAP_IS2_CNT,
-		.entry_width = VSC7514_VCAP_IS2_ENTRY_WIDTH,
-		.action_count = VSC7514_VCAP_IS2_CNT +
-				VSC7514_VCAP_PORT_CNT + 2,
-		.action_width = 99,
 		.action_type_width = 1,
 		.action_table = {
 			[IS2_ACTION_TYPE_NORMAL] = {
@@ -876,8 +1013,9 @@ static const struct vcap_props vsc7514_vcap_props[] = {
 				.count = 4
 			},
 		},
-		.counter_words = 4,
-		.counter_width = 32,
+		.target = S2,
+		.keys = vsc7514_vcap_is2_keys,
+		.actions = vsc7514_vcap_is2_actions,
 	},
 };
 
@@ -932,10 +1070,6 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev,
 	if (!ocelot->ports)
 		return -ENOMEM;
 
-	/* No NPI port */
-	ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE,
-			     OCELOT_TAG_PREFIX_NONE);
-
 	for_each_available_child_of_node(ports, portnp) {
 		struct ocelot_port_private *priv;
 		struct ocelot_port *ocelot_port;
@@ -1043,6 +1177,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 		{ QSYS, "qsys" },
 		{ ANA, "ana" },
 		{ QS, "qs" },
+		{ S0, "s0" },
+		{ S1, "s1" },
 		{ S2, "s2" },
 		{ PTP, "ptp", 1 },
 	};
@@ -1119,9 +1255,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 
 	ocelot->num_phys_ports = of_get_child_count(ports);
 
-	ocelot->vcap_is2_keys = vsc7514_vcap_is2_keys;
-	ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions;
 	ocelot->vcap = vsc7514_vcap_props;
+	ocelot->inj_prefix = OCELOT_TAG_PREFIX_NONE;
+	ocelot->xtr_prefix = OCELOT_TAG_PREFIX_NONE;
+	ocelot->npi = -1;
 
 	err = ocelot_init(ocelot);
 	if (err)
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 4a5beafa0493..1634ca6d4a8f 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -3543,11 +3543,10 @@ static void myri10ge_free_slices(struct myri10ge_priv *mgp)
 					  ss->fw_stats, ss->fw_stats_bus);
 			ss->fw_stats = NULL;
 		}
-		napi_hash_del(&ss->napi);
-		netif_napi_del(&ss->napi);
+		__netif_napi_del(&ss->napi);
 	}
 	/* Wait till napi structs are no longer used, and then free ss. */
-	synchronize_rcu();
+	synchronize_net();
 	kfree(mgp->ss);
 	mgp->ss = NULL;
 }
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 3de8430ee8c5..b81e1487945c 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -1916,9 +1916,9 @@ static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue)
 static int alloc_ring(struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
-	np->rx_ring = pci_alloc_consistent(np->pci_dev,
-		sizeof(struct netdev_desc) * (RX_RING_SIZE+TX_RING_SIZE),
-		&np->ring_dma);
+	np->rx_ring = dma_alloc_coherent(&np->pci_dev->dev,
+					 sizeof(struct netdev_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+					 &np->ring_dma, GFP_KERNEL);
 	if (!np->rx_ring)
 		return -ENOMEM;
 	np->tx_ring = &np->rx_ring[RX_RING_SIZE];
@@ -1939,10 +1939,10 @@ static void refill_rx(struct net_device *dev)
 			np->rx_skbuff[entry] = skb;
 			if (skb == NULL)
 				break; /* Better luck next round. */
-			np->rx_dma[entry] = pci_map_single(np->pci_dev,
-				skb->data, buflen, PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(np->pci_dev,
-						  np->rx_dma[entry])) {
+			np->rx_dma[entry] = dma_map_single(&np->pci_dev->dev,
+							   skb->data, buflen,
+							   DMA_FROM_DEVICE);
+			if (dma_mapping_error(&np->pci_dev->dev, np->rx_dma[entry])) {
 				dev_kfree_skb_any(skb);
 				np->rx_skbuff[entry] = NULL;
 				break; /* Better luck next round. */
@@ -2013,9 +2013,8 @@ static void drain_tx(struct net_device *dev)
 
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		if (np->tx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev,
-				np->tx_dma[i], np->tx_skbuff[i]->len,
-				PCI_DMA_TODEVICE);
+			dma_unmap_single(&np->pci_dev->dev, np->tx_dma[i],
+					 np->tx_skbuff[i]->len, DMA_TO_DEVICE);
 			dev_kfree_skb(np->tx_skbuff[i]);
 			dev->stats.tx_dropped++;
 		}
@@ -2034,9 +2033,9 @@ static void drain_rx(struct net_device *dev)
 		np->rx_ring[i].cmd_status = 0;
 		np->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
 		if (np->rx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev, np->rx_dma[i],
-				buflen + NATSEMI_PADDING,
-				PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&np->pci_dev->dev, np->rx_dma[i],
+					 buflen + NATSEMI_PADDING,
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb(np->rx_skbuff[i]);
 		}
 		np->rx_skbuff[i] = NULL;
@@ -2052,9 +2051,9 @@ static void drain_ring(struct net_device *dev)
 static void free_ring(struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
-	pci_free_consistent(np->pci_dev,
-		sizeof(struct netdev_desc) * (RX_RING_SIZE+TX_RING_SIZE),
-		np->rx_ring, np->ring_dma);
+	dma_free_coherent(&np->pci_dev->dev,
+			  sizeof(struct netdev_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+			  np->rx_ring, np->ring_dma);
 }
 
 static void reinit_rx(struct net_device *dev)
@@ -2101,9 +2100,9 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 	entry = np->cur_tx % TX_RING_SIZE;
 
 	np->tx_skbuff[entry] = skb;
-	np->tx_dma[entry] = pci_map_single(np->pci_dev,
-				skb->data,skb->len, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(np->pci_dev, np->tx_dma[entry])) {
+	np->tx_dma[entry] = dma_map_single(&np->pci_dev->dev, skb->data,
+					   skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(&np->pci_dev->dev, np->tx_dma[entry])) {
 		np->tx_skbuff[entry] = NULL;
 		dev_kfree_skb_irq(skb);
 		dev->stats.tx_dropped++;
@@ -2169,9 +2168,8 @@ static void netdev_tx_done(struct net_device *dev)
 				dev->stats.tx_window_errors++;
 			dev->stats.tx_errors++;
 		}
-		pci_unmap_single(np->pci_dev,np->tx_dma[entry],
-					np->tx_skbuff[entry]->len,
-					PCI_DMA_TODEVICE);
+		dma_unmap_single(&np->pci_dev->dev, np->tx_dma[entry],
+				 np->tx_skbuff[entry]->len, DMA_TO_DEVICE);
 		/* Free the original skb. */
 		dev_consume_skb_irq(np->tx_skbuff[entry]);
 		np->tx_skbuff[entry] = NULL;
@@ -2359,21 +2357,22 @@ static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
 			    (skb = netdev_alloc_skb(dev, pkt_len + RX_OFFSET)) != NULL) {
 				/* 16 byte align the IP header */
 				skb_reserve(skb, RX_OFFSET);
-				pci_dma_sync_single_for_cpu(np->pci_dev,
-					np->rx_dma[entry],
-					buflen,
-					PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&np->pci_dev->dev,
+							np->rx_dma[entry],
+							buflen,
+							DMA_FROM_DEVICE);
 				skb_copy_to_linear_data(skb,
 					np->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
-				pci_dma_sync_single_for_device(np->pci_dev,
-					np->rx_dma[entry],
-					buflen,
-					PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_device(&np->pci_dev->dev,
+							   np->rx_dma[entry],
+							   buflen,
+							   DMA_FROM_DEVICE);
 			} else {
-				pci_unmap_single(np->pci_dev, np->rx_dma[entry],
+				dma_unmap_single(&np->pci_dev->dev,
+						 np->rx_dma[entry],
 						 buflen + NATSEMI_PADDING,
-						 PCI_DMA_FROMDEVICE);
+						 DMA_FROM_DEVICE);
 				skb_put(skb = np->rx_skbuff[entry], pkt_len);
 				np->rx_skbuff[entry] = NULL;
 			}
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 8e24c7acf79b..72794d158871 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -526,8 +526,8 @@ static inline int ns83820_add_rx_skb(struct ns83820 *dev, struct sk_buff *skb)
 
 	dev->rx_info.next_empty = (next_empty + 1) % NR_RX_DESC;
 	cmdsts = REAL_RX_BUF_SIZE | CMDSTS_INTR;
-	buf = pci_map_single(dev->pci_dev, skb->data,
-			     REAL_RX_BUF_SIZE, PCI_DMA_FROMDEVICE);
+	buf = dma_map_single(&dev->pci_dev->dev, skb->data, REAL_RX_BUF_SIZE,
+			     DMA_FROM_DEVICE);
 	build_rx_desc(dev, sg, 0, buf, cmdsts, 0);
 	/* update link of previous rx */
 	if (likely(next_empty != dev->rx_info.next_rx))
@@ -600,12 +600,14 @@ static void phy_intr(struct net_device *ndev)
 	struct ns83820 *dev = PRIV(ndev);
 	static const char *speeds[] = { "10", "100", "1000", "1000(?)", "1000F" };
 	u32 cfg, new_cfg;
-	u32 tbisr, tanar, tanlpar;
+	u32 tanar, tanlpar;
 	int speed, fullduplex, newlinkstate;
 
 	cfg = readl(dev->base + CFG) ^ SPDSTS_POLARITY;
 
 	if (dev->CFG_cache & CFG_TBI_EN) {
+		u32 __maybe_unused tbisr;
+
 		/* we have an optical transceiver */
 		tbisr = readl(dev->base + TBISR);
 		tanar = readl(dev->base + TANAR);
@@ -858,8 +860,8 @@ static void rx_irq(struct net_device *ndev)
 		mb();
 		clear_rx_desc(dev, next_rx);
 
-		pci_unmap_single(dev->pci_dev, bufptr,
-				 RX_BUF_SIZE, PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&dev->pci_dev->dev, bufptr, RX_BUF_SIZE,
+				 DMA_FROM_DEVICE);
 		len = cmdsts & CMDSTS_LEN_MASK;
 #ifdef NS83820_VLAN_ACCEL_SUPPORT
 		/* NH: As was mentioned below, this chip is kinda
@@ -923,10 +925,10 @@ out:
 	spin_unlock_irqrestore(&info->lock, flags);
 }
 
-static void rx_action(unsigned long _dev)
+static void rx_action(struct tasklet_struct *t)
 {
-	struct net_device *ndev = (void *)_dev;
-	struct ns83820 *dev = PRIV(ndev);
+	struct ns83820 *dev = from_tasklet(dev, t, rx_tasklet);
+	struct net_device *ndev = dev->ndev;
 	rx_irq(ndev);
 	writel(ihr, dev->base + IHR);
 
@@ -985,17 +987,13 @@ static void do_tx_done(struct net_device *ndev)
 		len = cmdsts & CMDSTS_LEN_MASK;
 		addr = desc_addr_get(desc + DESC_BUFPTR);
 		if (skb) {
-			pci_unmap_single(dev->pci_dev,
-					addr,
-					len,
-					PCI_DMA_TODEVICE);
+			dma_unmap_single(&dev->pci_dev->dev, addr, len,
+					 DMA_TO_DEVICE);
 			dev_consume_skb_irq(skb);
 			atomic_dec(&dev->nr_tx_skbs);
 		} else
-			pci_unmap_page(dev->pci_dev,
-					addr,
-					len,
-					PCI_DMA_TODEVICE);
+			dma_unmap_page(&dev->pci_dev->dev, addr, len,
+				       DMA_TO_DEVICE);
 
 		tx_done_idx = (tx_done_idx + 1) % NR_TX_DESC;
 		dev->tx_done_idx = tx_done_idx;
@@ -1023,10 +1021,10 @@ static void ns83820_cleanup_tx(struct ns83820 *dev)
 		dev->tx_skbs[i] = NULL;
 		if (skb) {
 			__le32 *desc = dev->tx_descs + (i * DESC_SIZE);
-			pci_unmap_single(dev->pci_dev,
-					desc_addr_get(desc + DESC_BUFPTR),
-					le32_to_cpu(desc[DESC_CMDSTS]) & CMDSTS_LEN_MASK,
-					PCI_DMA_TODEVICE);
+			dma_unmap_single(&dev->pci_dev->dev,
+					 desc_addr_get(desc + DESC_BUFPTR),
+					 le32_to_cpu(desc[DESC_CMDSTS]) & CMDSTS_LEN_MASK,
+					 DMA_TO_DEVICE);
 			dev_kfree_skb_irq(skb);
 			atomic_dec(&dev->nr_tx_skbs);
 		}
@@ -1121,7 +1119,8 @@ again:
 	len = skb->len;
 	if (nr_frags)
 		len -= skb->data_len;
-	buf = pci_map_single(dev->pci_dev, skb->data, len, PCI_DMA_TODEVICE);
+	buf = dma_map_single(&dev->pci_dev->dev, skb->data, len,
+			     DMA_TO_DEVICE);
 
 	first_desc = dev->tx_descs + (free_idx * DESC_SIZE);
 
@@ -1207,7 +1206,7 @@ static int ns83820_get_link_ksettings(struct net_device *ndev,
 				      struct ethtool_link_ksettings *cmd)
 {
 	struct ns83820 *dev = PRIV(ndev);
-	u32 cfg, tanar, tbicr;
+	u32 cfg, tbicr;
 	int fullduplex   = 0;
 	u32 supported;
 
@@ -1226,7 +1225,7 @@ static int ns83820_get_link_ksettings(struct net_device *ndev,
 
 	/* read current configuration */
 	cfg   = readl(dev->base + CFG) ^ SPDSTS_POLARITY;
-	tanar = readl(dev->base + TANAR);
+	readl(dev->base + TANAR);
 	tbicr = readl(dev->base + TBICR);
 
 	fullduplex = (cfg & CFG_DUPSTS) ? 1 : 0;
@@ -1902,12 +1901,12 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 
 	/* See if we can set the dma mask early on; failure is fatal. */
 	if (sizeof(dma_addr_t) == 8 &&
-		!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) {
+		!dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64))) {
 		using_dac = 1;
-	} else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+	} else if (!dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32))) {
 		using_dac = 0;
 	} else {
-		dev_warn(&pci_dev->dev, "pci_set_dma_mask failed!\n");
+		dev_warn(&pci_dev->dev, "dma_set_mask failed!\n");
 		return -ENODEV;
 	}
 
@@ -1927,7 +1926,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 	SET_NETDEV_DEV(ndev, &pci_dev->dev);
 
 	INIT_WORK(&dev->tq_refill, queue_refill);
-	tasklet_init(&dev->rx_tasklet, rx_action, (unsigned long)ndev);
+	tasklet_setup(&dev->rx_tasklet, rx_action);
 
 	err = pci_enable_device(pci_dev);
 	if (err) {
@@ -1938,10 +1937,12 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 	pci_set_master(pci_dev);
 	addr = pci_resource_start(pci_dev, 1);
 	dev->base = ioremap(addr, PAGE_SIZE);
-	dev->tx_descs = pci_alloc_consistent(pci_dev,
-			4 * DESC_SIZE * NR_TX_DESC, &dev->tx_phy_descs);
-	dev->rx_info.descs = pci_alloc_consistent(pci_dev,
-			4 * DESC_SIZE * NR_RX_DESC, &dev->rx_info.phy_descs);
+	dev->tx_descs = dma_alloc_coherent(&pci_dev->dev,
+					   4 * DESC_SIZE * NR_TX_DESC,
+					   &dev->tx_phy_descs, GFP_KERNEL);
+	dev->rx_info.descs = dma_alloc_coherent(&pci_dev->dev,
+						4 * DESC_SIZE * NR_RX_DESC,
+						&dev->rx_info.phy_descs, GFP_KERNEL);
 	err = -ENOMEM;
 	if (!dev->base || !dev->tx_descs || !dev->rx_info.descs)
 		goto out_disable;
@@ -2183,8 +2184,10 @@ out_free_irq:
 out_disable:
 	if (dev->base)
 		iounmap(dev->base);
-	pci_free_consistent(pci_dev, 4 * DESC_SIZE * NR_TX_DESC, dev->tx_descs, dev->tx_phy_descs);
-	pci_free_consistent(pci_dev, 4 * DESC_SIZE * NR_RX_DESC, dev->rx_info.descs, dev->rx_info.phy_descs);
+	dma_free_coherent(&pci_dev->dev, 4 * DESC_SIZE * NR_TX_DESC,
+			  dev->tx_descs, dev->tx_phy_descs);
+	dma_free_coherent(&pci_dev->dev, 4 * DESC_SIZE * NR_RX_DESC,
+			  dev->rx_info.descs, dev->rx_info.phy_descs);
 	pci_disable_device(pci_dev);
 out_free:
 	free_netdev(ndev);
@@ -2205,10 +2208,10 @@ static void ns83820_remove_one(struct pci_dev *pci_dev)
 	unregister_netdev(ndev);
 	free_irq(dev->pci_dev->irq, ndev);
 	iounmap(dev->base);
-	pci_free_consistent(dev->pci_dev, 4 * DESC_SIZE * NR_TX_DESC,
-			dev->tx_descs, dev->tx_phy_descs);
-	pci_free_consistent(dev->pci_dev, 4 * DESC_SIZE * NR_RX_DESC,
-			dev->rx_info.descs, dev->rx_info.phy_descs);
+	dma_free_coherent(&dev->pci_dev->dev, 4 * DESC_SIZE * NR_TX_DESC,
+			  dev->tx_descs, dev->tx_phy_descs);
+	dma_free_coherent(&dev->pci_dev->dev, 4 * DESC_SIZE * NR_RX_DESC,
+			  dev->rx_info.descs, dev->rx_info.phy_descs);
 	pci_disable_device(dev->pci_dev);
 	free_netdev(ndev);
 }
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index dd3605aa5f23..d17d1b4f2585 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -143,7 +143,7 @@ static int sonic_open(struct net_device *dev)
 	/*
 	 * Initialize the SONIC
 	 */
-	sonic_init(dev);
+	sonic_init(dev, true);
 
 	netif_start_queue(dev);
 
@@ -153,7 +153,7 @@ static int sonic_open(struct net_device *dev)
 }
 
 /* Wait for the SONIC to become idle. */
-static void sonic_quiesce(struct net_device *dev, u16 mask)
+static void sonic_quiesce(struct net_device *dev, u16 mask, bool may_sleep)
 {
 	struct sonic_local * __maybe_unused lp = netdev_priv(dev);
 	int i;
@@ -163,7 +163,7 @@ static void sonic_quiesce(struct net_device *dev, u16 mask)
 		bits = SONIC_READ(SONIC_CMD) & mask;
 		if (!bits)
 			return;
-		if (irqs_disabled() || in_interrupt())
+		if (!may_sleep)
 			udelay(20);
 		else
 			usleep_range(100, 200);
@@ -187,7 +187,7 @@ static int sonic_close(struct net_device *dev)
 	 * stop the SONIC, disable interrupts
 	 */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS);
-	sonic_quiesce(dev, SONIC_CR_ALL);
+	sonic_quiesce(dev, SONIC_CR_ALL, true);
 
 	SONIC_WRITE(SONIC_IMR, 0);
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
@@ -229,7 +229,7 @@ static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 	 * disable all interrupts before releasing DMA buffers
 	 */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS);
-	sonic_quiesce(dev, SONIC_CR_ALL);
+	sonic_quiesce(dev, SONIC_CR_ALL, false);
 
 	SONIC_WRITE(SONIC_IMR, 0);
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
@@ -246,7 +246,7 @@ static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 		}
 	}
 	/* Try to restart the adaptor. */
-	sonic_init(dev);
+	sonic_init(dev, false);
 	lp->stats.tx_errors++;
 	netif_trans_update(dev); /* prevent tx timeout */
 	netif_wake_queue(dev);
@@ -692,9 +692,9 @@ static void sonic_multicast_list(struct net_device *dev)
 
 			/* LCAM and TXP commands can't be used simultaneously */
 			spin_lock_irqsave(&lp->lock, flags);
-			sonic_quiesce(dev, SONIC_CR_TXP);
+			sonic_quiesce(dev, SONIC_CR_TXP, false);
 			SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM);
-			sonic_quiesce(dev, SONIC_CR_LCAM);
+			sonic_quiesce(dev, SONIC_CR_LCAM, false);
 			spin_unlock_irqrestore(&lp->lock, flags);
 		}
 	}
@@ -708,7 +708,7 @@ static void sonic_multicast_list(struct net_device *dev)
 /*
  * Initialize the SONIC ethernet controller.
  */
-static int sonic_init(struct net_device *dev)
+static int sonic_init(struct net_device *dev, bool may_sleep)
 {
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
@@ -730,7 +730,7 @@ static int sonic_init(struct net_device *dev)
 	 */
 	SONIC_WRITE(SONIC_CMD, 0);
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS | SONIC_CR_STP);
-	sonic_quiesce(dev, SONIC_CR_ALL);
+	sonic_quiesce(dev, SONIC_CR_ALL, may_sleep);
 
 	/*
 	 * initialize the receive resource area
@@ -759,7 +759,7 @@ static int sonic_init(struct net_device *dev)
 	netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
-	sonic_quiesce(dev, SONIC_CR_RRRA);
+	sonic_quiesce(dev, SONIC_CR_RRRA, may_sleep);
 
 	/*
 	 * Initialize the receive descriptors so that they
@@ -834,7 +834,7 @@ static int sonic_init(struct net_device *dev)
 	 * load the CAM
 	 */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM);
-	sonic_quiesce(dev, SONIC_CR_LCAM);
+	sonic_quiesce(dev, SONIC_CR_LCAM, may_sleep);
 
 	/*
 	 * enable receiver, disable loopback
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 3cbb62c860c8..a5b803eb8c8a 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -338,7 +338,7 @@ static void sonic_rx(struct net_device *dev);
 static int sonic_close(struct net_device *dev);
 static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
-static int sonic_init(struct net_device *dev);
+static int sonic_init(struct net_device *dev, bool may_sleep);
 static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void sonic_msg_init(struct net_device *dev);
 static int sonic_alloc_descriptors(struct net_device *dev);
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index bc94970bea45..d13d92bf7447 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -1000,7 +1000,7 @@ static void free_shared_mem(struct s2io_nic *nic)
 	}
 }
 
-/**
+/*
  * s2io_verify_pci_mode -
  */
 
@@ -1035,7 +1035,7 @@ static int s2io_on_nec_bridge(struct pci_dev *s2io_pdev)
 }
 
 static int bus_speed[8] = {33, 133, 133, 200, 266, 133, 200, 266};
-/**
+/*
  * s2io_print_pci_mode -
  */
 static int s2io_print_pci_mode(struct s2io_nic *nic)
@@ -2064,6 +2064,9 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
 
 /**
  *  verify_pcc_quiescent- Checks for PCC quiescent state
+ *  @sp : private member of the device structure, which is a pointer to the
+ *  s2io_nic structure.
+ *  @flag: boolean controlling function path
  *  Return: 1 If PCC is quiescence
  *          0 If PCC is not quiescence
  */
@@ -2099,6 +2102,8 @@ static int verify_pcc_quiescent(struct s2io_nic *sp, int flag)
 }
 /**
  *  verify_xena_quiescence - Checks whether the H/W is ready
+ *  @sp : private member of the device structure, which is a pointer to the
+ *  s2io_nic structure.
  *  Description: Returns whether the H/W is ready to go or not. Depending
  *  on whether adapter enable bit was written or not the comparison
  *  differs and the calling function passes the input argument flag to
@@ -2305,6 +2310,9 @@ static int start_nic(struct s2io_nic *nic)
 }
 /**
  * s2io_txdl_getskb - Get the skb from txdl, unmap and return skb
+ * @fifo_data: fifo data pointer
+ * @txdlp: descriptor
+ * @get_off: unused
  */
 static struct sk_buff *s2io_txdl_getskb(struct fifo_info *fifo_data,
 					struct TxD *txdlp, int get_off)
@@ -2391,7 +2399,7 @@ static void free_tx_buffers(struct s2io_nic *nic)
 
 /**
  *   stop_nic -  To stop the nic
- *   @nic ; device private variable.
+ *   @nic : device private variable.
  *   Description:
  *   This function does exactly the opposite of what the start_nic()
  *   function does. This function is called to stop the device.
@@ -2419,7 +2427,8 @@ static void stop_nic(struct s2io_nic *nic)
 
 /**
  *  fill_rx_buffers - Allocates the Rx side skbs
- *  @ring_info: per ring structure
+ *  @nic : device private variable.
+ *  @ring: per ring structure
  *  @from_card_up: If this is true, we will map the buffer to get
  *     the dma address for buf0 and buf1 to give it to the card.
  *     Else we will sync the already mapped buffer to give it to the card.
@@ -2864,7 +2873,7 @@ static void s2io_netpoll(struct net_device *dev)
 
 /**
  *  rx_intr_handler - Rx interrupt handler
- *  @ring_info: per ring structure.
+ *  @ring_data: per ring structure.
  *  @budget: budget for napi processing.
  *  Description:
  *  If the interrupt is because of a received frame or if the
@@ -2972,7 +2981,7 @@ static int rx_intr_handler(struct ring_info *ring_data, int budget)
 
 /**
  *  tx_intr_handler - Transmit interrupt handler
- *  @nic : device private variable
+ *  @fifo_data : fifo data pointer
  *  Description:
  *  If an interrupt was raised to indicate DMA complete of the
  *  Tx packet, this function is called. It identifies the last TxD
@@ -3153,6 +3162,8 @@ static u64 s2io_mdio_read(u32 mmd_type, u64 addr, struct net_device *dev)
 /**
  *  s2io_chk_xpak_counter - Function to check the status of the xpak counters
  *  @counter      : counter value to be updated
+ *  @regs_stat    : registers status
+ *  @index        : index
  *  @flag         : flag to indicate the status
  *  @type         : counter type
  *  Description:
@@ -3309,8 +3320,9 @@ static void s2io_updt_xpak_counter(struct net_device *dev)
 
 /**
  *  wait_for_cmd_complete - waits for a command to complete.
- *  @sp : private member of the device structure, which is a pointer to the
- *  s2io_nic structure.
+ *  @addr: address
+ *  @busy_bit: bit to check for busy
+ *  @bit_state: state to check
  *  Description: Function that waits for a command to Write into RMAC
  *  ADDR DATA registers to be completed and returns either success or
  *  error depending on whether the command was complete or not.
@@ -4335,7 +4347,7 @@ static int do_s2io_chk_alarm_bit(u64 value, void __iomem *addr,
 
 /**
  *  s2io_handle_errors - Xframe error indication handler
- *  @nic: device private variable
+ *  @dev_id: opaque handle to dev
  *  Description: Handle alarms such as loss of link, single or
  *  double ECC errors, critical and serious errors.
  *  Return Value:
@@ -4739,7 +4751,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-/**
+/*
  * s2io_updt_stats -
  */
 static void s2io_updt_stats(struct s2io_nic *sp)
@@ -5168,7 +5180,7 @@ static u64 do_s2io_read_unicast_mc(struct s2io_nic *sp, int offset)
 	return tmp64 >> 16;
 }
 
-/**
+/*
  * s2io_set_mac_addr - driver entry point
  */
 
@@ -5243,8 +5255,7 @@ static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr)
 
 /**
  * s2io_ethtool_set_link_ksettings - Sets different link parameters.
- * @sp : private member of the device structure, which is a pointer to the
- * s2io_nic structure.
+ * @dev : pointer to netdev
  * @cmd: pointer to the structure with parameters given by ethtool to set
  * link information.
  * Description:
@@ -5273,8 +5284,7 @@ s2io_ethtool_set_link_ksettings(struct net_device *dev,
 
 /**
  * s2io_ethtol_get_link_ksettings - Return link specific information.
- * @sp : private member of the device structure, pointer to the
- *      s2io_nic structure.
+ * @dev: pointer to netdev
  * @cmd : pointer to the structure with parameters given by ethtool
  * to return link information.
  * Description:
@@ -5313,8 +5323,7 @@ s2io_ethtool_get_link_ksettings(struct net_device *dev,
 
 /**
  * s2io_ethtool_gdrvinfo - Returns driver specific information.
- * @sp : private member of the device structure, which is a pointer to the
- * s2io_nic structure.
+ * @dev: pointer to netdev
  * @info : pointer to the structure with parameters given by ethtool to
  * return driver information.
  * Description:
@@ -5335,11 +5344,10 @@ static void s2io_ethtool_gdrvinfo(struct net_device *dev,
 
 /**
  *  s2io_ethtool_gregs - dumps the entire space of Xfame into the buffer.
- *  @sp: private member of the device structure, which is a pointer to the
- *  s2io_nic structure.
+ *  @dev: pointer to netdev
  *  @regs : pointer to the structure with parameters given by ethtool for
- *  dumping the registers.
- *  @reg_space: The input argument into which all the registers are dumped.
+ *          dumping the registers.
+ *  @space: The input argument into which all the registers are dumped.
  *  Description:
  *  Dumps the entire register space of xFrame NIC into the user given
  *  buffer area.
@@ -5471,8 +5479,7 @@ static void s2io_ethtool_gringparam(struct net_device *dev,
 
 /**
  * s2io_ethtool_getpause_data -Pause frame frame generation and reception.
- * @sp : private member of the device structure, which is a pointer to the
- *	s2io_nic structure.
+ * @dev: pointer to netdev
  * @ep : pointer to the structure with pause parameters given by ethtool.
  * Description:
  * Returns the Pause frame generation and reception capability of the NIC.
@@ -5496,8 +5503,7 @@ static void s2io_ethtool_getpause_data(struct net_device *dev,
 
 /**
  * s2io_ethtool_setpause_data -  set/reset pause frame generation.
- * @sp : private member of the device structure, which is a pointer to the
- *      s2io_nic structure.
+ * @dev: pointer to netdev
  * @ep : pointer to the structure with pause parameters given by ethtool.
  * Description:
  * It can be used to set or reset Pause frame generation or reception
@@ -5526,6 +5532,7 @@ static int s2io_ethtool_setpause_data(struct net_device *dev,
 	return 0;
 }
 
+#define S2IO_DEV_ID		5
 /**
  * read_eeprom - reads 4 bytes of data from user given offset.
  * @sp : private member of the device structure, which is a pointer to the
@@ -5541,8 +5548,6 @@ static int s2io_ethtool_setpause_data(struct net_device *dev,
  * Return value:
  *  -1 on failure and 0 on success.
  */
-
-#define S2IO_DEV_ID		5
 static int read_eeprom(struct s2io_nic *sp, int off, u64 *data)
 {
 	int ret = -1;
@@ -5734,8 +5739,7 @@ static void s2io_vpd_read(struct s2io_nic *nic)
 
 /**
  *  s2io_ethtool_geeprom  - reads the value stored in the Eeprom.
- *  @sp : private member of the device structure, which is a pointer to the
- *  s2io_nic structure.
+ *  @dev: pointer to netdev
  *  @eeprom : pointer to the user level structure provided by ethtool,
  *  containing all relevant information.
  *  @data_buf : user defined value to be written into Eeprom.
@@ -5771,11 +5775,10 @@ static int s2io_ethtool_geeprom(struct net_device *dev,
 
 /**
  *  s2io_ethtool_seeprom - tries to write the user provided value in Eeprom
- *  @sp : private member of the device structure, which is a pointer to the
- *  s2io_nic structure.
+ *  @dev: pointer to netdev
  *  @eeprom : pointer to the user level structure provided by ethtool,
  *  containing all relevant information.
- *  @data_buf ; user defined value to be written into Eeprom.
+ *  @data_buf : user defined value to be written into Eeprom.
  *  Description:
  *  Tries to write the user provided value in the Eeprom, at the offset
  *  given by the user.
@@ -6027,7 +6030,7 @@ static int s2io_bist_test(struct s2io_nic *sp, uint64_t *data)
 
 /**
  * s2io_link_test - verifies the link state of the nic
- * @sp ; private member of the device structure, which is a pointer to the
+ * @sp: private member of the device structure, which is a pointer to the
  * s2io_nic structure.
  * @data: variable that returns the result of each of the test conducted by
  * the driver.
@@ -6150,8 +6153,7 @@ static int s2io_rldram_test(struct s2io_nic *sp, uint64_t *data)
 
 /**
  *  s2io_ethtool_test - conducts 6 tsets to determine the health of card.
- *  @sp : private member of the device structure, which is a pointer to the
- *  s2io_nic structure.
+ *  @dev: pointer to netdev
  *  @ethtest : pointer to a ethtool command specific structure that will be
  *  returned to the user.
  *  @data : variable that returns the result of each of the test
@@ -6597,7 +6599,7 @@ static const struct ethtool_ops netdev_ethtool_ops = {
 /**
  *  s2io_ioctl - Entry point for the Ioctl
  *  @dev :  Device pointer.
- *  @ifr :  An IOCTL specefic structure, that can contain a pointer to
+ *  @rq :  An IOCTL specefic structure, that can contain a pointer to
  *  a proprietary structure used to pass information to the driver.
  *  @cmd :  This is used to distinguish between the different commands that
  *  can be passed to the IOCTL functions.
@@ -6650,7 +6652,7 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
 
 /**
  * s2io_set_link - Set the LInk status
- * @data: long pointer to device private structue
+ * @work: work struct containing a pointer to device private structue
  * Description: Sets the link status for the adapter
  */
 
@@ -7187,7 +7189,7 @@ static int s2io_card_up(struct s2io_nic *sp)
 
 /**
  * s2io_restart_nic - Resets the NIC.
- * @data : long pointer to the device private structure
+ * @work : work struct containing a pointer to the device private structure
  * Description:
  * This function is scheduled to be run by the s2io_tx_watchdog
  * function after 0.5 secs to reset the NIC. The idea is to reduce
@@ -7218,6 +7220,7 @@ out_unlock:
 /**
  *  s2io_tx_watchdog - Watchdog for transmit side.
  *  @dev : Pointer to net device structure
+ *  @txqueue: index of the hanging queue
  *  Description:
  *  This function is triggered if the Tx Queue is stopped
  *  for a pre-defined amount of time when the Interface is still up.
@@ -7242,11 +7245,8 @@ static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue)
 
 /**
  *   rx_osm_handler - To perform some OS related operations on SKB.
- *   @sp: private member of the device structure,pointer to s2io_nic structure.
- *   @skb : the socket buffer pointer.
- *   @len : length of the packet
- *   @cksum : FCS checksum of the frame.
- *   @ring_no : the ring from which this RxD was extracted.
+ *   @ring_data : the ring from which this RxD was extracted.
+ *   @rxdp: descriptor
  *   Description:
  *   This function is called by the Rx interrupt serivce routine to perform
  *   some OS related operations on the SKB before passing it to the upper
@@ -7576,9 +7576,10 @@ static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type,
 }
 
 /**
- * rts_ds_steer - Receive traffic steering based on IPv4 or IPv6 TOS
- * or Traffic class respectively.
+ * rts_ds_steer - Receive traffic steering based on IPv4 or IPv6 TOS or Traffic class respectively.
  * @nic: device private variable
+ * @ds_codepoint: data
+ * @ring: ring index
  * Description: The function configures the receive steering to
  * desired receive ring.
  * Return Value:  SUCCESS on success and
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 78eba10300ae..f5d48d7c4ce2 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c
@@ -988,6 +988,9 @@ exit:
 
 /**
  * vxge_hw_device_hw_info_get - Get the hw information
+ * @bar0: the bar
+ * @hw_info: the hw_info struct
+ *
  * Returns the vpath mask that has the bits set for each vpath allocated
  * for the driver, FW version information, and the first mac address for
  * each vpath
@@ -2303,16 +2306,9 @@ exit:
 static inline void
 vxge_os_dma_malloc_async(struct pci_dev *pdev, void *devh, unsigned long size)
 {
-	gfp_t flags;
 	void *vaddr;
 
-	if (in_interrupt())
-		flags = GFP_ATOMIC | GFP_DMA;
-	else
-		flags = GFP_KERNEL | GFP_DMA;
-
-	vaddr = kmalloc((size), flags);
-
+	vaddr = kmalloc(size, GFP_KERNEL | GFP_DMA);
 	vxge_hw_blockpool_block_add(devh, vaddr, size, pdev, pdev);
 }
 
@@ -3926,7 +3922,7 @@ exit:
 
 /**
  * vxge_hw_vpath_check_leak - Check for memory leak
- * @ringh: Handle to the ring object used for receive
+ * @ring: Handle to the ring object used for receive
  *
  * If PRC_RXD_DOORBELL_VPn.NEW_QW_CNT is larger or equal to
  * PRC_CFG6_VPn.RXD_SPAT then a leak has occurred.
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h
index 373165119850..0cd0750484ae 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h
@@ -1899,18 +1899,13 @@ static inline void *vxge_os_dma_malloc(struct pci_dev *pdev,
 			struct pci_dev **p_dmah,
 			struct pci_dev **p_dma_acch)
 {
-	gfp_t flags;
 	void *vaddr;
 	unsigned long misaligned = 0;
 	int realloc_flag = 0;
 	*p_dma_acch = *p_dmah = NULL;
 
-	if (in_interrupt())
-		flags = GFP_ATOMIC | GFP_DMA;
-	else
-		flags = GFP_KERNEL | GFP_DMA;
 realloc:
-	vaddr = kmalloc((size), flags);
+	vaddr = kmalloc(size, GFP_KERNEL | GFP_DMA);
 	if (vaddr == NULL)
 		return vaddr;
 	misaligned = (unsigned long)VXGE_ALIGN((unsigned long)vaddr,
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
index 03c3d1230c17..4d91026485ae 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
@@ -119,7 +119,7 @@ static void vxge_ethtool_gdrvinfo(struct net_device *dev,
  * @dev: device pointer.
  * @regs: pointer to the structure with parameters given by ethtool for
  * dumping the registers.
- * @reg_space: The input argument into which all the registers are dumped.
+ * @space: The input argument into which all the registers are dumped.
  *
  * Dumps the vpath register space of Titan NIC into the user given
  * buffer area.
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 1ded4e275086..87892bd992b1 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -1275,6 +1275,7 @@ _set_all_mcast:
 /**
  * vxge_set_mac_addr
  * @dev: pointer to the device structure
+ * @p: socket info
  *
  * Update entry "0" (default MAC addr)
  */
@@ -1799,7 +1800,7 @@ static void vxge_reset(struct work_struct *work)
 
 /**
  * vxge_poll - Receive handler when Receive Polling is used.
- * @dev: pointer to the device structure.
+ * @napi: pointer to the napi structure.
  * @budget: Number of packets budgeted to be processed in this iteration.
  *
  * This function comes into picture only if Receive side is being handled
@@ -3096,7 +3097,7 @@ static int vxge_change_mtu(struct net_device *dev, int new_mtu)
 /**
  * vxge_get_stats64
  * @dev: pointer to the device structure
- * @stats: pointer to struct rtnl_link_stats64
+ * @net_stats: pointer to struct rtnl_link_stats64
  *
  */
 static void
@@ -3245,7 +3246,7 @@ static int vxge_hwtstamp_get(struct vxgedev *vdev, void __user *data)
 /**
  * vxge_ioctl
  * @dev: Device pointer.
- * @ifr: An IOCTL specific structure, that can contain a pointer to
+ * @rq: An IOCTL specific structure, that can contain a pointer to
  *       a proprietary structure used to pass information to the driver.
  * @cmd: This is used to distinguish between the different commands that
  *       can be passed to the IOCTL functions.
@@ -3269,6 +3270,7 @@ static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 /**
  * vxge_tx_watchdog
  * @dev: pointer to net device structure
+ * @txqueue: index of the hanging queue
  *
  * Watchdog for transmit side.
  * This function is triggered if the Tx Queue is stopped
@@ -4002,6 +4004,7 @@ static void vxge_print_parm(struct vxgedev *vdev, u64 vpath_mask)
 
 /**
  * vxge_pm_suspend - vxge power management suspend entry point
+ * @dev_d: device pointer
  *
  */
 static int __maybe_unused vxge_pm_suspend(struct device *dev_d)
@@ -4010,6 +4013,7 @@ static int __maybe_unused vxge_pm_suspend(struct device *dev_d)
 }
 /**
  * vxge_pm_resume - vxge power management resume entry point
+ * @dev_d: device pointer
  *
  */
 static int __maybe_unused vxge_pm_resume(struct device *dev_d)
@@ -4539,7 +4543,7 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 	 * due to the fact that HWTS is using the FCS as the location of the
 	 * timestamp.  The HW FCS checking will still correctly determine if
 	 * there is a valid checksum, and the FCS is being removed by the driver
-	 * anyway.  So no fucntionality is being lost.  Since it is always
+	 * anyway.  So no functionality is being lost.  Since it is always
 	 * enabled, we now simply use the ioctl call to set whether or not the
 	 * driver should be paying attention to the HWTS.
 	 */
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
index 709d20d9938f..ee164970b267 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
@@ -30,8 +30,6 @@
  */
 enum vxge_hw_status vxge_hw_vpath_intr_enable(struct __vxge_hw_vpath_handle *vp)
 {
-	u64 val64;
-
 	struct __vxge_hw_virtualpath *vpath;
 	struct vxge_hw_vpath_reg __iomem *vp_reg;
 	enum vxge_hw_status status = VXGE_HW_OK;
@@ -84,7 +82,7 @@ enum vxge_hw_status vxge_hw_vpath_intr_enable(struct __vxge_hw_vpath_handle *vp)
 	__vxge_hw_pio_mem_write32_upper((u32)VXGE_HW_INTR_MASK_ALL,
 			&vp_reg->xgmac_vp_int_status);
 
-	val64 = readq(&vp_reg->vpath_general_int_status);
+	readq(&vp_reg->vpath_general_int_status);
 
 	/* Mask unwanted interrupts */
 
@@ -157,8 +155,6 @@ exit:
 enum vxge_hw_status vxge_hw_vpath_intr_disable(
 			struct __vxge_hw_vpath_handle *vp)
 {
-	u64 val64;
-
 	struct __vxge_hw_virtualpath *vpath;
 	enum vxge_hw_status status = VXGE_HW_OK;
 	struct vxge_hw_vpath_reg __iomem *vp_reg;
@@ -179,8 +175,6 @@ enum vxge_hw_status vxge_hw_vpath_intr_disable(
 		(u32)VXGE_HW_INTR_MASK_ALL,
 		&vp_reg->vpath_general_int_mask);
 
-	val64 = VXGE_HW_TIM_CLR_INT_EN_VP(1 << (16 - vpath->vp_id));
-
 	writeq(VXGE_HW_INTR_MASK_ALL, &vp_reg->kdfcctl_errors_mask);
 
 	__vxge_hw_pio_mem_write32_upper((u32)VXGE_HW_INTR_MASK_ALL,
@@ -284,7 +278,7 @@ void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring)
 
 /**
  * vxge_hw_channel_msix_mask - Mask MSIX Vector.
- * @channeh: Channel for rx or tx handle
+ * @channel: Channel for rx or tx handle
  * @msix_id:  MSIX ID
  *
  * The function masks the msix interrupt for the given msix_id
@@ -301,7 +295,7 @@ void vxge_hw_channel_msix_mask(struct __vxge_hw_channel *channel, int msix_id)
 
 /**
  * vxge_hw_channel_msix_unmask - Unmask the MSIX Vector.
- * @channeh: Channel for rx or tx handle
+ * @channel: Channel for rx or tx handle
  * @msix_id:  MSI ID
  *
  * The function unmasks the msix interrupt for the given msix_id
@@ -356,8 +350,6 @@ u32 vxge_hw_device_set_intr_type(struct __vxge_hw_device *hldev, u32 intr_mode)
 /**
  * vxge_hw_device_intr_enable - Enable interrupts.
  * @hldev: HW device handle.
- * @op: One of the enum vxge_hw_device_intr enumerated values specifying
- *      the type(s) of interrupts to enable.
  *
  * Enable Titan interrupts. The function is to be executed the last in
  * Titan initialization sequence.
@@ -411,8 +403,6 @@ void vxge_hw_device_intr_enable(struct __vxge_hw_device *hldev)
 /**
  * vxge_hw_device_intr_disable - Disable Titan interrupts.
  * @hldev: HW device handle.
- * @op: One of the enum vxge_hw_device_intr enumerated values specifying
- *      the type(s) of interrupts to disable.
  *
  * Disable Titan interrupts.
  *
@@ -487,9 +477,7 @@ void vxge_hw_device_unmask_all(struct __vxge_hw_device *hldev)
  */
 void vxge_hw_device_flush_io(struct __vxge_hw_device *hldev)
 {
-	u32 val32;
-
-	val32 = readl(&hldev->common_reg->titan_general_int_status);
+	readl(&hldev->common_reg->titan_general_int_status);
 }
 
 /**
@@ -1414,7 +1402,7 @@ u32 vxge_hw_fifo_free_txdl_count_get(struct __vxge_hw_fifo *fifoh)
 
 /**
  * vxge_hw_fifo_txdl_reserve - Reserve fifo descriptor.
- * @fifoh: Handle to the fifo object used for non offload send
+ * @fifo: Handle to the fifo object used for non offload send
  * @txdlh: Reserved descriptor. On success HW fills this "out" parameter
  *        with a valid handle.
  * @txdl_priv: Buffer to return the pointer to per txdl space
@@ -1525,8 +1513,6 @@ void vxge_hw_fifo_txdl_buffer_set(struct __vxge_hw_fifo *fifo,
  * vxge_hw_fifo_txdl_post - Post descriptor on the fifo channel.
  * @fifo: Handle to the fifo object used for non offload send
  * @txdlh: Descriptor obtained via vxge_hw_fifo_txdl_reserve()
- * @frags: Number of contiguous buffers that are part of a single
- *         transmit operation.
  *
  * Post descriptor on the 'fifo' type channel for transmission.
  * Prior to posting the descriptor should be filled in accordance with
@@ -1699,8 +1685,7 @@ void vxge_hw_fifo_txdl_free(struct __vxge_hw_fifo *fifo, void *txdlh)
 }
 
 /**
- * vxge_hw_vpath_mac_addr_add - Add the mac address entry for this vpath
- *               to MAC address table.
+ * vxge_hw_vpath_mac_addr_add - Add the mac address entry for this vpath to MAC address table.
  * @vp: Vpath handle.
  * @macaddr: MAC address to be added for this vpath into the list
  * @macaddr_mask: MAC address mask for macaddr
@@ -1716,8 +1701,8 @@ void vxge_hw_fifo_txdl_free(struct __vxge_hw_fifo *fifo, void *txdlh)
 enum vxge_hw_status
 vxge_hw_vpath_mac_addr_add(
 	struct __vxge_hw_vpath_handle *vp,
-	u8 (macaddr)[ETH_ALEN],
-	u8 (macaddr_mask)[ETH_ALEN],
+	u8 *macaddr,
+	u8 *macaddr_mask,
 	enum vxge_hw_vpath_mac_addr_add_mode duplicate_mode)
 {
 	u32 i;
@@ -1765,13 +1750,13 @@ exit:
 }
 
 /**
- * vxge_hw_vpath_mac_addr_get - Get the first mac address entry for this vpath
- *               from MAC address table.
+ * vxge_hw_vpath_mac_addr_get - Get the first mac address entry
  * @vp: Vpath handle.
  * @macaddr: First MAC address entry for this vpath in the list
  * @macaddr_mask: MAC address mask for macaddr
  *
- * Returns the first mac address and mac address mask in the list for this
+ * Get the first mac address entry for this vpath from MAC address table.
+ * Return: the first mac address and mac address mask in the list for this
  * vpath.
  * see also: vxge_hw_vpath_mac_addr_get_next
  *
@@ -1779,8 +1764,8 @@ exit:
 enum vxge_hw_status
 vxge_hw_vpath_mac_addr_get(
 	struct __vxge_hw_vpath_handle *vp,
-	u8 (macaddr)[ETH_ALEN],
-	u8 (macaddr_mask)[ETH_ALEN])
+	u8 *macaddr,
+	u8 *macaddr_mask)
 {
 	u32 i;
 	u64 data1 = 0ULL;
@@ -1816,14 +1801,13 @@ exit:
 }
 
 /**
- * vxge_hw_vpath_mac_addr_get_next - Get the next mac address entry for this
- * vpath
- *               from MAC address table.
+ * vxge_hw_vpath_mac_addr_get_next - Get the next mac address entry
  * @vp: Vpath handle.
  * @macaddr: Next MAC address entry for this vpath in the list
  * @macaddr_mask: MAC address mask for macaddr
  *
- * Returns the next mac address and mac address mask in the list for this
+ * Get the next mac address entry for this vpath from MAC address table.
+ * Return: the next mac address and mac address mask in the list for this
  * vpath.
  * see also: vxge_hw_vpath_mac_addr_get
  *
@@ -1831,8 +1815,8 @@ exit:
 enum vxge_hw_status
 vxge_hw_vpath_mac_addr_get_next(
 	struct __vxge_hw_vpath_handle *vp,
-	u8 (macaddr)[ETH_ALEN],
-	u8 (macaddr_mask)[ETH_ALEN])
+	u8 *macaddr,
+	u8 *macaddr_mask)
 {
 	u32 i;
 	u64 data1 = 0ULL;
@@ -1869,8 +1853,7 @@ exit:
 }
 
 /**
- * vxge_hw_vpath_mac_addr_delete - Delete the mac address entry for this vpath
- *               to MAC address table.
+ * vxge_hw_vpath_mac_addr_delete - Delete the mac address entry for this vpath to MAC address table.
  * @vp: Vpath handle.
  * @macaddr: MAC address to be added for this vpath into the list
  * @macaddr_mask: MAC address mask for macaddr
@@ -1884,8 +1867,8 @@ exit:
 enum vxge_hw_status
 vxge_hw_vpath_mac_addr_delete(
 	struct __vxge_hw_vpath_handle *vp,
-	u8 (macaddr)[ETH_ALEN],
-	u8 (macaddr_mask)[ETH_ALEN])
+	u8 *macaddr,
+	u8 *macaddr_mask)
 {
 	u32 i;
 	u64 data1 = 0ULL;
@@ -1916,8 +1899,7 @@ exit:
 }
 
 /**
- * vxge_hw_vpath_vid_add - Add the vlan id entry for this vpath
- *               to vlan id table.
+ * vxge_hw_vpath_vid_add - Add the vlan id entry for this vpath to vlan id table.
  * @vp: Vpath handle.
  * @vid: vlan id to be added for this vpath into the list
  *
@@ -2375,7 +2357,6 @@ enum vxge_hw_status vxge_hw_vpath_poll_rx(struct __vxge_hw_ring *ring)
 	u8 t_code;
 	enum vxge_hw_status status = VXGE_HW_OK;
 	void *first_rxdh;
-	u64 val64 = 0;
 	int new_count = 0;
 
 	ring->cmpl_cnt = 0;
@@ -2403,8 +2384,7 @@ enum vxge_hw_status vxge_hw_vpath_poll_rx(struct __vxge_hw_ring *ring)
 			}
 			writeq(VXGE_HW_PRC_RXD_DOORBELL_NEW_QW_CNT(new_count),
 				&ring->vp_reg->prc_rxd_doorbell);
-			val64 =
-			  readl(&ring->common_reg->titan_general_int_status);
+			readl(&ring->common_reg->titan_general_int_status);
 			ring->doorbell_cnt = 0;
 		}
 	}
@@ -2413,9 +2393,11 @@ enum vxge_hw_status vxge_hw_vpath_poll_rx(struct __vxge_hw_ring *ring)
 }
 
 /**
- * vxge_hw_vpath_poll_tx - Poll Tx for completed descriptors and process
- * the same.
+ * vxge_hw_vpath_poll_tx - Poll Tx for completed descriptors and process the same.
  * @fifo: Handle to the fifo object used for non offload send
+ * @skb_ptr: pointer to skb
+ * @nr_skb: number of skbs
+ * @more: more is coming
  *
  * The function polls the Tx for the completed descriptors and calls
  * the driver via supplied completion callback.
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index ac02369174a9..53851853562c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -111,7 +111,9 @@ static int
 nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 		    struct bpf_prog *prog)
 {
-	int i, cnt, err;
+	int i, cnt, err = 0;
+
+	mutex_lock(&prog->aux->used_maps_mutex);
 
 	/* Quickly count the maps we will have to remember */
 	cnt = 0;
@@ -119,13 +121,15 @@ nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 		if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
 			cnt++;
 	if (!cnt)
-		return 0;
+		goto out;
 
 	nfp_prog->map_records = kmalloc_array(cnt,
 					      sizeof(nfp_prog->map_records[0]),
 					      GFP_KERNEL);
-	if (!nfp_prog->map_records)
-		return -ENOMEM;
+	if (!nfp_prog->map_records) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	for (i = 0; i < prog->aux->used_map_cnt; i++)
 		if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
@@ -133,12 +137,14 @@ nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 						 prog->aux->used_maps[i]);
 			if (err) {
 				nfp_map_ptrs_forget(bpf, nfp_prog);
-				return err;
+				goto out;
 			}
 		}
 	WARN_ON(cnt != nfp_prog->map_records_cnt);
 
-	return 0;
+out:
+	mutex_unlock(&prog->aux->used_maps_mutex);
+	return err;
 }
 
 static int
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index bf516285510f..a2926b1b3cff 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -24,6 +24,7 @@
 #define NFP_FLOWER_LAYER_VXLAN		BIT(7)
 
 #define NFP_FLOWER_LAYER2_GRE		BIT(0)
+#define NFP_FLOWER_LAYER2_QINQ		BIT(4)
 #define NFP_FLOWER_LAYER2_GENEVE	BIT(5)
 #define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)
 #define NFP_FLOWER_LAYER2_TUN_IPV6	BIT(7)
@@ -319,6 +320,22 @@ struct nfp_flower_mac_mpls {
 	__be32 mpls_lse;
 };
 
+/* VLAN details (2W/8B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           outer_tpid          |           outer_tci           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           inner_tpid          |           inner_tci           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_vlan {
+	__be16 outer_tpid;
+	__be16 outer_tci;
+	__be16 inner_tpid;
+	__be16 inner_tci;
+};
+
 /* L4 ports (for UDP, TCP, SCTP) (1W/4B)
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 3bf9c1afa45e..caf12eec9945 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -30,6 +30,8 @@ struct nfp_app;
 #define NFP_FLOWER_MASK_ELEMENT_RS	1
 #define NFP_FLOWER_MASK_HASH_BITS	10
 
+#define NFP_FLOWER_KEY_MAX_LW		32
+
 #define NFP_FL_META_FLAG_MANAGE_MASK	BIT(7)
 
 #define NFP_FL_MASK_REUSE_TIME_NS	40000
@@ -44,6 +46,7 @@ struct nfp_app;
 #define NFP_FL_FEATS_FLOW_MOD		BIT(5)
 #define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
 #define NFP_FL_FEATS_IPV6_TUN		BIT(7)
+#define NFP_FL_FEATS_VLAN_QINQ		BIT(8)
 #define NFP_FL_FEATS_HOST_ACK		BIT(31)
 
 #define NFP_FL_ENABLE_FLOW_MERGE	BIT(0)
@@ -57,7 +60,8 @@ struct nfp_app;
 	NFP_FL_FEATS_VF_RLIM | \
 	NFP_FL_FEATS_FLOW_MOD | \
 	NFP_FL_FEATS_PRE_TUN_RULES | \
-	NFP_FL_FEATS_IPV6_TUN)
+	NFP_FL_FEATS_IPV6_TUN | \
+	NFP_FL_FEATS_VLAN_QINQ)
 
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index f7f01e2e3dce..255a4dff6288 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -10,7 +10,7 @@
 static void
 nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
 			    struct nfp_flower_meta_tci *msk,
-			    struct flow_rule *rule, u8 key_type)
+			    struct flow_rule *rule, u8 key_type, bool qinq_sup)
 {
 	u16 tmp_tci;
 
@@ -24,7 +24,7 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
 	msk->nfp_flow_key_layer = key_type;
 	msk->mask_id = ~0;
 
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+	if (!qinq_sup && flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
 		struct flow_match_vlan match;
 
 		flow_rule_match_vlan(rule, &match);
@@ -231,6 +231,50 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
 }
 
 static void
+nfp_flower_fill_vlan(struct flow_dissector_key_vlan *key,
+		     struct nfp_flower_vlan *frame,
+		     bool outer_vlan)
+{
+	u16 tci;
+
+	tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+	tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+			  key->vlan_priority) |
+	       FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+			  key->vlan_id);
+
+	if (outer_vlan) {
+		frame->outer_tci = cpu_to_be16(tci);
+		frame->outer_tpid = key->vlan_tpid;
+	} else {
+		frame->inner_tci = cpu_to_be16(tci);
+		frame->inner_tpid = key->vlan_tpid;
+	}
+}
+
+static void
+nfp_flower_compile_vlan(struct nfp_flower_vlan *ext,
+			struct nfp_flower_vlan *msk,
+			struct flow_rule *rule)
+{
+	struct flow_match_vlan match;
+
+	memset(ext, 0, sizeof(struct nfp_flower_vlan));
+	memset(msk, 0, sizeof(struct nfp_flower_vlan));
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		flow_rule_match_vlan(rule, &match);
+		nfp_flower_fill_vlan(match.key, ext, true);
+		nfp_flower_fill_vlan(match.mask, msk, true);
+	}
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+		flow_rule_match_cvlan(rule, &match);
+		nfp_flower_fill_vlan(match.key, ext, false);
+		nfp_flower_fill_vlan(match.mask, msk, false);
+	}
+}
+
+static void
 nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
 			struct nfp_flower_ipv4 *msk, struct flow_rule *rule)
 {
@@ -433,7 +477,10 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 				  struct netlink_ext_ack *extack)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct nfp_flower_priv *priv = app->priv;
+	bool qinq_sup;
 	u32 port_id;
+	int ext_len;
 	int err;
 	u8 *ext;
 	u8 *msk;
@@ -446,9 +493,11 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	ext = nfp_flow->unmasked_data;
 	msk = nfp_flow->mask_data;
 
+	qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ);
+
 	nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
 				    (struct nfp_flower_meta_tci *)msk,
-				    rule, key_ls->key_layer);
+				    rule, key_ls->key_layer, qinq_sup);
 	ext += sizeof(struct nfp_flower_meta_tci);
 	msk += sizeof(struct nfp_flower_meta_tci);
 
@@ -547,6 +596,14 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 		}
 	}
 
+	if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) {
+		nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext,
+					(struct nfp_flower_vlan *)msk,
+					rule);
+		ext += sizeof(struct nfp_flower_vlan);
+		msk += sizeof(struct nfp_flower_vlan);
+	}
+
 	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
 	    key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
 		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
@@ -589,5 +646,15 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 		}
 	}
 
+	/* Check that the flow key does not exceed the maximum limit.
+	 * All structures in the key is multiples of 4 bytes, so use u32.
+	 */
+	ext_len = (u32 *)ext - (u32 *)nfp_flow->unmasked_data;
+	if (ext_len > NFP_FLOWER_KEY_MAX_LW) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "unsupported offload: flow key too long");
+		return -EOPNOTSUPP;
+	}
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 36356f96661d..1c59aff2163c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -31,6 +31,7 @@
 	 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
+	 BIT(FLOW_DISSECTOR_KEY_CVLAN) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
@@ -66,7 +67,8 @@
 	 NFP_FLOWER_LAYER_IPV6)
 
 #define NFP_FLOWER_PRE_TUN_RULE_FIELDS \
-	(NFP_FLOWER_LAYER_PORT | \
+	(NFP_FLOWER_LAYER_EXT_META | \
+	 NFP_FLOWER_LAYER_PORT | \
 	 NFP_FLOWER_LAYER_MAC | \
 	 NFP_FLOWER_LAYER_IPV4 | \
 	 NFP_FLOWER_LAYER_IPV6)
@@ -285,6 +287,30 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload");
 			return -EOPNOTSUPP;
 		}
+		if (priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ &&
+		    !(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) {
+			key_layer |= NFP_FLOWER_LAYER_EXT_META;
+			key_size += sizeof(struct nfp_flower_ext_meta);
+			key_size += sizeof(struct nfp_flower_vlan);
+			key_layer_two |= NFP_FLOWER_LAYER2_QINQ;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+		struct flow_match_vlan cvlan;
+
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN QinQ offload");
+			return -EOPNOTSUPP;
+		}
+
+		flow_rule_match_vlan(rule, &cvlan);
+		if (!(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) {
+			key_layer |= NFP_FLOWER_LAYER_EXT_META;
+			key_size += sizeof(struct nfp_flower_ext_meta);
+			key_size += sizeof(struct nfp_flower_vlan);
+			key_layer_two |= NFP_FLOWER_LAYER2_QINQ;
+		}
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
@@ -1066,6 +1092,7 @@ err_destroy_merge_flow:
  * nfp_flower_validate_pre_tun_rule()
  * @app:	Pointer to the APP handle
  * @flow:	Pointer to NFP flow representation of rule
+ * @key_ls:	Pointer to NFP key layers structure
  * @extack:	Netlink extended ACK report
  *
  * Verifies the flow as a pre-tunnel rule.
@@ -1075,10 +1102,13 @@ err_destroy_merge_flow:
 static int
 nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 				 struct nfp_fl_payload *flow,
+				 struct nfp_fl_key_ls *key_ls,
 				 struct netlink_ext_ack *extack)
 {
+	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_flower_meta_tci *meta_tci;
 	struct nfp_flower_mac_mpls *mac;
+	u8 *ext = flow->unmasked_data;
 	struct nfp_fl_act_head *act;
 	u8 *mask = flow->mask_data;
 	bool vlan = false;
@@ -1086,20 +1116,25 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 	u8 key_layer;
 
 	meta_tci = (struct nfp_flower_meta_tci *)flow->unmasked_data;
-	if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) {
-		u16 vlan_tci = be16_to_cpu(meta_tci->tci);
-
-		vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
-		flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
-		vlan = true;
-	} else {
-		flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+	key_layer = key_ls->key_layer;
+	if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) {
+		if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) {
+			u16 vlan_tci = be16_to_cpu(meta_tci->tci);
+
+			vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
+			flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
+			vlan = true;
+		} else {
+			flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+		}
 	}
 
-	key_layer = meta_tci->nfp_flow_key_layer;
 	if (key_layer & ~NFP_FLOWER_PRE_TUN_RULE_FIELDS) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: too many match fields");
 		return -EOPNOTSUPP;
+	} else if (key_ls->key_layer_two & ~NFP_FLOWER_LAYER2_QINQ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non-vlan in extended match fields");
+		return -EOPNOTSUPP;
 	}
 
 	if (!(key_layer & NFP_FLOWER_LAYER_MAC)) {
@@ -1109,7 +1144,13 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 
 	/* Skip fields known to exist. */
 	mask += sizeof(struct nfp_flower_meta_tci);
+	ext += sizeof(struct nfp_flower_meta_tci);
+	if (key_ls->key_layer_two) {
+		mask += sizeof(struct nfp_flower_ext_meta);
+		ext += sizeof(struct nfp_flower_ext_meta);
+	}
 	mask += sizeof(struct nfp_flower_in_port);
+	ext += sizeof(struct nfp_flower_in_port);
 
 	/* Ensure destination MAC address is fully matched. */
 	mac = (struct nfp_flower_mac_mpls *)mask;
@@ -1118,6 +1159,8 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 		return -EOPNOTSUPP;
 	}
 
+	mask += sizeof(struct nfp_flower_mac_mpls);
+	ext += sizeof(struct nfp_flower_mac_mpls);
 	if (key_layer & NFP_FLOWER_LAYER_IPV4 ||
 	    key_layer & NFP_FLOWER_LAYER_IPV6) {
 		/* Flags and proto fields have same offset in IPv4 and IPv6. */
@@ -1130,7 +1173,6 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 			sizeof(struct nfp_flower_ipv4) :
 			sizeof(struct nfp_flower_ipv6);
 
-		mask += sizeof(struct nfp_flower_mac_mpls);
 
 		/* Ensure proto and flags are the only IP layer fields. */
 		for (i = 0; i < size; i++)
@@ -1138,6 +1180,25 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 				NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header");
 				return -EOPNOTSUPP;
 			}
+		ext += size;
+		mask += size;
+	}
+
+	if ((priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) {
+		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
+			struct nfp_flower_vlan *vlan_tags;
+			u16 vlan_tci;
+
+			vlan_tags = (struct nfp_flower_vlan *)ext;
+
+			vlan_tci = be16_to_cpu(vlan_tags->outer_tci);
+
+			vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
+			flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
+			vlan = true;
+		} else {
+			flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+		}
 	}
 
 	/* Action must be a single egress or pop_vlan and egress. */
@@ -1220,7 +1281,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 		goto err_destroy_flow;
 
 	if (flow_pay->pre_tun_rule.dev) {
-		err = nfp_flower_validate_pre_tun_rule(app, flow_pay, extack);
+		err = nfp_flower_validate_pre_tun_rule(app, flow_pay, key_layer, extack);
 		if (err)
 			goto err_destroy_flow;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index be52510d446b..97d2b03208de 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -329,12 +329,11 @@ err_close_nsp:
 }
 
 static int
-nfp_devlink_flash_update(struct devlink *devlink, const char *path,
-			 const char *component, struct netlink_ext_ack *extack)
+nfp_devlink_flash_update(struct devlink *devlink,
+			 struct devlink_flash_update_params *params,
+			 struct netlink_ext_ack *extack)
 {
-	if (component)
-		return -EOPNOTSUPP;
-	return nfp_flash_update_common(devlink_priv(devlink), path, extack);
+	return nfp_flash_update_common(devlink_priv(devlink), params->file_name, extack);
 }
 
 const struct devlink_ops nfp_devlink_ops = {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 21ea22694e47..b150da43adb2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2287,9 +2287,9 @@ static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
 	return budget;
 }
 
-static void nfp_ctrl_poll(unsigned long arg)
+static void nfp_ctrl_poll(struct tasklet_struct *t)
 {
-	struct nfp_net_r_vector *r_vec = (void *)arg;
+	struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet);
 
 	spin_lock(&r_vec->lock);
 	nfp_net_tx_complete(r_vec->tx_ring, 0);
@@ -2337,8 +2337,7 @@ static void nfp_net_vecs_init(struct nfp_net *nn)
 
 			__skb_queue_head_init(&r_vec->queue);
 			spin_lock_init(&r_vec->lock);
-			tasklet_init(&r_vec->tasklet, nfp_ctrl_poll,
-				     (unsigned long)r_vec);
+			tasklet_setup(&r_vec->tasklet, nfp_ctrl_poll);
 			tasklet_disable(&r_vec->tasklet);
 		}
 
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 4075f5e59955..a6861df9904f 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -787,9 +787,9 @@ out:
 	return IRQ_HANDLED;
 }
 
-static void nixge_dma_err_handler(unsigned long data)
+static void nixge_dma_err_handler(struct tasklet_struct *t)
 {
-	struct nixge_priv *lp = (struct nixge_priv *)data;
+	struct nixge_priv *lp = from_tasklet(lp, t, dma_err_tasklet);
 	struct nixge_hw_dma_bd *cur_p;
 	struct nixge_tx_skb *tx_skb;
 	u32 cr, i;
@@ -879,8 +879,7 @@ static int nixge_open(struct net_device *ndev)
 	phy_start(phy);
 
 	/* Enable tasklets for Axi DMA error handling */
-	tasklet_init(&priv->dma_err_tasklet, nixge_dma_err_handler,
-		     (unsigned long)priv);
+	tasklet_setup(&priv->dma_err_tasklet, nixge_dma_err_handler);
 
 	napi_enable(&priv->napi);
 
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index b36aa5bf3c5f..a58f14aca10c 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -8,7 +8,7 @@
 #include "pch_gbe.h"
 #include "pch_gbe_phy.h"
 
-/**
+/*
  * pch_gbe_stats - Stats item information
  */
 struct pch_gbe_stats {
@@ -24,7 +24,7 @@ struct pch_gbe_stats {
 	.offset = offsetof(struct pch_gbe_hw_stats, m),		\
 }
 
-/**
+/*
  * pch_gbe_gstrings_stats - ethtool information status name list
  */
 static const struct pch_gbe_stats pch_gbe_gstrings_stats[] = {
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 23f7c76737c9..ade8c44c01cd 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -295,7 +295,7 @@ static s32 pch_gbe_mac_read_mac_addr(struct pch_gbe_hw *hw)
 /**
  * pch_gbe_wait_clr_bit - Wait to clear a bit
  * @reg:	Pointer of register
- * @busy:	Busy bit
+ * @bit:	Busy bit
  */
 static void pch_gbe_wait_clr_bit(void *reg, u32 bit)
 {
@@ -1034,7 +1034,7 @@ static void pch_gbe_set_mode(struct pch_gbe_adapter *adapter, u16 speed,
 
 /**
  * pch_gbe_watchdog - Watchdog process
- * @data:  Board private structure
+ * @t:  timer list containing a Board private structure
  */
 static void pch_gbe_watchdog(struct timer_list *t)
 {
@@ -2270,6 +2270,7 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 /**
  * pch_gbe_tx_timeout - Respond to a Tx Hang
  * @netdev:   Network interface device structure
+ * @txqueue: index of hanging queue
  */
 static void pch_gbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
index dceec80fd642..81fc5a6e3221 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
@@ -13,7 +13,7 @@
 #define OPTION_DISABLED 0
 #define OPTION_ENABLED  1
 
-/**
+/*
  * TxDescriptors - Transmit Descriptor Count
  * @Valid Range:   PCH_GBE_MIN_TXD - PCH_GBE_MAX_TXD
  * @Default Value: PCH_GBE_DEFAULT_TXD
@@ -22,7 +22,7 @@ static int TxDescriptors = OPTION_UNSET;
 module_param(TxDescriptors, int, 0);
 MODULE_PARM_DESC(TxDescriptors, "Number of transmit descriptors");
 
-/**
+/*
  * RxDescriptors -Receive Descriptor Count
  * @Valid Range:   PCH_GBE_MIN_RXD - PCH_GBE_MAX_RXD
  * @Default Value: PCH_GBE_DEFAULT_RXD
@@ -31,7 +31,7 @@ static int RxDescriptors = OPTION_UNSET;
 module_param(RxDescriptors, int, 0);
 MODULE_PARM_DESC(RxDescriptors, "Number of receive descriptors");
 
-/**
+/*
  * Speed - User Specified Speed Override
  * @Valid Range: 0, 10, 100, 1000
  *   - 0:    auto-negotiate at all supported speeds
@@ -44,7 +44,7 @@ static int Speed = OPTION_UNSET;
 module_param(Speed, int, 0);
 MODULE_PARM_DESC(Speed, "Speed setting");
 
-/**
+/*
  * Duplex - User Specified Duplex Override
  * @Valid Range: 0-2
  *   - 0:  auto-negotiate for duplex
@@ -59,7 +59,7 @@ MODULE_PARM_DESC(Duplex, "Duplex setting");
 #define HALF_DUPLEX 1
 #define FULL_DUPLEX 2
 
-/**
+/*
  * AutoNeg - Auto-negotiation Advertisement Override
  * @Valid Range: 0x01-0x0F, 0x20-0x2F
  *
@@ -85,7 +85,7 @@ MODULE_PARM_DESC(AutoNeg, "Advertised auto-negotiation setting");
 #define PHY_ADVERTISE_1000_FULL    0x0020
 #define PCH_AUTONEG_ADVERTISE_DEFAULT   0x2F
 
-/**
+/*
  * FlowControl - User Specified Flow Control Override
  * @Valid Range: 0-3
  *    - 0:  No Flow Control
@@ -124,7 +124,7 @@ MODULE_PARM_DESC(XsumTX, "Disable or enable Transmit Checksum offload");
 
 #define PCH_GBE_DEFAULT_TX_CSUM             true	/* trueorfalse */
 
-/**
+/*
  * pch_gbe_option - Force the MAC's flow control settings
  * @hw:	            Pointer to the HW structure
  * Returns:
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 3da075307178..d1dd9bc1bc7f 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -1060,7 +1060,7 @@ static int yellowfin_rx(struct net_device *dev)
 		struct sk_buff *rx_skb = yp->rx_skbuff[entry];
 		s16 frame_status;
 		u16 desc_status;
-		int data_size, yf_size;
+		int data_size, __maybe_unused yf_size;
 		u8 *buf_addr;
 
 		if(!desc->result_status)
diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 76f8cc502bf9..5f8b0bb3af6e 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -21,6 +21,7 @@ config IONIC
 	tristate "Pensando Ethernet IONIC Support"
 	depends on 64BIT && PCI
 	select NET_DEVLINK
+	select DIMLIB
 	help
 	  This enables the support for the Pensando family of Ethernet
 	  adapters.  More specific information on this driver can be
diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile
index 29f304d75261..8d3c2d3cb10d 100644
--- a/drivers/net/ethernet/pensando/ionic/Makefile
+++ b/drivers/net/ethernet/pensando/ionic/Makefile
@@ -5,4 +5,4 @@ obj-$(CONFIG_IONIC) := ionic.o
 
 ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \
 	   ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \
-	   ionic_txrx.o ionic_stats.o
+	   ionic_txrx.o ionic_stats.o ionic_fw.o
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index f5a910c458ba..084a924431d5 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -42,13 +42,11 @@ struct ionic {
 	struct ionic_dev_bar bars[IONIC_BARS_MAX];
 	unsigned int num_bars;
 	struct ionic_identity ident;
-	struct list_head lifs;
-	struct ionic_lif *master_lif;
+	struct ionic_lif *lif;
 	unsigned int nnqs_per_lif;
 	unsigned int neqs_per_lif;
 	unsigned int ntxqs_per_lif;
 	unsigned int nrxqs_per_lif;
-	DECLARE_BITMAP(lifbits, IONIC_LIFS_MAX);
 	unsigned int nintrs;
 	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
 	struct work_struct nb_work;
@@ -66,9 +64,6 @@ struct ionic_admin_ctx {
 	union ionic_adminq_comp comp;
 };
 
-int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
-	       ionic_cq_done_cb done_cb, void *done_arg);
-
 int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
 int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
 int ionic_set_dma_mask(struct ionic *ionic);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 85c686c16741..b0d8499d373b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -266,6 +266,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_err(dev, "Cannot identify device: %d, aborting\n", err);
 		goto err_out_teardown;
 	}
+	ionic_debugfs_add_ident(ionic);
 
 	err = ionic_init(ionic);
 	if (err) {
@@ -286,29 +287,22 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out_reset;
 	}
 
-	/* Configure LIFs */
-	err = ionic_lif_identify(ionic, IONIC_LIF_TYPE_CLASSIC,
-				 &ionic->ident.lif);
+	/* Allocate and init the LIF */
+	err = ionic_lif_size(ionic);
 	if (err) {
-		dev_err(dev, "Cannot identify LIFs: %d, aborting\n", err);
+		dev_err(dev, "Cannot size LIF: %d, aborting\n", err);
 		goto err_out_port_reset;
 	}
 
-	err = ionic_lifs_size(ionic);
+	err = ionic_lif_alloc(ionic);
 	if (err) {
-		dev_err(dev, "Cannot size LIFs: %d, aborting\n", err);
-		goto err_out_port_reset;
-	}
-
-	err = ionic_lifs_alloc(ionic);
-	if (err) {
-		dev_err(dev, "Cannot allocate LIFs: %d, aborting\n", err);
+		dev_err(dev, "Cannot allocate LIF: %d, aborting\n", err);
 		goto err_out_free_irqs;
 	}
 
-	err = ionic_lifs_init(ionic);
+	err = ionic_lif_init(ionic->lif);
 	if (err) {
-		dev_err(dev, "Cannot init LIFs: %d, aborting\n", err);
+		dev_err(dev, "Cannot init LIF: %d, aborting\n", err);
 		goto err_out_free_lifs;
 	}
 
@@ -321,9 +315,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			dev_err(dev, "Cannot enable existing VFs: %d\n", err);
 	}
 
-	err = ionic_lifs_register(ionic);
+	err = ionic_lif_register(ionic->lif);
 	if (err) {
-		dev_err(dev, "Cannot register LIFs: %d, aborting\n", err);
+		dev_err(dev, "Cannot register LIF: %d, aborting\n", err);
 		goto err_out_deinit_lifs;
 	}
 
@@ -336,12 +330,13 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 err_out_deregister_lifs:
-	ionic_lifs_unregister(ionic);
+	ionic_lif_unregister(ionic->lif);
 err_out_deinit_lifs:
 	ionic_vf_dealloc(ionic);
-	ionic_lifs_deinit(ionic);
+	ionic_lif_deinit(ionic->lif);
 err_out_free_lifs:
-	ionic_lifs_free(ionic);
+	ionic_lif_free(ionic->lif);
+	ionic->lif = NULL;
 err_out_free_irqs:
 	ionic_bus_free_irq_vectors(ionic);
 err_out_port_reset:
@@ -349,7 +344,7 @@ err_out_port_reset:
 err_out_reset:
 	ionic_reset(ionic);
 err_out_teardown:
-	ionic_dev_teardown(ionic);
+	del_timer_sync(&ionic->watchdog_timer);
 	pci_clear_master(pdev);
 	/* Don't fail the probe for these errors, keep
 	 * the hw interface around for inspection
@@ -377,17 +372,19 @@ static void ionic_remove(struct pci_dev *pdev)
 	if (!ionic)
 		return;
 
-	if (ionic->master_lif) {
+	del_timer_sync(&ionic->watchdog_timer);
+
+	if (ionic->lif) {
 		ionic_devlink_unregister(ionic);
-		ionic_lifs_unregister(ionic);
-		ionic_lifs_deinit(ionic);
-		ionic_lifs_free(ionic);
+		ionic_lif_unregister(ionic->lif);
+		ionic_lif_deinit(ionic->lif);
+		ionic_lif_free(ionic->lif);
+		ionic->lif = NULL;
 		ionic_bus_free_irq_vectors(ionic);
 	}
 
 	ionic_port_reset(ionic);
 	ionic_reset(ionic);
-	ionic_dev_teardown(ionic);
 	pci_clear_master(pdev);
 	ionic_unmap_bars(ionic);
 	pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
index 11621ccc1faf..39f59849720d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -76,7 +76,7 @@ static int q_tail_show(struct seq_file *seq, void *v)
 {
 	struct ionic_queue *q = seq->private;
 
-	seq_printf(seq, "%d\n", q->tail->index);
+	seq_printf(seq, "%d\n", q->tail_idx);
 
 	return 0;
 }
@@ -86,7 +86,7 @@ static int q_head_show(struct seq_file *seq, void *v)
 {
 	struct ionic_queue *q = seq->private;
 
-	seq_printf(seq, "%d\n", q->head->index);
+	seq_printf(seq, "%d\n", q->head_idx);
 
 	return 0;
 }
@@ -96,7 +96,7 @@ static int cq_tail_show(struct seq_file *seq, void *v)
 {
 	struct ionic_cq *cq = seq->private;
 
-	seq_printf(seq, "%d\n", cq->tail->index);
+	seq_printf(seq, "%d\n", cq->tail_idx);
 
 	return 0;
 }
@@ -112,7 +112,8 @@ static const struct debugfs_reg32 intr_ctrl_regs[] = {
 
 void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
 {
-	struct dentry *q_dentry, *cq_dentry, *intr_dentry, *stats_dentry;
+	struct dentry *qcq_dentry, *q_dentry, *cq_dentry;
+	struct dentry *intr_dentry, *stats_dentry;
 	struct ionic_dev *idev = &lif->ionic->idev;
 	struct debugfs_regset32 *intr_ctrl_regset;
 	struct ionic_intr_info *intr = &qcq->intr;
@@ -121,21 +122,21 @@ void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	struct ionic_queue *q = &qcq->q;
 	struct ionic_cq *cq = &qcq->cq;
 
-	qcq->dentry = debugfs_create_dir(q->name, lif->dentry);
+	qcq_dentry = debugfs_create_dir(q->name, lif->dentry);
+	if (IS_ERR_OR_NULL(qcq_dentry))
+		return;
+	qcq->dentry = qcq_dentry;
 
-	debugfs_create_x32("total_size", 0400, qcq->dentry, &qcq->total_size);
-	debugfs_create_x64("base_pa", 0400, qcq->dentry, &qcq->base_pa);
+	debugfs_create_x64("q_base_pa", 0400, qcq_dentry, &qcq->q_base_pa);
+	debugfs_create_x32("q_size", 0400, qcq_dentry, &qcq->q_size);
+	debugfs_create_x64("cq_base_pa", 0400, qcq_dentry, &qcq->cq_base_pa);
+	debugfs_create_x32("cq_size", 0400, qcq_dentry, &qcq->cq_size);
+	debugfs_create_x64("sg_base_pa", 0400, qcq_dentry, &qcq->sg_base_pa);
+	debugfs_create_x32("sg_size", 0400, qcq_dentry, &qcq->sg_size);
 
 	q_dentry = debugfs_create_dir("q", qcq->dentry);
 
 	debugfs_create_u32("index", 0400, q_dentry, &q->index);
-	debugfs_create_x64("base_pa", 0400, q_dentry, &q->base_pa);
-	if (qcq->flags & IONIC_QCQ_F_SG) {
-		debugfs_create_x64("sg_base_pa", 0400, q_dentry,
-				   &q->sg_base_pa);
-		debugfs_create_u32("sg_desc_size", 0400, q_dentry,
-				   &q->sg_desc_size);
-	}
 	debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs);
 	debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size);
 	debugfs_create_u32("pid", 0400, q_dentry, &q->pid);
@@ -188,6 +189,8 @@ void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
 				   &intr->index);
 		debugfs_create_u32("vector", 0400, intr_dentry,
 				   &intr->vector);
+		debugfs_create_u32("dim_coal_hw", 0400, intr_dentry,
+				   &intr->dim_coal_hw);
 
 		intr_ctrl_regset = devm_kzalloc(dev, sizeof(*intr_ctrl_regset),
 						GFP_KERNEL);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index d83eff0ae0ac..545c99b15df8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -19,10 +19,13 @@ static void ionic_watchdog_cb(struct timer_list *t)
 	mod_timer(&ionic->watchdog_timer,
 		  round_jiffies(jiffies + ionic->watchdog_period));
 
+	if (!ionic->lif)
+		return;
+
 	hb = ionic_heartbeat_check(ionic);
 
-	if (hb >= 0 && ionic->master_lif)
-		ionic_link_status_check_request(ionic->master_lif);
+	if (hb >= 0)
+		ionic_link_status_check_request(ionic->lif, false);
 }
 
 void ionic_init_devinfo(struct ionic *ionic)
@@ -98,11 +101,6 @@ int ionic_dev_setup(struct ionic *ionic)
 	return 0;
 }
 
-void ionic_dev_teardown(struct ionic *ionic)
-{
-	del_timer_sync(&ionic->watchdog_timer);
-}
-
 /* Devcmd Interface */
 int ionic_heartbeat_check(struct ionic *ionic)
 {
@@ -126,7 +124,7 @@ int ionic_heartbeat_check(struct ionic *ionic)
 	/* is this a transition? */
 	if (fw_status != idev->last_fw_status &&
 	    idev->last_fw_status != 0xff) {
-		struct ionic_lif *lif = ionic->master_lif;
+		struct ionic_lif *lif = ionic->lif;
 		bool trigger = false;
 
 		if (!fw_status || fw_status == 0xff) {
@@ -467,9 +465,7 @@ int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
 		  struct ionic_intr_info *intr,
 		  unsigned int num_descs, size_t desc_size)
 {
-	struct ionic_cq_info *cur;
 	unsigned int ring_size;
-	unsigned int i;
 
 	if (desc_size == 0 || !is_power_of_2(num_descs))
 		return -EINVAL;
@@ -482,22 +478,9 @@ int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
 	cq->bound_intr = intr;
 	cq->num_descs = num_descs;
 	cq->desc_size = desc_size;
-	cq->tail = cq->info;
+	cq->tail_idx = 0;
 	cq->done_color = 1;
 
-	cur = cq->info;
-
-	for (i = 0; i < num_descs; i++) {
-		if (i + 1 == num_descs) {
-			cur->next = cq->info;
-			cur->last = true;
-		} else {
-			cur->next = cur + 1;
-		}
-		cur->index = i;
-		cur++;
-	}
-
 	return 0;
 }
 
@@ -522,15 +505,18 @@ unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
 			      ionic_cq_cb cb, ionic_cq_done_cb done_cb,
 			      void *done_arg)
 {
+	struct ionic_cq_info *cq_info;
 	unsigned int work_done = 0;
 
 	if (work_to_do == 0)
 		return 0;
 
-	while (cb(cq, cq->tail)) {
-		if (cq->tail->last)
+	cq_info = &cq->info[cq->tail_idx];
+	while (cb(cq, cq_info)) {
+		if (cq->tail_idx == cq->num_descs - 1)
 			cq->done_color = !cq->done_color;
-		cq->tail = cq->tail->next;
+		cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
+		cq_info = &cq->info[cq->tail_idx];
 		DEBUG_STATS_CQE_CNT(cq);
 
 		if (++work_done >= work_to_do)
@@ -548,9 +534,7 @@ int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
 		 unsigned int num_descs, size_t desc_size,
 		 size_t sg_desc_size, unsigned int pid)
 {
-	struct ionic_desc_info *cur;
 	unsigned int ring_size;
-	unsigned int i;
 
 	if (desc_size == 0 || !is_power_of_2(num_descs))
 		return -EINVAL;
@@ -565,24 +549,12 @@ int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
 	q->num_descs = num_descs;
 	q->desc_size = desc_size;
 	q->sg_desc_size = sg_desc_size;
-	q->tail = q->info;
-	q->head = q->tail;
+	q->tail_idx = 0;
+	q->head_idx = 0;
 	q->pid = pid;
 
 	snprintf(q->name, sizeof(q->name), "L%d-%s%u", lif->index, name, index);
 
-	cur = q->info;
-
-	for (i = 0; i < num_descs; i++) {
-		if (i + 1 == num_descs)
-			cur->next = q->info;
-		else
-			cur->next = cur + 1;
-		cur->index = i;
-		cur->left = num_descs - i;
-		cur++;
-	}
-
 	return 0;
 }
 
@@ -614,19 +586,22 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
 		  void *cb_arg)
 {
 	struct device *dev = q->lif->ionic->dev;
+	struct ionic_desc_info *desc_info;
 	struct ionic_lif *lif = q->lif;
 
-	q->head->cb = cb;
-	q->head->cb_arg = cb_arg;
-	q->head = q->head->next;
+	desc_info = &q->info[q->head_idx];
+	desc_info->cb = cb;
+	desc_info->cb_arg = cb_arg;
+
+	q->head_idx = (q->head_idx + 1) & (q->num_descs - 1);
 
 	dev_dbg(dev, "lif=%d qname=%s qid=%d qtype=%d p_index=%d ringdb=%d\n",
 		q->lif->index, q->name, q->hw_type, q->hw_index,
-		q->head->index, ring_doorbell);
+		q->head_idx, ring_doorbell);
 
 	if (ring_doorbell)
 		ionic_dbell_ring(lif->kern_dbpage, q->hw_type,
-				 q->dbval | q->head->index);
+				 q->dbval | q->head_idx);
 }
 
 static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
@@ -634,8 +609,8 @@ static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
 	unsigned int mask, tail, head;
 
 	mask = q->num_descs - 1;
-	tail = q->tail->index;
-	head = q->head->index;
+	tail = q->tail_idx;
+	head = q->head_idx;
 
 	return ((pos - tail) & mask) < ((head - tail) & mask);
 }
@@ -646,20 +621,22 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
 	struct ionic_desc_info *desc_info;
 	ionic_desc_cb cb;
 	void *cb_arg;
+	u16 index;
 
 	/* check for empty queue */
-	if (q->tail->index == q->head->index)
+	if (q->tail_idx == q->head_idx)
 		return;
 
 	/* stop index must be for a descriptor that is not yet completed */
 	if (unlikely(!ionic_q_is_posted(q, stop_index)))
 		dev_err(q->lif->ionic->dev,
 			"ionic stop is not posted %s stop %u tail %u head %u\n",
-			q->name, stop_index, q->tail->index, q->head->index);
+			q->name, stop_index, q->tail_idx, q->head_idx);
 
 	do {
-		desc_info = q->tail;
-		q->tail = desc_info->next;
+		desc_info = &q->info[q->tail_idx];
+		index = q->tail_idx;
+		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 
 		cb = desc_info->cb;
 		cb_arg = desc_info->cb_arg;
@@ -669,5 +646,5 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
 
 		if (cb)
 			cb(q, desc_info, cq_info, cb_arg);
-	} while (desc_info->index != stop_index);
+	} while (index != stop_index);
 }
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index d5cba502abca..c109cd5a0471 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -149,10 +149,13 @@ struct ionic_dev {
 };
 
 struct ionic_cq_info {
-	void *cq_desc;
-	struct ionic_cq_info *next;
-	unsigned int index;
-	bool last;
+	union {
+		void *cq_desc;
+		struct ionic_txq_comp *txcq;
+		struct ionic_rxq_comp *rxcq;
+		struct ionic_admin_comp *admincq;
+		struct ionic_notifyq_event *notifyq;
+	};
 };
 
 struct ionic_queue;
@@ -169,11 +172,17 @@ struct ionic_page_info {
 };
 
 struct ionic_desc_info {
-	void *desc;
-	void *sg_desc;
-	struct ionic_desc_info *next;
-	unsigned int index;
-	unsigned int left;
+	union {
+		void *desc;
+		struct ionic_txq_desc *txq_desc;
+		struct ionic_rxq_desc *rxq_desc;
+		struct ionic_admin_cmd *adminq_desc;
+	};
+	union {
+		void *sg_desc;
+		struct ionic_txq_sg_desc *txq_sg_desc;
+		struct ionic_rxq_sg_desc *rxq_sgl_desc;
+	};
 	unsigned int npages;
 	struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
 	ionic_desc_cb cb;
@@ -183,25 +192,35 @@ struct ionic_desc_info {
 #define IONIC_QUEUE_NAME_MAX_SZ		32
 
 struct ionic_queue {
+	struct device *dev;
+	struct ionic_lif *lif;
+	struct ionic_desc_info *info;
+	u16 head_idx;
+	u16 tail_idx;
+	unsigned int index;
+	unsigned int num_descs;
 	u64 dbell_count;
-	u64 drop;
 	u64 stop;
 	u64 wake;
-	struct ionic_lif *lif;
-	struct ionic_desc_info *info;
-	struct ionic_desc_info *tail;
-	struct ionic_desc_info *head;
+	u64 drop;
 	struct ionic_dev *idev;
-	unsigned int index;
 	unsigned int type;
 	unsigned int hw_index;
 	unsigned int hw_type;
 	u64 dbval;
-	void *base;
-	void *sg_base;
+	union {
+		void *base;
+		struct ionic_txq_desc *txq;
+		struct ionic_rxq_desc *rxq;
+		struct ionic_admin_cmd *adminq;
+	};
+	union {
+		void *sg_base;
+		struct ionic_txq_sg_desc *txq_sgl;
+		struct ionic_rxq_sg_desc *rxq_sgl;
+	};
 	dma_addr_t base_pa;
 	dma_addr_t sg_base_pa;
-	unsigned int num_descs;
 	unsigned int desc_size;
 	unsigned int sg_desc_size;
 	unsigned int pid;
@@ -218,20 +237,21 @@ struct ionic_intr_info {
 	u64 rearm_count;
 	unsigned int cpu;
 	cpumask_t affinity_mask;
+	u32 dim_coal_hw;
 };
 
 struct ionic_cq {
-	void *base;
-	dma_addr_t base_pa;
 	struct ionic_lif *lif;
 	struct ionic_cq_info *info;
-	struct ionic_cq_info *tail;
 	struct ionic_queue *bound_q;
 	struct ionic_intr_info *bound_intr;
+	u16 tail_idx;
 	bool done_color;
 	unsigned int num_descs;
-	u64 compl_count;
 	unsigned int desc_size;
+	u64 compl_count;
+	void *base;
+	dma_addr_t base_pa;
 };
 
 struct ionic;
@@ -246,12 +266,12 @@ static inline void ionic_intr_init(struct ionic_dev *idev,
 
 static inline unsigned int ionic_q_space_avail(struct ionic_queue *q)
 {
-	unsigned int avail = q->tail->index;
+	unsigned int avail = q->tail_idx;
 
-	if (q->head->index >= avail)
-		avail += q->head->left - 1;
+	if (q->head_idx >= avail)
+		avail += q->num_descs - q->head_idx - 1;
 	else
-		avail -= q->head->index + 1;
+		avail -= q->head_idx + 1;
 
 	return avail;
 }
@@ -263,7 +283,6 @@ static inline bool ionic_q_has_space(struct ionic_queue *q, unsigned int want)
 
 void ionic_init_devinfo(struct ionic *ionic);
 int ionic_dev_setup(struct ionic *ionic);
-void ionic_dev_teardown(struct ionic *ionic);
 
 void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd);
 u8 ionic_dev_cmd_status(struct ionic_dev *idev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
index c4f4fd469fe3..51d64718ed9f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -9,6 +9,15 @@
 #include "ionic_lif.h"
 #include "ionic_devlink.h"
 
+static int ionic_dl_flash_update(struct devlink *dl,
+				 struct devlink_flash_update_params *params,
+				 struct netlink_ext_ack *extack)
+{
+	struct ionic *ionic = devlink_priv(dl);
+
+	return ionic_firmware_update(ionic->lif, params->file_name, extack);
+}
+
 static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 			     struct netlink_ext_ack *extack)
 {
@@ -48,6 +57,7 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 
 static const struct devlink_ops ionic_dl_ops = {
 	.info_get	= ionic_dl_info_get,
+	.flash_update	= ionic_dl_flash_update,
 };
 
 struct ionic *ionic_devlink_alloc(struct device *dev)
@@ -85,7 +95,7 @@ int ionic_devlink_register(struct ionic *ionic)
 		dev_err(ionic->dev, "devlink_port_register failed: %d\n", err);
 	else
 		devlink_port_type_eth_set(&ionic->dl_port,
-					  ionic->master_lif->netdev);
+					  ionic->lif->netdev);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.h b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h
index 0690172fc57a..5c01a9e306d8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h
@@ -6,6 +6,9 @@
 
 #include <net/devlink.h>
 
+int ionic_firmware_update(struct ionic_lif *lif, const char *fw_name,
+			  struct netlink_ext_ack *extack);
+
 struct ionic *ionic_devlink_alloc(struct device *dev);
 void ionic_devlink_free(struct ionic *ionic);
 int ionic_devlink_register(struct ionic *ionic);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 3c57c331729f..ed9808fc743b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -298,8 +298,8 @@ static void ionic_get_pauseparam(struct net_device *netdev,
 
 	pause_type = lif->ionic->idev.port_info->config.pause_type;
 	if (pause_type) {
-		pause->rx_pause = pause_type & IONIC_PAUSE_F_RX ? 1 : 0;
-		pause->tx_pause = pause_type & IONIC_PAUSE_F_TX ? 1 : 0;
+		pause->rx_pause = (pause_type & IONIC_PAUSE_F_RX) ? 1 : 0;
+		pause->tx_pause = (pause_type & IONIC_PAUSE_F_TX) ? 1 : 0;
 	}
 }
 
@@ -406,6 +406,13 @@ static int ionic_get_coalesce(struct net_device *netdev,
 	coalesce->tx_coalesce_usecs = lif->tx_coalesce_usecs;
 	coalesce->rx_coalesce_usecs = lif->rx_coalesce_usecs;
 
+	if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+		coalesce->use_adaptive_tx_coalesce = test_bit(IONIC_LIF_F_TX_DIM_INTR, lif->state);
+	else
+		coalesce->use_adaptive_tx_coalesce = 0;
+
+	coalesce->use_adaptive_rx_coalesce = test_bit(IONIC_LIF_F_RX_DIM_INTR, lif->state);
+
 	return 0;
 }
 
@@ -414,10 +421,9 @@ static int ionic_set_coalesce(struct net_device *netdev,
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 	struct ionic_identity *ident;
-	struct ionic_qcq *qcq;
+	u32 rx_coal, rx_dim;
+	u32 tx_coal, tx_dim;
 	unsigned int i;
-	u32 rx_coal;
-	u32 tx_coal;
 
 	ident = &lif->ionic->ident;
 	if (ident->dev.intr_coal_div == 0) {
@@ -426,10 +432,11 @@ static int ionic_set_coalesce(struct net_device *netdev,
 		return -EIO;
 	}
 
-	/* Tx normally shares Rx interrupt, so only change Rx */
+	/* Tx normally shares Rx interrupt, so only change Rx if not split */
 	if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state) &&
-	    coalesce->tx_coalesce_usecs != lif->rx_coalesce_usecs) {
-		netdev_warn(netdev, "only the rx-usecs can be changed\n");
+	    (coalesce->tx_coalesce_usecs != lif->rx_coalesce_usecs ||
+	     coalesce->use_adaptive_tx_coalesce)) {
+		netdev_warn(netdev, "only rx parameters can be changed\n");
 		return -EINVAL;
 	}
 
@@ -449,32 +456,44 @@ static int ionic_set_coalesce(struct net_device *netdev,
 
 	/* Save the new values */
 	lif->rx_coalesce_usecs = coalesce->rx_coalesce_usecs;
-	if (rx_coal != lif->rx_coalesce_hw) {
-		lif->rx_coalesce_hw = rx_coal;
-
-		if (test_bit(IONIC_LIF_F_UP, lif->state)) {
-			for (i = 0; i < lif->nxqs; i++) {
-				qcq = lif->rxqcqs[i].qcq;
-				ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-						     qcq->intr.index,
-						     lif->rx_coalesce_hw);
-			}
-		}
-	}
+	lif->rx_coalesce_hw = rx_coal;
 
 	if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
 		lif->tx_coalesce_usecs = coalesce->tx_coalesce_usecs;
 	else
 		lif->tx_coalesce_usecs = coalesce->rx_coalesce_usecs;
-	if (tx_coal != lif->tx_coalesce_hw) {
-		lif->tx_coalesce_hw = tx_coal;
+	lif->tx_coalesce_hw = tx_coal;
 
-		if (test_bit(IONIC_LIF_F_UP, lif->state)) {
-			for (i = 0; i < lif->nxqs; i++) {
-				qcq = lif->txqcqs[i].qcq;
+	if (coalesce->use_adaptive_rx_coalesce) {
+		set_bit(IONIC_LIF_F_RX_DIM_INTR, lif->state);
+		rx_dim = rx_coal;
+	} else {
+		clear_bit(IONIC_LIF_F_RX_DIM_INTR, lif->state);
+		rx_dim = 0;
+	}
+
+	if (coalesce->use_adaptive_tx_coalesce) {
+		set_bit(IONIC_LIF_F_TX_DIM_INTR, lif->state);
+		tx_dim = tx_coal;
+	} else {
+		clear_bit(IONIC_LIF_F_TX_DIM_INTR, lif->state);
+		tx_dim = 0;
+	}
+
+	if (test_bit(IONIC_LIF_F_UP, lif->state)) {
+		for (i = 0; i < lif->nxqs; i++) {
+			if (lif->rxqcqs[i]->flags & IONIC_QCQ_F_INTR) {
 				ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-						     qcq->intr.index,
+						     lif->rxqcqs[i]->intr.index,
+						     lif->rx_coalesce_hw);
+				lif->rxqcqs[i]->intr.dim_coal_hw = rx_dim;
+			}
+
+			if (lif->txqcqs[i]->flags & IONIC_QCQ_F_INTR) {
+				ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+						     lif->txqcqs[i]->intr.index,
 						     lif->tx_coalesce_hw);
+				lif->txqcqs[i]->intr.dim_coal_hw = tx_dim;
 			}
 		}
 	}
@@ -493,18 +512,14 @@ static void ionic_get_ringparam(struct net_device *netdev,
 	ring->rx_pending = lif->nrxq_descs;
 }
 
-static void ionic_set_ringsize(struct ionic_lif *lif, void *arg)
-{
-	struct ethtool_ringparam *ring = arg;
-
-	lif->ntxq_descs = ring->tx_pending;
-	lif->nrxq_descs = ring->rx_pending;
-}
-
 static int ionic_set_ringparam(struct net_device *netdev,
 			       struct ethtool_ringparam *ring)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_queue_params qparam;
+	int err;
+
+	ionic_init_queue_params(lif, &qparam);
 
 	if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
 		netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n");
@@ -522,7 +537,28 @@ static int ionic_set_ringparam(struct net_device *netdev,
 	    ring->rx_pending == lif->nrxq_descs)
 		return 0;
 
-	return ionic_reset_queues(lif, ionic_set_ringsize, ring);
+	if (ring->tx_pending != lif->ntxq_descs)
+		netdev_info(netdev, "Changing Tx ring size from %d to %d\n",
+			    lif->ntxq_descs, ring->tx_pending);
+
+	if (ring->rx_pending != lif->nrxq_descs)
+		netdev_info(netdev, "Changing Rx ring size from %d to %d\n",
+			    lif->nrxq_descs, ring->rx_pending);
+
+	/* if we're not running, just set the values and return */
+	if (!netif_running(lif->netdev)) {
+		lif->ntxq_descs = ring->tx_pending;
+		lif->nrxq_descs = ring->rx_pending;
+		return 0;
+	}
+
+	qparam.ntxq_descs = ring->tx_pending;
+	qparam.nrxq_descs = ring->rx_pending;
+	err = ionic_reconfigure_queues(lif, &qparam);
+	if (err)
+		netdev_info(netdev, "Ring reconfiguration failed, changes canceled: %d\n", err);
+
+	return err;
 }
 
 static void ionic_get_channels(struct net_device *netdev,
@@ -544,32 +580,15 @@ static void ionic_get_channels(struct net_device *netdev,
 	}
 }
 
-static void ionic_set_queuecount(struct ionic_lif *lif, void *arg)
-{
-	struct ethtool_channels *ch = arg;
-
-	if (ch->combined_count) {
-		lif->nxqs = ch->combined_count;
-		if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) {
-			clear_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
-			lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
-			lif->tx_coalesce_hw = lif->rx_coalesce_hw;
-			netdev_info(lif->netdev, "Sharing queue interrupts\n");
-		}
-	} else {
-		lif->nxqs = ch->rx_count;
-		if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) {
-			set_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
-			netdev_info(lif->netdev, "Splitting queue interrupts\n");
-		}
-	}
-}
-
 static int ionic_set_channels(struct net_device *netdev,
 			      struct ethtool_channels *ch)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
-	int new_cnt;
+	struct ionic_queue_params qparam;
+	int max_cnt;
+	int err;
+
+	ionic_init_queue_params(lif, &qparam);
 
 	if (ch->rx_count != ch->tx_count) {
 		netdev_info(netdev, "The rx and tx count must be equal\n");
@@ -577,20 +596,63 @@ static int ionic_set_channels(struct net_device *netdev,
 	}
 
 	if (ch->combined_count && ch->rx_count) {
-		netdev_info(netdev, "Use either combined_count or rx/tx_count, not both\n");
+		netdev_info(netdev, "Use either combined or rx and tx, not both\n");
 		return -EINVAL;
 	}
 
-	if (ch->combined_count)
-		new_cnt = ch->combined_count;
-	else
-		new_cnt = ch->rx_count;
+	max_cnt = lif->ionic->ntxqs_per_lif;
+	if (ch->combined_count) {
+		if (ch->combined_count > max_cnt)
+			return -EINVAL;
+
+		if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+			netdev_info(lif->netdev, "Sharing queue interrupts\n");
+		else if (ch->combined_count == lif->nxqs)
+			return 0;
 
-	if (lif->nxqs != new_cnt)
-		netdev_info(netdev, "Changing queue count from %d to %d\n",
-			    lif->nxqs, new_cnt);
+		if (lif->nxqs != ch->combined_count)
+			netdev_info(netdev, "Changing queue count from %d to %d\n",
+				    lif->nxqs, ch->combined_count);
 
-	return ionic_reset_queues(lif, ionic_set_queuecount, ch);
+		qparam.nxqs = ch->combined_count;
+		qparam.intr_split = 0;
+	} else {
+		max_cnt /= 2;
+		if (ch->rx_count > max_cnt)
+			return -EINVAL;
+
+		if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+			netdev_info(lif->netdev, "Splitting queue interrupts\n");
+		else if (ch->rx_count == lif->nxqs)
+			return 0;
+
+		if (lif->nxqs != ch->rx_count)
+			netdev_info(netdev, "Changing queue count from %d to %d\n",
+				    lif->nxqs, ch->rx_count);
+
+		qparam.nxqs = ch->rx_count;
+		qparam.intr_split = 1;
+	}
+
+	/* if we're not running, just set the values and return */
+	if (!netif_running(lif->netdev)) {
+		lif->nxqs = qparam.nxqs;
+
+		if (qparam.intr_split) {
+			set_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+		} else {
+			clear_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+			lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
+			lif->tx_coalesce_hw = lif->rx_coalesce_hw;
+		}
+		return 0;
+	}
+
+	err = ionic_reconfigure_queues(lif, &qparam);
+	if (err)
+		netdev_info(netdev, "Queue reconfiguration failed, changes canceled: %d\n", err);
+
+	return err;
 }
 
 static u32 ionic_get_priv_flags(struct net_device *netdev)
@@ -807,7 +869,9 @@ static int ionic_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops ionic_ethtool_ops = {
-	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE_TX,
 	.get_drvinfo		= ionic_get_drvinfo,
 	.get_regs_len		= ionic_get_regs_len,
 	.get_regs		= ionic_get_regs,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_fw.c b/drivers/net/ethernet/pensando/ionic/ionic_fw.c
new file mode 100644
index 000000000000..f492ae406a60
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_fw.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Pensando Systems, Inc */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+
+#include "ionic.h"
+#include "ionic_dev.h"
+#include "ionic_lif.h"
+#include "ionic_devlink.h"
+
+/* The worst case wait for the install activity is about 25 minutes when
+ * installing a new CPLD, which is very seldom.  Normal is about 30-35
+ * seconds.  Since the driver can't tell if a CPLD update will happen we
+ * set the timeout for the ugly case.
+ */
+#define IONIC_FW_INSTALL_TIMEOUT	(25 * 60)
+#define IONIC_FW_SELECT_TIMEOUT		30
+
+/* Number of periodic log updates during fw file download */
+#define IONIC_FW_INTERVAL_FRACTION	32
+
+static void ionic_dev_cmd_firmware_download(struct ionic_dev *idev, u64 addr,
+					    u32 offset, u32 length)
+{
+	union ionic_dev_cmd cmd = {
+		.fw_download.opcode = IONIC_CMD_FW_DOWNLOAD,
+		.fw_download.offset = offset,
+		.fw_download.addr = addr,
+		.fw_download.length = length
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+static void ionic_dev_cmd_firmware_install(struct ionic_dev *idev)
+{
+	union ionic_dev_cmd cmd = {
+		.fw_control.opcode = IONIC_CMD_FW_CONTROL,
+		.fw_control.oper = IONIC_FW_INSTALL_ASYNC
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+static void ionic_dev_cmd_firmware_activate(struct ionic_dev *idev, u8 slot)
+{
+	union ionic_dev_cmd cmd = {
+		.fw_control.opcode = IONIC_CMD_FW_CONTROL,
+		.fw_control.oper = IONIC_FW_ACTIVATE_ASYNC,
+		.fw_control.slot = slot
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+static int ionic_fw_status_long_wait(struct ionic *ionic,
+				     const char *label,
+				     unsigned long timeout,
+				     u8 fw_cmd,
+				     struct netlink_ext_ack *extack)
+{
+	union ionic_dev_cmd cmd = {
+		.fw_control.opcode = IONIC_CMD_FW_CONTROL,
+		.fw_control.oper = fw_cmd,
+	};
+	unsigned long start_time;
+	unsigned long end_time;
+	int err;
+
+	start_time = jiffies;
+	end_time = start_time + (timeout * HZ);
+	do {
+		mutex_lock(&ionic->dev_cmd_lock);
+		ionic_dev_cmd_go(&ionic->idev, &cmd);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+		mutex_unlock(&ionic->dev_cmd_lock);
+
+		msleep(20);
+	} while (time_before(jiffies, end_time) && (err == -EAGAIN || err == -ETIMEDOUT));
+
+	if (err == -EAGAIN || err == -ETIMEDOUT) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware wait timed out");
+		dev_err(ionic->dev, "DEV_CMD firmware wait %s timed out\n", label);
+	} else if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware wait failed");
+	}
+
+	return err;
+}
+
+int ionic_firmware_update(struct ionic_lif *lif, const char *fw_name,
+			  struct netlink_ext_ack *extack)
+{
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct net_device *netdev = lif->netdev;
+	struct ionic *ionic = lif->ionic;
+	union ionic_dev_cmd_comp comp;
+	u32 buf_sz, copy_sz, offset;
+	const struct firmware *fw;
+	struct devlink *dl;
+	int next_interval;
+	int err = 0;
+	u8 fw_slot;
+
+	netdev_info(netdev, "Installing firmware %s\n", fw_name);
+
+	dl = priv_to_devlink(ionic);
+	devlink_flash_update_begin_notify(dl);
+	devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0);
+
+	err = request_firmware(&fw, fw_name, ionic->dev);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to find firmware file");
+		goto err_out;
+	}
+
+	buf_sz = sizeof(idev->dev_cmd_regs->data);
+
+	netdev_dbg(netdev,
+		   "downloading firmware - size %d part_sz %d nparts %lu\n",
+		   (int)fw->size, buf_sz, DIV_ROUND_UP(fw->size, buf_sz));
+
+	offset = 0;
+	next_interval = 0;
+	while (offset < fw->size) {
+		if (offset >= next_interval) {
+			devlink_flash_update_status_notify(dl, "Downloading", NULL,
+							   offset, fw->size);
+			next_interval = offset + (fw->size / IONIC_FW_INTERVAL_FRACTION);
+		}
+
+		copy_sz = min_t(unsigned int, buf_sz, fw->size - offset);
+		mutex_lock(&ionic->dev_cmd_lock);
+		memcpy_toio(&idev->dev_cmd_regs->data, fw->data + offset, copy_sz);
+		ionic_dev_cmd_firmware_download(idev,
+						offsetof(union ionic_dev_cmd_regs, data),
+						offset, copy_sz);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+		mutex_unlock(&ionic->dev_cmd_lock);
+		if (err) {
+			netdev_err(netdev,
+				   "download failed offset 0x%x addr 0x%lx len 0x%x\n",
+				   offset, offsetof(union ionic_dev_cmd_regs, data),
+				   copy_sz);
+			NL_SET_ERR_MSG_MOD(extack, "Segment download failed");
+			goto err_out;
+		}
+		offset += copy_sz;
+	}
+	devlink_flash_update_status_notify(dl, "Downloading", NULL,
+					   fw->size, fw->size);
+
+	devlink_flash_update_timeout_notify(dl, "Installing", NULL,
+					    IONIC_FW_INSTALL_TIMEOUT);
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_firmware_install(idev);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp);
+	fw_slot = comp.fw_control.slot;
+	mutex_unlock(&ionic->dev_cmd_lock);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to start firmware install");
+		goto err_out;
+	}
+
+	err = ionic_fw_status_long_wait(ionic, "Installing",
+					IONIC_FW_INSTALL_TIMEOUT,
+					IONIC_FW_INSTALL_STATUS,
+					extack);
+	if (err)
+		goto err_out;
+
+	devlink_flash_update_timeout_notify(dl, "Selecting", NULL,
+					    IONIC_FW_SELECT_TIMEOUT);
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_firmware_activate(idev, fw_slot);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to start firmware select");
+		goto err_out;
+	}
+
+	err = ionic_fw_status_long_wait(ionic, "Selecting",
+					IONIC_FW_SELECT_TIMEOUT,
+					IONIC_FW_ACTIVATE_STATUS,
+					extack);
+	if (err)
+		goto err_out;
+
+	netdev_info(netdev, "Firmware update completed\n");
+
+err_out:
+	if (err)
+		devlink_flash_update_status_notify(dl, "Flash failed", NULL, 0, 0);
+	else
+		devlink_flash_update_status_notify(dl, "Flash done", NULL, 0, 0);
+	release_firmware(fw);
+	devlink_flash_update_end_notify(dl);
+	return err;
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index acc94b244cf3..31ccfcdc2b0a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -63,8 +63,10 @@ enum ionic_cmd_opcode {
 	IONIC_CMD_QOS_RESET			= 245,
 
 	/* Firmware commands */
-	IONIC_CMD_FW_DOWNLOAD			= 254,
-	IONIC_CMD_FW_CONTROL			= 255,
+	IONIC_CMD_FW_DOWNLOAD                   = 252,
+	IONIC_CMD_FW_CONTROL                    = 253,
+	IONIC_CMD_FW_DOWNLOAD_V1		= 254,
+	IONIC_CMD_FW_CONTROL_V1		        = 255,
 };
 
 /**
@@ -94,6 +96,7 @@ enum ionic_status_code {
 	IONIC_RC_ERROR		= 29,	/* Generic error */
 	IONIC_RC_ERDMA		= 30,	/* Generic RDMA error */
 	IONIC_RC_EVFID		= 31,	/* VF ID does not exist */
+	IONIC_RC_EBAD_FW	= 32,	/* FW file is invalid or corrupted */
 };
 
 enum ionic_notifyq_opcode {
@@ -2069,14 +2072,23 @@ typedef struct ionic_admin_comp ionic_fw_download_comp;
 
 /**
  * enum ionic_fw_control_oper - FW control operations
- * @IONIC_FW_RESET:     Reset firmware
- * @IONIC_FW_INSTALL:   Install firmware
- * @IONIC_FW_ACTIVATE:  Activate firmware
+ * @IONIC_FW_RESET:		Reset firmware
+ * @IONIC_FW_INSTALL:		Install firmware
+ * @IONIC_FW_ACTIVATE:		Activate firmware
+ * @IONIC_FW_INSTALL_ASYNC:	Install firmware asynchronously
+ * @IONIC_FW_INSTALL_STATUS:	Firmware installation status
+ * @IONIC_FW_ACTIVATE_ASYNC:	Activate firmware asynchronously
+ * @IONIC_FW_ACTIVATE_STATUS:	Firmware activate status
  */
 enum ionic_fw_control_oper {
-	IONIC_FW_RESET		= 0,
-	IONIC_FW_INSTALL	= 1,
-	IONIC_FW_ACTIVATE	= 2,
+	IONIC_FW_RESET			= 0,
+	IONIC_FW_INSTALL		= 1,
+	IONIC_FW_ACTIVATE		= 2,
+	IONIC_FW_INSTALL_ASYNC		= 3,
+	IONIC_FW_INSTALL_STATUS		= 4,
+	IONIC_FW_ACTIVATE_ASYNC		= 5,
+	IONIC_FW_ACTIVATE_STATUS	= 6,
+	IONIC_FW_UPDATE_CLEANUP		= 7,
 };
 
 /**
@@ -2689,6 +2701,9 @@ union ionic_dev_cmd {
 	struct ionic_q_identify_cmd q_identify;
 	struct ionic_q_init_cmd q_init;
 	struct ionic_q_control_cmd q_control;
+
+	struct ionic_fw_download_cmd fw_download;
+	struct ionic_fw_control_cmd fw_control;
 };
 
 union ionic_dev_cmd_comp {
@@ -2722,6 +2737,9 @@ union ionic_dev_cmd_comp {
 
 	struct ionic_q_identify_comp q_identify;
 	struct ionic_q_init_comp q_init;
+
+	ionic_fw_download_comp fw_download;
+	struct ionic_fw_control_comp fw_control;
 };
 
 /**
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 26988ad7ec97..d655a7ae3058 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -36,25 +36,44 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif);
 static void ionic_lif_handle_fw_up(struct ionic_lif *lif);
 static void ionic_lif_set_netdev_info(struct ionic_lif *lif);
 
+static void ionic_txrx_deinit(struct ionic_lif *lif);
+static int ionic_txrx_init(struct ionic_lif *lif);
 static int ionic_start_queues(struct ionic_lif *lif);
 static void ionic_stop_queues(struct ionic_lif *lif);
 static void ionic_lif_queue_identify(struct ionic_lif *lif);
 
+static void ionic_dim_work(struct work_struct *work)
+{
+	struct dim *dim = container_of(work, struct dim, work);
+	struct dim_cq_moder cur_moder;
+	struct ionic_qcq *qcq;
+	u32 new_coal;
+
+	cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+	qcq = container_of(dim, struct ionic_qcq, dim);
+	new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec);
+	qcq->intr.dim_coal_hw = new_coal ? new_coal : 1;
+	dim->state = DIM_START_MEASURE;
+}
+
 static void ionic_lif_deferred_work(struct work_struct *work)
 {
 	struct ionic_lif *lif = container_of(work, struct ionic_lif, deferred.work);
 	struct ionic_deferred *def = &lif->deferred;
 	struct ionic_deferred_work *w = NULL;
 
-	spin_lock_bh(&def->lock);
-	if (!list_empty(&def->list)) {
-		w = list_first_entry(&def->list,
-				     struct ionic_deferred_work, list);
-		list_del(&w->list);
-	}
-	spin_unlock_bh(&def->lock);
+	do {
+		spin_lock_bh(&def->lock);
+		if (!list_empty(&def->list)) {
+			w = list_first_entry(&def->list,
+					     struct ionic_deferred_work, list);
+			list_del(&w->list);
+		}
+		spin_unlock_bh(&def->lock);
+
+		if (!w)
+			break;
 
-	if (w) {
 		switch (w->type) {
 		case IONIC_DW_TYPE_RX_MODE:
 			ionic_lif_rx_mode(lif, w->rx_mode);
@@ -78,8 +97,8 @@ static void ionic_lif_deferred_work(struct work_struct *work)
 			break;
 		}
 		kfree(w);
-		schedule_work(&def->work);
-	}
+		w = NULL;
+	} while (true);
 }
 
 void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
@@ -135,7 +154,7 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 	clear_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state);
 }
 
-void ionic_link_status_check_request(struct ionic_lif *lif)
+void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep)
 {
 	struct ionic_deferred_work *work;
 
@@ -143,10 +162,12 @@ void ionic_link_status_check_request(struct ionic_lif *lif)
 	if (test_and_set_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state))
 		return;
 
-	if (in_interrupt()) {
+	if (!can_sleep) {
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
-		if (!work)
+		if (!work) {
+			clear_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state);
 			return;
+		}
 
 		work->type = IONIC_DW_TYPE_LINK_STATUS;
 		ionic_lif_deferred_enqueue(&lif->deferred, work);
@@ -243,31 +264,30 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq)
 	return ionic_adminq_post_wait(lif, &ctx);
 }
 
-static int ionic_qcq_disable(struct ionic_qcq *qcq)
+static int ionic_qcq_disable(struct ionic_qcq *qcq, bool send_to_hw)
 {
-	struct ionic_queue *q = &qcq->q;
-	struct ionic_lif *lif = q->lif;
-	struct ionic_dev *idev;
-	struct device *dev;
+	struct ionic_queue *q;
+	struct ionic_lif *lif;
+	int err = 0;
 
 	struct ionic_admin_ctx ctx = {
 		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
 		.cmd.q_control = {
 			.opcode = IONIC_CMD_Q_CONTROL,
-			.lif_index = cpu_to_le16(lif->index),
-			.type = q->type,
-			.index = cpu_to_le32(q->index),
 			.oper = IONIC_Q_DISABLE,
 		},
 	};
 
-	idev = &lif->ionic->idev;
-	dev = lif->ionic->dev;
+	if (!qcq)
+		return -ENXIO;
 
-	dev_dbg(dev, "q_disable.index %d q_disable.qtype %d\n",
-		ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+	q = &qcq->q;
+	lif = q->lif;
 
 	if (qcq->flags & IONIC_QCQ_F_INTR) {
+		struct ionic_dev *idev = &lif->ionic->idev;
+
+		cancel_work_sync(&qcq->dim.work);
 		ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
 				IONIC_INTR_MASK_SET);
 		synchronize_irq(qcq->intr.vector);
@@ -275,7 +295,17 @@ static int ionic_qcq_disable(struct ionic_qcq *qcq)
 		napi_disable(&qcq->napi);
 	}
 
-	return ionic_adminq_post_wait(lif, &ctx);
+	if (send_to_hw) {
+		ctx.cmd.q_control.lif_index = cpu_to_le16(lif->index);
+		ctx.cmd.q_control.type = q->type;
+		ctx.cmd.q_control.index = cpu_to_le32(q->index);
+		dev_dbg(lif->ionic->dev, "q_disable.index %d q_disable.qtype %d\n",
+			ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+
+		err = ionic_adminq_post_wait(lif, &ctx);
+	}
+
+	return err;
 }
 
 static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
@@ -297,6 +327,18 @@ static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	qcq->flags &= ~IONIC_QCQ_F_INITED;
 }
 
+static void ionic_qcq_intr_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	if (!(qcq->flags & IONIC_QCQ_F_INTR) || qcq->intr.vector == 0)
+		return;
+
+	irq_set_affinity_hint(qcq->intr.vector, NULL);
+	devm_free_irq(lif->ionic->dev, qcq->intr.vector, &qcq->napi);
+	qcq->intr.vector = 0;
+	ionic_intr_free(lif->ionic, qcq->intr.index);
+	qcq->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
+}
+
 static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
 {
 	struct device *dev = lif->ionic->dev;
@@ -306,51 +348,62 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
 
 	ionic_debugfs_del_qcq(qcq);
 
-	dma_free_coherent(dev, qcq->total_size, qcq->base, qcq->base_pa);
-	qcq->base = NULL;
-	qcq->base_pa = 0;
+	if (qcq->q_base) {
+		dma_free_coherent(dev, qcq->q_size, qcq->q_base, qcq->q_base_pa);
+		qcq->q_base = NULL;
+		qcq->q_base_pa = 0;
+	}
 
-	if (qcq->flags & IONIC_QCQ_F_INTR) {
-		irq_set_affinity_hint(qcq->intr.vector, NULL);
-		devm_free_irq(dev, qcq->intr.vector, &qcq->napi);
-		qcq->intr.vector = 0;
-		ionic_intr_free(lif->ionic, qcq->intr.index);
+	if (qcq->cq_base) {
+		dma_free_coherent(dev, qcq->cq_size, qcq->cq_base, qcq->cq_base_pa);
+		qcq->cq_base = NULL;
+		qcq->cq_base_pa = 0;
+	}
+
+	if (qcq->sg_base) {
+		dma_free_coherent(dev, qcq->sg_size, qcq->sg_base, qcq->sg_base_pa);
+		qcq->sg_base = NULL;
+		qcq->sg_base_pa = 0;
 	}
 
-	devm_kfree(dev, qcq->cq.info);
-	qcq->cq.info = NULL;
-	devm_kfree(dev, qcq->q.info);
-	qcq->q.info = NULL;
-	devm_kfree(dev, qcq);
+	ionic_qcq_intr_free(lif, qcq);
+
+	if (qcq->cq.info) {
+		devm_kfree(dev, qcq->cq.info);
+		qcq->cq.info = NULL;
+	}
+	if (qcq->q.info) {
+		devm_kfree(dev, qcq->q.info);
+		qcq->q.info = NULL;
+	}
 }
 
 static void ionic_qcqs_free(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
-	unsigned int i;
 
 	if (lif->notifyqcq) {
 		ionic_qcq_free(lif, lif->notifyqcq);
+		devm_kfree(dev, lif->notifyqcq);
 		lif->notifyqcq = NULL;
 	}
 
 	if (lif->adminqcq) {
 		ionic_qcq_free(lif, lif->adminqcq);
+		devm_kfree(dev, lif->adminqcq);
 		lif->adminqcq = NULL;
 	}
 
 	if (lif->rxqcqs) {
-		for (i = 0; i < lif->nxqs; i++)
-			if (lif->rxqcqs[i].stats)
-				devm_kfree(dev, lif->rxqcqs[i].stats);
+		devm_kfree(dev, lif->rxqstats);
+		lif->rxqstats = NULL;
 		devm_kfree(dev, lif->rxqcqs);
 		lif->rxqcqs = NULL;
 	}
 
 	if (lif->txqcqs) {
-		for (i = 0; i < lif->nxqs; i++)
-			if (lif->txqcqs[i].stats)
-				devm_kfree(dev, lif->txqcqs[i].stats);
+		devm_kfree(dev, lif->txqstats);
+		lif->txqstats = NULL;
 		devm_kfree(dev, lif->txqcqs);
 		lif->txqcqs = NULL;
 	}
@@ -368,6 +421,53 @@ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
 	n_qcq->intr.index = src_qcq->intr.index;
 }
 
+static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	int err;
+
+	if (!(qcq->flags & IONIC_QCQ_F_INTR)) {
+		qcq->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
+		return 0;
+	}
+
+	err = ionic_intr_alloc(lif, &qcq->intr);
+	if (err) {
+		netdev_warn(lif->netdev, "no intr for %s: %d\n",
+			    qcq->q.name, err);
+		goto err_out;
+	}
+
+	err = ionic_bus_get_irq(lif->ionic, qcq->intr.index);
+	if (err < 0) {
+		netdev_warn(lif->netdev, "no vector for %s: %d\n",
+			    qcq->q.name, err);
+		goto err_out_free_intr;
+	}
+	qcq->intr.vector = err;
+	ionic_intr_mask_assert(lif->ionic->idev.intr_ctrl, qcq->intr.index,
+			       IONIC_INTR_MASK_SET);
+
+	err = ionic_request_irq(lif, qcq);
+	if (err) {
+		netdev_warn(lif->netdev, "irq request failed %d\n", err);
+		goto err_out_free_intr;
+	}
+
+	/* try to get the irq on the local numa node first */
+	qcq->intr.cpu = cpumask_local_spread(qcq->intr.index,
+					     dev_to_node(lif->ionic->dev));
+	if (qcq->intr.cpu != -1)
+		cpumask_set_cpu(qcq->intr.cpu, &qcq->intr.affinity_mask);
+
+	netdev_dbg(lif->netdev, "%s: Interrupt index %d\n", qcq->q.name, qcq->intr.index);
+	return 0;
+
+err_out_free_intr:
+	ionic_intr_free(lif->ionic, qcq->intr.index);
+err_out:
+	return err;
+}
+
 static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 			   unsigned int index,
 			   const char *name, unsigned int flags,
@@ -377,7 +477,6 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 			   unsigned int pid, struct ionic_qcq **qcq)
 {
 	struct ionic_dev *idev = &lif->ionic->idev;
-	u32 q_size, cq_size, sg_size, total_size;
 	struct device *dev = lif->ionic->dev;
 	void *q_base, *cq_base, *sg_base;
 	dma_addr_t cq_base_pa = 0;
@@ -388,21 +487,6 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 
 	*qcq = NULL;
 
-	q_size  = num_descs * desc_size;
-	cq_size = num_descs * cq_desc_size;
-	sg_size = num_descs * sg_desc_size;
-
-	total_size = ALIGN(q_size, PAGE_SIZE) + ALIGN(cq_size, PAGE_SIZE);
-	/* Note: aligning q_size/cq_size is not enough due to cq_base
-	 * address aligning as q_base could be not aligned to the page.
-	 * Adding PAGE_SIZE.
-	 */
-	total_size += PAGE_SIZE;
-	if (flags & IONIC_QCQ_F_SG) {
-		total_size += ALIGN(sg_size, PAGE_SIZE);
-		total_size += PAGE_SIZE;
-	}
-
 	new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
 	if (!new) {
 		netdev_err(lif->netdev, "Cannot allocate queue structure\n");
@@ -417,7 +501,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 	if (!new->q.info) {
 		netdev_err(lif->netdev, "Cannot allocate queue info\n");
 		err = -ENOMEM;
-		goto err_out;
+		goto err_out_free_qcq;
 	}
 
 	new->q.type = type;
@@ -426,41 +510,12 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 			   desc_size, sg_desc_size, pid);
 	if (err) {
 		netdev_err(lif->netdev, "Cannot initialize queue\n");
-		goto err_out;
+		goto err_out_free_q_info;
 	}
 
-	if (flags & IONIC_QCQ_F_INTR) {
-		err = ionic_intr_alloc(lif, &new->intr);
-		if (err) {
-			netdev_warn(lif->netdev, "no intr for %s: %d\n",
-				    name, err);
-			goto err_out;
-		}
-
-		err = ionic_bus_get_irq(lif->ionic, new->intr.index);
-		if (err < 0) {
-			netdev_warn(lif->netdev, "no vector for %s: %d\n",
-				    name, err);
-			goto err_out_free_intr;
-		}
-		new->intr.vector = err;
-		ionic_intr_mask_assert(idev->intr_ctrl, new->intr.index,
-				       IONIC_INTR_MASK_SET);
-
-		err = ionic_request_irq(lif, new);
-		if (err) {
-			netdev_warn(lif->netdev, "irq request failed %d\n", err);
-			goto err_out_free_intr;
-		}
-
-		new->intr.cpu = cpumask_local_spread(new->intr.index,
-						     dev_to_node(dev));
-		if (new->intr.cpu != -1)
-			cpumask_set_cpu(new->intr.cpu,
-					&new->intr.affinity_mask);
-	} else {
-		new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
-	}
+	err = ionic_alloc_qcq_interrupt(lif, new);
+	if (err)
+		goto err_out;
 
 	new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info),
 				    GFP_KERNEL);
@@ -473,46 +528,95 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 	err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size);
 	if (err) {
 		netdev_err(lif->netdev, "Cannot initialize completion queue\n");
-		goto err_out_free_irq;
+		goto err_out_free_cq_info;
 	}
 
-	new->base = dma_alloc_coherent(dev, total_size, &new->base_pa,
-				       GFP_KERNEL);
-	if (!new->base) {
-		netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n");
-		err = -ENOMEM;
-		goto err_out_free_irq;
-	}
-
-	new->total_size = total_size;
+	if (flags & IONIC_QCQ_F_NOTIFYQ) {
+		int q_size, cq_size;
 
-	q_base = new->base;
-	q_base_pa = new->base_pa;
+		/* q & cq need to be contiguous in case of notifyq */
+		q_size = ALIGN(num_descs * desc_size, PAGE_SIZE);
+		cq_size = ALIGN(num_descs * cq_desc_size, PAGE_SIZE);
 
-	cq_base = (void *)ALIGN((uintptr_t)q_base + q_size, PAGE_SIZE);
-	cq_base_pa = ALIGN(q_base_pa + q_size, PAGE_SIZE);
+		new->q_size = PAGE_SIZE + q_size + cq_size;
+		new->q_base = dma_alloc_coherent(dev, new->q_size,
+						 &new->q_base_pa, GFP_KERNEL);
+		if (!new->q_base) {
+			netdev_err(lif->netdev, "Cannot allocate qcq DMA memory\n");
+			err = -ENOMEM;
+			goto err_out_free_cq_info;
+		}
+		q_base = PTR_ALIGN(new->q_base, PAGE_SIZE);
+		q_base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
+		ionic_q_map(&new->q, q_base, q_base_pa);
+
+		cq_base = PTR_ALIGN(q_base + q_size, PAGE_SIZE);
+		cq_base_pa = ALIGN(new->q_base_pa + q_size, PAGE_SIZE);
+		ionic_cq_map(&new->cq, cq_base, cq_base_pa);
+		ionic_cq_bind(&new->cq, &new->q);
+	} else {
+		new->q_size = PAGE_SIZE + (num_descs * desc_size);
+		new->q_base = dma_alloc_coherent(dev, new->q_size, &new->q_base_pa,
+						 GFP_KERNEL);
+		if (!new->q_base) {
+			netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n");
+			err = -ENOMEM;
+			goto err_out_free_cq_info;
+		}
+		q_base = PTR_ALIGN(new->q_base, PAGE_SIZE);
+		q_base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
+		ionic_q_map(&new->q, q_base, q_base_pa);
+
+		new->cq_size = PAGE_SIZE + (num_descs * cq_desc_size);
+		new->cq_base = dma_alloc_coherent(dev, new->cq_size, &new->cq_base_pa,
+						  GFP_KERNEL);
+		if (!new->cq_base) {
+			netdev_err(lif->netdev, "Cannot allocate cq DMA memory\n");
+			err = -ENOMEM;
+			goto err_out_free_q;
+		}
+		cq_base = PTR_ALIGN(new->cq_base, PAGE_SIZE);
+		cq_base_pa = ALIGN(new->cq_base_pa, PAGE_SIZE);
+		ionic_cq_map(&new->cq, cq_base, cq_base_pa);
+		ionic_cq_bind(&new->cq, &new->q);
+	}
 
 	if (flags & IONIC_QCQ_F_SG) {
-		sg_base = (void *)ALIGN((uintptr_t)cq_base + cq_size,
-					PAGE_SIZE);
-		sg_base_pa = ALIGN(cq_base_pa + cq_size, PAGE_SIZE);
+		new->sg_size = PAGE_SIZE + (num_descs * sg_desc_size);
+		new->sg_base = dma_alloc_coherent(dev, new->sg_size, &new->sg_base_pa,
+						  GFP_KERNEL);
+		if (!new->sg_base) {
+			netdev_err(lif->netdev, "Cannot allocate sg DMA memory\n");
+			err = -ENOMEM;
+			goto err_out_free_cq;
+		}
+		sg_base = PTR_ALIGN(new->sg_base, PAGE_SIZE);
+		sg_base_pa = ALIGN(new->sg_base_pa, PAGE_SIZE);
 		ionic_q_sg_map(&new->q, sg_base, sg_base_pa);
 	}
 
-	ionic_q_map(&new->q, q_base, q_base_pa);
-	ionic_cq_map(&new->cq, cq_base, cq_base_pa);
-	ionic_cq_bind(&new->cq, &new->q);
+	INIT_WORK(&new->dim.work, ionic_dim_work);
+	new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 
 	*qcq = new;
 
 	return 0;
 
+err_out_free_cq:
+	dma_free_coherent(dev, new->cq_size, new->cq_base, new->cq_base_pa);
+err_out_free_q:
+	dma_free_coherent(dev, new->q_size, new->q_base, new->q_base_pa);
+err_out_free_cq_info:
+	devm_kfree(dev, new->cq.info);
 err_out_free_irq:
-	if (flags & IONIC_QCQ_F_INTR)
+	if (flags & IONIC_QCQ_F_INTR) {
 		devm_free_irq(dev, new->intr.vector, &new->napi);
-err_out_free_intr:
-	if (flags & IONIC_QCQ_F_INTR)
 		ionic_intr_free(lif->ionic, new->intr.index);
+	}
+err_out_free_q_info:
+	devm_kfree(dev, new->q.info);
+err_out_free_qcq:
+	devm_kfree(dev, new);
 err_out:
 	dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
 	return err;
@@ -521,10 +625,8 @@ err_out:
 static int ionic_qcqs_alloc(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
-	unsigned int q_list_size;
 	unsigned int flags;
 	int err;
-	int i;
 
 	flags = IONIC_QCQ_F_INTR;
 	err = ionic_qcq_alloc(lif, IONIC_QTYPE_ADMINQ, 0, "admin", flags,
@@ -544,63 +646,50 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
 				      sizeof(union ionic_notifyq_comp),
 				      0, lif->kern_pid, &lif->notifyqcq);
 		if (err)
-			goto err_out_free_adminqcq;
+			goto err_out;
 		ionic_debugfs_add_qcq(lif, lif->notifyqcq);
 
 		/* Let the notifyq ride on the adminq interrupt */
 		ionic_link_qcq_interrupts(lif->adminqcq, lif->notifyqcq);
 	}
 
-	q_list_size = sizeof(*lif->txqcqs) * lif->nxqs;
 	err = -ENOMEM;
-	lif->txqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+	lif->txqcqs = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
+				   sizeof(struct ionic_qcq *), GFP_KERNEL);
 	if (!lif->txqcqs)
-		goto err_out_free_notifyqcq;
-	for (i = 0; i < lif->nxqs; i++) {
-		lif->txqcqs[i].stats = devm_kzalloc(dev,
-						    sizeof(struct ionic_q_stats),
-						    GFP_KERNEL);
-		if (!lif->txqcqs[i].stats)
-			goto err_out_free_tx_stats;
-	}
-
-	lif->rxqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+		goto err_out;
+	lif->rxqcqs = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
+				   sizeof(struct ionic_qcq *), GFP_KERNEL);
 	if (!lif->rxqcqs)
-		goto err_out_free_tx_stats;
-	for (i = 0; i < lif->nxqs; i++) {
-		lif->rxqcqs[i].stats = devm_kzalloc(dev,
-						    sizeof(struct ionic_q_stats),
-						    GFP_KERNEL);
-		if (!lif->rxqcqs[i].stats)
-			goto err_out_free_rx_stats;
-	}
+		goto err_out;
 
-	return 0;
+	lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
+				     sizeof(struct ionic_tx_stats), GFP_KERNEL);
+	if (!lif->txqstats)
+		goto err_out;
+	lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
+				     sizeof(struct ionic_rx_stats), GFP_KERNEL);
+	if (!lif->rxqstats)
+		goto err_out;
 
-err_out_free_rx_stats:
-	for (i = 0; i < lif->nxqs; i++)
-		if (lif->rxqcqs[i].stats)
-			devm_kfree(dev, lif->rxqcqs[i].stats);
-	devm_kfree(dev, lif->rxqcqs);
-	lif->rxqcqs = NULL;
-err_out_free_tx_stats:
-	for (i = 0; i < lif->nxqs; i++)
-		if (lif->txqcqs[i].stats)
-			devm_kfree(dev, lif->txqcqs[i].stats);
-	devm_kfree(dev, lif->txqcqs);
-	lif->txqcqs = NULL;
-err_out_free_notifyqcq:
-	if (lif->notifyqcq) {
-		ionic_qcq_free(lif, lif->notifyqcq);
-		lif->notifyqcq = NULL;
-	}
-err_out_free_adminqcq:
-	ionic_qcq_free(lif, lif->adminqcq);
-	lif->adminqcq = NULL;
+	return 0;
 
+err_out:
+	ionic_qcqs_free(lif);
 	return err;
 }
 
+static void ionic_qcq_sanitize(struct ionic_qcq *qcq)
+{
+	qcq->q.tail_idx = 0;
+	qcq->q.head_idx = 0;
+	qcq->cq.tail_idx = 0;
+	qcq->cq.done_color = 1;
+	memset(qcq->q_base, 0, qcq->q_size);
+	memset(qcq->cq_base, 0, qcq->cq_size);
+	memset(qcq->sg_base, 0, qcq->sg_size);
+}
+
 static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 {
 	struct device *dev = lif->ionic->dev;
@@ -626,10 +715,10 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	unsigned int intr_index;
 	int err;
 
-	if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+	if (qcq->flags & IONIC_QCQ_F_INTR)
 		intr_index = qcq->intr.index;
 	else
-		intr_index = lif->rxqcqs[q->index].qcq->intr.index;
+		intr_index = lif->rxqcqs[q->index]->intr.index;
 	ctx.cmd.q_init.intr_index = cpu_to_le16(intr_index);
 
 	dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid);
@@ -640,9 +729,7 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	dev_dbg(dev, "txq_init.ver %d\n", ctx.cmd.q_init.ver);
 	dev_dbg(dev, "txq_init.intr_index %d\n", ctx.cmd.q_init.intr_index);
 
-	q->tail = q->info;
-	q->head = q->tail;
-	cq->tail = cq->info;
+	ionic_qcq_sanitize(qcq);
 
 	err = ionic_adminq_post_wait(lif, &ctx);
 	if (err)
@@ -697,9 +784,7 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	dev_dbg(dev, "rxq_init.ver %d\n", ctx.cmd.q_init.ver);
 	dev_dbg(dev, "rxq_init.intr_index %d\n", ctx.cmd.q_init.intr_index);
 
-	q->tail = q->info;
-	q->head = q->tail;
-	cq->tail = cq->info;
+	ionic_qcq_sanitize(qcq);
 
 	err = ionic_adminq_post_wait(lif, &ctx);
 	if (err)
@@ -751,7 +836,7 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
 
 	switch (le16_to_cpu(comp->event.ecode)) {
 	case IONIC_EVENT_LINK_CHANGE:
-		ionic_link_status_check_request(lif);
+		ionic_link_status_check_request(lif, false);
 		break;
 	case IONIC_EVENT_RESET:
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
@@ -771,21 +856,6 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
 	return true;
 }
 
-static int ionic_notifyq_clean(struct ionic_lif *lif, int budget)
-{
-	struct ionic_dev *idev = &lif->ionic->idev;
-	struct ionic_cq *cq = &lif->notifyqcq->cq;
-	u32 work_done;
-
-	work_done = ionic_cq_service(cq, budget, ionic_notifyq_service,
-				     NULL, NULL);
-	if (work_done)
-		ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
-				   work_done, IONIC_INTR_CRED_RESET_COALESCE);
-
-	return work_done;
-}
-
 static bool ionic_adminq_service(struct ionic_cq *cq,
 				 struct ionic_cq_info *cq_info)
 {
@@ -801,15 +871,36 @@ static bool ionic_adminq_service(struct ionic_cq *cq,
 
 static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 {
+	struct ionic_intr_info *intr = napi_to_cq(napi)->bound_intr;
 	struct ionic_lif *lif = napi_to_cq(napi)->lif;
+	struct ionic_dev *idev = &lif->ionic->idev;
+	unsigned int flags = 0;
 	int n_work = 0;
 	int a_work = 0;
+	int work_done;
 
-	if (likely(lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED))
-		n_work = ionic_notifyq_clean(lif, budget);
-	a_work = ionic_napi(napi, budget, ionic_adminq_service, NULL, NULL);
+	if (lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED)
+		n_work = ionic_cq_service(&lif->notifyqcq->cq, budget,
+					  ionic_notifyq_service, NULL, NULL);
+
+	if (lif->adminqcq && lif->adminqcq->flags & IONIC_QCQ_F_INITED)
+		a_work = ionic_cq_service(&lif->adminqcq->cq, budget,
+					  ionic_adminq_service, NULL, NULL);
+
+	work_done = max(n_work, a_work);
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		flags |= IONIC_INTR_CRED_UNMASK;
+		lif->adminqcq->cq.bound_intr->rearm_count++;
+	}
+
+	if (work_done || flags) {
+		flags |= IONIC_INTR_CRED_RESET_COALESCE;
+		ionic_intr_credits(idev->intr_ctrl,
+				   intr->index,
+				   n_work + a_work, flags);
+	}
 
-	return max(n_work, a_work);
+	return work_done;
 }
 
 void ionic_get_stats64(struct net_device *netdev,
@@ -928,9 +1019,9 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
 	return 0;
 }
 
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
+			  bool can_sleep)
 {
-	struct ionic *ionic = lif->ionic;
 	struct ionic_deferred_work *work;
 	unsigned int nmfilters;
 	unsigned int nufilters;
@@ -940,8 +1031,8 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
 		 * here before checking the need for deferral so that we
 		 * can return an overflow error to the stack.
 		 */
-		nmfilters = le32_to_cpu(ionic->ident.lif.eth.max_mcast_filters);
-		nufilters = le32_to_cpu(ionic->ident.lif.eth.max_ucast_filters);
+		nmfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
+		nufilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
 
 		if ((is_multicast_ether_addr(addr) && lif->nmcast < nmfilters))
 			lif->nmcast++;
@@ -957,7 +1048,7 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
 			lif->nucast--;
 	}
 
-	if (in_interrupt()) {
+	if (!can_sleep) {
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
 		if (!work) {
 			netdev_err(lif->netdev, "%s OOM\n", __func__);
@@ -983,12 +1074,22 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
 
 static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
 {
-	return ionic_lif_addr(netdev_priv(netdev), addr, true);
+	return ionic_lif_addr(netdev_priv(netdev), addr, true, true);
+}
+
+static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr)
+{
+	return ionic_lif_addr(netdev_priv(netdev), addr, true, false);
 }
 
 static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
 {
-	return ionic_lif_addr(netdev_priv(netdev), addr, false);
+	return ionic_lif_addr(netdev_priv(netdev), addr, false, true);
+}
+
+static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr)
+{
+	return ionic_lif_addr(netdev_priv(netdev), addr, false, false);
 }
 
 static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
@@ -1028,11 +1129,12 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
 		lif->rx_mode = rx_mode;
 }
 
-static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
+static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode,
+			       bool from_ndo)
 {
 	struct ionic_deferred_work *work;
 
-	if (in_interrupt()) {
+	if (from_ndo) {
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
 		if (!work) {
 			netdev_err(lif->netdev, "%s OOM\n", __func__);
@@ -1047,15 +1149,21 @@ static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
 	}
 }
 
-static void ionic_set_rx_mode(struct net_device *netdev)
+static void ionic_dev_uc_sync(struct net_device *netdev, bool from_ndo)
+{
+	if (from_ndo)
+		__dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+	else
+		__dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
+
+}
+
+static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
-	struct ionic_identity *ident;
 	unsigned int nfilters;
 	unsigned int rx_mode;
 
-	ident = &lif->ionic->ident;
-
 	rx_mode = IONIC_RX_MODE_F_UNICAST;
 	rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
 	rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
@@ -1069,8 +1177,8 @@ static void ionic_set_rx_mode(struct net_device *netdev)
 	 *       we remove our overflow flag and check the netdev flags
 	 *       to see if we can disable NIC PROMISC
 	 */
-	__dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
-	nfilters = le32_to_cpu(ident->lif.eth.max_ucast_filters);
+	ionic_dev_uc_sync(netdev, from_ndo);
+	nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
 	if (netdev_uc_count(netdev) + 1 > nfilters) {
 		rx_mode |= IONIC_RX_MODE_F_PROMISC;
 		lif->uc_overflow = true;
@@ -1081,8 +1189,8 @@ static void ionic_set_rx_mode(struct net_device *netdev)
 	}
 
 	/* same for multicast */
-	__dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
-	nfilters = le32_to_cpu(ident->lif.eth.max_mcast_filters);
+	ionic_dev_uc_sync(netdev, from_ndo);
+	nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
 	if (netdev_mc_count(netdev) > nfilters) {
 		rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
 		lif->mc_overflow = true;
@@ -1093,7 +1201,12 @@ static void ionic_set_rx_mode(struct net_device *netdev)
 	}
 
 	if (lif->rx_mode != rx_mode)
-		_ionic_lif_rx_mode(lif, rx_mode);
+		_ionic_lif_rx_mode(lif, rx_mode, from_ndo);
+}
+
+static void ionic_ndo_set_rx_mode(struct net_device *netdev)
+{
+	ionic_set_rx_mode(netdev, true);
 }
 
 static __le64 ionic_netdev_features_to_nic(netdev_features_t features)
@@ -1315,6 +1428,35 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
 	return ionic_addr_add(netdev, mac);
 }
 
+static void ionic_stop_queues_reconfig(struct ionic_lif *lif)
+{
+	/* Stop and clean the queues before reconfiguration */
+	mutex_lock(&lif->queue_lock);
+	netif_device_detach(lif->netdev);
+	ionic_stop_queues(lif);
+	ionic_txrx_deinit(lif);
+}
+
+static int ionic_start_queues_reconfig(struct ionic_lif *lif)
+{
+	int err;
+
+	/* Re-init the queues after reconfiguration */
+
+	/* The only way txrx_init can fail here is if communication
+	 * with FW is suddenly broken.  There's not much we can do
+	 * at this point - error messages have already been printed,
+	 * so we can continue on and the user can eventually do a
+	 * DOWN and UP to try to reset and clear the issue.
+	 */
+	err = ionic_txrx_init(lif);
+	mutex_unlock(&lif->queue_lock);
+	ionic_link_status_check_request(lif, true);
+	netif_device_attach(lif->netdev);
+
+	return err;
+}
+
 static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
@@ -1334,9 +1476,12 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
 		return err;
 
 	netdev->mtu = new_mtu;
-	err = ionic_reset_queues(lif, NULL, NULL);
+	/* if we're not running, nothing more to do */
+	if (!netif_running(netdev))
+		return 0;
 
-	return err;
+	ionic_stop_queues_reconfig(lif);
+	return ionic_start_queues_reconfig(lif);
 }
 
 static void ionic_tx_timeout_work(struct work_struct *ws)
@@ -1345,9 +1490,14 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 
 	netdev_info(lif->netdev, "Tx Timeout recovery\n");
 
-	rtnl_lock();
-	ionic_reset_queues(lif, NULL, NULL);
-	rtnl_unlock();
+	/* if we were stopped before this scheduled job was launched,
+	 * don't bother the queues as they are already stopped.
+	 */
+	if (!netif_running(lif->netdev))
+		return;
+
+	ionic_stop_queues_reconfig(lif);
+	ionic_start_queues_reconfig(lif);
 }
 
 static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -1478,22 +1628,16 @@ static void ionic_lif_rss_deinit(struct ionic_lif *lif)
 static void ionic_txrx_disable(struct ionic_lif *lif)
 {
 	unsigned int i;
-	int err;
+	int err = 0;
 
 	if (lif->txqcqs) {
-		for (i = 0; i < lif->nxqs; i++) {
-			err = ionic_qcq_disable(lif->txqcqs[i].qcq);
-			if (err == -ETIMEDOUT)
-				break;
-		}
+		for (i = 0; i < lif->nxqs; i++)
+			err = ionic_qcq_disable(lif->txqcqs[i], (err != -ETIMEDOUT));
 	}
 
 	if (lif->rxqcqs) {
-		for (i = 0; i < lif->nxqs; i++) {
-			err = ionic_qcq_disable(lif->rxqcqs[i].qcq);
-			if (err == -ETIMEDOUT)
-				break;
-		}
+		for (i = 0; i < lif->nxqs; i++)
+			err = ionic_qcq_disable(lif->rxqcqs[i], (err != -ETIMEDOUT));
 	}
 }
 
@@ -1502,18 +1646,18 @@ static void ionic_txrx_deinit(struct ionic_lif *lif)
 	unsigned int i;
 
 	if (lif->txqcqs) {
-		for (i = 0; i < lif->nxqs; i++) {
-			ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
-			ionic_tx_flush(&lif->txqcqs[i].qcq->cq);
-			ionic_tx_empty(&lif->txqcqs[i].qcq->q);
+		for (i = 0; i < lif->nxqs && lif->txqcqs[i]; i++) {
+			ionic_lif_qcq_deinit(lif, lif->txqcqs[i]);
+			ionic_tx_flush(&lif->txqcqs[i]->cq);
+			ionic_tx_empty(&lif->txqcqs[i]->q);
 		}
 	}
 
 	if (lif->rxqcqs) {
-		for (i = 0; i < lif->nxqs; i++) {
-			ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
-			ionic_rx_flush(&lif->rxqcqs[i].qcq->cq);
-			ionic_rx_empty(&lif->rxqcqs[i].qcq->q);
+		for (i = 0; i < lif->nxqs && lif->rxqcqs[i]; i++) {
+			ionic_lif_qcq_deinit(lif, lif->rxqcqs[i]);
+			ionic_rx_flush(&lif->rxqcqs[i]->cq);
+			ionic_rx_empty(&lif->rxqcqs[i]->q);
 		}
 	}
 	lif->rx_mode = 0;
@@ -1524,16 +1668,18 @@ static void ionic_txrx_free(struct ionic_lif *lif)
 	unsigned int i;
 
 	if (lif->txqcqs) {
-		for (i = 0; i < lif->nxqs; i++) {
-			ionic_qcq_free(lif, lif->txqcqs[i].qcq);
-			lif->txqcqs[i].qcq = NULL;
+		for (i = 0; i < lif->ionic->ntxqs_per_lif && lif->txqcqs[i]; i++) {
+			ionic_qcq_free(lif, lif->txqcqs[i]);
+			devm_kfree(lif->ionic->dev, lif->txqcqs[i]);
+			lif->txqcqs[i] = NULL;
 		}
 	}
 
 	if (lif->rxqcqs) {
-		for (i = 0; i < lif->nxqs; i++) {
-			ionic_qcq_free(lif, lif->rxqcqs[i].qcq);
-			lif->rxqcqs[i].qcq = NULL;
+		for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++) {
+			ionic_qcq_free(lif, lif->rxqcqs[i]);
+			devm_kfree(lif->ionic->dev, lif->rxqcqs[i]);
+			lif->rxqcqs[i] = NULL;
 		}
 	}
 }
@@ -1561,17 +1707,19 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 				      sizeof(struct ionic_txq_desc),
 				      sizeof(struct ionic_txq_comp),
 				      sg_desc_sz,
-				      lif->kern_pid, &lif->txqcqs[i].qcq);
+				      lif->kern_pid, &lif->txqcqs[i]);
 		if (err)
 			goto err_out;
 
-		if (flags & IONIC_QCQ_F_INTR)
+		if (flags & IONIC_QCQ_F_INTR) {
 			ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-					     lif->txqcqs[i].qcq->intr.index,
+					     lif->txqcqs[i]->intr.index,
 					     lif->tx_coalesce_hw);
+			if (test_bit(IONIC_LIF_F_TX_DIM_INTR, lif->state))
+				lif->txqcqs[i]->intr.dim_coal_hw = lif->tx_coalesce_hw;
+		}
 
-		lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats;
-		ionic_debugfs_add_qcq(lif, lif->txqcqs[i].qcq);
+		ionic_debugfs_add_qcq(lif, lif->txqcqs[i]);
 	}
 
 	flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG | IONIC_QCQ_F_INTR;
@@ -1581,20 +1729,21 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 				      sizeof(struct ionic_rxq_desc),
 				      sizeof(struct ionic_rxq_comp),
 				      sizeof(struct ionic_rxq_sg_desc),
-				      lif->kern_pid, &lif->rxqcqs[i].qcq);
+				      lif->kern_pid, &lif->rxqcqs[i]);
 		if (err)
 			goto err_out;
 
 		ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-				     lif->rxqcqs[i].qcq->intr.index,
+				     lif->rxqcqs[i]->intr.index,
 				     lif->rx_coalesce_hw);
+		if (test_bit(IONIC_LIF_F_RX_DIM_INTR, lif->state))
+			lif->rxqcqs[i]->intr.dim_coal_hw = lif->rx_coalesce_hw;
 
 		if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
-			ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq,
-						  lif->txqcqs[i].qcq);
+			ionic_link_qcq_interrupts(lif->rxqcqs[i],
+						  lif->txqcqs[i]);
 
-		lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats;
-		ionic_debugfs_add_qcq(lif, lif->rxqcqs[i].qcq);
+		ionic_debugfs_add_qcq(lif, lif->rxqcqs[i]);
 	}
 
 	return 0;
@@ -1611,13 +1760,13 @@ static int ionic_txrx_init(struct ionic_lif *lif)
 	int err;
 
 	for (i = 0; i < lif->nxqs; i++) {
-		err = ionic_lif_txq_init(lif, lif->txqcqs[i].qcq);
+		err = ionic_lif_txq_init(lif, lif->txqcqs[i]);
 		if (err)
 			goto err_out;
 
-		err = ionic_lif_rxq_init(lif, lif->rxqcqs[i].qcq);
+		err = ionic_lif_rxq_init(lif, lif->rxqcqs[i]);
 		if (err) {
-			ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+			ionic_lif_qcq_deinit(lif, lif->txqcqs[i]);
 			goto err_out;
 		}
 	}
@@ -1625,14 +1774,14 @@ static int ionic_txrx_init(struct ionic_lif *lif)
 	if (lif->netdev->features & NETIF_F_RXHASH)
 		ionic_lif_rss_init(lif);
 
-	ionic_set_rx_mode(lif->netdev);
+	ionic_set_rx_mode(lif->netdev, false);
 
 	return 0;
 
 err_out:
 	while (i--) {
-		ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
-		ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
+		ionic_lif_qcq_deinit(lif, lif->txqcqs[i]);
+		ionic_lif_qcq_deinit(lif, lif->rxqcqs[i]);
 	}
 
 	return err;
@@ -1640,18 +1789,24 @@ err_out:
 
 static int ionic_txrx_enable(struct ionic_lif *lif)
 {
+	int derr = 0;
 	int i, err;
 
 	for (i = 0; i < lif->nxqs; i++) {
-		ionic_rx_fill(&lif->rxqcqs[i].qcq->q);
-		err = ionic_qcq_enable(lif->rxqcqs[i].qcq);
+		if (!(lif->rxqcqs[i] && lif->txqcqs[i])) {
+			dev_err(lif->ionic->dev, "%s: bad qcq %d\n", __func__, i);
+			err = -ENXIO;
+			goto err_out;
+		}
+
+		ionic_rx_fill(&lif->rxqcqs[i]->q);
+		err = ionic_qcq_enable(lif->rxqcqs[i]);
 		if (err)
 			goto err_out;
 
-		err = ionic_qcq_enable(lif->txqcqs[i].qcq);
+		err = ionic_qcq_enable(lif->txqcqs[i]);
 		if (err) {
-			if (err != -ETIMEDOUT)
-				ionic_qcq_disable(lif->rxqcqs[i].qcq);
+			derr = ionic_qcq_disable(lif->rxqcqs[i], (err != -ETIMEDOUT));
 			goto err_out;
 		}
 	}
@@ -1660,12 +1815,8 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
 
 err_out:
 	while (i--) {
-		err = ionic_qcq_disable(lif->txqcqs[i].qcq);
-		if (err == -ETIMEDOUT)
-			break;
-		err = ionic_qcq_disable(lif->rxqcqs[i].qcq);
-		if (err == -ETIMEDOUT)
-			break;
+		derr = ionic_qcq_disable(lif->txqcqs[i], (derr != -ETIMEDOUT));
+		derr = ionic_qcq_disable(lif->rxqcqs[i], (derr != -ETIMEDOUT));
 	}
 
 	return err;
@@ -1688,7 +1839,7 @@ static int ionic_start_queues(struct ionic_lif *lif)
 	return 0;
 }
 
-int ionic_open(struct net_device *netdev)
+static int ionic_open(struct net_device *netdev)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 	int err;
@@ -1734,7 +1885,7 @@ static void ionic_stop_queues(struct ionic_lif *lif)
 	ionic_txrx_disable(lif);
 }
 
-int ionic_stop(struct net_device *netdev)
+static int ionic_stop(struct net_device *netdev)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 
@@ -1998,7 +2149,7 @@ static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_stop               = ionic_stop,
 	.ndo_start_xmit		= ionic_start_xmit,
 	.ndo_get_stats64	= ionic_get_stats64,
-	.ndo_set_rx_mode	= ionic_set_rx_mode,
+	.ndo_set_rx_mode	= ionic_ndo_set_rx_mode,
 	.ndo_set_features	= ionic_set_features,
 	.ndo_set_mac_address	= ionic_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -2016,35 +2167,227 @@ static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_get_vf_stats       = ionic_get_vf_stats,
 };
 
-int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg)
-{
-	bool running;
-	int err = 0;
+static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
+{
+	/* only swapping the queues, not the napi, flags, or other stuff */
+	swap(a->q.num_descs,  b->q.num_descs);
+	swap(a->q.base,       b->q.base);
+	swap(a->q.base_pa,    b->q.base_pa);
+	swap(a->q.info,       b->q.info);
+	swap(a->q_base,       b->q_base);
+	swap(a->q_base_pa,    b->q_base_pa);
+	swap(a->q_size,       b->q_size);
+
+	swap(a->q.sg_base,    b->q.sg_base);
+	swap(a->q.sg_base_pa, b->q.sg_base_pa);
+	swap(a->sg_base,      b->sg_base);
+	swap(a->sg_base_pa,   b->sg_base_pa);
+	swap(a->sg_size,      b->sg_size);
+
+	swap(a->cq.num_descs, b->cq.num_descs);
+	swap(a->cq.base,      b->cq.base);
+	swap(a->cq.base_pa,   b->cq.base_pa);
+	swap(a->cq.info,      b->cq.info);
+	swap(a->cq_base,      b->cq_base);
+	swap(a->cq_base_pa,   b->cq_base_pa);
+	swap(a->cq_size,      b->cq_size);
+}
+
+int ionic_reconfigure_queues(struct ionic_lif *lif,
+			     struct ionic_queue_params *qparam)
+{
+	struct ionic_qcq **tx_qcqs = NULL;
+	struct ionic_qcq **rx_qcqs = NULL;
+	unsigned int sg_desc_sz;
+	unsigned int flags;
+	int err = -ENOMEM;
+	unsigned int i;
 
-	mutex_lock(&lif->queue_lock);
-	running = netif_running(lif->netdev);
-	if (running) {
-		netif_device_detach(lif->netdev);
-		err = ionic_stop(lif->netdev);
+	/* allocate temporary qcq arrays to hold new queue structs */
+	if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) {
+		tx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->ntxqs_per_lif,
+				       sizeof(struct ionic_qcq *), GFP_KERNEL);
+		if (!tx_qcqs)
+			goto err_out;
+	}
+	if (qparam->nxqs != lif->nxqs || qparam->nrxq_descs != lif->nrxq_descs) {
+		rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif,
+				       sizeof(struct ionic_qcq *), GFP_KERNEL);
+		if (!rx_qcqs)
+			goto err_out;
+	}
+
+	/* allocate new desc_info and rings, but leave the interrupt setup
+	 * until later so as to not mess with the still-running queues
+	 */
+	if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
+	    lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz ==
+					  sizeof(struct ionic_txq_sg_desc_v1))
+		sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1);
+	else
+		sg_desc_sz = sizeof(struct ionic_txq_sg_desc);
+
+	if (tx_qcqs) {
+		for (i = 0; i < qparam->nxqs; i++) {
+			flags = lif->txqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
+			err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
+					      qparam->ntxq_descs,
+					      sizeof(struct ionic_txq_desc),
+					      sizeof(struct ionic_txq_comp),
+					      sg_desc_sz,
+					      lif->kern_pid, &tx_qcqs[i]);
+			if (err)
+				goto err_out;
+		}
+	}
+
+	if (rx_qcqs) {
+		for (i = 0; i < qparam->nxqs; i++) {
+			flags = lif->rxqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
+			err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
+					      qparam->nrxq_descs,
+					      sizeof(struct ionic_rxq_desc),
+					      sizeof(struct ionic_rxq_comp),
+					      sizeof(struct ionic_rxq_sg_desc),
+					      lif->kern_pid, &rx_qcqs[i]);
+			if (err)
+				goto err_out;
+		}
+	}
+
+	/* stop and clean the queues */
+	ionic_stop_queues_reconfig(lif);
+
+	if (qparam->nxqs != lif->nxqs) {
+		err = netif_set_real_num_tx_queues(lif->netdev, qparam->nxqs);
 		if (err)
-			goto reset_out;
+			goto err_out_reinit_unlock;
+		err = netif_set_real_num_rx_queues(lif->netdev, qparam->nxqs);
+		if (err) {
+			netif_set_real_num_tx_queues(lif->netdev, lif->nxqs);
+			goto err_out_reinit_unlock;
+		}
 	}
 
-	if (cb)
-		cb(lif, arg);
+	/* swap new desc_info and rings, keeping existing interrupt config */
+	if (tx_qcqs) {
+		lif->ntxq_descs = qparam->ntxq_descs;
+		for (i = 0; i < qparam->nxqs; i++)
+			ionic_swap_queues(lif->txqcqs[i], tx_qcqs[i]);
+	}
 
-	if (running) {
-		err = ionic_open(lif->netdev);
-		netif_device_attach(lif->netdev);
+	if (rx_qcqs) {
+		lif->nrxq_descs = qparam->nrxq_descs;
+		for (i = 0; i < qparam->nxqs; i++)
+			ionic_swap_queues(lif->rxqcqs[i], rx_qcqs[i]);
 	}
 
-reset_out:
-	mutex_unlock(&lif->queue_lock);
+	/* if we need to change the interrupt layout, this is the time */
+	if (qparam->intr_split != test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state) ||
+	    qparam->nxqs != lif->nxqs) {
+		if (qparam->intr_split) {
+			set_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+		} else {
+			clear_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+			lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
+			lif->tx_coalesce_hw = lif->rx_coalesce_hw;
+		}
+
+		/* clear existing interrupt assignments */
+		for (i = 0; i < lif->ionic->ntxqs_per_lif; i++) {
+			ionic_qcq_intr_free(lif, lif->txqcqs[i]);
+			ionic_qcq_intr_free(lif, lif->rxqcqs[i]);
+		}
+
+		/* re-assign the interrupts */
+		for (i = 0; i < qparam->nxqs; i++) {
+			lif->rxqcqs[i]->flags |= IONIC_QCQ_F_INTR;
+			err = ionic_alloc_qcq_interrupt(lif, lif->rxqcqs[i]);
+			ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+					     lif->rxqcqs[i]->intr.index,
+					     lif->rx_coalesce_hw);
+
+			if (qparam->intr_split) {
+				lif->txqcqs[i]->flags |= IONIC_QCQ_F_INTR;
+				err = ionic_alloc_qcq_interrupt(lif, lif->txqcqs[i]);
+				ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+						     lif->txqcqs[i]->intr.index,
+						     lif->tx_coalesce_hw);
+				if (test_bit(IONIC_LIF_F_TX_DIM_INTR, lif->state))
+					lif->txqcqs[i]->intr.dim_coal_hw = lif->tx_coalesce_hw;
+			} else {
+				lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+				ionic_link_qcq_interrupts(lif->rxqcqs[i], lif->txqcqs[i]);
+			}
+		}
+	}
+
+	/* now we can rework the debugfs mappings */
+	if (tx_qcqs) {
+		for (i = 0; i < qparam->nxqs; i++) {
+			ionic_debugfs_del_qcq(lif->txqcqs[i]);
+			ionic_debugfs_add_qcq(lif, lif->txqcqs[i]);
+		}
+	}
+
+	if (rx_qcqs) {
+		for (i = 0; i < qparam->nxqs; i++) {
+			ionic_debugfs_del_qcq(lif->rxqcqs[i]);
+			ionic_debugfs_add_qcq(lif, lif->rxqcqs[i]);
+		}
+	}
+
+	swap(lif->nxqs, qparam->nxqs);
+
+err_out_reinit_unlock:
+	/* re-init the queues, but don't loose an error code */
+	if (err)
+		ionic_start_queues_reconfig(lif);
+	else
+		err = ionic_start_queues_reconfig(lif);
+
+err_out:
+	/* free old allocs without cleaning intr */
+	for (i = 0; i < qparam->nxqs; i++) {
+		if (tx_qcqs && tx_qcqs[i]) {
+			tx_qcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+			ionic_qcq_free(lif, tx_qcqs[i]);
+			devm_kfree(lif->ionic->dev, tx_qcqs[i]);
+			tx_qcqs[i] = NULL;
+		}
+		if (rx_qcqs && rx_qcqs[i]) {
+			rx_qcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+			ionic_qcq_free(lif, rx_qcqs[i]);
+			devm_kfree(lif->ionic->dev, rx_qcqs[i]);
+			rx_qcqs[i] = NULL;
+		}
+	}
+
+	/* free q array */
+	if (rx_qcqs) {
+		devm_kfree(lif->ionic->dev, rx_qcqs);
+		rx_qcqs = NULL;
+	}
+	if (tx_qcqs) {
+		devm_kfree(lif->ionic->dev, tx_qcqs);
+		tx_qcqs = NULL;
+	}
+
+	/* clean the unused dma and info allocations when new set is smaller
+	 * than the full array, but leave the qcq shells in place
+	 */
+	for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) {
+		lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+		ionic_qcq_free(lif, lif->txqcqs[i]);
+
+		lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+		ionic_qcq_free(lif, lif->rxqcqs[i]);
+	}
 
 	return err;
 }
 
-static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index)
+int ionic_lif_alloc(struct ionic *ionic)
 {
 	struct device *dev = ionic->dev;
 	union ionic_lif_identity *lid;
@@ -2055,7 +2398,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 
 	lid = kzalloc(sizeof(*lid), GFP_KERNEL);
 	if (!lid)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	netdev = alloc_etherdev_mqs(sizeof(*lif),
 				    ionic->ntxqs_per_lif, ionic->ntxqs_per_lif);
@@ -2069,7 +2412,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 
 	lif = netdev_priv(netdev);
 	lif->netdev = netdev;
-	ionic->master_lif = lif;
+	ionic->lif = lif;
 	netdev->netdev_ops = &ionic_netdev_ops;
 	ionic_ethtool_set_ops(netdev);
 
@@ -2078,8 +2421,14 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 
 	lif->identity = lid;
 	lif->lif_type = IONIC_LIF_TYPE_CLASSIC;
-	ionic_lif_identify(ionic, lif->lif_type, lif->identity);
-	lif->netdev->min_mtu = le32_to_cpu(lif->identity->eth.min_frame_size);
+	err = ionic_lif_identify(ionic, lif->lif_type, lif->identity);
+	if (err) {
+		dev_err(ionic->dev, "Cannot identify type %d: %d\n",
+			lif->lif_type, err);
+		goto err_out_free_netdev;
+	}
+	lif->netdev->min_mtu = max_t(unsigned int, ETH_MIN_MTU,
+				     le32_to_cpu(lif->identity->eth.min_frame_size));
 	lif->netdev->max_mtu =
 		le32_to_cpu(lif->identity->eth.max_frame_size) - ETH_HLEN - VLAN_HLEN;
 
@@ -2087,7 +2436,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 	lif->nxqs = ionic->ntxqs_per_lif;
 
 	lif->ionic = ionic;
-	lif->index = index;
+	lif->index = 0;
 	lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
 	lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
 	lif->tx_budget = IONIC_TX_BUDGET_DEFAULT;
@@ -2098,8 +2447,10 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 						    lif->rx_coalesce_usecs);
 	lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
 	lif->tx_coalesce_hw = lif->rx_coalesce_hw;
+	set_bit(IONIC_LIF_F_RX_DIM_INTR, lif->state);
+	set_bit(IONIC_LIF_F_TX_DIM_INTR, lif->state);
 
-	snprintf(lif->name, sizeof(lif->name), "lif%u", index);
+	snprintf(lif->name, sizeof(lif->name), "lif%u", lif->index);
 
 	spin_lock_init(&lif->adminq_lock);
 
@@ -2119,7 +2470,8 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 
 	ionic_debugfs_add_lif(lif);
 
-	/* allocate queues */
+	/* allocate control queues and txrx queue arrays */
+	ionic_lif_queue_identify(lif);
 	err = ionic_qcqs_alloc(lif);
 	if (err)
 		goto err_out_free_lif_info;
@@ -2138,9 +2490,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 	}
 	netdev_rss_key_fill(lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE);
 
-	list_add_tail(&lif->list, &ionic->lifs);
-
-	return lif;
+	return 0;
 
 err_out_free_qcqs:
 	ionic_qcqs_free(lif);
@@ -2154,27 +2504,7 @@ err_out_free_netdev:
 err_out_free_lid:
 	kfree(lid);
 
-	return ERR_PTR(err);
-}
-
-int ionic_lifs_alloc(struct ionic *ionic)
-{
-	struct ionic_lif *lif;
-
-	INIT_LIST_HEAD(&ionic->lifs);
-
-	/* only build the first lif, others are for later features */
-	set_bit(0, ionic->lifbits);
-
-	lif = ionic_lif_alloc(ionic, 0);
-	if (IS_ERR_OR_NULL(lif)) {
-		clear_bit(0, ionic->lifbits);
-		return -ENOMEM;
-	}
-
-	ionic_lif_queue_identify(lif);
-
-	return 0;
+	return err;
 }
 
 static void ionic_lif_reset(struct ionic_lif *lif)
@@ -2209,7 +2539,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
 		ionic_txrx_deinit(lif);
 		ionic_txrx_free(lif);
 	}
-	ionic_lifs_deinit(ionic);
+	ionic_lif_deinit(lif);
 	ionic_reset(ionic);
 	ionic_qcqs_free(lif);
 
@@ -2227,12 +2557,20 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 	dev_info(ionic->dev, "FW Up: restarting LIFs\n");
 
 	ionic_init_devinfo(ionic);
-	ionic_port_init(ionic);
+	err = ionic_identify(ionic);
+	if (err)
+		goto err_out;
+	err = ionic_port_identify(ionic);
+	if (err)
+		goto err_out;
+	err = ionic_port_init(ionic);
+	if (err)
+		goto err_out;
 	err = ionic_qcqs_alloc(lif);
 	if (err)
 		goto err_out;
 
-	err = ionic_lifs_init(ionic);
+	err = ionic_lif_init(lif);
 	if (err)
 		goto err_qcqs_free;
 
@@ -2252,7 +2590,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 	}
 
 	clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
-	ionic_link_status_check_request(lif);
+	ionic_link_status_check_request(lif, true);
 	netif_device_attach(lif->netdev);
 	dev_info(ionic->dev, "FW Up: LIFs restarted\n");
 
@@ -2261,14 +2599,14 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 err_txrx_free:
 	ionic_txrx_free(lif);
 err_lifs_deinit:
-	ionic_lifs_deinit(ionic);
+	ionic_lif_deinit(lif);
 err_qcqs_free:
 	ionic_qcqs_free(lif);
 err_out:
 	dev_err(ionic->dev, "FW Up: LIFs restart failed - err %d\n", err);
 }
 
-static void ionic_lif_free(struct ionic_lif *lif)
+void ionic_lif_free(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
 
@@ -2297,23 +2635,10 @@ static void ionic_lif_free(struct ionic_lif *lif)
 
 	/* free netdev & lif */
 	ionic_debugfs_del_lif(lif);
-	list_del(&lif->list);
 	free_netdev(lif->netdev);
 }
 
-void ionic_lifs_free(struct ionic *ionic)
-{
-	struct list_head *cur, *tmp;
-	struct ionic_lif *lif;
-
-	list_for_each_safe(cur, tmp, &ionic->lifs) {
-		lif = list_entry(cur, struct ionic_lif, list);
-
-		ionic_lif_free(lif);
-	}
-}
-
-static void ionic_lif_deinit(struct ionic_lif *lif)
+void ionic_lif_deinit(struct ionic_lif *lif)
 {
 	if (!test_and_clear_bit(IONIC_LIF_F_INITED, lif->state))
 		return;
@@ -2334,17 +2659,6 @@ static void ionic_lif_deinit(struct ionic_lif *lif)
 	ionic_lif_reset(lif);
 }
 
-void ionic_lifs_deinit(struct ionic *ionic)
-{
-	struct list_head *cur, *tmp;
-	struct ionic_lif *lif;
-
-	list_for_each_safe(cur, tmp, &ionic->lifs) {
-		lif = list_entry(cur, struct ionic_lif, list);
-		ionic_lif_deinit(lif);
-	}
-}
-
 static int ionic_lif_adminq_init(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
@@ -2468,7 +2782,7 @@ static int ionic_station_set(struct ionic_lif *lif)
 		 */
 		if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
 				      netdev->dev_addr))
-			ionic_lif_addr(lif, netdev->dev_addr, true);
+			ionic_lif_addr(lif, netdev->dev_addr, true, true);
 	} else {
 		/* Update the netdev mac with the device's mac */
 		memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
@@ -2485,12 +2799,12 @@ static int ionic_station_set(struct ionic_lif *lif)
 
 	netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
 		   netdev->dev_addr);
-	ionic_lif_addr(lif, netdev->dev_addr, true);
+	ionic_lif_addr(lif, netdev->dev_addr, true, true);
 
 	return 0;
 }
 
-static int ionic_lif_init(struct ionic_lif *lif)
+int ionic_lif_init(struct ionic_lif *lif)
 {
 	struct ionic_dev *idev = &lif->ionic->idev;
 	struct device *dev = lif->ionic->dev;
@@ -2580,22 +2894,6 @@ err_out_free_dbid:
 	return err;
 }
 
-int ionic_lifs_init(struct ionic *ionic)
-{
-	struct list_head *cur, *tmp;
-	struct ionic_lif *lif;
-	int err;
-
-	list_for_each_safe(cur, tmp, &ionic->lifs) {
-		lif = list_entry(cur, struct ionic_lif, list);
-		err = ionic_lif_init(lif);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static void ionic_lif_notify_work(struct work_struct *ws)
 {
 }
@@ -2644,45 +2942,41 @@ static int ionic_lif_notify(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-int ionic_lifs_register(struct ionic *ionic)
+int ionic_lif_register(struct ionic_lif *lif)
 {
 	int err;
 
-	INIT_WORK(&ionic->nb_work, ionic_lif_notify_work);
+	INIT_WORK(&lif->ionic->nb_work, ionic_lif_notify_work);
 
-	ionic->nb.notifier_call = ionic_lif_notify;
+	lif->ionic->nb.notifier_call = ionic_lif_notify;
 
-	err = register_netdevice_notifier(&ionic->nb);
+	err = register_netdevice_notifier(&lif->ionic->nb);
 	if (err)
-		ionic->nb.notifier_call = NULL;
+		lif->ionic->nb.notifier_call = NULL;
 
 	/* only register LIF0 for now */
-	err = register_netdev(ionic->master_lif->netdev);
+	err = register_netdev(lif->netdev);
 	if (err) {
-		dev_err(ionic->dev, "Cannot register net device, aborting\n");
+		dev_err(lif->ionic->dev, "Cannot register net device, aborting\n");
 		return err;
 	}
-	ionic->master_lif->registered = true;
-	ionic_lif_set_netdev_info(ionic->master_lif);
+	lif->registered = true;
+	ionic_lif_set_netdev_info(lif);
 
 	return 0;
 }
 
-void ionic_lifs_unregister(struct ionic *ionic)
+void ionic_lif_unregister(struct ionic_lif *lif)
 {
-	if (ionic->nb.notifier_call) {
-		unregister_netdevice_notifier(&ionic->nb);
-		cancel_work_sync(&ionic->nb_work);
-		ionic->nb.notifier_call = NULL;
+	if (lif->ionic->nb.notifier_call) {
+		unregister_netdevice_notifier(&lif->ionic->nb);
+		cancel_work_sync(&lif->ionic->nb_work);
+		lif->ionic->nb.notifier_call = NULL;
 	}
 
-	/* There is only one lif ever registered in the
-	 * current model, so don't bother searching the
-	 * ionic->lif for candidates to unregister
-	 */
-	if (ionic->master_lif &&
-	    ionic->master_lif->netdev->reg_state == NETREG_REGISTERED)
-		unregister_netdev(ionic->master_lif->netdev);
+	if (lif->netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(lif->netdev);
+	lif->registered = false;
 }
 
 static void ionic_lif_queue_identify(struct ionic_lif *lif)
@@ -2801,7 +3095,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 	return 0;
 }
 
-int ionic_lifs_size(struct ionic *ionic)
+int ionic_lif_size(struct ionic *ionic)
 {
 	struct ionic_identity *ident = &ionic->ident;
 	unsigned int nintrs, dev_nintrs;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 1ee3b14c8d50..0224dfd24b8a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -4,6 +4,7 @@
 #ifndef _IONIC_LIF_H_
 #define _IONIC_LIF_H_
 
+#include <linux/dim.h>
 #include <linux/pci.h>
 #include "ionic_rx_filter.h"
 
@@ -16,32 +17,32 @@
 #define IONIC_TX_BUDGET_DEFAULT		256
 
 struct ionic_tx_stats {
-	u64 dma_map_err;
 	u64 pkts;
 	u64 bytes;
-	u64 clean;
-	u64 linearize;
 	u64 csum_none;
 	u64 csum;
-	u64 crc32_csum;
 	u64 tso;
 	u64 tso_bytes;
 	u64 frags;
 	u64 vlan_inserted;
+	u64 clean;
+	u64 linearize;
+	u64 crc32_csum;
 	u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR];
+	u64 dma_map_err;
 };
 
 struct ionic_rx_stats {
-	u64 dma_map_err;
-	u64 alloc_err;
 	u64 pkts;
 	u64 bytes;
 	u64 csum_none;
 	u64 csum_complete;
-	u64 csum_error;
 	u64 buffers_posted;
 	u64 dropped;
 	u64 vlan_stripped;
+	u64 csum_error;
+	u64 dma_map_err;
+	u64 alloc_err;
 };
 
 #define IONIC_QCQ_F_INITED		BIT(0)
@@ -56,35 +57,29 @@ struct ionic_napi_stats {
 	u64 work_done_cntr[IONIC_MAX_NUM_NAPI_CNTR];
 };
 
-struct ionic_q_stats {
-	union {
-		struct ionic_tx_stats tx;
-		struct ionic_rx_stats rx;
-	};
-};
-
 struct ionic_qcq {
-	void *base;
-	dma_addr_t base_pa;
-	unsigned int total_size;
+	void *q_base;
+	dma_addr_t q_base_pa;
+	u32 q_size;
+	void *cq_base;
+	dma_addr_t cq_base_pa;
+	u32 cq_size;
+	void *sg_base;
+	dma_addr_t sg_base_pa;
+	u32 sg_size;
+	struct dim dim;
 	struct ionic_queue q;
 	struct ionic_cq cq;
 	struct ionic_intr_info intr;
 	struct napi_struct napi;
 	struct ionic_napi_stats napi_stats;
-	struct ionic_q_stats *stats;
 	unsigned int flags;
 	struct dentry *dentry;
 };
 
-struct ionic_qcqst {
-	struct ionic_qcq *qcq;
-	struct ionic_q_stats *stats;
-};
-
 #define q_to_qcq(q)		container_of(q, struct ionic_qcq, q)
-#define q_to_tx_stats(q)	(&q_to_qcq(q)->stats->tx)
-#define q_to_rx_stats(q)	(&q_to_qcq(q)->stats->rx)
+#define q_to_tx_stats(q)	(&(q)->lif->txqstats[(q)->index])
+#define q_to_rx_stats(q)	(&(q)->lif->rxqstats[(q)->index])
 #define napi_to_qcq(napi)	container_of(napi, struct ionic_qcq, napi)
 #define napi_to_cq(napi)	(&napi_to_qcq(napi)->cq)
 
@@ -138,6 +133,8 @@ enum ionic_lif_state_flags {
 	IONIC_LIF_F_LINK_CHECK_REQUESTED,
 	IONIC_LIF_F_FW_RESET,
 	IONIC_LIF_F_SPLIT_INTR,
+	IONIC_LIF_F_TX_DIM_INTR,
+	IONIC_LIF_F_RX_DIM_INTR,
 
 	/* leave this as last */
 	IONIC_LIF_F_STATE_SIZE
@@ -170,8 +167,10 @@ struct ionic_lif {
 	spinlock_t adminq_lock;		/* lock for AdminQ operations */
 	struct ionic_qcq *adminqcq;
 	struct ionic_qcq *notifyqcq;
-	struct ionic_qcqst *txqcqs;
-	struct ionic_qcqst *rxqcqs;
+	struct ionic_qcq **txqcqs;
+	struct ionic_tx_stats *txqstats;
+	struct ionic_qcq **rxqcqs;
+	struct ionic_rx_stats *rxqstats;
 	u64 last_eid;
 	unsigned int neqs;
 	unsigned int nxqs;
@@ -212,12 +211,21 @@ struct ionic_lif {
 	struct work_struct tx_timeout_work;
 };
 
-#define lif_to_txqcq(lif, i)	((lif)->txqcqs[i].qcq)
-#define lif_to_rxqcq(lif, i)	((lif)->rxqcqs[i].qcq)
-#define lif_to_txstats(lif, i)	((lif)->txqcqs[i].stats->tx)
-#define lif_to_rxstats(lif, i)	((lif)->rxqcqs[i].stats->rx)
-#define lif_to_txq(lif, i)	(&lif_to_txqcq((lif), i)->q)
-#define lif_to_rxq(lif, i)	(&lif_to_txqcq((lif), i)->q)
+struct ionic_queue_params {
+	unsigned int nxqs;
+	unsigned int ntxq_descs;
+	unsigned int nrxq_descs;
+	unsigned int intr_split;
+};
+
+static inline void ionic_init_queue_params(struct ionic_lif *lif,
+					   struct ionic_queue_params *qparam)
+{
+	qparam->nxqs = lif->nxqs;
+	qparam->ntxq_descs = lif->ntxq_descs;
+	qparam->nrxq_descs = lif->nrxq_descs;
+	qparam->intr_split = test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+}
 
 static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
 {
@@ -237,39 +245,38 @@ static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
 
 typedef void (*ionic_reset_cb)(struct ionic_lif *lif, void *arg);
 
-void ionic_link_status_check_request(struct ionic_lif *lif);
+void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep);
 void ionic_get_stats64(struct net_device *netdev,
 		       struct rtnl_link_stats64 *ns);
 void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
 				struct ionic_deferred_work *work);
-int ionic_lifs_alloc(struct ionic *ionic);
-void ionic_lifs_free(struct ionic *ionic);
-void ionic_lifs_deinit(struct ionic *ionic);
-int ionic_lifs_init(struct ionic *ionic);
-int ionic_lifs_register(struct ionic *ionic);
-void ionic_lifs_unregister(struct ionic *ionic);
+int ionic_lif_alloc(struct ionic *ionic);
+int ionic_lif_init(struct ionic_lif *lif);
+void ionic_lif_free(struct ionic_lif *lif);
+void ionic_lif_deinit(struct ionic_lif *lif);
+int ionic_lif_register(struct ionic_lif *lif);
+void ionic_lif_unregister(struct ionic_lif *lif);
 int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 		       union ionic_lif_identity *lif_ident);
-int ionic_lifs_size(struct ionic *ionic);
+int ionic_lif_size(struct ionic *ionic);
 int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
 			 const u8 *key, const u32 *indir);
+int ionic_reconfigure_queues(struct ionic_lif *lif,
+			     struct ionic_queue_params *qparam);
 
-int ionic_open(struct net_device *netdev);
-int ionic_stop(struct net_device *netdev);
-int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg);
-
-static inline void debug_stats_txq_post(struct ionic_qcq *qcq,
-					struct ionic_txq_desc *desc, bool dbell)
+static inline void debug_stats_txq_post(struct ionic_queue *q, bool dbell)
 {
-	u8 num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT)
-						& IONIC_TXQ_DESC_NSGE_MASK);
+	struct ionic_txq_desc *desc = &q->txq[q->head_idx];
+	u8 num_sg_elems;
 
-	qcq->q.dbell_count += dbell;
+	q->dbell_count += dbell;
 
+	num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT)
+						& IONIC_TXQ_DESC_NSGE_MASK);
 	if (num_sg_elems > (IONIC_MAX_NUM_SG_CNTR - 1))
 		num_sg_elems = IONIC_MAX_NUM_SG_CNTR - 1;
 
-	qcq->stats->tx.sg_cntr[num_sg_elems]++;
+	q->lif->txqstats[q->index].sg_cntr[num_sg_elems]++;
 }
 
 static inline void debug_stats_napi_poll(struct ionic_qcq *qcq,
@@ -284,10 +291,8 @@ static inline void debug_stats_napi_poll(struct ionic_qcq *qcq,
 }
 
 #define DEBUG_STATS_CQE_CNT(cq)		((cq)->compl_count++)
-#define DEBUG_STATS_RX_BUFF_CNT(qcq)	((qcq)->stats->rx.buffers_posted++)
-#define DEBUG_STATS_INTR_REARM(intr)	((intr)->rearm_count++)
-#define DEBUG_STATS_TXQ_POST(qcq, txdesc, dbell) \
-	debug_stats_txq_post(qcq, txdesc, dbell)
+#define DEBUG_STATS_RX_BUFF_CNT(q)	((q)->lif->rxqstats[q->index].buffers_posted++)
+#define DEBUG_STATS_TXQ_POST(q, dbell)  debug_stats_txq_post(q, dbell)
 #define DEBUG_STATS_NAPI_POLL(qcq, work_done) \
 	debug_stats_napi_poll(qcq, work_done)
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index df5b9bcc3aba..ee0740881af3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -64,6 +64,8 @@ static const char *ionic_error_to_str(enum ionic_status_code code)
 		return "IONIC_RC_ERROR";
 	case IONIC_RC_ERDMA:
 		return "IONIC_RC_ERDMA";
+	case IONIC_RC_EBAD_FW:
+		return "IONIC_RC_EBAD_FW";
 	default:
 		return "IONIC_RC_UNKNOWN";
 	}
@@ -170,6 +172,10 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 		return "IONIC_CMD_FW_DOWNLOAD";
 	case IONIC_CMD_FW_CONTROL:
 		return "IONIC_CMD_FW_CONTROL";
+	case IONIC_CMD_FW_DOWNLOAD_V1:
+		return "IONIC_CMD_FW_DOWNLOAD_V1";
+	case IONIC_CMD_FW_CONTROL_V1:
+		return "IONIC_CMD_FW_CONTROL_V1";
 	case IONIC_CMD_VF_GETATTR:
 		return "IONIC_CMD_VF_GETATTR";
 	case IONIC_CMD_VF_SETATTR:
@@ -181,15 +187,17 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 
 static void ionic_adminq_flush(struct ionic_lif *lif)
 {
-	struct ionic_queue *adminq = &lif->adminqcq->q;
+	struct ionic_queue *q = &lif->adminqcq->q;
+	struct ionic_desc_info *desc_info;
 
 	spin_lock(&lif->adminq_lock);
 
-	while (adminq->tail != adminq->head) {
-		memset(adminq->tail->desc, 0, sizeof(union ionic_adminq_cmd));
-		adminq->tail->cb = NULL;
-		adminq->tail->cb_arg = NULL;
-		adminq->tail = adminq->tail->next;
+	while (q->tail_idx != q->head_idx) {
+		desc_info = &q->info[q->tail_idx];
+		memset(desc_info->desc, 0, sizeof(union ionic_adminq_cmd));
+		desc_info->cb = NULL;
+		desc_info->cb_arg = NULL;
+		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 	}
 	spin_unlock(&lif->adminq_lock);
 }
@@ -245,18 +253,17 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 
 static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 {
-	struct ionic_queue *adminq;
+	struct ionic_desc_info *desc_info;
+	struct ionic_queue *q;
 	int err = 0;
 
-	WARN_ON(in_interrupt());
-
 	if (!lif->adminqcq)
 		return -EIO;
 
-	adminq = &lif->adminqcq->q;
+	q = &lif->adminqcq->q;
 
 	spin_lock(&lif->adminq_lock);
-	if (!ionic_q_has_space(adminq, 1)) {
+	if (!ionic_q_has_space(q, 1)) {
 		err = -ENOSPC;
 		goto err_out;
 	}
@@ -265,13 +272,14 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 	if (err)
 		goto err_out;
 
-	memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd));
+	desc_info = &q->info[q->head_idx];
+	memcpy(desc_info->desc, &ctx->cmd, sizeof(ctx->cmd));
 
 	dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
 	dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1,
 			 &ctx->cmd, sizeof(ctx->cmd), true);
 
-	ionic_q_post(adminq, true, ionic_adminq_cb, ctx);
+	ionic_q_post(q, true, ionic_adminq_cb, ctx);
 
 err_out:
 	spin_unlock(&lif->adminq_lock);
@@ -301,32 +309,6 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 	return ionic_adminq_check_err(lif, ctx, (remaining == 0));
 }
 
-int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
-	       ionic_cq_done_cb done_cb, void *done_arg)
-{
-	struct ionic_qcq *qcq = napi_to_qcq(napi);
-	struct ionic_cq *cq = &qcq->cq;
-	u32 work_done, flags = 0;
-
-	work_done = ionic_cq_service(cq, budget, cb, done_cb, done_arg);
-
-	if (work_done < budget && napi_complete_done(napi, work_done)) {
-		flags |= IONIC_INTR_CRED_UNMASK;
-		DEBUG_STATS_INTR_REARM(cq->bound_intr);
-	}
-
-	if (work_done || flags) {
-		flags |= IONIC_INTR_CRED_RESET_COALESCE;
-		ionic_intr_credits(cq->lif->ionic->idev.intr_ctrl,
-				   cq->bound_intr->index,
-				   work_done, flags);
-	}
-
-	DEBUG_STATS_NAPI_POLL(qcq, work_done);
-
-	return work_done;
-}
-
 static void ionic_dev_cmd_clean(struct ionic *ionic)
 {
 	union ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
@@ -346,24 +328,27 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 	int done;
 	int err;
 
-	WARN_ON(in_interrupt());
-
 	/* Wait for dev cmd to complete, retrying if we get EAGAIN,
 	 * but don't wait any longer than max_seconds.
 	 */
 	max_wait = jiffies + (max_seconds * HZ);
 try_again:
+	opcode = idev->dev_cmd_regs->cmd.cmd.opcode;
 	start_time = jiffies;
 	do {
 		done = ionic_dev_cmd_done(idev);
 		if (done)
 			break;
-		msleep(5);
-		hb = ionic_heartbeat_check(ionic);
+		usleep_range(100, 200);
+
+		/* Don't check the heartbeat on FW_CONTROL commands as they are
+		 * notorious for interrupting the firmware's heartbeat update.
+		 */
+		if (opcode != IONIC_CMD_FW_CONTROL)
+			hb = ionic_heartbeat_check(ionic);
 	} while (!done && !hb && time_before(jiffies, max_wait));
 	duration = jiffies - start_time;
 
-	opcode = idev->dev_cmd_regs->cmd.cmd.opcode;
 	dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
 		ionic_opcode_to_str(opcode), opcode,
 		done, duration / HZ, duration);
@@ -387,8 +372,9 @@ try_again:
 
 	err = ionic_dev_cmd_status(&ionic->idev);
 	if (err) {
-		if (err == IONIC_RC_EAGAIN && !time_after(jiffies, max_wait)) {
-			dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) retrying...\n",
+		if (err == IONIC_RC_EAGAIN &&
+		    time_before(jiffies, (max_wait - HZ))) {
+			dev_dbg(ionic->dev, "DEV_CMD %s (%d), %s (%d) retrying...\n",
 				ionic_opcode_to_str(opcode), opcode,
 				ionic_error_to_str(err), err);
 
@@ -398,9 +384,10 @@ try_again:
 			goto try_again;
 		}
 
-		dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
-			ionic_opcode_to_str(opcode), opcode,
-			ionic_error_to_str(err), err);
+		if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN))
+			dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+				ionic_opcode_to_str(opcode), opcode,
+				ionic_error_to_str(err), err);
 
 		return ionic_error_to_errno(err);
 	}
@@ -444,17 +431,23 @@ int ionic_identify(struct ionic *ionic)
 		sz = min(sizeof(ident->dev), sizeof(idev->dev_cmd_regs->data));
 		memcpy_fromio(&ident->dev, &idev->dev_cmd_regs->data, sz);
 	}
-
 	mutex_unlock(&ionic->dev_cmd_lock);
 
-	if (err)
-		goto err_out_unmap;
+	if (err) {
+		dev_err(ionic->dev, "Cannot identify ionic: %dn", err);
+		goto err_out;
+	}
 
-	ionic_debugfs_add_ident(ionic);
+	err = ionic_lif_identify(ionic, IONIC_LIF_TYPE_CLASSIC,
+				 &ionic->ident.lif);
+	if (err) {
+		dev_err(ionic->dev, "Cannot identify LIFs: %d\n", err);
+		goto err_out;
+	}
 
 	return 0;
 
-err_out_unmap:
+err_out:
 	return err;
 }
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 2a1885da58a6..ff20a2ac4c2f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -179,36 +179,28 @@ static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = {
 static void ionic_get_lif_stats(struct ionic_lif *lif,
 				struct ionic_lif_sw_stats *stats)
 {
-	struct ionic_tx_stats *tstats;
-	struct ionic_rx_stats *rstats;
+	struct ionic_tx_stats *txstats;
+	struct ionic_rx_stats *rxstats;
 	struct rtnl_link_stats64 ns;
-	struct ionic_qcq *txqcq;
-	struct ionic_qcq *rxqcq;
 	int q_num;
 
 	memset(stats, 0, sizeof(*stats));
 
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		txqcq = lif_to_txqcq(lif, q_num);
-		if (txqcq && txqcq->stats) {
-			tstats = &txqcq->stats->tx;
-			stats->tx_packets += tstats->pkts;
-			stats->tx_bytes += tstats->bytes;
-			stats->tx_tso += tstats->tso;
-			stats->tx_tso_bytes += tstats->tso_bytes;
-			stats->tx_csum_none += tstats->csum_none;
-			stats->tx_csum += tstats->csum;
-		}
-
-		rxqcq = lif_to_rxqcq(lif, q_num);
-		if (rxqcq && rxqcq->stats) {
-			rstats = &rxqcq->stats->rx;
-			stats->rx_packets += rstats->pkts;
-			stats->rx_bytes += rstats->bytes;
-			stats->rx_csum_none += rstats->csum_none;
-			stats->rx_csum_complete += rstats->csum_complete;
-			stats->rx_csum_error += rstats->csum_error;
-		}
+		txstats = &lif->txqstats[q_num];
+		stats->tx_packets += txstats->pkts;
+		stats->tx_bytes += txstats->bytes;
+		stats->tx_tso += txstats->tso;
+		stats->tx_tso_bytes += txstats->tso_bytes;
+		stats->tx_csum_none += txstats->csum_none;
+		stats->tx_csum += txstats->csum;
+
+		rxstats = &lif->rxqstats[q_num];
+		stats->rx_packets += rxstats->pkts;
+		stats->rx_bytes += rxstats->bytes;
+		stats->rx_csum_none += rxstats->csum_none;
+		stats->rx_csum_complete += rxstats->csum_complete;
+		stats->rx_csum_error += rxstats->csum_error;
 	}
 
 	ionic_get_stats64(lif->netdev, &ns);
@@ -371,7 +363,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 	}
 
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		txstats = &lif_to_txstats(lif, q_num);
+		txstats = &lif->txqstats[q_num];
 
 		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
 			**buf = IONIC_READ_STAT64(txstats,
@@ -381,7 +373,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 
 		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
 		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			txqcq = lif_to_txqcq(lif, q_num);
+			txqcq = lif->txqcqs[q_num];
 			for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
 				**buf = IONIC_READ_STAT64(&txqcq->q,
 						      &ionic_txq_stats_desc[i]);
@@ -405,7 +397,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 	}
 
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		rxstats = &lif_to_rxstats(lif, q_num);
+		rxstats = &lif->rxqstats[q_num];
 
 		for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
 			**buf = IONIC_READ_STAT64(rxstats,
@@ -415,7 +407,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 
 		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
 		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			rxqcq = lif_to_rxqcq(lif, q_num);
+			rxqcq = lif->rxqcqs[q_num];
 			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
 				**buf = IONIC_READ_STAT64(&rxqcq->cq,
 						   &ionic_dbg_cq_stats_desc[i]);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index def65fee27b5..169ac4f54640 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -22,7 +22,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
 				  ionic_desc_cb cb_func, void *cb_arg)
 {
-	DEBUG_STATS_TXQ_POST(q_to_qcq(q), q->head->desc, ring_dbell);
+	DEBUG_STATS_TXQ_POST(q, ring_dbell);
 
 	ionic_q_post(q, ring_dbell, cb_func, cb_arg);
 }
@@ -32,7 +32,7 @@ static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell,
 {
 	ionic_q_post(q, ring_dbell, cb_func, cb_arg);
 
-	DEBUG_STATS_RX_BUFF_CNT(q_to_qcq(q));
+	DEBUG_STATS_RX_BUFF_CNT(q);
 }
 
 static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
@@ -49,7 +49,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	struct sk_buff *skb;
 
 	netdev = lif->netdev;
-	stats = q_to_rx_stats(q);
+	stats = &q->lif->rxqstats[q->index];
 
 	if (frags)
 		skb = napi_get_frags(&q_to_qcq(q)->napi);
@@ -235,14 +235,14 @@ static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 		return false;
 
 	/* check for empty queue */
-	if (q->tail->index == q->head->index)
+	if (q->tail_idx == q->head_idx)
 		return false;
 
-	desc_info = q->tail;
-	if (desc_info->index != le16_to_cpu(comp->comp_index))
+	if (q->tail_idx != le16_to_cpu(comp->comp_index))
 		return false;
 
-	q->tail = desc_info->next;
+	desc_info = &q->info[q->tail_idx];
+	q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 
 	/* clean the related q entry, only one per qc completion */
 	ionic_rx_clean(q, desc_info, cq_info, desc_info->cb_arg);
@@ -266,40 +266,49 @@ void ionic_rx_flush(struct ionic_cq *cq)
 				   work_done, IONIC_INTR_CRED_RESET_COALESCE);
 }
 
-static struct page *ionic_rx_page_alloc(struct ionic_queue *q,
-					dma_addr_t *dma_addr)
+static int ionic_rx_page_alloc(struct ionic_queue *q,
+			       struct ionic_page_info *page_info)
 {
 	struct ionic_lif *lif = q->lif;
 	struct ionic_rx_stats *stats;
 	struct net_device *netdev;
 	struct device *dev;
-	struct page *page;
 
 	netdev = lif->netdev;
 	dev = lif->ionic->dev;
 	stats = q_to_rx_stats(q);
-	page = alloc_page(GFP_ATOMIC);
-	if (unlikely(!page)) {
-		net_err_ratelimited("%s: Page alloc failed on %s!\n",
+
+	if (unlikely(!page_info)) {
+		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
+				    netdev->name, q->name);
+		return -EINVAL;
+	}
+
+	page_info->page = dev_alloc_page();
+	if (unlikely(!page_info->page)) {
+		net_err_ratelimited("%s: %s page alloc failed\n",
 				    netdev->name, q->name);
 		stats->alloc_err++;
-		return NULL;
+		return -ENOMEM;
 	}
 
-	*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, *dma_addr))) {
-		__free_page(page);
-		net_err_ratelimited("%s: DMA single map failed on %s!\n",
+	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
+					   DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
+		put_page(page_info->page);
+		page_info->dma_addr = 0;
+		page_info->page = NULL;
+		net_err_ratelimited("%s: %s dma map failed\n",
 				    netdev->name, q->name);
 		stats->dma_map_err++;
-		return NULL;
+		return -EIO;
 	}
 
-	return page;
+	return 0;
 }
 
-static void ionic_rx_page_free(struct ionic_queue *q, struct page *page,
-			       dma_addr_t dma_addr)
+static void ionic_rx_page_free(struct ionic_queue *q,
+			       struct ionic_page_info *page_info)
 {
 	struct ionic_lif *lif = q->lif;
 	struct net_device *netdev;
@@ -308,15 +317,23 @@ static void ionic_rx_page_free(struct ionic_queue *q, struct page *page,
 	netdev = lif->netdev;
 	dev = lif->ionic->dev;
 
-	if (unlikely(!page)) {
-		net_err_ratelimited("%s: Trying to free unallocated buffer on %s!\n",
+	if (unlikely(!page_info)) {
+		net_err_ratelimited("%s: %s invalid page_info in free\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	if (unlikely(!page_info->page)) {
+		net_err_ratelimited("%s: %s invalid page in free\n",
 				    netdev->name, q->name);
 		return;
 	}
 
-	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
 
-	__free_page(page);
+	put_page(page_info->page);
+	page_info->dma_addr = 0;
+	page_info->page = NULL;
 }
 
 void ionic_rx_fill(struct ionic_queue *q)
@@ -338,7 +355,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 
 	for (i = ionic_q_space_avail(q); i; i--) {
 		remain_len = len;
-		desc_info = q->head;
+		desc_info = &q->info[q->head_idx];
 		desc = desc_info->desc;
 		sg_desc = desc_info->sg_desc;
 		page_info = &desc_info->pages[0];
@@ -352,8 +369,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
 					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
 		desc_info->npages = nfrags;
-		page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
-		if (unlikely(!page_info->page)) {
+		if (unlikely(ionic_rx_page_alloc(q, page_info))) {
 			desc->addr = 0;
 			desc->len = 0;
 			return;
@@ -370,8 +386,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 				continue;
 
 			sg_elem = &sg_desc->elems[j];
-			page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
-			if (unlikely(!page_info->page)) {
+			if (unlikely(ionic_rx_page_alloc(q, page_info))) {
 				sg_elem->addr = 0;
 				sg_elem->len = 0;
 				return;
@@ -387,7 +402,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 	}
 
 	ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
-			 q->dbval | q->head->index);
+			 q->dbval | q->head_idx);
 }
 
 static void ionic_rx_fill_cb(void *arg)
@@ -397,28 +412,50 @@ static void ionic_rx_fill_cb(void *arg)
 
 void ionic_rx_empty(struct ionic_queue *q)
 {
-	struct ionic_desc_info *cur;
+	struct ionic_desc_info *desc_info;
 	struct ionic_rxq_desc *desc;
 	unsigned int i;
+	u16 idx;
 
-	for (cur = q->tail; cur != q->head; cur = cur->next) {
-		desc = cur->desc;
+	idx = q->tail_idx;
+	while (idx != q->head_idx) {
+		desc_info = &q->info[idx];
+		desc = desc_info->desc;
 		desc->addr = 0;
 		desc->len = 0;
 
-		for (i = 0; i < cur->npages; i++) {
-			if (likely(cur->pages[i].page)) {
-				ionic_rx_page_free(q, cur->pages[i].page,
-						   cur->pages[i].dma_addr);
-				cur->pages[i].page = NULL;
-				cur->pages[i].dma_addr = 0;
-			}
-		}
+		for (i = 0; i < desc_info->npages; i++)
+			ionic_rx_page_free(q, &desc_info->pages[i]);
 
-		cur->cb_arg = NULL;
+		desc_info->cb_arg = NULL;
+		idx = (idx + 1) & (q->num_descs - 1);
 	}
 }
 
+static void ionic_dim_update(struct ionic_qcq *qcq)
+{
+	struct dim_sample dim_sample;
+	struct ionic_lif *lif;
+	unsigned int qi;
+
+	if (!qcq->intr.dim_coal_hw)
+		return;
+
+	lif = qcq->q.lif;
+	qi = qcq->cq.bound_q->index;
+
+	ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+			     lif->rxqcqs[qi]->intr.index,
+			     qcq->intr.dim_coal_hw);
+
+	dim_update_sample(qcq->cq.bound_intr->rearm_count,
+			  lif->txqstats[qi].pkts,
+			  lif->txqstats[qi].bytes,
+			  &dim_sample);
+
+	net_dim(&qcq->dim, dim_sample);
+}
+
 int ionic_tx_napi(struct napi_struct *napi, int budget)
 {
 	struct ionic_qcq *qcq = napi_to_qcq(napi);
@@ -435,8 +472,9 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
 				     ionic_tx_service, NULL, NULL);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		ionic_dim_update(qcq);
 		flags |= IONIC_INTR_CRED_UNMASK;
-		DEBUG_STATS_INTR_REARM(cq->bound_intr);
+		cq->bound_intr->rearm_count++;
 	}
 
 	if (work_done || flags) {
@@ -470,8 +508,9 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
 		ionic_rx_fill(cq->bound_q);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		ionic_dim_update(qcq);
 		flags |= IONIC_INTR_CRED_UNMASK;
-		DEBUG_STATS_INTR_REARM(cq->bound_intr);
+		cq->bound_intr->rearm_count++;
 	}
 
 	if (work_done || flags) {
@@ -500,7 +539,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 
 	lif = rxcq->bound_q->lif;
 	idev = &lif->ionic->idev;
-	txcq = &lif->txqcqs[qi].qcq->cq;
+	txcq = &lif->txqcqs[qi]->cq;
 
 	tx_work_done = ionic_cq_service(txcq, lif->tx_budget,
 					ionic_tx_service, NULL, NULL);
@@ -511,8 +550,9 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 		ionic_rx_fill_cb(rxcq->bound_q);
 
 	if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
+		ionic_dim_update(qcq);
 		flags |= IONIC_INTR_CRED_UNMASK;
-		DEBUG_STATS_INTR_REARM(rxcq->bound_intr);
+		rxcq->bound_intr->rearm_count++;
 	}
 
 	if (rx_work_done || flags) {
@@ -615,6 +655,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	struct ionic_txq_comp *comp = cq_info->cq_desc;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
+	u16 index;
 
 	if (!color_match(comp->color, cq->done_color))
 		return false;
@@ -623,12 +664,13 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	 * several q entries completed for each cq completion
 	 */
 	do {
-		desc_info = q->tail;
-		q->tail = desc_info->next;
-		ionic_tx_clean(q, desc_info, cq->tail, desc_info->cb_arg);
+		desc_info = &q->info[q->tail_idx];
+		index = q->tail_idx;
+		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
+		ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg);
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
-	} while (desc_info->index != le16_to_cpu(comp->comp_index));
+	} while (index != le16_to_cpu(comp->comp_index));
 
 	return true;
 }
@@ -648,16 +690,14 @@ void ionic_tx_flush(struct ionic_cq *cq)
 void ionic_tx_empty(struct ionic_queue *q)
 {
 	struct ionic_desc_info *desc_info;
-	int done = 0;
 
 	/* walk the not completed tx entries, if any */
-	while (q->head != q->tail) {
-		desc_info = q->tail;
-		q->tail = desc_info->next;
+	while (q->head_idx != q->tail_idx) {
+		desc_info = &q->info[q->tail_idx];
+		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 		ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg);
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
-		done++;
 	}
 }
 
@@ -741,8 +781,8 @@ static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc
 static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
 						struct ionic_txq_sg_elem **elem)
 {
-	struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
-	struct ionic_txq_desc *desc = q->head->desc;
+	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
+	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 
 	*elem = sg_desc->elems;
 	return desc;
@@ -751,13 +791,13 @@ static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
 static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_desc_info *abort = q->head;
+	struct ionic_desc_info *rewind_desc_info;
 	struct device *dev = q->lif->ionic->dev;
-	struct ionic_desc_info *rewind = abort;
 	struct ionic_txq_sg_elem *elem;
 	struct ionic_txq_desc *desc;
 	unsigned int frag_left = 0;
 	unsigned int offset = 0;
+	u16 abort = q->head_idx;
 	unsigned int len_left;
 	dma_addr_t desc_addr;
 	unsigned int hdrlen;
@@ -765,6 +805,7 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	unsigned int seglen;
 	u64 total_bytes = 0;
 	u64 total_pkts = 0;
+	u16 rewind = abort;
 	unsigned int left;
 	unsigned int len;
 	unsigned int mss;
@@ -909,19 +950,20 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	return 0;
 
 err_out_abort:
-	while (rewind->desc != q->head->desc) {
-		ionic_tx_clean(q, rewind, NULL, NULL);
-		rewind = rewind->next;
+	while (rewind != q->head_idx) {
+		rewind_desc_info = &q->info[rewind];
+		ionic_tx_clean(q, rewind_desc_info, NULL, NULL);
+		rewind = (rewind + 1) & (q->num_descs - 1);
 	}
-	q->head = abort;
+	q->head_idx = abort;
 
 	return -ENOMEM;
 }
 
 static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 {
+	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_txq_desc *desc = q->head->desc;
 	struct device *dev = q->lif->ionic->dev;
 	dma_addr_t dma_addr;
 	bool has_vlan;
@@ -960,8 +1002,8 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 
 static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 {
+	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_txq_desc *desc = q->head->desc;
 	struct device *dev = q->lif->ionic->dev;
 	dma_addr_t dma_addr;
 	bool has_vlan;
@@ -995,7 +1037,7 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 
 static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
 {
-	struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
+	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
 	unsigned int len_left = skb->len - skb_headlen(skb);
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
@@ -1104,9 +1146,9 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_OK;
 	}
 
-	if (unlikely(!lif_to_txqcq(lif, queue_index)))
+	if (unlikely(queue_index >= lif->nxqs))
 		queue_index = 0;
-	q = lif_to_txq(lif, queue_index);
+	q = &lif->txqcqs[queue_index]->q;
 
 	ndescs = ionic_tx_descs_needed(q, skb);
 	if (ndescs < 0)
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 8f743d80760b..4366c7a8de95 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -80,7 +80,7 @@ config QED
 	select CRC8
 	select NET_DEVLINK
 	help
-	  This enables the support for ...
+	  This enables the support for Marvell FastLinQ adapters family.
 
 config QED_LL2
 	bool
@@ -100,7 +100,8 @@ config QEDE
 	depends on QED
 	imply PTP_1588_CLOCK
 	help
-	  This enables the support for ...
+	  This enables the support for Marvell FastLinQ adapters family,
+	  ethernet driver.
 
 config QED_RDMA
 	bool
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 86153660d245..e5c51256243a 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -1189,9 +1189,6 @@ typedef struct {
 #define NX_FORCE_FW_RESET               0xdeaddead
 
 
-/* Fw dump levels */
-static const u32 FW_DUMP_LEVELS[] = { 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
-
 /* Flash read/write address */
 #define NX_FW_DUMP_REG1         0x00130060
 #define NX_FW_DUMP_REG2         0x001e0000
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
index c3f50ddbe824..dd22cb056d03 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
@@ -814,6 +814,9 @@ netxen_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
 	return 0;
 }
 
+/* Fw dump levels */
+static const u32 FW_DUMP_LEVELS[] = { 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
+
 static int
 netxen_set_dump(struct net_device *netdev, struct ethtool_dump *val)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index f947b105cf14..8251755ec18c 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -9,6 +9,7 @@ qed-y :=			\
 	qed_dcbx.o		\
 	qed_debug.o		\
 	qed_dev.o		\
+	qed_devlink.o		\
 	qed_hw.o		\
 	qed_init_fw_funcs.o	\
 	qed_init_ops.o		\
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b2a7b53ee760..a20cb8a0c377 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -572,7 +572,7 @@ struct qed_hwfn {
 	struct qed_consq		*p_consq;
 
 	/* Slow-Path definitions */
-	struct tasklet_struct		*sp_dpc;
+	struct tasklet_struct		sp_dpc;
 	bool				b_sp_dpc_enabled;
 
 	struct qed_ptt			*p_main_ptt;
@@ -807,6 +807,7 @@ struct qed_dev {
 	struct qed_llh_info *p_llh_info;
 
 	/* Linux specific here */
+	struct qed_dev_info		common_dev_info;
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
 	u32 flags;
@@ -849,7 +850,6 @@ struct qed_dev {
 	u32 rdma_max_srq_sge;
 	u16 tunn_feature_mask;
 
-	struct devlink			*dl;
 	bool				iwarp_cmt;
 };
 
@@ -981,6 +981,7 @@ void qed_bw_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
+int qed_recovery_process(struct qed_dev *cdev);
 void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
 void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
 			   enum qed_hw_err_type err_type);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3db181f3617a..d2f5855b2ea7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -3973,6 +3973,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	struct qed_mcp_link_speed_params *ext_speed;
 	struct qed_mcp_link_capabilities *p_caps;
 	struct qed_mcp_link_params *link;
+	int i;
 
 	/* Read global nvm_cfg address */
 	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
@@ -4299,6 +4300,14 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		__set_bit(QED_DEV_CAP_ROCE,
 			  &p_hwfn->hw_info.device_capabilities);
 
+	/* Read device serial number information from shmem */
+	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+		offsetof(struct nvm_cfg1, glob) +
+		offsetof(struct nvm_cfg1_glob, serial_number);
+
+	for (i = 0; i < 4; i++)
+		p_hwfn->hw_info.part_num[i] = qed_rd(p_hwfn, p_ptt, addr + i * 4);
+
 	return qed_mcp_fill_shmem_func_info(p_hwfn, p_ptt);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
new file mode 100644
index 000000000000..cf7f4da68e69
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Marvell/Qlogic FastLinQ NIC driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/qed/qed_if.h>
+#include <linux/vmalloc.h>
+#include "qed.h"
+#include "qed_devlink.h"
+
+enum qed_devlink_param_id {
+	QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	QED_DEVLINK_PARAM_ID_IWARP_CMT,
+};
+
+struct qed_fw_fatal_ctx {
+	enum qed_hw_err_type err_type;
+};
+
+int qed_report_fatal_error(struct devlink *devlink, enum qed_hw_err_type err_type)
+{
+	struct qed_devlink *qdl = devlink_priv(devlink);
+	struct qed_fw_fatal_ctx fw_fatal_ctx = {
+		.err_type = err_type,
+	};
+
+	if (qdl->fw_reporter)
+		devlink_health_report(qdl->fw_reporter,
+				      "Fatal error occurred", &fw_fatal_ctx);
+
+	return 0;
+}
+
+static int
+qed_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+			   struct devlink_fmsg *fmsg, void *priv_ctx,
+			   struct netlink_ext_ack *extack)
+{
+	struct qed_devlink *qdl = devlink_health_reporter_priv(reporter);
+	struct qed_fw_fatal_ctx *fw_fatal_ctx = priv_ctx;
+	struct qed_dev *cdev = qdl->cdev;
+	u32 dbg_data_buf_size;
+	u8 *p_dbg_data_buf;
+	int err;
+
+	/* Having context means that was a dump request after fatal,
+	 * so we enable extra debugging while gathering the dump,
+	 * just in case
+	 */
+	cdev->print_dbg_data = fw_fatal_ctx ? true : false;
+
+	dbg_data_buf_size = qed_dbg_all_data_size(cdev);
+	p_dbg_data_buf = vzalloc(dbg_data_buf_size);
+	if (!p_dbg_data_buf) {
+		DP_NOTICE(cdev,
+			  "Failed to allocate memory for a debug data buffer\n");
+		return -ENOMEM;
+	}
+
+	err = qed_dbg_all_data(cdev, p_dbg_data_buf);
+	if (err) {
+		DP_NOTICE(cdev, "Failed to obtain debug data\n");
+		vfree(p_dbg_data_buf);
+		return err;
+	}
+
+	err = devlink_fmsg_binary_pair_put(fmsg, "dump_data",
+					   p_dbg_data_buf, dbg_data_buf_size);
+
+	vfree(p_dbg_data_buf);
+
+	return err;
+}
+
+static int
+qed_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
+			      void *priv_ctx,
+			      struct netlink_ext_ack *extack)
+{
+	struct qed_devlink *qdl = devlink_health_reporter_priv(reporter);
+	struct qed_dev *cdev = qdl->cdev;
+
+	qed_recovery_process(cdev);
+
+	return 0;
+}
+
+static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = {
+		.name = "fw_fatal",
+		.recover = qed_fw_fatal_reporter_recover,
+		.dump = qed_fw_fatal_reporter_dump,
+};
+
+#define QED_REPORTER_FW_GRACEFUL_PERIOD 1200000
+
+void qed_fw_reporters_create(struct devlink *devlink)
+{
+	struct qed_devlink *dl = devlink_priv(devlink);
+
+	dl->fw_reporter = devlink_health_reporter_create(devlink, &qed_fw_fatal_reporter_ops,
+							 QED_REPORTER_FW_GRACEFUL_PERIOD, dl);
+	if (IS_ERR(dl->fw_reporter)) {
+		DP_NOTICE(dl->cdev, "Failed to create fw reporter, err = %ld\n",
+			  PTR_ERR(dl->fw_reporter));
+		dl->fw_reporter = NULL;
+	}
+}
+
+void qed_fw_reporters_destroy(struct devlink *devlink)
+{
+	struct qed_devlink *dl = devlink_priv(devlink);
+	struct devlink_health_reporter *rep;
+
+	rep = dl->fw_reporter;
+
+	if (!IS_ERR_OR_NULL(rep))
+		devlink_health_reporter_destroy(rep);
+}
+
+static int qed_dl_param_get(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl = devlink_priv(dl);
+	struct qed_dev *cdev;
+
+	cdev = qed_dl->cdev;
+	ctx->val.vbool = cdev->iwarp_cmt;
+
+	return 0;
+}
+
+static int qed_dl_param_set(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl = devlink_priv(dl);
+	struct qed_dev *cdev;
+
+	cdev = qed_dl->cdev;
+	cdev->iwarp_cmt = ctx->val.vbool;
+
+	return 0;
+}
+
+static const struct devlink_param qed_devlink_params[] = {
+	DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
+			     "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     qed_dl_param_get, qed_dl_param_set, NULL),
+};
+
+static int qed_devlink_info_get(struct devlink *devlink,
+				struct devlink_info_req *req,
+				struct netlink_ext_ack *extack)
+{
+	struct qed_devlink *qed_dl = devlink_priv(devlink);
+	struct qed_dev *cdev = qed_dl->cdev;
+	struct qed_dev_info *dev_info;
+	char buf[100];
+	int err;
+
+	dev_info = &cdev->common_dev_info;
+
+	err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	memcpy(buf, cdev->hwfns[0].hw_info.part_num, sizeof(cdev->hwfns[0].hw_info.part_num));
+	buf[sizeof(cdev->hwfns[0].hw_info.part_num)] = 0;
+
+	if (buf[0]) {
+		err = devlink_info_board_serial_number_put(req, buf);
+		if (err)
+			return err;
+	}
+
+	snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+		 GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_3),
+		 GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_2),
+		 GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_1),
+		 GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_0));
+
+	err = devlink_info_version_stored_put(req,
+					      DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, buf);
+	if (err)
+		return err;
+
+	snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+		 dev_info->fw_major,
+		 dev_info->fw_minor,
+		 dev_info->fw_rev,
+		 dev_info->fw_eng);
+
+	return devlink_info_version_running_put(req,
+						DEVLINK_INFO_VERSION_GENERIC_FW_APP, buf);
+}
+
+static const struct devlink_ops qed_dl_ops = {
+	.info_get = qed_devlink_info_get,
+};
+
+struct devlink *qed_devlink_register(struct qed_dev *cdev)
+{
+	union devlink_param_value value;
+	struct qed_devlink *qdevlink;
+	struct devlink *dl;
+	int rc;
+
+	dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+	if (!dl)
+		return ERR_PTR(-ENOMEM);
+
+	qdevlink = devlink_priv(dl);
+	qdevlink->cdev = cdev;
+
+	rc = devlink_register(dl, &cdev->pdev->dev);
+	if (rc)
+		goto err_free;
+
+	rc = devlink_params_register(dl, qed_devlink_params,
+				     ARRAY_SIZE(qed_devlink_params));
+	if (rc)
+		goto err_unregister;
+
+	value.vbool = false;
+	devlink_param_driverinit_value_set(dl,
+					   QED_DEVLINK_PARAM_ID_IWARP_CMT,
+					   value);
+
+	devlink_params_publish(dl);
+	cdev->iwarp_cmt = false;
+
+	qed_fw_reporters_create(dl);
+
+	return dl;
+
+err_unregister:
+	devlink_unregister(dl);
+
+err_free:
+	devlink_free(dl);
+
+	return ERR_PTR(rc);
+}
+
+void qed_devlink_unregister(struct devlink *devlink)
+{
+	if (!devlink)
+		return;
+
+	qed_fw_reporters_destroy(devlink);
+
+	devlink_params_unregister(devlink, qed_devlink_params,
+				  ARRAY_SIZE(qed_devlink_params));
+
+	devlink_unregister(devlink);
+	devlink_free(devlink);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.h b/drivers/net/ethernet/qlogic/qed/qed_devlink.h
new file mode 100644
index 000000000000..ccc7d1d1bfd4
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Marvell/Qlogic FastLinQ NIC driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+#ifndef _QED_DEVLINK_H
+#define _QED_DEVLINK_H
+
+#include <linux/qed/qed_if.h>
+#include <net/devlink.h>
+
+struct devlink *qed_devlink_register(struct qed_dev *cdev);
+void qed_devlink_unregister(struct devlink *devlink);
+
+void qed_fw_reporters_create(struct devlink *devlink);
+void qed_fw_reporters_destroy(struct devlink *devlink);
+
+int qed_report_fatal_error(struct devlink *dl, enum qed_hw_err_type err_type);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index f8c5a864812d..578935f643b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1216,9 +1216,9 @@ static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
 	barrier();
 }
 
-void qed_int_sp_dpc(unsigned long hwfn_cookie)
+void qed_int_sp_dpc(struct tasklet_struct *t)
 {
-	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)hwfn_cookie;
+	struct qed_hwfn *p_hwfn = from_tasklet(p_hwfn, t, sp_dpc);
 	struct qed_pi_info *pi_info = NULL;
 	struct qed_sb_attn_info *sb_attn;
 	struct qed_sb_info *sb_info;
@@ -2285,34 +2285,14 @@ u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn)
 
 static void qed_int_sp_dpc_setup(struct qed_hwfn *p_hwfn)
 {
-	tasklet_init(p_hwfn->sp_dpc,
-		     qed_int_sp_dpc, (unsigned long)p_hwfn);
+	tasklet_setup(&p_hwfn->sp_dpc, qed_int_sp_dpc);
 	p_hwfn->b_sp_dpc_enabled = true;
 }
 
-static int qed_int_sp_dpc_alloc(struct qed_hwfn *p_hwfn)
-{
-	p_hwfn->sp_dpc = kmalloc(sizeof(*p_hwfn->sp_dpc), GFP_KERNEL);
-	if (!p_hwfn->sp_dpc)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void qed_int_sp_dpc_free(struct qed_hwfn *p_hwfn)
-{
-	kfree(p_hwfn->sp_dpc);
-	p_hwfn->sp_dpc = NULL;
-}
-
 int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	int rc = 0;
 
-	rc = qed_int_sp_dpc_alloc(p_hwfn);
-	if (rc)
-		return rc;
-
 	rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt);
 	if (rc)
 		return rc;
@@ -2326,7 +2306,6 @@ void qed_int_free(struct qed_hwfn *p_hwfn)
 {
 	qed_int_sp_sb_free(p_hwfn);
 	qed_int_sb_attn_free(p_hwfn);
-	qed_int_sp_dpc_free(p_hwfn);
 }
 
 void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 86809d7bc2de..c5550e96bbe1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -140,7 +140,7 @@ int qed_int_sb_release(struct qed_hwfn *p_hwfn,
  * @param p_hwfn - pointer to hwfn
  *
  */
-void qed_int_sp_dpc(unsigned long hwfn_cookie);
+void qed_int_sp_dpc(struct tasklet_struct *t);
 
 /**
  * @brief qed_int_get_num_sbs - get the number of status
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 0452b728c527..49783f365079 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1185,7 +1185,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
 		.elem_size	= sizeof(struct core_tx_bd),
 	};
 	struct qed_ll2_tx_packet *p_descq;
-	u32 desc_size;
+	size_t desc_size;
 	u32 capacity;
 	int rc = 0;
 
@@ -1198,10 +1198,9 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
 		goto out;
 
 	capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain);
-	/* First element is part of the packet, rest are flexibly added */
-	desc_size = (sizeof(*p_descq) +
-		     (p_ll2_info->input.tx_max_bds_per_packet - 1) *
-		     sizeof(p_descq->bds_set));
+	/* All bds_set elements are flexibily added. */
+	desc_size = struct_size(p_descq, bds_set,
+				p_ll2_info->input.tx_max_bds_per_packet);
 
 	p_descq = kcalloc(capacity, desc_size, GFP_KERNEL);
 	if (!p_descq) {
@@ -1524,7 +1523,7 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	struct qed_ptt *p_ptt;
 	int rc = -EINVAL;
 	u32 i, capacity;
-	u32 desc_size;
+	size_t desc_size;
 	u8 qid;
 
 	p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1558,10 +1557,9 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	INIT_LIST_HEAD(&p_tx->sending_descq);
 	spin_lock_init(&p_tx->lock);
 	capacity = qed_chain_get_capacity(&p_tx->txq_chain);
-	/* First element is part of the packet, rest are flexibly added */
-	desc_size = (sizeof(*p_pkt) +
-		     (p_ll2_conn->input.tx_max_bds_per_packet - 1) *
-		     sizeof(p_pkt->bds_set));
+	/* All bds_set elements are flexibily added. */
+	desc_size = struct_size(p_pkt, bds_set,
+				p_ll2_conn->input.tx_max_bds_per_packet);
 
 	for (i = 0; i < capacity; i++) {
 		p_pkt = p_tx->descq_mem + desc_size * i;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 500d0c4f8077..df88d00053a2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -56,7 +56,7 @@ struct qed_ll2_tx_packet {
 		struct core_tx_bd *txq_bd;
 		dma_addr_t tx_frag;
 		u16 frag_len;
-	} bds_set[1];
+	} bds_set[];
 };
 
 struct qed_ll2_rx_queue {
@@ -86,9 +86,6 @@ struct qed_ll2_tx_queue {
 	struct list_head active_descq;
 	struct list_head free_descq;
 	struct list_head sending_descq;
-	void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/
-	struct qed_ll2_tx_packet *cur_send_packet;
-	struct qed_ll2_tx_packet cur_completing_packet;
 	u16 cur_completing_bd_idx;
 	void __iomem *doorbell_addr;
 	struct core_db_data db_msg;
@@ -96,6 +93,9 @@ struct qed_ll2_tx_queue {
 	u16 cur_send_frag_num;
 	u16 cur_completing_frag_num;
 	bool b_completing_packet;
+	void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/
+	struct qed_ll2_tx_packet *cur_send_packet;
+	struct qed_ll2_tx_packet cur_completing_packet;
 };
 
 struct qed_ll2_info {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 50e5eb22e60a..5bd58c65e163 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -39,6 +39,7 @@
 #include "qed_hw.h"
 #include "qed_selftest.h"
 #include "qed_debug.h"
+#include "qed_devlink.h"
 
 #define QED_ROCE_QPS			(8192)
 #define QED_ROCE_DPIS			(8)
@@ -480,6 +481,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 	}
 
 	dev_info->mtu = hw_info->mtu;
+	cdev->common_dev_info = *dev_info;
 
 	return 0;
 }
@@ -512,107 +514,6 @@ static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
 	return 0;
 }
 
-struct qed_devlink {
-	struct qed_dev *cdev;
-};
-
-enum qed_devlink_param_id {
-	QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
-	QED_DEVLINK_PARAM_ID_IWARP_CMT,
-};
-
-static int qed_dl_param_get(struct devlink *dl, u32 id,
-			    struct devlink_param_gset_ctx *ctx)
-{
-	struct qed_devlink *qed_dl;
-	struct qed_dev *cdev;
-
-	qed_dl = devlink_priv(dl);
-	cdev = qed_dl->cdev;
-	ctx->val.vbool = cdev->iwarp_cmt;
-
-	return 0;
-}
-
-static int qed_dl_param_set(struct devlink *dl, u32 id,
-			    struct devlink_param_gset_ctx *ctx)
-{
-	struct qed_devlink *qed_dl;
-	struct qed_dev *cdev;
-
-	qed_dl = devlink_priv(dl);
-	cdev = qed_dl->cdev;
-	cdev->iwarp_cmt = ctx->val.vbool;
-
-	return 0;
-}
-
-static const struct devlink_param qed_devlink_params[] = {
-	DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
-			     "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
-			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
-			     qed_dl_param_get, qed_dl_param_set, NULL),
-};
-
-static const struct devlink_ops qed_dl_ops;
-
-static int qed_devlink_register(struct qed_dev *cdev)
-{
-	union devlink_param_value value;
-	struct qed_devlink *qed_dl;
-	struct devlink *dl;
-	int rc;
-
-	dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl));
-	if (!dl)
-		return -ENOMEM;
-
-	qed_dl = devlink_priv(dl);
-
-	cdev->dl = dl;
-	qed_dl->cdev = cdev;
-
-	rc = devlink_register(dl, &cdev->pdev->dev);
-	if (rc)
-		goto err_free;
-
-	rc = devlink_params_register(dl, qed_devlink_params,
-				     ARRAY_SIZE(qed_devlink_params));
-	if (rc)
-		goto err_unregister;
-
-	value.vbool = false;
-	devlink_param_driverinit_value_set(dl,
-					   QED_DEVLINK_PARAM_ID_IWARP_CMT,
-					   value);
-
-	devlink_params_publish(dl);
-	cdev->iwarp_cmt = false;
-
-	return 0;
-
-err_unregister:
-	devlink_unregister(dl);
-
-err_free:
-	cdev->dl = NULL;
-	devlink_free(dl);
-
-	return rc;
-}
-
-static void qed_devlink_unregister(struct qed_dev *cdev)
-{
-	if (!cdev->dl)
-		return;
-
-	devlink_params_unregister(cdev->dl, qed_devlink_params,
-				  ARRAY_SIZE(qed_devlink_params));
-
-	devlink_unregister(cdev->dl);
-	devlink_free(cdev->dl);
-}
-
 /* probing */
 static struct qed_dev *qed_probe(struct pci_dev *pdev,
 				 struct qed_probe_params *params)
@@ -641,12 +542,6 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 	}
 	DP_INFO(cdev, "PCI init completed successfully\n");
 
-	rc = qed_devlink_register(cdev);
-	if (rc) {
-		DP_INFO(cdev, "Failed to register devlink.\n");
-		goto err2;
-	}
-
 	rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
 	if (rc) {
 		DP_ERR(cdev, "hw prepare failed\n");
@@ -676,8 +571,6 @@ static void qed_remove(struct qed_dev *cdev)
 
 	qed_set_power_state(cdev, PCI_D3hot);
 
-	qed_devlink_unregister(cdev);
-
 	qed_free_cdev(cdev);
 }
 
@@ -843,7 +736,7 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
 
 		/* Slowpath interrupt */
 		if (unlikely(status & 0x1)) {
-			tasklet_schedule(hwfn->sp_dpc);
+			tasklet_schedule(&hwfn->sp_dpc);
 			status &= ~0x1;
 			rc = IRQ_HANDLED;
 		}
@@ -889,7 +782,7 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
 			 id, cdev->pdev->bus->number,
 			 PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
 		rc = request_irq(cdev->int_params.msix_table[id].vector,
-				 qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
+				 qed_msix_sp_int, 0, hwfn->name, &hwfn->sp_dpc);
 	} else {
 		unsigned long flags = 0;
 
@@ -921,8 +814,8 @@ static void qed_slowpath_tasklet_flush(struct qed_hwfn *p_hwfn)
 	 * enable function makes this sequence a flush-like operation.
 	 */
 	if (p_hwfn->b_sp_dpc_enabled) {
-		tasklet_disable(p_hwfn->sp_dpc);
-		tasklet_enable(p_hwfn->sp_dpc);
+		tasklet_disable(&p_hwfn->sp_dpc);
+		tasklet_enable(&p_hwfn->sp_dpc);
 	}
 }
 
@@ -951,7 +844,7 @@ static void qed_slowpath_irq_free(struct qed_dev *cdev)
 				break;
 			synchronize_irq(cdev->int_params.msix_table[i].vector);
 			free_irq(cdev->int_params.msix_table[i].vector,
-				 cdev->hwfns[i].sp_dpc);
+				 &cdev->hwfns[i].sp_dpc);
 		}
 	} else {
 		if (QED_LEADING_HWFN(cdev)->b_int_requested)
@@ -970,11 +863,11 @@ static int qed_nic_stop(struct qed_dev *cdev)
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
 		if (p_hwfn->b_sp_dpc_enabled) {
-			tasklet_disable(p_hwfn->sp_dpc);
+			tasklet_disable(&p_hwfn->sp_dpc);
 			p_hwfn->b_sp_dpc_enabled = false;
 			DP_VERBOSE(cdev, NETIF_MSG_IFDOWN,
 				   "Disabled sp tasklet [hwfn %d] at %p\n",
-				   i, p_hwfn->sp_dpc);
+				   i, &p_hwfn->sp_dpc);
 		}
 	}
 
@@ -2926,7 +2819,7 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
-static int qed_recovery_process(struct qed_dev *cdev)
+int qed_recovery_process(struct qed_dev *cdev)
 {
 	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_ptt *p_ptt;
@@ -3114,6 +3007,9 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.get_link = &qed_get_current_link,
 	.drain = &qed_drain,
 	.update_msglvl = &qed_init_dp,
+	.devlink_register = qed_devlink_register,
+	.devlink_unregister = qed_devlink_unregister,
+	.report_fatal_error = qed_report_fatal_error,
 	.dbg_all_data = &qed_dbg_all_data,
 	.dbg_all_data_size = &qed_dbg_all_data_size,
 	.chain_alloc = &qed_chain_alloc,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index a4bcde522cdf..d3136556a1e9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1151,7 +1151,6 @@ qed_rdma_destroy_cq(void *rdma_cxt,
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
 
 	p_ramrod_res =
-	    (struct rdma_destroy_cq_output_params *)
 	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 			       sizeof(struct rdma_destroy_cq_output_params),
 			       &ramrod_res_phys, GFP_KERNEL);
@@ -1463,14 +1462,14 @@ static int qed_rdma_modify_qp(void *rdma_cxt,
 
 	switch (qp->qp_type) {
 	case QED_RDMA_QP_TYPE_XRC_INI:
-		qp->has_req = 1;
+		qp->has_req = true;
 		break;
 	case QED_RDMA_QP_TYPE_XRC_TGT:
-		qp->has_resp = 1;
+		qp->has_resp = true;
 		break;
 	default:
-		qp->has_req = 1;
-		qp->has_resp = 1;
+		qp->has_req  = true;
+		qp->has_resp = true;
 	}
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 803c1fcca8ad..3efc5899f656 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -172,6 +172,7 @@ struct qede_dev {
 	struct qed_dev			*cdev;
 	struct net_device		*ndev;
 	struct pci_dev			*pdev;
+	struct devlink			*devlink;
 
 	u32				dp_module;
 	u8				dp_level;
@@ -263,6 +264,7 @@ struct qede_dev {
 
 	struct bpf_prog			*xdp_prog;
 
+	enum qed_hw_err_type		last_err_type;
 	unsigned long			err_flags;
 #define QEDE_ERR_IS_HANDLED		31
 #define QEDE_ERR_ATTN_CLR_EN		0
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 9e1f41ba766c..05e3a3b60269 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1170,10 +1170,23 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 			rc = -ENOMEM;
 			goto err2;
 		}
+
+		edev->devlink = qed_ops->common->devlink_register(cdev);
+		if (IS_ERR(edev->devlink)) {
+			DP_NOTICE(edev, "Cannot register devlink\n");
+			edev->devlink = NULL;
+			/* Go on, we can live without devlink */
+		}
 	} else {
 		struct net_device *ndev = pci_get_drvdata(pdev);
 
 		edev = netdev_priv(ndev);
+
+		if (edev->devlink) {
+			struct qed_devlink *qdl = devlink_priv(edev->devlink);
+
+			qdl->cdev = cdev;
+		}
 		edev->cdev = cdev;
 		memset(&edev->stats, 0, sizeof(edev->stats));
 		memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
@@ -1225,7 +1238,10 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 err4:
 	qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY));
 err3:
-	free_netdev(edev->ndev);
+	if (mode != QEDE_PROBE_RECOVERY)
+		free_netdev(edev->ndev);
+	else
+		edev->cdev = NULL;
 err2:
 	qed_ops->common->slowpath_stop(cdev);
 err1:
@@ -1296,6 +1312,11 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 	qed_ops->common->slowpath_stop(cdev);
 	if (system_state == SYSTEM_POWER_OFF)
 		return;
+
+	if (mode != QEDE_REMOVE_RECOVERY && edev->devlink) {
+		qed_ops->common->devlink_unregister(edev->devlink);
+		edev->devlink = NULL;
+	}
 	qed_ops->common->remove(cdev);
 	edev->cdev = NULL;
 
@@ -2454,7 +2475,8 @@ static int qede_close(struct net_device *ndev)
 
 	qede_unload(edev, QEDE_UNLOAD_NORMAL, false);
 
-	edev->ops->common->update_drv_state(edev->cdev, false);
+	if (edev->cdev)
+		edev->ops->common->update_drv_state(edev->cdev, false);
 
 	return 0;
 }
@@ -2576,19 +2598,12 @@ static void qede_atomic_hw_err_handler(struct qede_dev *edev)
 
 static void qede_generic_hw_err_handler(struct qede_dev *edev)
 {
-	struct qed_dev *cdev = edev->cdev;
-
 	DP_NOTICE(edev,
 		  "Generic sleepable HW error handling started - err_flags 0x%lx\n",
 		  edev->err_flags);
 
-	/* Trigger a recovery process.
-	 * This is placed in the sleep requiring section just to make
-	 * sure it is the last one, and that all the other operations
-	 * were completed.
-	 */
-	if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
-		edev->ops->common->recovery_process(cdev);
+	if (edev->devlink)
+		edev->ops->common->report_fatal_error(edev->devlink, edev->last_err_type);
 
 	clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
 
@@ -2642,6 +2657,7 @@ static void qede_schedule_hw_err_handler(void *dev,
 		return;
 	}
 
+	edev->last_err_type = err_type;
 	qede_set_hw_err_flags(edev, err_type);
 	qede_atomic_hw_err_handler(edev);
 	set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 31ad3a5cd128..d8882d0b6b49 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -657,11 +657,10 @@ int qlcnic_83xx_cam_lock(struct qlcnic_adapter *adapter)
 void qlcnic_83xx_cam_unlock(struct qlcnic_adapter *adapter)
 {
 	void __iomem *addr;
-	u32 val;
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
 	addr = ahw->pci_base0 + QLC_83XX_SEM_UNLOCK_FUNC(ahw->pci_func);
-	val = readl(addr);
+	readl(addr);
 }
 
 void qlcnic_83xx_read_crb(struct qlcnic_adapter *adapter, char *buf,
@@ -3812,7 +3811,6 @@ static int qlcnic_83xx_shutdown(struct pci_dev *pdev)
 {
 	struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
 	struct net_device *netdev = adapter->netdev;
-	int retval;
 
 	netif_device_detach(netdev);
 	qlcnic_cancel_idc_work(adapter);
@@ -3823,11 +3821,7 @@ static int qlcnic_83xx_shutdown(struct pci_dev *pdev)
 	qlcnic_83xx_disable_mbx_intr(adapter);
 	cancel_delayed_work_sync(&adapter->idc_aen_work);
 
-	retval = pci_save_state(pdev);
-	if (retval)
-		return retval;
-
-	return 0;
+	return pci_save_state(pdev);
 }
 
 static int qlcnic_83xx_resume(struct qlcnic_adapter *adapter)
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 1166b98d8bb2..8543bf3c3484 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -292,6 +292,7 @@ static void emac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 
 /**
  * emac_update_hw_stats - read the EMAC stat registers
+ * @adpt: pointer to adapter struct
  *
  * Reads the stats registers and write the values to adpt->stats.
  *
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 375a844cd27c..362b4f5c162c 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -167,7 +167,7 @@ static void qca_tty_wakeup(struct serdev_device *serdev)
 	schedule_work(&qca->tx_work);
 }
 
-static struct serdev_device_ops qca_serdev_ops = {
+static const struct serdev_device_ops qca_serdev_ops = {
 	.receive_buf = qca_tty_receive,
 	.write_wakeup = qca_tty_wakeup,
 };
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index e291e6ac40cb..4e44313b7651 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1239,7 +1239,7 @@ static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
-	int rc, i;
+	int i;
 
 	netdev_warn(dev, "Transmit timeout, status %2x %4x %4x %4x\n",
 		    cpr8(Cmd), cpr16(CpCmd),
@@ -1260,7 +1260,7 @@ static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue)
 
 	cp_stop_hw(cp);
 	cp_clean_rings(cp);
-	rc = cp_init_rings(cp);
+	cp_init_rings(cp);
 	cp_start_hw(cp);
 	__cp_set_rx_mode(dev);
 	cpw16_f(IntrMask, cp_norx_intr_mask);
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 227139d42227..1e5a453dea14 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -978,7 +978,7 @@ static int rtl8139_init_one(struct pci_dev *pdev,
 	    pdev->subsystem_vendor == PCI_VENDOR_ID_ATHEROS &&
 	    pdev->subsystem_device == PCI_DEVICE_ID_REALTEK_8139) {
 		pr_info("OQO Model 2 detected. Forcing PIO\n");
-		use_io = 1;
+		use_io = true;
 	}
 
 	dev = rtl8139_init_board (pdev);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 11e6962a18e4..7d366b0362cb 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -617,7 +617,6 @@ struct rtl8169_private {
 		struct work_struct work;
 	} wk;
 
-	unsigned irq_enabled:1;
 	unsigned supports_gmii:1;
 	unsigned aspm_manageable:1;
 	dma_addr_t counters_phys_addr;
@@ -701,6 +700,27 @@ static bool rtl_supports_eee(struct rtl8169_private *tp)
 	       tp->mac_version != RTL_GIGA_MAC_VER_39;
 }
 
+static void rtl_get_priv_stats(struct rtl8169_stats *stats,
+			       u64 *pkts, u64 *bytes)
+{
+	unsigned int start;
+
+	do {
+		start = u64_stats_fetch_begin_irq(&stats->syncp);
+		*pkts = stats->packets;
+		*bytes = stats->bytes;
+	} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+}
+
+static void rtl_inc_priv_stats(struct rtl8169_stats *stats,
+			       u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&stats->syncp);
+	stats->packets += pkts;
+	stats->bytes += bytes;
+	u64_stats_update_end(&stats->syncp);
+}
+
 static void rtl_read_mac_from_reg(struct rtl8169_private *tp, u8 *mac, int reg)
 {
 	int i;
@@ -1280,12 +1300,10 @@ static void rtl_irq_disable(struct rtl8169_private *tp)
 		RTL_W32(tp, IntrMask_8125, 0);
 	else
 		RTL_W16(tp, IntrMask, 0);
-	tp->irq_enabled = 0;
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp)
 {
-	tp->irq_enabled = 1;
 	if (rtl_is_8125(tp))
 		RTL_W32(tp, IntrMask_8125, tp->irq_mask);
 	else
@@ -4399,10 +4417,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
 	if (tp->dirty_tx != dirty_tx) {
 		netdev_completed_queue(dev, pkts_compl, bytes_compl);
 
-		u64_stats_update_begin(&tp->tx_stats.syncp);
-		tp->tx_stats.packets += pkts_compl;
-		tp->tx_stats.bytes += bytes_compl;
-		u64_stats_update_end(&tp->tx_stats.syncp);
+		rtl_inc_priv_stats(&tp->tx_stats, pkts_compl, bytes_compl);
 
 		tp->dirty_tx = dirty_tx;
 		/* Sync with rtl8169_start_xmit:
@@ -4524,11 +4539,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 
 		napi_gro_receive(&tp->napi, skb);
 
-		u64_stats_update_begin(&tp->rx_stats.syncp);
-		tp->rx_stats.packets++;
-		tp->rx_stats.bytes += pkt_size;
-		u64_stats_update_end(&tp->rx_stats.syncp);
-
+		rtl_inc_priv_stats(&tp->rx_stats, 1, pkt_size);
 release_descriptor:
 		rtl8169_mark_to_asic(desc);
 	}
@@ -4544,8 +4555,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 	struct rtl8169_private *tp = dev_instance;
 	u32 status = rtl_get_events(tp);
 
-	if (!tp->irq_enabled || (status & 0xffff) == 0xffff ||
-	    !(status & tp->irq_mask))
+	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
 		return IRQ_NONE;
 
 	if (unlikely(status & SYSErr)) {
@@ -4599,10 +4609,8 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 
 	rtl_tx(dev, tp, budget);
 
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
+	if (work_done < budget && napi_complete_done(napi, work_done))
 		rtl_irq_enable(tp);
-	}
 
 	return work_done;
 }
@@ -4778,23 +4786,13 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct rtl8169_private *tp = netdev_priv(dev);
 	struct pci_dev *pdev = tp->pci_dev;
 	struct rtl8169_counters *counters = tp->counters;
-	unsigned int start;
 
 	pm_runtime_get_noresume(&pdev->dev);
 
 	netdev_stats_to_stats64(stats, &dev->stats);
 
-	do {
-		start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
-		stats->rx_packets = tp->rx_stats.packets;
-		stats->rx_bytes	= tp->rx_stats.bytes;
-	} while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start));
-
-	do {
-		start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp);
-		stats->tx_packets = tp->tx_stats.packets;
-		stats->tx_bytes	= tp->tx_stats.bytes;
-	} while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start));
+	rtl_get_priv_stats(&tp->rx_stats, &stats->rx_packets, &stats->rx_bytes);
+	rtl_get_priv_stats(&tp->tx_stats, &stats->tx_packets, &stats->tx_bytes);
 
 	/*
 	 * Fetch additional counter values missing in stats collected by driver
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 9f88b5db4f89..7453b17a37a2 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1036,7 +1036,10 @@ struct ravb_private {
 	unsigned no_avb_link:1;
 	unsigned avb_link_active_low:1;
 	unsigned wol_enabled:1;
-	int num_tx_desc;	/* TX descriptors per packet */
+	unsigned rxcidm:1;		/* RX Clock Internal Delay Mode */
+	unsigned txcidm:1;		/* TX Clock Internal Delay Mode */
+	unsigned rgmii_override:1;	/* Deprecated rgmii-*id behavior */
+	int num_tx_desc;		/* TX descriptors per packet */
 };
 
 static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 99f7aae102ce..9c4df4ede011 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -162,7 +162,7 @@ static int ravb_get_mdio_data(struct mdiobb_ctrl *ctrl)
 }
 
 /* MDIO bus control struct */
-static struct mdiobb_ops bb_ops = {
+static const struct mdiobb_ops bb_ops = {
 	.owner = THIS_MODULE,
 	.set_mdc = ravb_set_mdc,
 	.set_mdio_dir = ravb_set_mdio_dir,
@@ -1034,11 +1034,8 @@ static int ravb_phy_init(struct net_device *ndev)
 		pn = of_node_get(np);
 	}
 
-	iface = priv->phy_interface;
-	if (priv->chip_id != RCAR_GEN2 && phy_interface_mode_is_rgmii(iface)) {
-		/* ravb_set_delay_mode() takes care of internal delay mode */
-		iface = PHY_INTERFACE_MODE_RGMII;
-	}
+	iface = priv->rgmii_override ? PHY_INTERFACE_MODE_RGMII
+				     : priv->phy_interface;
 	phydev = of_phy_connect(ndev, pn, ravb_adjust_link, 0, iface);
 	of_node_put(pn);
 	if (!phydev) {
@@ -1989,23 +1986,53 @@ static const struct soc_device_attribute ravb_delay_mode_quirk_match[] = {
 };
 
 /* Set tx and rx clock internal delay modes */
-static void ravb_set_delay_mode(struct net_device *ndev)
+static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
-	int set = 0;
+	bool explicit_delay = false;
+	u32 delay;
+
+	if (!of_property_read_u32(np, "rx-internal-delay-ps", &delay)) {
+		/* Valid values are 0 and 1800, according to DT bindings */
+		priv->rxcidm = !!delay;
+		explicit_delay = true;
+	}
+	if (!of_property_read_u32(np, "tx-internal-delay-ps", &delay)) {
+		/* Valid values are 0 and 2000, according to DT bindings */
+		priv->txcidm = !!delay;
+		explicit_delay = true;
+	}
 
+	if (explicit_delay)
+		return;
+
+	/* Fall back to legacy rgmii-*id behavior */
 	if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
-	    priv->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID)
-		set |= APSR_DM_RDM;
+	    priv->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+		priv->rxcidm = 1;
+		priv->rgmii_override = 1;
+	}
 
 	if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
 	    priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
 		if (!WARN(soc_device_match(ravb_delay_mode_quirk_match),
 			  "phy-mode %s requires TX clock internal delay mode which is not supported by this hardware revision. Please update device tree",
-			  phy_modes(priv->phy_interface)))
-			set |= APSR_DM_TDM;
+			  phy_modes(priv->phy_interface))) {
+			priv->txcidm = 1;
+			priv->rgmii_override = 1;
+		}
 	}
+}
+
+static void ravb_set_delay_mode(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+	u32 set = 0;
 
+	if (priv->rxcidm)
+		set |= APSR_DM_RDM;
+	if (priv->txcidm)
+		set |= APSR_DM_TDM;
 	ravb_modify(ndev, APSR, APSR_DM, set);
 }
 
@@ -2138,8 +2165,10 @@ static int ravb_probe(struct platform_device *pdev)
 	/* Request GTI loading */
 	ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
 
-	if (priv->chip_id != RCAR_GEN2)
+	if (priv->chip_id != RCAR_GEN2) {
+		ravb_parse_delay_mode(np, ndev);
 		ravb_set_delay_mode(ndev);
+	}
 
 	/* Allocate descriptor base address table */
 	priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f45331ed90b0..c63304632935 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -45,6 +45,15 @@
 #define SH_ETH_OFFSET_DEFAULTS			\
 	[0 ... SH_ETH_MAX_REGISTER_OFFSET - 1] = SH_ETH_OFFSET_INVALID
 
+/* use some intentionally tricky logic here to initialize the whole struct to
+ * 0xffff, but then override certain fields, requiring us to indicate that we
+ * "know" that there are overrides in this structure, and we'll need to disable
+ * that warning from W=1 builds. GCC has supported this option since 4.2.X, but
+ * the macros available to do this only define GCC 8.
+ */
+__diag_push();
+__diag_ignore(GCC, 8, "-Woverride-init",
+	      "logic to initialize all and then override some is OK");
 static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
 	SH_ETH_OFFSET_DEFAULTS,
 
@@ -332,6 +341,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
 
 	[TSU_ADRH0]	= 0x0100,
 };
+__diag_pop();
 
 static void sh_eth_rcv_snd_disable(struct net_device *ndev);
 static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev);
@@ -1202,7 +1212,7 @@ static void sh_mdc_ctrl(struct mdiobb_ctrl *ctrl, int bit)
 }
 
 /* mdio bus control struct */
-static struct mdiobb_ops bb_ops = {
+static const struct mdiobb_ops bb_ops = {
 	.owner = THIS_MODULE,
 	.set_mdc = sh_mdc_ctrl,
 	.set_mdio_dir = sh_mmd_ctrl,
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 9cc31f7e0df1..dd0bc7f0aaee 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -200,9 +200,9 @@ static int rocker_dma_test_offset(const struct rocker *rocker,
 	buf = alloc + offset;
 	expect = buf + ROCKER_TEST_DMA_BUF_SIZE;
 
-	dma_handle = pci_map_single(pdev, buf, ROCKER_TEST_DMA_BUF_SIZE,
-				    PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(pdev, dma_handle)) {
+	dma_handle = dma_map_single(&pdev->dev, buf, ROCKER_TEST_DMA_BUF_SIZE,
+				    DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&pdev->dev, dma_handle)) {
 		err = -EIO;
 		goto free_alloc;
 	}
@@ -234,8 +234,8 @@ static int rocker_dma_test_offset(const struct rocker *rocker,
 		goto unmap;
 
 unmap:
-	pci_unmap_single(pdev, dma_handle, ROCKER_TEST_DMA_BUF_SIZE,
-			 PCI_DMA_BIDIRECTIONAL);
+	dma_unmap_single(&pdev->dev, dma_handle, ROCKER_TEST_DMA_BUF_SIZE,
+			 DMA_BIDIRECTIONAL);
 free_alloc:
 	kfree(alloc);
 
@@ -441,9 +441,9 @@ static int rocker_dma_ring_create(const struct rocker *rocker,
 	if (!info->desc_info)
 		return -ENOMEM;
 
-	info->desc = pci_alloc_consistent(rocker->pdev,
-					  info->size * sizeof(*info->desc),
-					  &info->mapaddr);
+	info->desc = dma_alloc_coherent(&rocker->pdev->dev,
+					info->size * sizeof(*info->desc),
+					&info->mapaddr, GFP_KERNEL);
 	if (!info->desc) {
 		kfree(info->desc_info);
 		return -ENOMEM;
@@ -465,9 +465,9 @@ static void rocker_dma_ring_destroy(const struct rocker *rocker,
 {
 	rocker_write64(rocker, DMA_DESC_ADDR(info->type), 0);
 
-	pci_free_consistent(rocker->pdev,
-			    info->size * sizeof(struct rocker_desc),
-			    info->desc, info->mapaddr);
+	dma_free_coherent(&rocker->pdev->dev,
+			  info->size * sizeof(struct rocker_desc), info->desc,
+			  info->mapaddr);
 	kfree(info->desc_info);
 }
 
@@ -506,8 +506,9 @@ static int rocker_dma_ring_bufs_alloc(const struct rocker *rocker,
 			goto rollback;
 		}
 
-		dma_handle = pci_map_single(pdev, buf, buf_size, direction);
-		if (pci_dma_mapping_error(pdev, dma_handle)) {
+		dma_handle = dma_map_single(&pdev->dev, buf, buf_size,
+					    direction);
+		if (dma_mapping_error(&pdev->dev, dma_handle)) {
 			kfree(buf);
 			err = -EIO;
 			goto rollback;
@@ -526,7 +527,8 @@ rollback:
 	for (i--; i >= 0; i--) {
 		const struct rocker_desc_info *desc_info = &info->desc_info[i];
 
-		pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr),
+		dma_unmap_single(&pdev->dev,
+				 dma_unmap_addr(desc_info, mapaddr),
 				 desc_info->data_size, direction);
 		kfree(desc_info->data);
 	}
@@ -546,7 +548,8 @@ static void rocker_dma_ring_bufs_free(const struct rocker *rocker,
 
 		desc->buf_addr = 0;
 		desc->buf_size = 0;
-		pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr),
+		dma_unmap_single(&pdev->dev,
+				 dma_unmap_addr(desc_info, mapaddr),
 				 desc_info->data_size, direction);
 		kfree(desc_info->data);
 	}
@@ -615,7 +618,7 @@ static int rocker_dma_rings_init(struct rocker *rocker)
 	spin_lock_init(&rocker->cmd_ring_lock);
 
 	err = rocker_dma_ring_bufs_alloc(rocker, &rocker->cmd_ring,
-					 PCI_DMA_BIDIRECTIONAL, PAGE_SIZE);
+					 DMA_BIDIRECTIONAL, PAGE_SIZE);
 	if (err) {
 		dev_err(&pdev->dev, "failed to alloc command dma ring buffers\n");
 		goto err_dma_cmd_ring_bufs_alloc;
@@ -636,7 +639,7 @@ static int rocker_dma_rings_init(struct rocker *rocker)
 	}
 
 	err = rocker_dma_ring_bufs_alloc(rocker, &rocker->event_ring,
-					 PCI_DMA_FROMDEVICE, PAGE_SIZE);
+					 DMA_FROM_DEVICE, PAGE_SIZE);
 	if (err) {
 		dev_err(&pdev->dev, "failed to alloc event dma ring buffers\n");
 		goto err_dma_event_ring_bufs_alloc;
@@ -650,7 +653,7 @@ err_dma_event_ring_create:
 	rocker_dma_cmd_ring_waits_free(rocker);
 err_dma_cmd_ring_waits_alloc:
 	rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
-				  PCI_DMA_BIDIRECTIONAL);
+				  DMA_BIDIRECTIONAL);
 err_dma_cmd_ring_bufs_alloc:
 	rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
 	return err;
@@ -659,11 +662,11 @@ err_dma_cmd_ring_bufs_alloc:
 static void rocker_dma_rings_fini(struct rocker *rocker)
 {
 	rocker_dma_ring_bufs_free(rocker, &rocker->event_ring,
-				  PCI_DMA_BIDIRECTIONAL);
+				  DMA_BIDIRECTIONAL);
 	rocker_dma_ring_destroy(rocker, &rocker->event_ring);
 	rocker_dma_cmd_ring_waits_free(rocker);
 	rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
-				  PCI_DMA_BIDIRECTIONAL);
+				  DMA_BIDIRECTIONAL);
 	rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
 }
 
@@ -675,9 +678,9 @@ static int rocker_dma_rx_ring_skb_map(const struct rocker_port *rocker_port,
 	struct pci_dev *pdev = rocker->pdev;
 	dma_addr_t dma_handle;
 
-	dma_handle = pci_map_single(pdev, skb->data, buf_len,
-				    PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(pdev, dma_handle))
+	dma_handle = dma_map_single(&pdev->dev, skb->data, buf_len,
+				    DMA_FROM_DEVICE);
+	if (dma_mapping_error(&pdev->dev, dma_handle))
 		return -EIO;
 	if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_RX_FRAG_ADDR, dma_handle))
 		goto tlv_put_failure;
@@ -686,7 +689,7 @@ static int rocker_dma_rx_ring_skb_map(const struct rocker_port *rocker_port,
 	return 0;
 
 tlv_put_failure:
-	pci_unmap_single(pdev, dma_handle, buf_len, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&pdev->dev, dma_handle, buf_len, DMA_FROM_DEVICE);
 	desc_info->tlv_size = 0;
 	return -EMSGSIZE;
 }
@@ -734,7 +737,7 @@ static void rocker_dma_rx_ring_skb_unmap(const struct rocker *rocker,
 		return;
 	dma_handle = rocker_tlv_get_u64(attrs[ROCKER_TLV_RX_FRAG_ADDR]);
 	len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
-	pci_unmap_single(pdev, dma_handle, len, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&pdev->dev, dma_handle, len, DMA_FROM_DEVICE);
 }
 
 static void rocker_dma_rx_ring_skb_free(const struct rocker *rocker,
@@ -796,7 +799,7 @@ static int rocker_port_dma_rings_init(struct rocker_port *rocker_port)
 	}
 
 	err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->tx_ring,
-					 PCI_DMA_TODEVICE,
+					 DMA_TO_DEVICE,
 					 ROCKER_DMA_TX_DESC_SIZE);
 	if (err) {
 		netdev_err(rocker_port->dev, "failed to alloc tx dma ring buffers\n");
@@ -813,7 +816,7 @@ static int rocker_port_dma_rings_init(struct rocker_port *rocker_port)
 	}
 
 	err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->rx_ring,
-					 PCI_DMA_BIDIRECTIONAL,
+					 DMA_BIDIRECTIONAL,
 					 ROCKER_DMA_RX_DESC_SIZE);
 	if (err) {
 		netdev_err(rocker_port->dev, "failed to alloc rx dma ring buffers\n");
@@ -831,12 +834,12 @@ static int rocker_port_dma_rings_init(struct rocker_port *rocker_port)
 
 err_dma_rx_ring_skbs_alloc:
 	rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring,
-				  PCI_DMA_BIDIRECTIONAL);
+				  DMA_BIDIRECTIONAL);
 err_dma_rx_ring_bufs_alloc:
 	rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring);
 err_dma_rx_ring_create:
 	rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring,
-				  PCI_DMA_TODEVICE);
+				  DMA_TO_DEVICE);
 err_dma_tx_ring_bufs_alloc:
 	rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring);
 	return err;
@@ -848,10 +851,10 @@ static void rocker_port_dma_rings_fini(struct rocker_port *rocker_port)
 
 	rocker_dma_rx_ring_skbs_free(rocker_port);
 	rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring,
-				  PCI_DMA_BIDIRECTIONAL);
+				  DMA_BIDIRECTIONAL);
 	rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring);
 	rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring,
-				  PCI_DMA_TODEVICE);
+				  DMA_TO_DEVICE);
 	rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring);
 }
 
@@ -1858,7 +1861,7 @@ static void rocker_tx_desc_frags_unmap(const struct rocker_port *rocker_port,
 			continue;
 		dma_handle = rocker_tlv_get_u64(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 		len = rocker_tlv_get_u16(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
-		pci_unmap_single(pdev, dma_handle, len, DMA_TO_DEVICE);
+		dma_unmap_single(&pdev->dev, dma_handle, len, DMA_TO_DEVICE);
 	}
 }
 
@@ -1871,8 +1874,8 @@ static int rocker_tx_desc_frag_map_put(const struct rocker_port *rocker_port,
 	dma_addr_t dma_handle;
 	struct rocker_tlv *frag;
 
-	dma_handle = pci_map_single(pdev, buf, buf_len, DMA_TO_DEVICE);
-	if (unlikely(pci_dma_mapping_error(pdev, dma_handle))) {
+	dma_handle = dma_map_single(&pdev->dev, buf, buf_len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(&pdev->dev, dma_handle))) {
 		if (net_ratelimit())
 			netdev_err(rocker_port->dev, "failed to dma map tx frag\n");
 		return -EIO;
@@ -1892,7 +1895,7 @@ static int rocker_tx_desc_frag_map_put(const struct rocker_port *rocker_port,
 nest_cancel:
 	rocker_tlv_nest_cancel(desc_info, frag);
 unmap_frag:
-	pci_unmap_single(pdev, dma_handle, buf_len, DMA_TO_DEVICE);
+	dma_unmap_single(&pdev->dev, dma_handle, buf_len, DMA_TO_DEVICE);
 	return -EMSGSIZE;
 }
 
@@ -2905,17 +2908,17 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_pci_request_regions;
 	}
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
 	if (!err) {
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		if (err) {
-			dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n");
+			dev_err(&pdev->dev, "dma_set_coherent_mask failed\n");
 			goto err_pci_set_dma_mask;
 		}
 	} else {
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (err) {
-			dev_err(&pdev->dev, "pci_set_dma_mask failed\n");
+			dev_err(&pdev->dev, "dma_set_mask failed\n");
 			goto err_pci_set_dma_mask;
 		}
 	}
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 2cc8184b7e6b..971f1e54b652 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -97,7 +97,7 @@ void sxgbe_disable_eee_mode(struct sxgbe_priv_data * const priv)
 
 /**
  * sxgbe_eee_ctrl_timer
- * @arg : data hook
+ * @t: timer list containing a data
  * Description:
  *  If there is no data transfer and if we are not in LPI state,
  *  then MAC Transmitter can be moved to LPI state.
@@ -255,7 +255,7 @@ static void sxgbe_adjust_link(struct net_device *dev)
 
 /**
  * sxgbe_init_phy - PHY initialization
- * @dev: net device structure
+ * @ndev: net device structure
  * Description: it initializes the driver's PHY state, and attaches the PHY
  * to the mac driver.
  *  Return value:
@@ -364,8 +364,11 @@ static int sxgbe_init_rx_buffers(struct net_device *dev,
 /**
  * sxgbe_free_rx_buffers - free what sxgbe_init_rx_buffers() allocated
  * @dev: net device structure
+ * @p: dec pointer
+ * @i: index
+ * @dma_buf_sz: size
  * @rx_ring: ring to be freed
- * @rx_rsize: ring size
+ *
  * Description:  this function initializes the DMA RX descriptor
  */
 static void sxgbe_free_rx_buffers(struct net_device *dev,
@@ -383,6 +386,7 @@ static void sxgbe_free_rx_buffers(struct net_device *dev,
 /**
  * init_tx_ring - init the TX descriptor ring
  * @dev: net device structure
+ * @queue_no: queue
  * @tx_ring: ring to be initialised
  * @tx_rsize: ring size
  * Description:  this function initializes the DMA TX descriptor
@@ -449,6 +453,7 @@ static void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring,
 /**
  * init_rx_ring - init the RX descriptor ring
  * @dev: net device structure
+ * @queue_no: queue
  * @rx_ring: ring to be initialised
  * @rx_rsize: ring size
  * Description:  this function initializes the DMA RX descriptor
@@ -548,7 +553,7 @@ static void free_tx_ring(struct device *dev, struct sxgbe_tx_queue *tx_ring,
 
 /**
  * init_dma_desc_rings - init the RX/TX descriptor rings
- * @dev: net device structure
+ * @netd: net device structure
  * Description:  this function initializes the DMA RX/TX descriptors
  * and allocates the socket buffers. It suppors the chained and ring
  * modes.
@@ -724,7 +729,7 @@ static void sxgbe_mtl_operation_mode(struct sxgbe_priv_data *priv)
 
 /**
  * sxgbe_tx_queue_clean:
- * @priv: driver private structure
+ * @tqueue: queue pointer
  * Description: it reclaims resources after transmission completes.
  */
 static void sxgbe_tx_queue_clean(struct sxgbe_tx_queue *tqueue)
@@ -807,6 +812,7 @@ static void sxgbe_tx_all_clean(struct sxgbe_priv_data * const priv)
 /**
  * sxgbe_restart_tx_queue: irq tx error mng function
  * @priv: driver private structure
+ * @queue_num: queue number
  * Description: it cleans the descriptors and restarts the transmission
  * in case of errors.
  */
@@ -1567,6 +1573,7 @@ static int sxgbe_poll(struct napi_struct *napi, int budget)
 /**
  *  sxgbe_tx_timeout
  *  @dev : Pointer to net device structure
+ *  @txqueue: index of the hanging queue
  *  Description: this function is called when a packet transmission fails to
  *   complete within a reasonable time. The driver will mark the error in the
  *   netdev structure and arrange for the device to be reset to a sane state
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 4b0b2cf026a5..da6886dcac37 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	efx_ef10_read_licensed_features(efx);
 
 	/* We can have one VI for each vi_stride-byte region.
-	 * However, until we use TX option descriptors we need two TX queues
-	 * per channel.
+	 * However, until we use TX option descriptors we need up to four
+	 * TX queues per channel for different checksumming combinations.
 	 */
-	efx->tx_queues_per_channel = 2;
+	if (nic_data->datapath_caps &
+	    (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+		efx->tx_queues_per_channel = 4;
+	else
+		efx->tx_queues_per_channel = 2;
 	efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride;
 	if (!efx->max_vis) {
 		netif_err(efx, drv, efx->net_dev, "error determining max VIs\n");
@@ -1300,6 +1304,7 @@ static void efx_ef10_fini_nic(struct efx_nic *efx)
 static int efx_ef10_init_nic(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	netdev_features_t hw_enc_features = 0;
 	int rc;
 
 	if (nic_data->must_check_datapath_caps) {
@@ -1344,6 +1349,21 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 		nic_data->must_restore_piobufs = false;
 	}
 
+	/* add encapsulated checksum offload features */
+	if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx))
+		hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	/* add encapsulated TSO features */
+	if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
+		netdev_features_t encap_tso_features;
+
+		encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
+			NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;
+
+		hw_enc_features |= encap_tso_features | NETIF_F_TSO;
+		efx->net_dev->features |= encap_tso_features;
+	}
+	efx->net_dev->hw_enc_features = hw_enc_features;
+
 	/* don't fail init if RSS setup doesn't work */
 	rc = efx->type->rx_push_rss_config(efx, false,
 					   efx->rss_context.rx_indir_table, NULL);
@@ -1851,18 +1871,9 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx)
 
 	spin_unlock_bh(&efx->stats_lock);
 
-	if (in_interrupt()) {
-		/* If in atomic context, cannot update stats.  Just update the
-		 * software stats and return so the caller can continue.
-		 */
-		spin_lock_bh(&efx->stats_lock);
-		efx_update_sw_stats(efx, stats);
-		return 0;
-	}
-
 	efx_ef10_get_stat_mask(efx, mask);
 
-	rc = efx_nic_alloc_buffer(efx, &stats_buf, dma_len, GFP_ATOMIC);
+	rc = efx_nic_alloc_buffer(efx, &stats_buf, dma_len, GFP_KERNEL);
 	if (rc) {
 		spin_lock_bh(&efx->stats_lock);
 		return rc;
@@ -1918,6 +1929,18 @@ static size_t efx_ef10_update_stats_vf(struct efx_nic *efx, u64 *full_stats,
 	return efx_ef10_update_stats_common(efx, full_stats, core_stats);
 }
 
+static size_t efx_ef10_update_stats_atomic_vf(struct efx_nic *efx, u64 *full_stats,
+					      struct rtnl_link_stats64 *core_stats)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+	/* In atomic context, cannot update HW stats.  Just update the
+	 * software stats and return so the caller can continue.
+	 */
+	efx_update_sw_stats(efx, nic_data->stats);
+	return efx_ef10_update_stats_common(efx, full_stats, core_stats);
+}
+
 static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
 {
 	struct efx_nic *efx = channel->efx;
@@ -2146,6 +2169,9 @@ static int efx_ef10_irq_test_generate(struct efx_nic *efx)
 
 static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue)
 {
+	/* low two bits of label are what we want for type */
+	BUILD_BUG_ON((EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM) != 3);
+	tx_queue->type = tx_queue->label & 3;
 	return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
 				    (tx_queue->ptr_mask + 1) *
 				    sizeof(efx_qword_t),
@@ -2168,15 +2194,15 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
 
 /* Add Firmware-Assisted TSO v2 option descriptors to a queue.
  */
-static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
-				struct sk_buff *skb,
-				bool *data_mapped)
+int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+			 bool *data_mapped)
 {
 	struct efx_tx_buffer *buffer;
+	u16 inner_ipv4_id = 0;
+	u16 outer_ipv4_id = 0;
 	struct tcphdr *tcp;
 	struct iphdr *ip;
-
-	u16 ipv4_id;
+	u16 ip_tot_len;
 	u32 seqnum;
 	u32 mss;
 
@@ -2189,21 +2215,43 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
 		return -EINVAL;
 	}
 
-	ip = ip_hdr(skb);
+	if (skb->encapsulation) {
+		if (!tx_queue->tso_encap)
+			return -EINVAL;
+		ip = ip_hdr(skb);
+		if (ip->version == 4)
+			outer_ipv4_id = ntohs(ip->id);
+
+		ip = inner_ip_hdr(skb);
+		tcp = inner_tcp_hdr(skb);
+	} else {
+		ip = ip_hdr(skb);
+		tcp = tcp_hdr(skb);
+	}
+
+	/* 8000-series EF10 hardware requires that IP Total Length be
+	 * greater than or equal to the value it will have in each segment
+	 * (which is at most mss + 208 + TCP header length), but also less
+	 * than (0x10000 - inner_network_header).  Otherwise the TCP
+	 * checksum calculation will be broken for encapsulated packets.
+	 * We fill in ip->tot_len with 0xff30, which should satisfy the
+	 * first requirement unless the MSS is ridiculously large (which
+	 * should be impossible as the driver max MTU is 9216); it is
+	 * guaranteed to satisfy the second as we only attempt TSO if
+	 * inner_network_header <= 208.
+	 */
+	ip_tot_len = -EFX_TSO2_MAX_HDRLEN;
+	EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN +
+				  (tcp->doff << 2u) > ip_tot_len);
+
 	if (ip->version == 4) {
-		/* Modify IPv4 header if needed. */
-		ip->tot_len = 0;
+		ip->tot_len = htons(ip_tot_len);
 		ip->check = 0;
-		ipv4_id = ntohs(ip->id);
+		inner_ipv4_id = ntohs(ip->id);
 	} else {
-		/* Modify IPv6 header if needed. */
-		struct ipv6hdr *ipv6 = ipv6_hdr(skb);
-
-		ipv6->payload_len = 0;
-		ipv4_id = 0;
+		((struct ipv6hdr *)ip)->payload_len = htons(ip_tot_len);
 	}
 
-	tcp = tcp_hdr(skb);
 	seqnum = ntohl(tcp->seq);
 
 	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
@@ -2216,7 +2264,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
 			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
 			ESF_DZ_TX_TSO_OPTION_TYPE,
 			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
-			ESF_DZ_TX_TSO_IP_ID, ipv4_id,
+			ESF_DZ_TX_TSO_IP_ID, inner_ipv4_id,
 			ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
 			);
 	++tx_queue->insert_count;
@@ -2226,11 +2274,12 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
 	buffer->flags = EFX_TX_BUF_OPTION;
 	buffer->len = 0;
 	buffer->unmap_len = 0;
-	EFX_POPULATE_QWORD_4(buffer->option,
+	EFX_POPULATE_QWORD_5(buffer->option,
 			ESF_DZ_TX_DESC_IS_OPT, 1,
 			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
 			ESF_DZ_TX_TSO_OPTION_TYPE,
 			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
+			ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id,
 			ESF_DZ_TX_TSO_TCP_MSS, mss
 			);
 	++tx_queue->insert_count;
@@ -2254,11 +2303,11 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)
 
 static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 {
-	bool csum_offload = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD;
+	bool csum_offload = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
+	bool inner_csum = tx_queue->type & EFX_TXQ_TYPE_INNER_CSUM;
 	struct efx_channel *channel = tx_queue->channel;
 	struct efx_nic *efx = tx_queue->efx;
 	struct efx_ef10_nic_data *nic_data;
-	bool tso_v2 = false;
 	efx_qword_t *txd;
 	int rc;
 
@@ -2281,15 +2330,18 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 	 * TSOv2 cannot be used with Hardware timestamping, and is never needed
 	 * for XDP tx.
 	 */
-	if (csum_offload && (nic_data->datapath_caps2 &
-			(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
-	    !tx_queue->timestamping && !tx_queue->xdp_tx) {
-		tso_v2 = true;
-		netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
-				channel->channel);
+	if (efx_has_cap(efx, TX_TSO_V2)) {
+		if ((csum_offload || inner_csum) &&
+		    !tx_queue->timestamping && !tx_queue->xdp_tx) {
+			tx_queue->tso_version = 2;
+			netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
+				  channel->channel);
+		}
+	} else if (efx_has_cap(efx, TX_TSO)) {
+		tx_queue->tso_version = 1;
 	}
 
-	rc = efx_mcdi_tx_init(tx_queue, tso_v2);
+	rc = efx_mcdi_tx_init(tx_queue);
 	if (rc)
 		goto fail;
 
@@ -2302,22 +2354,19 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 	tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
 	tx_queue->insert_count = 1;
 	txd = efx_tx_desc(tx_queue, 0);
-	EFX_POPULATE_QWORD_5(*txd,
+	EFX_POPULATE_QWORD_7(*txd,
 			     ESF_DZ_TX_DESC_IS_OPT, true,
 			     ESF_DZ_TX_OPTION_TYPE,
 			     ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
 			     ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
-			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload,
+			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload && tx_queue->tso_version != 2,
+			     ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM, inner_csum,
+			     ESF_DZ_TX_OPTION_INNER_IP_CSUM, inner_csum && tx_queue->tso_version != 2,
 			     ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
 	tx_queue->write_count = 1;
 
-	if (tso_v2) {
-		tx_queue->handle_tso = efx_ef10_tx_tso_desc;
-		tx_queue->tso_version = 2;
-	} else if (nic_data->datapath_caps &
-			(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
-		tx_queue->tso_version = 1;
-	}
+	if (tx_queue->tso_version == 2 && efx_has_cap(efx, TX_TSO_V2_ENCAP))
+		tx_queue->tso_encap = true;
 
 	wmb();
 	efx_ef10_push_tx_desc(tx_queue, txd);
@@ -2367,7 +2416,7 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 	unsigned int write_ptr;
 	efx_qword_t *txd;
 
-	tx_queue->xmit_more_available = false;
+	tx_queue->xmit_pending = false;
 	if (unlikely(tx_queue->write_count == tx_queue->insert_count))
 		return;
 
@@ -2880,7 +2929,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
 	/* Get the transmit queue */
 	tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
 	tx_queue = efx_channel_get_tx_queue(channel,
-					    tx_ev_q_label % EFX_TXQ_TYPES);
+					    tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
 
 	if (!tx_queue->timestamping) {
 		/* Transmit completion */
@@ -3952,10 +4001,10 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.finish_flr = efx_port_dummy_op_void,
 	.describe_stats = efx_ef10_describe_stats,
 	.update_stats = efx_ef10_update_stats_vf,
+	.update_stats_atomic = efx_ef10_update_stats_atomic_vf,
 	.start_stats = efx_port_dummy_op_void,
 	.pull_stats = efx_port_dummy_op_void,
 	.stop_stats = efx_port_dummy_op_void,
-	.set_id_led = efx_mcdi_set_id_led,
 	.push_irq_moderation = efx_ef10_push_irq_moderation,
 	.reconfigure_mac = efx_ef10_mac_reconfigure,
 	.check_mac_fault = efx_mcdi_mac_check_fault,
@@ -4066,7 +4115,6 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.start_stats = efx_mcdi_mac_start_stats,
 	.pull_stats = efx_mcdi_mac_pull_stats,
 	.stop_stats = efx_mcdi_mac_stop_stats,
-	.set_id_led = efx_mcdi_set_id_led,
 	.push_irq_moderation = efx_ef10_push_irq_moderation,
 	.reconfigure_mac = efx_ef10_mac_reconfigure,
 	.check_mac_fault = efx_mcdi_mac_check_fault,
diff --git a/drivers/net/ethernet/sfc/ef100_ethtool.c b/drivers/net/ethernet/sfc/ef100_ethtool.c
index 729c425d0f78..835c838b7dfa 100644
--- a/drivers/net/ethernet/sfc/ef100_ethtool.c
+++ b/drivers/net/ethernet/sfc/ef100_ethtool.c
@@ -17,8 +17,49 @@
 #include "ef100_ethtool.h"
 #include "mcdi_functions.h"
 
+/* This is the maximum number of descriptor rings supported by the QDMA */
+#define EFX_EF100_MAX_DMAQ_SIZE 16384UL
+
+static void ef100_ethtool_get_ringparam(struct net_device *net_dev,
+					struct ethtool_ringparam *ring)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	ring->rx_max_pending = EFX_EF100_MAX_DMAQ_SIZE;
+	ring->tx_max_pending = EFX_EF100_MAX_DMAQ_SIZE;
+	ring->rx_pending = efx->rxq_entries;
+	ring->tx_pending = efx->txq_entries;
+}
+
 /*	Ethtool options available
  */
 const struct ethtool_ops ef100_ethtool_ops = {
 	.get_drvinfo		= efx_ethtool_get_drvinfo,
+	.get_msglevel		= efx_ethtool_get_msglevel,
+	.set_msglevel		= efx_ethtool_set_msglevel,
+	.get_pauseparam         = efx_ethtool_get_pauseparam,
+	.set_pauseparam         = efx_ethtool_set_pauseparam,
+	.get_sset_count		= efx_ethtool_get_sset_count,
+	.self_test		= efx_ethtool_self_test,
+	.get_strings		= efx_ethtool_get_strings,
+	.get_link_ksettings	= efx_ethtool_get_link_ksettings,
+	.set_link_ksettings	= efx_ethtool_set_link_ksettings,
+	.get_link		= ethtool_op_get_link,
+	.get_ringparam		= ef100_ethtool_get_ringparam,
+	.get_fecparam		= efx_ethtool_get_fecparam,
+	.set_fecparam		= efx_ethtool_set_fecparam,
+	.get_ethtool_stats	= efx_ethtool_get_stats,
+	.get_rxnfc              = efx_ethtool_get_rxnfc,
+	.set_rxnfc              = efx_ethtool_set_rxnfc,
+	.reset                  = efx_ethtool_reset,
+
+	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
+	.get_rxfh_key_size	= efx_ethtool_get_rxfh_key_size,
+	.get_rxfh		= efx_ethtool_get_rxfh,
+	.set_rxfh		= efx_ethtool_set_rxfh,
+	.get_rxfh_context	= efx_ethtool_get_rxfh_context,
+	.set_rxfh_context	= efx_ethtool_set_rxfh_context,
+
+	.get_module_info	= efx_ethtool_get_module_info,
+	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
 };
diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c
index 63c311ba28b9..67fe44db6b61 100644
--- a/drivers/net/ethernet/sfc/ef100_netdev.c
+++ b/drivers/net/ethernet/sfc/ef100_netdev.c
@@ -217,9 +217,13 @@ static const struct net_device_ops ef100_netdev_ops = {
 	.ndo_open               = ef100_net_open,
 	.ndo_stop               = ef100_net_stop,
 	.ndo_start_xmit         = ef100_hard_start_xmit,
+	.ndo_tx_timeout         = efx_watchdog,
 	.ndo_get_stats64        = efx_net_stats,
+	.ndo_change_mtu         = efx_change_mtu,
 	.ndo_validate_addr      = eth_validate_addr,
+	.ndo_set_mac_address    = efx_set_mac_address,
 	.ndo_set_rx_mode        = efx_set_rx_mode, /* Lookout */
+	.ndo_set_features       = efx_set_features,
 	.ndo_get_phys_port_id   = efx_get_phys_port_id,
 	.ndo_get_phys_port_name = efx_get_phys_port_name,
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
index 19fe86b3b316..3148fe770356 100644
--- a/drivers/net/ethernet/sfc/ef100_nic.c
+++ b/drivers/net/ethernet/sfc/ef100_nic.c
@@ -428,24 +428,12 @@ static int ef100_reset(struct efx_nic *efx, enum reset_type reset_type)
 		__clear_bit(reset_type, &efx->reset_pending);
 		rc = dev_open(efx->net_dev, NULL);
 	} else if (reset_type == RESET_TYPE_ALL) {
-		/* A RESET_TYPE_ALL will cause filters to be removed, so we remove filters
-		 * and reprobe after reset to avoid removing filters twice
-		 */
-		down_write(&efx->filter_sem);
-		ef100_filter_table_down(efx);
-		up_write(&efx->filter_sem);
 		rc = efx_mcdi_reset(efx, reset_type);
 		if (rc)
 			return rc;
 
 		netif_device_attach(efx->net_dev);
 
-		down_write(&efx->filter_sem);
-		rc = ef100_filter_table_up(efx);
-		up_write(&efx->filter_sem);
-		if (rc)
-			return rc;
-
 		rc = dev_open(efx->net_dev, NULL);
 	} else {
 		rc = 1;	/* Leave the device closed */
@@ -696,7 +684,7 @@ static unsigned int ef100_check_caps(const struct efx_nic *efx,
 /*	NIC level access functions
  */
 #define EF100_OFFLOAD_FEATURES	(NETIF_F_HW_CSUM | NETIF_F_RXCSUM |	\
-	NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_FRAGLIST |		\
+	NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_NTUPLE | \
 	NETIF_F_RXHASH | NETIF_F_RXFCS | NETIF_F_TSO_ECN | NETIF_F_RXALL | \
 	NETIF_F_TSO_MANGLEID | NETIF_F_HW_VLAN_CTAG_TX)
 
@@ -769,6 +757,7 @@ const struct efx_nic_type ef100_pf_nic_type = {
 	.rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
 
 	.reconfigure_mac = ef100_reconfigure_mac,
+	.reconfigure_port = efx_mcdi_port_reconfigure,
 	.test_nvram = efx_new_mcdi_nvram_test_all,
 	.describe_stats = ef100_describe_stats,
 	.start_stats = efx_mcdi_mac_start_stats,
@@ -1172,6 +1161,10 @@ static int ef100_probe_main(struct efx_nic *efx)
 	rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
 	if (rc)
 		goto fail;
+	/* Enable event logging */
+	rc = efx_mcdi_log_ctrl(efx, true, false, 0);
+	if (rc)
+		goto fail;
 
 	rc = efx_get_pf_index(efx, &nic_data->pf_index);
 	if (rc)
@@ -1207,10 +1200,6 @@ static int ef100_probe_main(struct efx_nic *efx)
 	if (rc)
 		goto fail;
 
-	rc = efx_init_channels(efx);
-	if (rc)
-		goto fail;
-
 	down_write(&efx->filter_sem);
 	rc = ef100_filter_table_probe(efx);
 	up_write(&efx->filter_sem);
diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c
index a09546e43408..a90e5a9d2a37 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.c
+++ b/drivers/net/ethernet/sfc/ef100_tx.c
@@ -27,7 +27,6 @@ int ef100_tx_probe(struct efx_tx_queue *tx_queue)
 				    (tx_queue->ptr_mask + 2) *
 				    sizeof(efx_oword_t),
 				    GFP_KERNEL);
-	return 0;
 }
 
 void ef100_tx_init(struct efx_tx_queue *tx_queue)
@@ -38,7 +37,14 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue)
 				    tx_queue->channel->channel -
 				    tx_queue->efx->tx_channel_offset);
 
-	if (efx_mcdi_tx_init(tx_queue, false))
+	/* This value is purely documentational; as EF100 never passes through
+	 * the switch statement in tx.c:__efx_enqueue_skb(), that switch does
+	 * not handle case 3.  EF100's TSOv3 descriptors are generated by
+	 * ef100_make_tso_desc().
+	 * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
+	 */
+	tx_queue->tso_version = 3;
+	if (efx_mcdi_tx_init(tx_queue))
 		netdev_WARN(tx_queue->efx->net_dev,
 			    "failed to initialise TXQ %d\n", tx_queue->queue);
 }
@@ -117,11 +123,13 @@ static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int in
 		return NULL;
 }
 
-void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
+static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
 {
 	unsigned int write_ptr;
 	efx_dword_t reg;
 
+	tx_queue->xmit_pending = false;
+
 	if (unlikely(tx_queue->notify_count == tx_queue->write_count))
 		return;
 
@@ -131,7 +139,6 @@ void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
 	efx_writed_page(tx_queue->efx, &reg,
 			ER_GZ_TX_RING_DOORBELL, tx_queue->queue);
 	tx_queue->notify_count = tx_queue->write_count;
-	tx_queue->xmit_more_available = false;
 }
 
 static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue)
@@ -359,28 +366,31 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 		goto err;
 	ef100_tx_make_descriptors(tx_queue, skb, segments);
 
-	fill_level = efx_channel_tx_fill_level(tx_queue->channel);
+	fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
 	if (fill_level > efx->txq_stop_thresh) {
+		struct efx_tx_queue *txq2;
+
 		netif_tx_stop_queue(tx_queue->core_txq);
 		/* Re-read after a memory barrier in case we've raced with
 		 * the completion path. Otherwise there's a danger we'll never
 		 * restart the queue if all completions have just happened.
 		 */
 		smp_mb();
-		fill_level = efx_channel_tx_fill_level(tx_queue->channel);
+		efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
+			txq2->old_read_count = READ_ONCE(txq2->read_count);
+		fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
 		if (fill_level < efx->txq_stop_thresh)
 			netif_tx_start_queue(tx_queue->core_txq);
 	}
 
-	if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more))
-		tx_queue->xmit_more_available = false; /* push doorbell */
-	else if (tx_queue->write_count - tx_queue->notify_count > 255)
-		/* Ensure we never push more than 256 packets at once */
-		tx_queue->xmit_more_available = false; /* push */
-	else
-		tx_queue->xmit_more_available = true; /* don't push yet */
+	tx_queue->xmit_pending = true;
 
-	if (!tx_queue->xmit_more_available)
+	/* If xmit_more then we don't need to push the doorbell, unless there
+	 * are 256 descriptors already queued in which case we have to push to
+	 * ensure we never push more than 256 at once.
+	 */
+	if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
+	    tx_queue->write_count - tx_queue->notify_count > 255)
 		ef100_tx_push_buffers(tx_queue);
 
 	if (segments) {
@@ -399,10 +409,10 @@ err:
 
 	/* If we're not expecting another transmit and we had something to push
 	 * on this queue then we need to push here to get the previous packets
-	 * out.  We only enter this branch from before the 'Update BQL' section
-	 * above, so xmit_more_available still refers to the old state.
+	 * out.  We only enter this branch from before the xmit_more handling
+	 * above, so xmit_pending still refers to the old state.
 	 */
-	if (tx_queue->xmit_more_available && !xmit_more)
+	if (tx_queue->xmit_pending && !xmit_more)
 		ef100_tx_push_buffers(tx_queue);
 	return rc;
 }
diff --git a/drivers/net/ethernet/sfc/ef100_tx.h b/drivers/net/ethernet/sfc/ef100_tx.h
index fa23e430bdd7..ddc4b98fa6db 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.h
+++ b/drivers/net/ethernet/sfc/ef100_tx.h
@@ -17,7 +17,6 @@
 int ef100_tx_probe(struct efx_tx_queue *tx_queue);
 void ef100_tx_init(struct efx_tx_queue *tx_queue);
 void ef100_tx_write(struct efx_tx_queue *tx_queue);
-void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue);
 unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx);
 
 void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index e06fa89f2d72..718308076341 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -33,7 +33,7 @@
 #include "selftest.h"
 #include "sriov.h"
 
-#include "mcdi.h"
+#include "mcdi_port_common.h"
 #include "mcdi_pcol.h"
 #include "workarounds.h"
 
@@ -149,23 +149,17 @@ static int efx_init_port(struct efx_nic *efx)
 
 	mutex_lock(&efx->mac_lock);
 
-	rc = efx->phy_op->init(efx);
-	if (rc)
-		goto fail1;
-
 	efx->port_initialized = true;
 
 	/* Ensure the PHY advertises the correct flow control settings */
-	rc = efx->phy_op->reconfigure(efx);
+	rc = efx_mcdi_port_reconfigure(efx);
 	if (rc && rc != -EPERM)
-		goto fail2;
+		goto fail;
 
 	mutex_unlock(&efx->mac_lock);
 	return 0;
 
-fail2:
-	efx->phy_op->fini(efx);
-fail1:
+fail:
 	mutex_unlock(&efx->mac_lock);
 	return rc;
 }
@@ -177,7 +171,6 @@ static void efx_fini_port(struct efx_nic *efx)
 	if (!efx->port_initialized)
 		return;
 
-	efx->phy_op->fini(efx);
 	efx->port_initialized = false;
 
 	efx->link_state.up = false;
@@ -603,6 +596,7 @@ static const struct net_device_ops efx_netdev_ops = {
 	.ndo_set_mac_address	= efx_set_mac_address,
 	.ndo_set_rx_mode	= efx_set_rx_mode,
 	.ndo_set_features	= efx_set_features,
+	.ndo_features_check	= efx_features_check,
 	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
 #ifdef CONFIG_SFC_SRIOV
@@ -1229,7 +1223,7 @@ static int efx_pm_thaw(struct device *dev)
 			goto fail;
 
 		mutex_lock(&efx->mac_lock);
-		efx->phy_op->reconfigure(efx);
+		efx_mcdi_port_reconfigure(efx);
 		mutex_unlock(&efx->mac_lock);
 
 		efx_start_all(efx);
@@ -1336,7 +1330,7 @@ static int __init efx_init_module(void)
 {
 	int rc;
 
-	printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
+	printk(KERN_INFO "Solarflare NET driver\n");
 
 	rc = register_netdevice_notifier(&efx_netdev_notifier);
 	if (rc)
@@ -1398,4 +1392,3 @@ MODULE_AUTHOR("Solarflare Communications and "
 MODULE_DESCRIPTION("Solarflare network driver");
 MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, efx_pci_table);
-MODULE_VERSION(EFX_DRIVER_VERSION);
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index dd4f30ea48a8..a4a626e9cd9a 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -151,7 +151,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
 	 */
 
 	n_xdp_tx = num_possible_cpus();
-	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
+	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);
 
 	vec_count = pci_msix_vec_count(efx->pci_dev);
 	if (vec_count < 0)
@@ -179,7 +179,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
 		efx->xdp_tx_queue_count = 0;
 	} else {
 		efx->n_xdp_channels = n_xdp_ev;
-		efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
+		efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
 		efx->xdp_tx_queue_count = n_xdp_tx;
 		n_channels += n_xdp_ev;
 		netif_dbg(efx, drv, efx->net_dev,
@@ -505,8 +505,7 @@ static void efx_filter_rfs_expire(struct work_struct *data)
 #endif
 
 /* Allocate and initialise a channel structure. */
-struct efx_channel *
-efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
+static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
 {
 	struct efx_rx_queue *rx_queue;
 	struct efx_tx_queue *tx_queue;
@@ -521,7 +520,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 	channel->channel = i;
 	channel->type = &efx_default_channel_type;
 
-	for (j = 0; j < EFX_TXQ_TYPES; j++) {
+	for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 		tx_queue = &channel->tx_queue[j];
 		tx_queue->efx = efx;
 		tx_queue->queue = -1;
@@ -545,7 +544,7 @@ int efx_init_channels(struct efx_nic *efx)
 	unsigned int i;
 
 	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
-		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
+		efx->channel[i] = efx_alloc_channel(efx, i);
 		if (!efx->channel[i])
 			return -ENOMEM;
 		efx->msi_context[i].efx = efx;
@@ -595,7 +594,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
 	channel->napi_str.state = 0;
 	memset(&channel->eventq, 0, sizeof(channel->eventq));
 
-	for (j = 0; j < EFX_TXQ_TYPES; j++) {
+	for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 		tx_queue = &channel->tx_queue[j];
 		if (tx_queue->channel)
 			tx_queue->channel = channel;
@@ -895,7 +894,7 @@ int efx_set_channels(struct efx_nic *efx)
 						  xdp_queue_number, tx_queue->queue);
 					/* We may have a few left-over XDP TX
 					 * queues owing to xdp_tx_queue_count
-					 * not dividing evenly by EFX_TXQ_TYPES.
+					 * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
 					 * We still allocate and probe those
 					 * TXQs, but never use them.
 					 */
diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h
index 2d71dc9a33dd..d77ec1f77fb1 100644
--- a/drivers/net/ethernet/sfc/efx_channels.h
+++ b/drivers/net/ethernet/sfc/efx_channels.h
@@ -31,8 +31,6 @@ void efx_stop_eventq(struct efx_channel *channel);
 void efx_fini_eventq(struct efx_channel *channel);
 void efx_remove_eventq(struct efx_channel *channel);
 
-struct efx_channel *
-efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel);
 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
 void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len);
 void efx_set_channel_names(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index dfc6032e75f4..72a3f0e09f52 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -11,6 +11,7 @@
 #include "net_driver.h"
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <net/gre.h>
 #include "efx_common.h"
 #include "efx_channels.h"
 #include "efx.h"
@@ -19,6 +20,7 @@
 #include "rx_common.h"
 #include "tx_common.h"
 #include "nic.h"
+#include "mcdi_port_common.h"
 #include "io.h"
 #include "mcdi_pcol.h"
 
@@ -544,7 +546,7 @@ void efx_start_all(struct efx_nic *efx)
 	 * to poll now because we could have missed a change
 	 */
 	mutex_lock(&efx->mac_lock);
-	if (efx->phy_op->poll(efx))
+	if (efx_mcdi_phy_poll(efx))
 		efx_link_status_changed(efx);
 	mutex_unlock(&efx->mac_lock);
 
@@ -600,7 +602,7 @@ void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
 	struct efx_nic *efx = netdev_priv(net_dev);
 
 	spin_lock_bh(&efx->stats_lock);
-	efx->type->update_stats(efx, NULL, stats);
+	efx_nic_update_stats_atomic(efx, NULL, stats);
 	spin_unlock_bh(&efx->stats_lock);
 }
 
@@ -714,9 +716,6 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
 	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
 	mutex_lock(&efx->rss_lock);
-	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
-	    method != RESET_TYPE_DATAPATH)
-		efx->phy_op->fini(efx);
 	efx->type->fini(efx);
 }
 
@@ -759,10 +758,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 
 	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
 	    method != RESET_TYPE_DATAPATH) {
-		rc = efx->phy_op->init(efx);
-		if (rc)
-			goto fail;
-		rc = efx->phy_op->reconfigure(efx);
+		rc = efx_mcdi_port_reconfigure(efx);
 		if (rc && rc != -EPERM)
 			netif_err(efx, drv, efx->net_dev,
 				  "could not restore PHY settings\n");
@@ -959,7 +955,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
 
 /**************************************************************************
  *
- * Dummy PHY/MAC operations
+ * Dummy NIC operations
  *
  * Can be used for some unimplemented operations
  * Needed so all function pointers are valid and do not have to be tested
@@ -972,18 +968,6 @@ int efx_port_dummy_op_int(struct efx_nic *efx)
 }
 void efx_port_dummy_op_void(struct efx_nic *efx) {}
 
-static bool efx_port_dummy_op_poll(struct efx_nic *efx)
-{
-	return false;
-}
-
-static const struct efx_phy_operations efx_dummy_phy_operations = {
-	.init		 = efx_port_dummy_op_int,
-	.reconfigure	 = efx_port_dummy_op_int,
-	.poll		 = efx_port_dummy_op_poll,
-	.fini		 = efx_port_dummy_op_void,
-};
-
 /**************************************************************************
  *
  * Data housekeeping
@@ -1037,7 +1021,6 @@ int efx_init_struct(struct efx_nic *efx,
 	efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
 				      sizeof(*efx->rps_hash_table), GFP_KERNEL);
 #endif
-	efx->phy_op = &efx_dummy_phy_operations;
 	efx->mdio.dev = net_dev;
 	INIT_WORK(&efx->mac_work, efx_mac_work);
 	init_waitqueue_head(&efx->flush_wq);
@@ -1104,17 +1087,7 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
 
 	pci_set_master(pci_dev);
 
-	/* Set the PCI DMA mask.  Try all possibilities from our
-	 * genuine mask down to 32 bits, because some architectures
-	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
-	 * masks event though they reject 46 bit masks.
-	 */
-	while (dma_mask > 0x7fffffffUL) {
-		rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
-		if (rc == 0)
-			break;
-		dma_mask >>= 1;
-	}
+	rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
 	if (rc) {
 		netif_err(efx, probe, efx->net_dev,
 			  "could not find a suitable DMA mask\n");
@@ -1315,6 +1288,89 @@ const struct pci_error_handlers efx_err_handlers = {
 	.resume		= efx_io_resume,
 };
 
+/* Determine whether the NIC will be able to handle TX offloads for a given
+ * encapsulated packet.
+ */
+static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb)
+{
+	struct gre_base_hdr *greh;
+	__be16 dst_port;
+	u8 ipproto;
+
+	/* Does the NIC support encap offloads?
+	 * If not, we should never get here, because we shouldn't have
+	 * advertised encap offload feature flags in the first place.
+	 */
+	if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port))
+		return false;
+
+	/* Determine encapsulation protocol in use */
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipproto = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		/* If there are extension headers, this will cause us to
+		 * think we can't offload something that we maybe could have.
+		 */
+		ipproto = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		/* Not IP, so can't offload it */
+		return false;
+	}
+	switch (ipproto) {
+	case IPPROTO_GRE:
+		/* We support NVGRE but not IP over GRE or random gretaps.
+		 * Specifically, the NIC will accept GRE as encapsulated if
+		 * the inner protocol is Ethernet, but only handle it
+		 * correctly if the GRE header is 8 bytes long.  Moreover,
+		 * it will not update the Checksum or Sequence Number fields
+		 * if they are present.  (The Routing Present flag,
+		 * GRE_ROUTING, cannot be set else the header would be more
+		 * than 8 bytes long; so we don't have to worry about it.)
+		 */
+		if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
+			return false;
+		if (ntohs(skb->inner_protocol) != ETH_P_TEB)
+			return false;
+		if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8)
+			return false;
+		greh = (struct gre_base_hdr *)skb_transport_header(skb);
+		return !(greh->flags & (GRE_CSUM | GRE_SEQ));
+	case IPPROTO_UDP:
+		/* If the port is registered for a UDP tunnel, we assume the
+		 * packet is for that tunnel, and the NIC will handle it as
+		 * such.  If not, the NIC won't know what to do with it.
+		 */
+		dst_port = udp_hdr(skb)->dest;
+		return efx->type->udp_tnl_has_port(efx, dst_port);
+	default:
+		return false;
+	}
+}
+
+netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
+				     netdev_features_t features)
+{
+	struct efx_nic *efx = netdev_priv(dev);
+
+	if (skb->encapsulation) {
+		if (features & NETIF_F_GSO_MASK)
+			/* Hardware can only do TSO with at most 208 bytes
+			 * of headers.
+			 */
+			if (skb_inner_transport_offset(skb) >
+			    EFX_TSO2_MAX_HDRLEN)
+				features &= ~(NETIF_F_GSO_MASK);
+		if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK))
+			if (!efx_can_encap_offloads(efx, skb))
+				features &= ~(NETIF_F_GSO_MASK |
+					      NETIF_F_CSUM_MASK);
+	}
+	return features;
+}
+
 int efx_get_phys_port_id(struct net_device *net_dev,
 			 struct netdev_phys_item_id *ppid)
 {
diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h
index 4056f68f04e5..65513fd0cf6c 100644
--- a/drivers/net/ethernet/sfc/efx_common.h
+++ b/drivers/net/ethernet/sfc/efx_common.h
@@ -105,6 +105,9 @@ int efx_change_mtu(struct net_device *net_dev, int new_mtu);
 
 extern const struct pci_error_handlers efx_err_handlers;
 
+netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
+				     netdev_features_t features);
+
 int efx_get_phys_port_id(struct net_device *net_dev,
 			 struct netdev_phys_item_id *ppid);
 
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 4ffda7782f68..12a91c559aa2 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -50,8 +50,7 @@ static int efx_ethtool_phys_id(struct net_device *net_dev,
 		return 1;	/* cycle on/off once per second */
 	}
 
-	efx->type->set_id_led(efx, mode);
-	return 0;
+	return efx_mcdi_set_id_led(efx, mode);
 }
 
 static int efx_ethtool_get_regs_len(struct net_device *net_dev)
diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
index 05ac87807929..bf1443539a1a 100644
--- a/drivers/net/ethernet/sfc/ethtool_common.c
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
@@ -15,6 +15,7 @@
 #include "selftest.h"
 #include "rx_common.h"
 #include "ethtool_common.h"
+#include "mcdi_port_common.h"
 
 struct efx_sw_stat_desc {
 	const char *name;
@@ -105,7 +106,6 @@ void efx_ethtool_get_drvinfo(struct net_device *net_dev,
 	struct efx_nic *efx = netdev_priv(net_dev);
 
 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
-	strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
 	efx_mcdi_print_fwver(efx, info->fw_version,
 			     sizeof(info->fw_version));
 	strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
@@ -221,7 +221,7 @@ int efx_ethtool_set_pauseparam(struct net_device *net_dev,
 	efx_link_set_wanted_fc(efx, wanted_fc);
 	if (efx->link_advertising[0] != old_adv ||
 	    (efx->wanted_fc ^ old_fc) & EFX_FC_AUTO) {
-		rc = efx->phy_op->reconfigure(efx);
+		rc = efx_mcdi_port_reconfigure(efx);
 		if (rc) {
 			netif_err(efx, drv, efx->net_dev,
 				  "Unable to advertise requested flow "
@@ -372,20 +372,15 @@ int efx_ethtool_fill_self_tests(struct efx_nic *efx,
 	efx_fill_test(n++, strings, data, &tests->registers,
 		      "core", 0, "registers", NULL);
 
-	if (efx->phy_op->run_tests != NULL) {
-		EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
+	for (i = 0; true; ++i) {
+		const char *name;
 
-		for (i = 0; true; ++i) {
-			const char *name;
-
-			EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
-			name = efx->phy_op->test_name(efx, i);
-			if (name == NULL)
-				break;
+		EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
+		name = efx_mcdi_phy_test_name(efx, i);
+		if (name == NULL)
+			break;
 
-			efx_fill_test(n++, strings, data, &tests->phy_ext[i],
-				      "phy", 0, name, NULL);
-		}
+		efx_fill_test(n++, strings, data, &tests->phy_ext[i], "phy", 0, name, NULL);
 	}
 
 	/* Loopback tests */
@@ -412,7 +407,7 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
 				snprintf(strings, ETH_GSTRING_LEN,
 					 "tx-%u.tx_packets",
 					 channel->tx_queue[0].queue /
-					 EFX_TXQ_TYPES);
+					 EFX_MAX_TXQ_PER_CHANNEL);
 
 				strings += ETH_GSTRING_LEN;
 			}
@@ -571,7 +566,7 @@ int efx_ethtool_get_link_ksettings(struct net_device *net_dev,
 	u32 supported;
 
 	mutex_lock(&efx->mac_lock);
-	efx->phy_op->get_link_ksettings(efx, cmd);
+	efx_mcdi_phy_get_link_ksettings(efx, cmd);
 	mutex_unlock(&efx->mac_lock);
 
 	/* Both MACs support pause frames (bidirectional and respond-only) */
@@ -607,7 +602,7 @@ int efx_ethtool_set_link_ksettings(struct net_device *net_dev,
 	}
 
 	mutex_lock(&efx->mac_lock);
-	rc = efx->phy_op->set_link_ksettings(efx, cmd);
+	rc = efx_mcdi_phy_set_link_ksettings(efx, cmd);
 	mutex_unlock(&efx->mac_lock);
 	return rc;
 }
@@ -618,10 +613,8 @@ int efx_ethtool_get_fecparam(struct net_device *net_dev,
 	struct efx_nic *efx = netdev_priv(net_dev);
 	int rc;
 
-	if (!efx->phy_op || !efx->phy_op->get_fecparam)
-		return -EOPNOTSUPP;
 	mutex_lock(&efx->mac_lock);
-	rc = efx->phy_op->get_fecparam(efx, fecparam);
+	rc = efx_mcdi_phy_get_fecparam(efx, fecparam);
 	mutex_unlock(&efx->mac_lock);
 
 	return rc;
@@ -633,10 +626,8 @@ int efx_ethtool_set_fecparam(struct net_device *net_dev,
 	struct efx_nic *efx = netdev_priv(net_dev);
 	int rc;
 
-	if (!efx->phy_op || !efx->phy_op->get_fecparam)
-		return -EOPNOTSUPP;
 	mutex_lock(&efx->mac_lock);
-	rc = efx->phy_op->set_fecparam(efx, fecparam);
+	rc = efx_mcdi_phy_set_fecparam(efx, fecparam);
 	mutex_unlock(&efx->mac_lock);
 
 	return rc;
@@ -1332,11 +1323,8 @@ int efx_ethtool_get_module_eeprom(struct net_device *net_dev,
 	struct efx_nic *efx = netdev_priv(net_dev);
 	int ret;
 
-	if (!efx->phy_op || !efx->phy_op->get_module_eeprom)
-		return -EOPNOTSUPP;
-
 	mutex_lock(&efx->mac_lock);
-	ret = efx->phy_op->get_module_eeprom(efx, ee, data);
+	ret = efx_mcdi_phy_get_module_eeprom(efx, ee, data);
 	mutex_unlock(&efx->mac_lock);
 
 	return ret;
@@ -1348,11 +1336,8 @@ int efx_ethtool_get_module_info(struct net_device *net_dev,
 	struct efx_nic *efx = netdev_priv(net_dev);
 	int ret;
 
-	if (!efx->phy_op || !efx->phy_op->get_module_info)
-		return -EOPNOTSUPP;
-
 	mutex_lock(&efx->mac_lock);
-	ret = efx->phy_op->get_module_info(efx, modinfo);
+	ret = efx_mcdi_phy_get_module_info(efx, modinfo);
 	mutex_unlock(&efx->mac_lock);
 
 	return ret;
diff --git a/drivers/net/ethernet/sfc/falcon/farch.c b/drivers/net/ethernet/sfc/falcon/farch.c
index fa1ade856b10..2c91792cec01 100644
--- a/drivers/net/ethernet/sfc/falcon/farch.c
+++ b/drivers/net/ethernet/sfc/falcon/farch.c
@@ -870,17 +870,12 @@ static u16 ef4_farch_handle_rx_not_ok(struct ef4_rx_queue *rx_queue,
 {
 	struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
 	struct ef4_nic *efx = rx_queue->efx;
-	bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
+	bool __maybe_unused rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
 	bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
 	bool rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc;
-	bool rx_ev_other_err, rx_ev_pause_frm;
-	bool rx_ev_hdr_type, rx_ev_mcast_pkt;
-	unsigned rx_ev_pkt_type;
+	bool rx_ev_pause_frm;
 
-	rx_ev_hdr_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
-	rx_ev_mcast_pkt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
 	rx_ev_tobe_disc = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
-	rx_ev_pkt_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_TYPE);
 	rx_ev_buf_owner_id_err = EF4_QWORD_FIELD(*event,
 						 FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
 	rx_ev_ip_hdr_chksum_err = EF4_QWORD_FIELD(*event,
@@ -893,10 +888,6 @@ static u16 ef4_farch_handle_rx_not_ok(struct ef4_rx_queue *rx_queue,
 			  0 : EF4_QWORD_FIELD(*event, FSF_AA_RX_EV_DRIB_NIB));
 	rx_ev_pause_frm = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
 
-	/* Every error apart from tobe_disc and pause_frm */
-	rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
-			   rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
-			   rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
 
 	/* Count errors that are not in MAC stats.  Ignore expected
 	 * checksum errors during self-test. */
@@ -916,6 +907,13 @@ static u16 ef4_farch_handle_rx_not_ok(struct ef4_rx_queue *rx_queue,
 	 * to a FIFO overflow.
 	 */
 #ifdef DEBUG
+	{
+	/* Every error apart from tobe_disc and pause_frm */
+
+	bool rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
+				rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
+				rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
+
 	if (rx_ev_other_err && net_ratelimit()) {
 		netif_dbg(efx, rx_err, efx->net_dev,
 			  " RX queue %d unexpected RX event "
@@ -932,6 +930,7 @@ static u16 ef4_farch_handle_rx_not_ok(struct ef4_rx_queue *rx_queue,
 			  rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
 			  rx_ev_pause_frm ? " [PAUSE]" : "");
 	}
+	}
 #endif
 
 	/* The frame must be discarded if any of these are true. */
@@ -1643,15 +1642,11 @@ void ef4_farch_rx_push_indir_table(struct ef4_nic *efx)
  */
 void ef4_farch_dimension_resources(struct ef4_nic *efx, unsigned sram_lim_qw)
 {
-	unsigned vi_count, buftbl_min;
+	unsigned vi_count;
 
 	/* Account for the buffer table entries backing the datapath channels
 	 * and the descriptor caches for those channels.
 	 */
-	buftbl_min = ((efx->n_rx_channels * EF4_MAX_DMAQ_SIZE +
-		       efx->n_tx_channels * EF4_TXQ_TYPES * EF4_MAX_DMAQ_SIZE +
-		       efx->n_channels * EF4_MAX_EVQ_SIZE)
-		      * sizeof(ef4_qword_t) / EF4_BUF_SIZE);
 	vi_count = max(efx->n_channels, efx->n_tx_channels * EF4_TXQ_TYPES);
 
 	efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
@@ -2532,7 +2527,6 @@ int ef4_farch_filter_remove_safe(struct ef4_nic *efx,
 	enum ef4_farch_filter_table_id table_id;
 	struct ef4_farch_filter_table *table;
 	unsigned int filter_idx;
-	struct ef4_farch_filter_spec *spec;
 	int rc;
 
 	table_id = ef4_farch_filter_id_table_id(filter_id);
@@ -2543,7 +2537,6 @@ int ef4_farch_filter_remove_safe(struct ef4_nic *efx,
 	filter_idx = ef4_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
 		return -ENOENT;
-	spec = &table->spec[filter_idx];
 
 	spin_lock_bh(&efx->filter_lock);
 	rc = ef4_farch_filter_remove(efx, table, filter_idx, priority);
diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
index 05ea3523890a..966f13e7475d 100644
--- a/drivers/net/ethernet/sfc/falcon/rx.c
+++ b/drivers/net/ethernet/sfc/falcon/rx.c
@@ -140,6 +140,7 @@ static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue)
  * ef4_init_rx_buffers - create EF4_RX_BATCH page-based RX buffers
  *
  * @rx_queue:		Efx RX queue
+ * @atomic:		control memory allocation flags
  *
  * This allocates a batch of pages, maps them for DMA, and populates
  * struct ef4_rx_buffers for each one. Return a negative error code or
@@ -316,6 +317,7 @@ static void ef4_discard_rx_packet(struct ef4_channel *channel,
  * This will aim to fill the RX descriptor queue up to
  * @rx_queue->@max_fill. If there is insufficient atomic
  * memory to do so, a slow fill will be scheduled.
+ * @atomic: control memory allocation flags
  *
  * The caller must provide serialisation (none is used here). In practise,
  * this means this function must run from the NAPI handler, or be called
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c
index 147677c7c72f..6a454ac6f876 100644
--- a/drivers/net/ethernet/sfc/falcon/selftest.c
+++ b/drivers/net/ethernet/sfc/falcon/selftest.c
@@ -65,7 +65,7 @@ static const char *const ef4_interrupt_mode_names[] = {
 	STRING_TABLE_LOOKUP(efx->interrupt_mode, ef4_interrupt_mode)
 
 /**
- * ef4_loopback_state - persistent state during a loopback selftest
+ * struct ef4_loopback_state - persistent state during a loopback selftest
  * @flush:		Drop all packets in ef4_loopback_rx_packet
  * @packet_count:	Number of packets being used in this test
  * @skbs:		An array of skbs transmitted
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 4002f9a3ae90..d75cf5ff5686 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -320,7 +320,7 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
 	unsigned write_ptr;
 	unsigned old_write_count = tx_queue->write_count;
 
-	tx_queue->xmit_more_available = false;
+	tx_queue->xmit_pending = false;
 	if (unlikely(tx_queue->write_count == tx_queue->insert_count))
 		return;
 
@@ -372,6 +372,8 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
 	struct efx_nic *efx = tx_queue->efx;
 	unsigned entries;
 
+	tx_queue->type = ((tx_queue->label & 1) ? EFX_TXQ_TYPE_OUTER_CSUM : 0) |
+			 ((tx_queue->label & 2) ? EFX_TXQ_TYPE_HIGHPRI : 0);
 	entries = tx_queue->ptr_mask + 1;
 	return efx_alloc_special_buffer(efx, &tx_queue->txd,
 					entries * sizeof(efx_qword_t));
@@ -379,7 +381,7 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
 
 void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
 {
-	int csum = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD;
+	int csum = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
 	struct efx_nic *efx = tx_queue->efx;
 	efx_oword_t reg;
 
@@ -409,10 +411,12 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
 
 	EFX_POPULATE_OWORD_1(reg,
 			     FRF_BZ_TX_PACE,
-			     (tx_queue->label & EFX_TXQ_TYPE_HIGHPRI) ?
+			     (tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
 			     FFE_BZ_TX_PACE_OFF :
 			     FFE_BZ_TX_PACE_RESERVED);
 	efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL, tx_queue->queue);
+
+	tx_queue->tso_version = 1;
 }
 
 static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
@@ -832,13 +836,13 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
 		tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
 		tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
 		tx_queue = efx_channel_get_tx_queue(
-			channel, tx_ev_q_label % EFX_TXQ_TYPES);
+			channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
 		efx_xmit_done(tx_queue, tx_ev_desc_ptr);
 	} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
 		/* Rewrite the FIFO write pointer */
 		tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
 		tx_queue = efx_channel_get_tx_queue(
-			channel, tx_ev_q_label % EFX_TXQ_TYPES);
+			channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
 
 		netif_tx_lock(efx->net_dev);
 		efx_farch_notify_tx_desc(tx_queue);
@@ -863,13 +867,8 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 	bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
 	bool rx_ev_frm_trunc, rx_ev_tobe_disc;
 	bool rx_ev_other_err, rx_ev_pause_frm;
-	bool rx_ev_hdr_type, rx_ev_mcast_pkt;
-	unsigned rx_ev_pkt_type;
 
-	rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
-	rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
 	rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
-	rx_ev_pkt_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_TYPE);
 	rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event,
 						 FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
 	rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event,
@@ -918,6 +917,8 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 			  rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
 			  rx_ev_pause_frm ? " [PAUSE]" : "");
 	}
+#else
+	(void) rx_ev_other_err;
 #endif
 
 	if (efx->net_dev->features & NETIF_F_RXALL)
@@ -1083,9 +1084,9 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
 	int qid;
 
 	qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
-	if (qid < EFX_TXQ_TYPES * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
-		tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES,
-					    qid % EFX_TXQ_TYPES);
+	if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
+		tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
+					    qid % EFX_MAX_TXQ_PER_CHANNEL);
 		if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
 			efx_farch_magic_event(tx_queue->channel,
 					      EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
@@ -1678,10 +1679,10 @@ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
 	 * and the descriptor caches for those channels.
 	 */
 	buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE +
-		       total_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE +
+		       total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_DMAQ_SIZE +
 		       efx->n_channels * EFX_MAX_EVQ_SIZE)
 		      * sizeof(efx_qword_t) / EFX_BUF_SIZE);
-	vi_count = max(efx->n_channels, total_tx_channels * EFX_TXQ_TYPES);
+	vi_count = max(efx->n_channels, total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL);
 
 #ifdef CONFIG_SFC_SRIOV
 	if (efx->type->sriov_wanted) {
@@ -2592,7 +2593,6 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
 	enum efx_farch_filter_table_id table_id;
 	struct efx_farch_filter_table *table;
 	unsigned int filter_idx;
-	struct efx_farch_filter_spec *spec;
 	int rc;
 
 	table_id = efx_farch_filter_id_table_id(filter_id);
@@ -2604,7 +2604,6 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
 	if (filter_idx >= table->size)
 		return -ENOENT;
 	down_write(&state->lock);
-	spec = &table->spec[filter_idx];
 
 	rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
 	up_write(&state->lock);
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 5467819aef6e..be6bfd6b7ec7 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1868,10 +1868,9 @@ int efx_mcdi_handle_assertion(struct efx_nic *efx)
 	return efx_mcdi_exit_assertion(efx);
 }
 
-void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_ID_LED_IN_LEN);
-	int rc;
 
 	BUILD_BUG_ON(EFX_LED_OFF != MC_CMD_LED_OFF);
 	BUILD_BUG_ON(EFX_LED_ON != MC_CMD_LED_ON);
@@ -1881,8 +1880,7 @@ void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
 
 	MCDI_SET_DWORD(inbuf, SET_ID_LED_IN_STATE, mode);
 
-	rc = efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf),
-			  NULL, 0, NULL);
+	return efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf), NULL, 0, NULL);
 }
 
 static int efx_mcdi_reset_func(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 658cf345420d..69c2924a147c 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -190,6 +190,7 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
  * 32-bit-aligned.  Also, on Siena we must copy to the MC shared
  * memory strictly 32 bits at a time, so add any necessary padding.
  */
+#define MCDI_TX_BUF_LEN(_len) DIV_ROUND_UP((_len), 4)
 #define _MCDI_DECLARE_BUF(_name, _len)					\
 	efx_dword_t _name[DIV_ROUND_UP(_len, 4)]
 #define MCDI_DECLARE_BUF(_name, _len)					\
@@ -348,14 +349,13 @@ int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
 int efx_new_mcdi_nvram_test_all(struct efx_nic *efx);
 int efx_mcdi_nvram_test_all(struct efx_nic *efx);
 int efx_mcdi_handle_assertion(struct efx_nic *efx);
-void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
+int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
 int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, const u8 *mac,
 				  int *id_out);
 int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
 int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id);
 int efx_mcdi_wol_filter_reset(struct efx_nic *efx);
 int efx_mcdi_flush_rxqs(struct efx_nic *efx);
-int efx_mcdi_port_reconfigure(struct efx_nic *efx);
 void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
 void efx_mcdi_mac_start_stats(struct efx_nic *efx);
 void efx_mcdi_mac_stop_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c
index d8a3af86ef78..d3e6d8239f5c 100644
--- a/drivers/net/ethernet/sfc/mcdi_functions.c
+++ b/drivers/net/ethernet/sfc/mcdi_functions.c
@@ -160,11 +160,12 @@ fail:
 			       outbuf, outlen, rc);
 }
 
-int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
 						       EFX_BUF_SIZE));
-	bool csum_offload = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD;
+	bool csum_offload = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
+	bool inner_csum = tx_queue->type & EFX_TXQ_TYPE_INNER_CSUM;
 	size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
 	struct efx_channel *channel = tx_queue->channel;
 	struct efx_nic *efx = tx_queue->efx;
@@ -194,22 +195,31 @@ int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
 	inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
 
 	do {
-		MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
+		bool tso_v2 = tx_queue->tso_version == 2;
+
+		/* TSOv2 implies IP header checksum offload for TSO frames,
+		 * so we can safely disable IP header checksum offload for
+		 * everything else.  If we don't have TSOv2, then we have to
+		 * enable IP header checksum offload, which is strictly
+		 * incorrect but better than breaking TSO.
+		 */
+		MCDI_POPULATE_DWORD_6(inbuf, INIT_TXQ_IN_FLAGS,
 				/* This flag was removed from mcdi_pcol.h for
 				 * the non-_EXT version of INIT_TXQ.  However,
 				 * firmware still honours it.
 				 */
 				INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
-				INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+				INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !(csum_offload && tso_v2),
 				INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
-				INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
-						tx_queue->timestamping);
+				INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, tx_queue->timestamping,
+				INIT_TXQ_IN_FLAG_INNER_IP_CSUM_EN, inner_csum && !tso_v2,
+				INIT_TXQ_IN_FLAG_INNER_TCP_CSUM_EN, inner_csum);
 
 		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
 					NULL, 0, NULL);
 		if (rc == -ENOSPC && tso_v2) {
 			/* Retry without TSOv2 if we're short on contexts. */
-			tso_v2 = false;
+			tx_queue->tso_version = 0;
 			netif_warn(efx, probe, efx->net_dev,
 				   "TSOv2 context not available to segment in "
 				   "hardware. TCP performance may be reduced.\n"
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.h b/drivers/net/ethernet/sfc/mcdi_functions.h
index 687be8b00cd8..b0e2f53a0d9b 100644
--- a/drivers/net/ethernet/sfc/mcdi_functions.h
+++ b/drivers/net/ethernet/sfc/mcdi_functions.h
@@ -19,7 +19,7 @@ int efx_mcdi_ev_probe(struct efx_channel *channel);
 int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
 void efx_mcdi_ev_remove(struct efx_channel *channel);
 void efx_mcdi_ev_fini(struct efx_channel *channel);
-int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2);
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue);
 void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
 void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
 int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index 98eeb404f68d..94c6a345c0b1 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -70,592 +70,6 @@ static int efx_mcdi_mdio_write(struct net_device *net_dev,
 	return 0;
 }
 
-static int efx_mcdi_phy_probe(struct efx_nic *efx)
-{
-	struct efx_mcdi_phy_data *phy_data;
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
-	u32 caps;
-	int rc;
-
-	/* Initialise and populate phy_data */
-	phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
-	if (phy_data == NULL)
-		return -ENOMEM;
-
-	rc = efx_mcdi_get_phy_cfg(efx, phy_data);
-	if (rc != 0)
-		goto fail;
-
-	/* Read initial link advertisement */
-	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
-			  outbuf, sizeof(outbuf), NULL);
-	if (rc)
-		goto fail;
-
-	/* Fill out nic state */
-	efx->phy_data = phy_data;
-	efx->phy_type = phy_data->type;
-
-	efx->mdio_bus = phy_data->channel;
-	efx->mdio.prtad = phy_data->port;
-	efx->mdio.mmds = phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22);
-	efx->mdio.mode_support = 0;
-	if (phy_data->mmd_mask & (1 << MC_CMD_MMD_CLAUSE22))
-		efx->mdio.mode_support |= MDIO_SUPPORTS_C22;
-	if (phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22))
-		efx->mdio.mode_support |= MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
-
-	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_CAP);
-	if (caps & (1 << MC_CMD_PHY_CAP_AN_LBN))
-		mcdi_to_ethtool_linkset(phy_data->media, caps,
-					efx->link_advertising);
-	else
-		phy_data->forced_cap = caps;
-
-	/* Assert that we can map efx -> mcdi loopback modes */
-	BUILD_BUG_ON(LOOPBACK_NONE != MC_CMD_LOOPBACK_NONE);
-	BUILD_BUG_ON(LOOPBACK_DATA != MC_CMD_LOOPBACK_DATA);
-	BUILD_BUG_ON(LOOPBACK_GMAC != MC_CMD_LOOPBACK_GMAC);
-	BUILD_BUG_ON(LOOPBACK_XGMII != MC_CMD_LOOPBACK_XGMII);
-	BUILD_BUG_ON(LOOPBACK_XGXS != MC_CMD_LOOPBACK_XGXS);
-	BUILD_BUG_ON(LOOPBACK_XAUI != MC_CMD_LOOPBACK_XAUI);
-	BUILD_BUG_ON(LOOPBACK_GMII != MC_CMD_LOOPBACK_GMII);
-	BUILD_BUG_ON(LOOPBACK_SGMII != MC_CMD_LOOPBACK_SGMII);
-	BUILD_BUG_ON(LOOPBACK_XGBR != MC_CMD_LOOPBACK_XGBR);
-	BUILD_BUG_ON(LOOPBACK_XFI != MC_CMD_LOOPBACK_XFI);
-	BUILD_BUG_ON(LOOPBACK_XAUI_FAR != MC_CMD_LOOPBACK_XAUI_FAR);
-	BUILD_BUG_ON(LOOPBACK_GMII_FAR != MC_CMD_LOOPBACK_GMII_FAR);
-	BUILD_BUG_ON(LOOPBACK_SGMII_FAR != MC_CMD_LOOPBACK_SGMII_FAR);
-	BUILD_BUG_ON(LOOPBACK_XFI_FAR != MC_CMD_LOOPBACK_XFI_FAR);
-	BUILD_BUG_ON(LOOPBACK_GPHY != MC_CMD_LOOPBACK_GPHY);
-	BUILD_BUG_ON(LOOPBACK_PHYXS != MC_CMD_LOOPBACK_PHYXS);
-	BUILD_BUG_ON(LOOPBACK_PCS != MC_CMD_LOOPBACK_PCS);
-	BUILD_BUG_ON(LOOPBACK_PMAPMD != MC_CMD_LOOPBACK_PMAPMD);
-	BUILD_BUG_ON(LOOPBACK_XPORT != MC_CMD_LOOPBACK_XPORT);
-	BUILD_BUG_ON(LOOPBACK_XGMII_WS != MC_CMD_LOOPBACK_XGMII_WS);
-	BUILD_BUG_ON(LOOPBACK_XAUI_WS != MC_CMD_LOOPBACK_XAUI_WS);
-	BUILD_BUG_ON(LOOPBACK_XAUI_WS_FAR != MC_CMD_LOOPBACK_XAUI_WS_FAR);
-	BUILD_BUG_ON(LOOPBACK_XAUI_WS_NEAR != MC_CMD_LOOPBACK_XAUI_WS_NEAR);
-	BUILD_BUG_ON(LOOPBACK_GMII_WS != MC_CMD_LOOPBACK_GMII_WS);
-	BUILD_BUG_ON(LOOPBACK_XFI_WS != MC_CMD_LOOPBACK_XFI_WS);
-	BUILD_BUG_ON(LOOPBACK_XFI_WS_FAR != MC_CMD_LOOPBACK_XFI_WS_FAR);
-	BUILD_BUG_ON(LOOPBACK_PHYXS_WS != MC_CMD_LOOPBACK_PHYXS_WS);
-
-	rc = efx_mcdi_loopback_modes(efx, &efx->loopback_modes);
-	if (rc != 0)
-		goto fail;
-	/* The MC indicates that LOOPBACK_NONE is a valid loopback mode,
-	 * but by convention we don't */
-	efx->loopback_modes &= ~(1 << LOOPBACK_NONE);
-
-	/* Set the initial link mode */
-	efx_mcdi_phy_decode_link(
-		efx, &efx->link_state,
-		MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
-		MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
-		MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
-
-	/* Record the initial FEC configuration (or nearest approximation
-	 * representable in the ethtool configuration space)
-	 */
-	efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
-						   efx->link_state.speed == 25000 ||
-						   efx->link_state.speed == 50000);
-
-	/* Default to Autonegotiated flow control if the PHY supports it */
-	efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
-	if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
-		efx->wanted_fc |= EFX_FC_AUTO;
-	efx_link_set_wanted_fc(efx, efx->wanted_fc);
-
-	return 0;
-
-fail:
-	kfree(phy_data);
-	return rc;
-}
-
-static void efx_mcdi_phy_remove(struct efx_nic *efx)
-{
-	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
-
-	efx->phy_data = NULL;
-	kfree(phy_data);
-}
-
-static void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx,
-					    struct ethtool_link_ksettings *cmd)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
-	int rc;
-
-	cmd->base.speed = efx->link_state.speed;
-	cmd->base.duplex = efx->link_state.fd;
-	cmd->base.port = mcdi_to_ethtool_media(phy_cfg->media);
-	cmd->base.phy_address = phy_cfg->port;
-	cmd->base.autoneg = !!(efx->link_advertising[0] & ADVERTISED_Autoneg);
-	cmd->base.mdio_support = (efx->mdio.mode_support &
-			      (MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22));
-
-	mcdi_to_ethtool_linkset(phy_cfg->media, phy_cfg->supported_cap,
-				cmd->link_modes.supported);
-	memcpy(cmd->link_modes.advertising, efx->link_advertising,
-	       sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
-
-	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
-			  outbuf, sizeof(outbuf), NULL);
-	if (rc)
-		return;
-	mcdi_to_ethtool_linkset(phy_cfg->media,
-				MCDI_DWORD(outbuf, GET_LINK_OUT_LP_CAP),
-				cmd->link_modes.lp_advertising);
-}
-
-static int
-efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
-				const struct ethtool_link_ksettings *cmd)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-	u32 caps;
-	int rc;
-
-	if (cmd->base.autoneg) {
-		caps = (ethtool_linkset_to_mcdi_cap(cmd->link_modes.advertising) |
-			1 << MC_CMD_PHY_CAP_AN_LBN);
-	} else if (cmd->base.duplex) {
-		switch (cmd->base.speed) {
-		case 10:     caps = 1 << MC_CMD_PHY_CAP_10FDX_LBN;     break;
-		case 100:    caps = 1 << MC_CMD_PHY_CAP_100FDX_LBN;    break;
-		case 1000:   caps = 1 << MC_CMD_PHY_CAP_1000FDX_LBN;   break;
-		case 10000:  caps = 1 << MC_CMD_PHY_CAP_10000FDX_LBN;  break;
-		case 40000:  caps = 1 << MC_CMD_PHY_CAP_40000FDX_LBN;  break;
-		case 100000: caps = 1 << MC_CMD_PHY_CAP_100000FDX_LBN; break;
-		case 25000:  caps = 1 << MC_CMD_PHY_CAP_25000FDX_LBN;  break;
-		case 50000:  caps = 1 << MC_CMD_PHY_CAP_50000FDX_LBN;  break;
-		default:     return -EINVAL;
-		}
-	} else {
-		switch (cmd->base.speed) {
-		case 10:     caps = 1 << MC_CMD_PHY_CAP_10HDX_LBN;     break;
-		case 100:    caps = 1 << MC_CMD_PHY_CAP_100HDX_LBN;    break;
-		case 1000:   caps = 1 << MC_CMD_PHY_CAP_1000HDX_LBN;   break;
-		default:     return -EINVAL;
-		}
-	}
-
-	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
-
-	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
-			       efx->loopback_mode, 0);
-	if (rc)
-		return rc;
-
-	if (cmd->base.autoneg) {
-		efx_link_set_advertising(efx, cmd->link_modes.advertising);
-		phy_cfg->forced_cap = 0;
-	} else {
-		efx_link_clear_advertising(efx);
-		phy_cfg->forced_cap = caps;
-	}
-	return 0;
-}
-
-static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
-				     const struct ethtool_fecparam *fec)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-	u32 caps;
-	int rc;
-
-	/* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
-	 * saved advertising bits
-	 */
-	if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
-		caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
-			1 << MC_CMD_PHY_CAP_AN_LBN);
-	else
-		caps = phy_cfg->forced_cap;
-
-	caps |= ethtool_fec_caps_to_mcdi(fec->fec);
-	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
-			       efx->loopback_mode, 0);
-	if (rc)
-		return rc;
-
-	/* Record the new FEC setting for subsequent set_link calls */
-	efx->fec_config = fec->fec;
-	return 0;
-}
-
-static const char *const mcdi_sft9001_cable_diag_names[] = {
-	"cable.pairA.length",
-	"cable.pairB.length",
-	"cable.pairC.length",
-	"cable.pairD.length",
-	"cable.pairA.status",
-	"cable.pairB.status",
-	"cable.pairC.status",
-	"cable.pairD.status",
-};
-
-static int efx_mcdi_bist(struct efx_nic *efx, unsigned int bist_mode,
-			 int *results)
-{
-	unsigned int retry, i, count = 0;
-	size_t outlen;
-	u32 status;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_START_BIST_IN_LEN);
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_POLL_BIST_OUT_SFT9001_LEN);
-	u8 *ptr;
-	int rc;
-
-	BUILD_BUG_ON(MC_CMD_START_BIST_OUT_LEN != 0);
-	MCDI_SET_DWORD(inbuf, START_BIST_IN_TYPE, bist_mode);
-	rc = efx_mcdi_rpc(efx, MC_CMD_START_BIST,
-			  inbuf, MC_CMD_START_BIST_IN_LEN, NULL, 0, NULL);
-	if (rc)
-		goto out;
-
-	/* Wait up to 10s for BIST to finish */
-	for (retry = 0; retry < 100; ++retry) {
-		BUILD_BUG_ON(MC_CMD_POLL_BIST_IN_LEN != 0);
-		rc = efx_mcdi_rpc(efx, MC_CMD_POLL_BIST, NULL, 0,
-				  outbuf, sizeof(outbuf), &outlen);
-		if (rc)
-			goto out;
-
-		status = MCDI_DWORD(outbuf, POLL_BIST_OUT_RESULT);
-		if (status != MC_CMD_POLL_BIST_RUNNING)
-			goto finished;
-
-		msleep(100);
-	}
-
-	rc = -ETIMEDOUT;
-	goto out;
-
-finished:
-	results[count++] = (status == MC_CMD_POLL_BIST_PASSED) ? 1 : -1;
-
-	/* SFT9001 specific cable diagnostics output */
-	if (efx->phy_type == PHY_TYPE_SFT9001B &&
-	    (bist_mode == MC_CMD_PHY_BIST_CABLE_SHORT ||
-	     bist_mode == MC_CMD_PHY_BIST_CABLE_LONG)) {
-		ptr = MCDI_PTR(outbuf, POLL_BIST_OUT_SFT9001_CABLE_LENGTH_A);
-		if (status == MC_CMD_POLL_BIST_PASSED &&
-		    outlen >= MC_CMD_POLL_BIST_OUT_SFT9001_LEN) {
-			for (i = 0; i < 8; i++) {
-				results[count + i] =
-					EFX_DWORD_FIELD(((efx_dword_t *)ptr)[i],
-							EFX_DWORD_0);
-			}
-		}
-		count += 8;
-	}
-	rc = count;
-
-out:
-	return rc;
-}
-
-static int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results,
-				  unsigned flags)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-	u32 mode;
-	int rc;
-
-	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) {
-		rc = efx_mcdi_bist(efx, MC_CMD_PHY_BIST, results);
-		if (rc < 0)
-			return rc;
-
-		results += rc;
-	}
-
-	/* If we support both LONG and SHORT, then run each in response to
-	 * break or not. Otherwise, run the one we support */
-	mode = 0;
-	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN)) {
-		if ((flags & ETH_TEST_FL_OFFLINE) &&
-		    (phy_cfg->flags &
-		     (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN)))
-			mode = MC_CMD_PHY_BIST_CABLE_LONG;
-		else
-			mode = MC_CMD_PHY_BIST_CABLE_SHORT;
-	} else if (phy_cfg->flags &
-		   (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))
-		mode = MC_CMD_PHY_BIST_CABLE_LONG;
-
-	if (mode != 0) {
-		rc = efx_mcdi_bist(efx, mode, results);
-		if (rc < 0)
-			return rc;
-		results += rc;
-	}
-
-	return 0;
-}
-
-static const char *efx_mcdi_phy_test_name(struct efx_nic *efx,
-					  unsigned int index)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-
-	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) {
-		if (index == 0)
-			return "bist";
-		--index;
-	}
-
-	if (phy_cfg->flags & ((1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN) |
-			      (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))) {
-		if (index == 0)
-			return "cable";
-		--index;
-
-		if (efx->phy_type == PHY_TYPE_SFT9001B) {
-			if (index < ARRAY_SIZE(mcdi_sft9001_cable_diag_names))
-				return mcdi_sft9001_cable_diag_names[index];
-			index -= ARRAY_SIZE(mcdi_sft9001_cable_diag_names);
-		}
-	}
-
-	return NULL;
-}
-
-#define SFP_PAGE_SIZE		128
-#define SFF_DIAG_TYPE_OFFSET	92
-#define SFF_DIAG_ADDR_CHANGE	BIT(2)
-#define SFF_8079_NUM_PAGES	2
-#define SFF_8472_NUM_PAGES	4
-#define SFF_8436_NUM_PAGES	5
-#define SFF_DMT_LEVEL_OFFSET	94
-
-/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom
- * @efx:	NIC context
- * @page:	EEPROM page number
- * @data:	Destination data pointer
- * @offset:	Offset in page to copy from in to data
- * @space:	Space available in data
- *
- * Return:
- *   >=0 - amount of data copied
- *   <0  - error
- */
-static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx,
-					       unsigned int page,
-					       u8 *data, ssize_t offset,
-					       ssize_t space)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX);
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN);
-	size_t outlen;
-	unsigned int payload_len;
-	unsigned int to_copy;
-	int rc;
-
-	if (offset > SFP_PAGE_SIZE)
-		return -EINVAL;
-
-	to_copy = min(space, SFP_PAGE_SIZE - offset);
-
-	MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
-	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO,
-				inbuf, sizeof(inbuf),
-				outbuf, sizeof(outbuf),
-				&outlen);
-
-	if (rc)
-		return rc;
-
-	if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
-			SFP_PAGE_SIZE))
-		return -EIO;
-
-	payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN);
-	if (payload_len != SFP_PAGE_SIZE)
-		return -EIO;
-
-	memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset,
-	       to_copy);
-
-	return to_copy;
-}
-
-static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx,
-					       unsigned int page,
-					       u8 byte)
-{
-	int rc;
-	u8 data;
-
-	rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1);
-	if (rc == 1)
-		return data;
-
-	return rc;
-}
-
-static int efx_mcdi_phy_diag_type(struct efx_nic *efx)
-{
-	/* Page zero of the EEPROM includes the diagnostic type at byte 92. */
-	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
-						   SFF_DIAG_TYPE_OFFSET);
-}
-
-static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx)
-{
-	/* Page zero of the EEPROM includes the DMT level at byte 94. */
-	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
-						   SFF_DMT_LEVEL_OFFSET);
-}
-
-static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
-{
-	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
-
-	if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS)
-		return phy_data->media;
-
-	/* A QSFP+ NIC may actually have an SFP+ module attached.
-	 * The ID is page 0, byte 0.
-	 */
-	switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
-	case 0x3:
-		return MC_CMD_MEDIA_SFP_PLUS;
-	case 0xc:
-	case 0xd:
-		return MC_CMD_MEDIA_QSFP_PLUS;
-	default:
-		return 0;
-	}
-}
-
-static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
-					  struct ethtool_eeprom *ee, u8 *data)
-{
-	int rc;
-	ssize_t space_remaining = ee->len;
-	unsigned int page_off;
-	bool ignore_missing;
-	int num_pages;
-	int page;
-
-	switch (efx_mcdi_phy_module_type(efx)) {
-	case MC_CMD_MEDIA_SFP_PLUS:
-		num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ?
-				SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES;
-		page = 0;
-		ignore_missing = false;
-		break;
-	case MC_CMD_MEDIA_QSFP_PLUS:
-		num_pages = SFF_8436_NUM_PAGES;
-		page = -1; /* We obtain the lower page by asking for -1. */
-		ignore_missing = true; /* Ignore missing pages after page 0. */
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	page_off = ee->offset % SFP_PAGE_SIZE;
-	page += ee->offset / SFP_PAGE_SIZE;
-
-	while (space_remaining && (page < num_pages)) {
-		rc = efx_mcdi_phy_get_module_eeprom_page(efx, page,
-							 data, page_off,
-							 space_remaining);
-
-		if (rc > 0) {
-			space_remaining -= rc;
-			data += rc;
-			page_off = 0;
-			page++;
-		} else if (rc == 0) {
-			space_remaining = 0;
-		} else if (ignore_missing && (page > 0)) {
-			int intended_size = SFP_PAGE_SIZE - page_off;
-
-			space_remaining -= intended_size;
-			if (space_remaining < 0) {
-				space_remaining = 0;
-			} else {
-				memset(data, 0, intended_size);
-				data += intended_size;
-				page_off = 0;
-				page++;
-				rc = 0;
-			}
-		} else {
-			return rc;
-		}
-	}
-
-	return 0;
-}
-
-static int efx_mcdi_phy_get_module_info(struct efx_nic *efx,
-					struct ethtool_modinfo *modinfo)
-{
-	int sff_8472_level;
-	int diag_type;
-
-	switch (efx_mcdi_phy_module_type(efx)) {
-	case MC_CMD_MEDIA_SFP_PLUS:
-		sff_8472_level = efx_mcdi_phy_sff_8472_level(efx);
-
-		/* If we can't read the diagnostics level we have none. */
-		if (sff_8472_level < 0)
-			return -EOPNOTSUPP;
-
-		/* Check if this module requires the (unsupported) address
-		 * change operation.
-		 */
-		diag_type = efx_mcdi_phy_diag_type(efx);
-
-		if ((sff_8472_level == 0) ||
-		    (diag_type & SFF_DIAG_ADDR_CHANGE)) {
-			modinfo->type = ETH_MODULE_SFF_8079;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
-		} else {
-			modinfo->type = ETH_MODULE_SFF_8472;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
-		}
-		break;
-
-	case MC_CMD_MEDIA_QSFP_PLUS:
-		modinfo->type = ETH_MODULE_SFF_8436;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
-		break;
-
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
-static const struct efx_phy_operations efx_mcdi_phy_ops = {
-	.probe		= efx_mcdi_phy_probe,
-	.init		= efx_port_dummy_op_int,
-	.reconfigure	= efx_mcdi_port_reconfigure,
-	.poll		= efx_mcdi_phy_poll,
-	.fini		= efx_port_dummy_op_void,
-	.remove		= efx_mcdi_phy_remove,
-	.get_link_ksettings = efx_mcdi_phy_get_link_ksettings,
-	.set_link_ksettings = efx_mcdi_phy_set_link_ksettings,
-	.get_fecparam	= efx_mcdi_phy_get_fecparam,
-	.set_fecparam	= efx_mcdi_phy_set_fecparam,
-	.test_alive	= efx_mcdi_phy_test_alive,
-	.run_tests	= efx_mcdi_phy_run_tests,
-	.test_name	= efx_mcdi_phy_test_name,
-	.get_module_eeprom = efx_mcdi_phy_get_module_eeprom,
-	.get_module_info = efx_mcdi_phy_get_module_info,
-};
-
 u32 efx_mcdi_phy_get_caps(struct efx_nic *efx)
 {
 	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
@@ -683,16 +97,13 @@ int efx_mcdi_port_probe(struct efx_nic *efx)
 {
 	int rc;
 
-	/* Hook in PHY operations table */
-	efx->phy_op = &efx_mcdi_phy_ops;
-
 	/* Set up MDIO structure for PHY */
 	efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
 	efx->mdio.mdio_read = efx_mcdi_mdio_read;
 	efx->mdio.mdio_write = efx_mcdi_mdio_write;
 
 	/* Fill out MDIO structure, loopback modes, and initial link state */
-	rc = efx->phy_op->probe(efx);
+	rc = efx_mcdi_phy_probe(efx);
 	if (rc != 0)
 		return rc;
 
@@ -701,6 +112,6 @@ int efx_mcdi_port_probe(struct efx_nic *efx)
 
 void efx_mcdi_port_remove(struct efx_nic *efx)
 {
-	efx->phy_op->remove(efx);
+	efx_mcdi_phy_remove(efx);
 	efx_mcdi_mac_fini_stats(efx);
 }
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c
index 714d7f937212..4bd3ef8f3384 100644
--- a/drivers/net/ethernet/sfc/mcdi_port_common.c
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.c
@@ -308,7 +308,7 @@ void efx_mcdi_phy_decode_link(struct efx_nic *efx,
  * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
  * partner support it, preferring RS to BASER.
  */
-u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+u32 ethtool_fec_caps_to_mcdi(u32 supported_cap, u32 ethtool_cap)
 {
 	u32 ret = 0;
 
@@ -316,17 +316,21 @@ u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
 		return 0;
 
 	if (ethtool_cap & ETHTOOL_FEC_AUTO)
-		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
-	if (ethtool_cap & ETHTOOL_FEC_RS)
+		ret |= ((1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+			(1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+			(1 << MC_CMD_PHY_CAP_RS_FEC_LBN)) & supported_cap;
+	if (ethtool_cap & ETHTOOL_FEC_RS &&
+	    supported_cap & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN))
 		ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
 		       (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
-	if (ethtool_cap & ETHTOOL_FEC_BASER)
-		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
-		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_BASER) {
+		if (supported_cap & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN))
+			ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+			       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+		if (supported_cap & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN))
+			ret |= (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+			       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+	}
 	return ret;
 }
 
@@ -404,6 +408,196 @@ bool efx_mcdi_phy_poll(struct efx_nic *efx)
 	return !efx_link_state_equal(&efx->link_state, &old_state);
 }
 
+int efx_mcdi_phy_probe(struct efx_nic *efx)
+{
+	struct efx_mcdi_phy_data *phy_data;
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+	u32 caps;
+	int rc;
+
+	/* Initialise and populate phy_data */
+	phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
+	if (phy_data == NULL)
+		return -ENOMEM;
+
+	rc = efx_mcdi_get_phy_cfg(efx, phy_data);
+	if (rc != 0)
+		goto fail;
+
+	/* Read initial link advertisement */
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), NULL);
+	if (rc)
+		goto fail;
+
+	/* Fill out nic state */
+	efx->phy_data = phy_data;
+	efx->phy_type = phy_data->type;
+
+	efx->mdio_bus = phy_data->channel;
+	efx->mdio.prtad = phy_data->port;
+	efx->mdio.mmds = phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22);
+	efx->mdio.mode_support = 0;
+	if (phy_data->mmd_mask & (1 << MC_CMD_MMD_CLAUSE22))
+		efx->mdio.mode_support |= MDIO_SUPPORTS_C22;
+	if (phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22))
+		efx->mdio.mode_support |= MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+
+	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_CAP);
+	if (caps & (1 << MC_CMD_PHY_CAP_AN_LBN))
+		mcdi_to_ethtool_linkset(phy_data->media, caps,
+					efx->link_advertising);
+	else
+		phy_data->forced_cap = caps;
+
+	/* Assert that we can map efx -> mcdi loopback modes */
+	BUILD_BUG_ON(LOOPBACK_NONE != MC_CMD_LOOPBACK_NONE);
+	BUILD_BUG_ON(LOOPBACK_DATA != MC_CMD_LOOPBACK_DATA);
+	BUILD_BUG_ON(LOOPBACK_GMAC != MC_CMD_LOOPBACK_GMAC);
+	BUILD_BUG_ON(LOOPBACK_XGMII != MC_CMD_LOOPBACK_XGMII);
+	BUILD_BUG_ON(LOOPBACK_XGXS != MC_CMD_LOOPBACK_XGXS);
+	BUILD_BUG_ON(LOOPBACK_XAUI != MC_CMD_LOOPBACK_XAUI);
+	BUILD_BUG_ON(LOOPBACK_GMII != MC_CMD_LOOPBACK_GMII);
+	BUILD_BUG_ON(LOOPBACK_SGMII != MC_CMD_LOOPBACK_SGMII);
+	BUILD_BUG_ON(LOOPBACK_XGBR != MC_CMD_LOOPBACK_XGBR);
+	BUILD_BUG_ON(LOOPBACK_XFI != MC_CMD_LOOPBACK_XFI);
+	BUILD_BUG_ON(LOOPBACK_XAUI_FAR != MC_CMD_LOOPBACK_XAUI_FAR);
+	BUILD_BUG_ON(LOOPBACK_GMII_FAR != MC_CMD_LOOPBACK_GMII_FAR);
+	BUILD_BUG_ON(LOOPBACK_SGMII_FAR != MC_CMD_LOOPBACK_SGMII_FAR);
+	BUILD_BUG_ON(LOOPBACK_XFI_FAR != MC_CMD_LOOPBACK_XFI_FAR);
+	BUILD_BUG_ON(LOOPBACK_GPHY != MC_CMD_LOOPBACK_GPHY);
+	BUILD_BUG_ON(LOOPBACK_PHYXS != MC_CMD_LOOPBACK_PHYXS);
+	BUILD_BUG_ON(LOOPBACK_PCS != MC_CMD_LOOPBACK_PCS);
+	BUILD_BUG_ON(LOOPBACK_PMAPMD != MC_CMD_LOOPBACK_PMAPMD);
+	BUILD_BUG_ON(LOOPBACK_XPORT != MC_CMD_LOOPBACK_XPORT);
+	BUILD_BUG_ON(LOOPBACK_XGMII_WS != MC_CMD_LOOPBACK_XGMII_WS);
+	BUILD_BUG_ON(LOOPBACK_XAUI_WS != MC_CMD_LOOPBACK_XAUI_WS);
+	BUILD_BUG_ON(LOOPBACK_XAUI_WS_FAR != MC_CMD_LOOPBACK_XAUI_WS_FAR);
+	BUILD_BUG_ON(LOOPBACK_XAUI_WS_NEAR != MC_CMD_LOOPBACK_XAUI_WS_NEAR);
+	BUILD_BUG_ON(LOOPBACK_GMII_WS != MC_CMD_LOOPBACK_GMII_WS);
+	BUILD_BUG_ON(LOOPBACK_XFI_WS != MC_CMD_LOOPBACK_XFI_WS);
+	BUILD_BUG_ON(LOOPBACK_XFI_WS_FAR != MC_CMD_LOOPBACK_XFI_WS_FAR);
+	BUILD_BUG_ON(LOOPBACK_PHYXS_WS != MC_CMD_LOOPBACK_PHYXS_WS);
+
+	rc = efx_mcdi_loopback_modes(efx, &efx->loopback_modes);
+	if (rc != 0)
+		goto fail;
+	/* The MC indicates that LOOPBACK_NONE is a valid loopback mode,
+	 * but by convention we don't
+	 */
+	efx->loopback_modes &= ~(1 << LOOPBACK_NONE);
+
+	/* Set the initial link mode */
+	efx_mcdi_phy_decode_link(efx, &efx->link_state,
+				 MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
+				 MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
+				 MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
+
+	/* Record the initial FEC configuration (or nearest approximation
+	 * representable in the ethtool configuration space)
+	 */
+	efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
+						   efx->link_state.speed == 25000 ||
+						   efx->link_state.speed == 50000);
+
+	/* Default to Autonegotiated flow control if the PHY supports it */
+	efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
+	if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+		efx->wanted_fc |= EFX_FC_AUTO;
+	efx_link_set_wanted_fc(efx, efx->wanted_fc);
+
+	return 0;
+
+fail:
+	kfree(phy_data);
+	return rc;
+}
+
+void efx_mcdi_phy_remove(struct efx_nic *efx)
+{
+	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+	efx->phy_data = NULL;
+	kfree(phy_data);
+}
+
+void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx, struct ethtool_link_ksettings *cmd)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+	int rc;
+
+	cmd->base.speed = efx->link_state.speed;
+	cmd->base.duplex = efx->link_state.fd;
+	cmd->base.port = mcdi_to_ethtool_media(phy_cfg->media);
+	cmd->base.phy_address = phy_cfg->port;
+	cmd->base.autoneg = !!(efx->link_advertising[0] & ADVERTISED_Autoneg);
+	cmd->base.mdio_support = (efx->mdio.mode_support &
+			      (MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22));
+
+	mcdi_to_ethtool_linkset(phy_cfg->media, phy_cfg->supported_cap,
+				cmd->link_modes.supported);
+	memcpy(cmd->link_modes.advertising, efx->link_advertising,
+	       sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
+
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), NULL);
+	if (rc)
+		return;
+	mcdi_to_ethtool_linkset(phy_cfg->media,
+				MCDI_DWORD(outbuf, GET_LINK_OUT_LP_CAP),
+				cmd->link_modes.lp_advertising);
+}
+
+int efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, const struct ethtool_link_ksettings *cmd)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 caps;
+	int rc;
+
+	if (cmd->base.autoneg) {
+		caps = (ethtool_linkset_to_mcdi_cap(cmd->link_modes.advertising) |
+			1 << MC_CMD_PHY_CAP_AN_LBN);
+	} else if (cmd->base.duplex) {
+		switch (cmd->base.speed) {
+		case 10:     caps = 1 << MC_CMD_PHY_CAP_10FDX_LBN;     break;
+		case 100:    caps = 1 << MC_CMD_PHY_CAP_100FDX_LBN;    break;
+		case 1000:   caps = 1 << MC_CMD_PHY_CAP_1000FDX_LBN;   break;
+		case 10000:  caps = 1 << MC_CMD_PHY_CAP_10000FDX_LBN;  break;
+		case 40000:  caps = 1 << MC_CMD_PHY_CAP_40000FDX_LBN;  break;
+		case 100000: caps = 1 << MC_CMD_PHY_CAP_100000FDX_LBN; break;
+		case 25000:  caps = 1 << MC_CMD_PHY_CAP_25000FDX_LBN;  break;
+		case 50000:  caps = 1 << MC_CMD_PHY_CAP_50000FDX_LBN;  break;
+		default:     return -EINVAL;
+		}
+	} else {
+		switch (cmd->base.speed) {
+		case 10:     caps = 1 << MC_CMD_PHY_CAP_10HDX_LBN;     break;
+		case 100:    caps = 1 << MC_CMD_PHY_CAP_100HDX_LBN;    break;
+		case 1000:   caps = 1 << MC_CMD_PHY_CAP_1000HDX_LBN;   break;
+		default:     return -EINVAL;
+		}
+	}
+
+	caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, efx->fec_config);
+
+	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+			       efx->loopback_mode, 0);
+	if (rc)
+		return rc;
+
+	if (cmd->base.autoneg) {
+		efx_link_set_advertising(efx, cmd->link_modes.advertising);
+		phy_cfg->forced_cap = 0;
+	} else {
+		efx_link_clear_advertising(efx);
+		phy_cfg->forced_cap = caps;
+	}
+	return 0;
+}
+
 int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
@@ -455,6 +649,50 @@ int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec)
 	return 0;
 }
 
+/* Basic validation to ensure that the caps we are going to attempt to set are
+ * in fact supported by the adapter.  Note that 'no FEC' is always supported.
+ */
+static int ethtool_fec_supported(u32 supported_cap, u32 ethtool_cap)
+{
+	if (ethtool_cap & ETHTOOL_FEC_OFF)
+		return 0;
+
+	if (ethtool_cap &&
+	    !ethtool_fec_caps_to_mcdi(supported_cap, ethtool_cap))
+		return -EINVAL;
+	return 0;
+}
+
+int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 caps;
+	int rc;
+
+	rc = ethtool_fec_supported(phy_cfg->supported_cap, fec->fec);
+	if (rc)
+		return rc;
+
+	/* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
+	 * saved advertising bits
+	 */
+	if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
+		caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
+			1 << MC_CMD_PHY_CAP_AN_LBN);
+	else
+		caps = phy_cfg->forced_cap;
+
+	caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, fec->fec);
+	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+			       efx->loopback_mode, 0);
+	if (rc)
+		return rc;
+
+	/* Record the new FEC setting for subsequent set_link calls */
+	efx->fec_config = fec->fec;
+	return 0;
+}
+
 int efx_mcdi_phy_test_alive(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
@@ -483,12 +721,357 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
 		    ethtool_linkset_to_mcdi_cap(efx->link_advertising) :
 		    phy_cfg->forced_cap);
 
-	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+	caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, efx->fec_config);
 
 	return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 				 efx->loopback_mode, 0);
 }
 
+static const char *const mcdi_sft9001_cable_diag_names[] = {
+	"cable.pairA.length",
+	"cable.pairB.length",
+	"cable.pairC.length",
+	"cable.pairD.length",
+	"cable.pairA.status",
+	"cable.pairB.status",
+	"cable.pairC.status",
+	"cable.pairD.status",
+};
+
+static int efx_mcdi_bist(struct efx_nic *efx, unsigned int bist_mode,
+			 int *results)
+{
+	unsigned int retry, i, count = 0;
+	size_t outlen;
+	u32 status;
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_START_BIST_IN_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_POLL_BIST_OUT_SFT9001_LEN);
+	u8 *ptr;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_START_BIST_OUT_LEN != 0);
+	MCDI_SET_DWORD(inbuf, START_BIST_IN_TYPE, bist_mode);
+	rc = efx_mcdi_rpc(efx, MC_CMD_START_BIST,
+			  inbuf, MC_CMD_START_BIST_IN_LEN, NULL, 0, NULL);
+	if (rc)
+		goto out;
+
+	/* Wait up to 10s for BIST to finish */
+	for (retry = 0; retry < 100; ++retry) {
+		BUILD_BUG_ON(MC_CMD_POLL_BIST_IN_LEN != 0);
+		rc = efx_mcdi_rpc(efx, MC_CMD_POLL_BIST, NULL, 0,
+				  outbuf, sizeof(outbuf), &outlen);
+		if (rc)
+			goto out;
+
+		status = MCDI_DWORD(outbuf, POLL_BIST_OUT_RESULT);
+		if (status != MC_CMD_POLL_BIST_RUNNING)
+			goto finished;
+
+		msleep(100);
+	}
+
+	rc = -ETIMEDOUT;
+	goto out;
+
+finished:
+	results[count++] = (status == MC_CMD_POLL_BIST_PASSED) ? 1 : -1;
+
+	/* SFT9001 specific cable diagnostics output */
+	if (efx->phy_type == PHY_TYPE_SFT9001B &&
+	    (bist_mode == MC_CMD_PHY_BIST_CABLE_SHORT ||
+	     bist_mode == MC_CMD_PHY_BIST_CABLE_LONG)) {
+		ptr = MCDI_PTR(outbuf, POLL_BIST_OUT_SFT9001_CABLE_LENGTH_A);
+		if (status == MC_CMD_POLL_BIST_PASSED &&
+		    outlen >= MC_CMD_POLL_BIST_OUT_SFT9001_LEN) {
+			for (i = 0; i < 8; i++) {
+				results[count + i] =
+					EFX_DWORD_FIELD(((efx_dword_t *)ptr)[i],
+							EFX_DWORD_0);
+			}
+		}
+		count += 8;
+	}
+	rc = count;
+
+out:
+	return rc;
+}
+
+int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results, unsigned int flags)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 mode;
+	int rc;
+
+	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) {
+		rc = efx_mcdi_bist(efx, MC_CMD_PHY_BIST, results);
+		if (rc < 0)
+			return rc;
+
+		results += rc;
+	}
+
+	/* If we support both LONG and SHORT, then run each in response to
+	 * break or not. Otherwise, run the one we support
+	 */
+	mode = 0;
+	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN)) {
+		if ((flags & ETH_TEST_FL_OFFLINE) &&
+		    (phy_cfg->flags &
+		     (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN)))
+			mode = MC_CMD_PHY_BIST_CABLE_LONG;
+		else
+			mode = MC_CMD_PHY_BIST_CABLE_SHORT;
+	} else if (phy_cfg->flags &
+		   (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))
+		mode = MC_CMD_PHY_BIST_CABLE_LONG;
+
+	if (mode != 0) {
+		rc = efx_mcdi_bist(efx, mode, results);
+		if (rc < 0)
+			return rc;
+		results += rc;
+	}
+
+	return 0;
+}
+
+const char *efx_mcdi_phy_test_name(struct efx_nic *efx, unsigned int index)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+
+	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) {
+		if (index == 0)
+			return "bist";
+		--index;
+	}
+
+	if (phy_cfg->flags & ((1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN) |
+			      (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))) {
+		if (index == 0)
+			return "cable";
+		--index;
+
+		if (efx->phy_type == PHY_TYPE_SFT9001B) {
+			if (index < ARRAY_SIZE(mcdi_sft9001_cable_diag_names))
+				return mcdi_sft9001_cable_diag_names[index];
+			index -= ARRAY_SIZE(mcdi_sft9001_cable_diag_names);
+		}
+	}
+
+	return NULL;
+}
+
+#define SFP_PAGE_SIZE		128
+#define SFF_DIAG_TYPE_OFFSET	92
+#define SFF_DIAG_ADDR_CHANGE	BIT(2)
+#define SFF_8079_NUM_PAGES	2
+#define SFF_8472_NUM_PAGES	4
+#define SFF_8436_NUM_PAGES	5
+#define SFF_DMT_LEVEL_OFFSET	94
+
+/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom
+ * @efx:	NIC context
+ * @page:	EEPROM page number
+ * @data:	Destination data pointer
+ * @offset:	Offset in page to copy from in to data
+ * @space:	Space available in data
+ *
+ * Return:
+ *   >=0 - amount of data copied
+ *   <0  - error
+ */
+static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx,
+					       unsigned int page,
+					       u8 *data, ssize_t offset,
+					       ssize_t space)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN);
+	unsigned int payload_len;
+	unsigned int to_copy;
+	size_t outlen;
+	int rc;
+
+	if (offset > SFP_PAGE_SIZE)
+		return -EINVAL;
+
+	to_copy = min(space, SFP_PAGE_SIZE - offset);
+
+	MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO,
+				inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf),
+				&outlen);
+
+	if (rc)
+		return rc;
+
+	if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
+			SFP_PAGE_SIZE))
+		return -EIO;
+
+	payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN);
+	if (payload_len != SFP_PAGE_SIZE)
+		return -EIO;
+
+	memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset,
+	       to_copy);
+
+	return to_copy;
+}
+
+static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx,
+					       unsigned int page,
+					       u8 byte)
+{
+	u8 data;
+	int rc;
+
+	rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1);
+	if (rc == 1)
+		return data;
+
+	return rc;
+}
+
+static int efx_mcdi_phy_diag_type(struct efx_nic *efx)
+{
+	/* Page zero of the EEPROM includes the diagnostic type at byte 92. */
+	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+						   SFF_DIAG_TYPE_OFFSET);
+}
+
+static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx)
+{
+	/* Page zero of the EEPROM includes the DMT level at byte 94. */
+	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+						   SFF_DMT_LEVEL_OFFSET);
+}
+
+static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
+{
+	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+	if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS)
+		return phy_data->media;
+
+	/* A QSFP+ NIC may actually have an SFP+ module attached.
+	 * The ID is page 0, byte 0.
+	 */
+	switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
+	case 0x3:
+		return MC_CMD_MEDIA_SFP_PLUS;
+	case 0xc:
+	case 0xd:
+		return MC_CMD_MEDIA_QSFP_PLUS;
+	default:
+		return 0;
+	}
+}
+
+int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, struct ethtool_eeprom *ee, u8 *data)
+{
+	int rc;
+	ssize_t space_remaining = ee->len;
+	unsigned int page_off;
+	bool ignore_missing;
+	int num_pages;
+	int page;
+
+	switch (efx_mcdi_phy_module_type(efx)) {
+	case MC_CMD_MEDIA_SFP_PLUS:
+		num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ?
+				SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES;
+		page = 0;
+		ignore_missing = false;
+		break;
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		num_pages = SFF_8436_NUM_PAGES;
+		page = -1; /* We obtain the lower page by asking for -1. */
+		ignore_missing = true; /* Ignore missing pages after page 0. */
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	page_off = ee->offset % SFP_PAGE_SIZE;
+	page += ee->offset / SFP_PAGE_SIZE;
+
+	while (space_remaining && (page < num_pages)) {
+		rc = efx_mcdi_phy_get_module_eeprom_page(efx, page,
+							 data, page_off,
+							 space_remaining);
+
+		if (rc > 0) {
+			space_remaining -= rc;
+			data += rc;
+			page_off = 0;
+			page++;
+		} else if (rc == 0) {
+			space_remaining = 0;
+		} else if (ignore_missing && (page > 0)) {
+			int intended_size = SFP_PAGE_SIZE - page_off;
+
+			space_remaining -= intended_size;
+			if (space_remaining < 0) {
+				space_remaining = 0;
+			} else {
+				memset(data, 0, intended_size);
+				data += intended_size;
+				page_off = 0;
+				page++;
+				rc = 0;
+			}
+		} else {
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo)
+{
+	int sff_8472_level;
+	int diag_type;
+
+	switch (efx_mcdi_phy_module_type(efx)) {
+	case MC_CMD_MEDIA_SFP_PLUS:
+		sff_8472_level = efx_mcdi_phy_sff_8472_level(efx);
+
+		/* If we can't read the diagnostics level we have none. */
+		if (sff_8472_level < 0)
+			return -EOPNOTSUPP;
+
+		/* Check if this module requires the (unsupported) address
+		 * change operation.
+		 */
+		diag_type = efx_mcdi_phy_diag_type(efx);
+
+		if (sff_8472_level == 0 ||
+		    (diag_type & SFF_DIAG_ADDR_CHANGE)) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		}
+		break;
+
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		modinfo->type = ETH_MODULE_SFF_8436;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static unsigned int efx_calc_mac_mtu(struct efx_nic *efx)
 {
 	return EFX_MAX_FRAME_LEN(efx->net_dev->mtu);
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.h b/drivers/net/ethernet/sfc/mcdi_port_common.h
index 9dbeee83266f..ed31690e591c 100644
--- a/drivers/net/ethernet/sfc/mcdi_port_common.h
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.h
@@ -41,13 +41,22 @@ u8 mcdi_to_ethtool_media(u32 media);
 void efx_mcdi_phy_decode_link(struct efx_nic *efx,
 			      struct efx_link_state *link_state,
 			      u32 speed, u32 flags, u32 fcntl);
-u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap);
+u32 ethtool_fec_caps_to_mcdi(u32 supported_cap, u32 ethtool_cap);
 u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g);
 void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa);
 bool efx_mcdi_phy_poll(struct efx_nic *efx);
-int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
-			      struct ethtool_fecparam *fec);
+int efx_mcdi_phy_probe(struct efx_nic *efx);
+void efx_mcdi_phy_remove(struct efx_nic *efx);
+void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx, struct ethtool_link_ksettings *cmd);
+int efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, const struct ethtool_link_ksettings *cmd);
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec);
+int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec);
 int efx_mcdi_phy_test_alive(struct efx_nic *efx);
+int efx_mcdi_port_reconfigure(struct efx_nic *efx);
+int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results, unsigned int flags);
+const char *efx_mcdi_phy_test_name(struct efx_nic *efx, unsigned int index);
+int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, struct ethtool_eeprom *ee, u8 *data);
+int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo);
 int efx_mcdi_set_mac(struct efx_nic *efx);
 int efx_mcdi_set_mtu(struct efx_nic *efx);
 int efx_mcdi_mac_init_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 062462a13847..9f7dfdf708cf 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -38,8 +38,6 @@
  *
  **************************************************************************/
 
-#define EFX_DRIVER_VERSION	"4.1"
-
 #ifdef DEBUG
 #define EFX_WARN_ON_ONCE_PARANOID(x) WARN_ON_ONCE(x)
 #define EFX_WARN_ON_PARANOID(x) WARN_ON(x)
@@ -65,10 +63,13 @@
  * queues. */
 #define EFX_MAX_TX_TC		2
 #define EFX_MAX_CORE_TX_QUEUES	(EFX_MAX_TX_TC * EFX_MAX_CHANNELS)
-#define EFX_TXQ_TYPE_OFFLOAD	1	/* flag */
-#define EFX_TXQ_TYPE_HIGHPRI	2	/* flag */
-#define EFX_TXQ_TYPES		4
-#define EFX_MAX_TX_QUEUES	(EFX_TXQ_TYPES * EFX_MAX_CHANNELS)
+#define EFX_TXQ_TYPE_OUTER_CSUM	1	/* Outer checksum offload */
+#define EFX_TXQ_TYPE_INNER_CSUM	2	/* Inner checksum offload */
+#define EFX_TXQ_TYPE_HIGHPRI	4	/* High-priority (for TC) */
+#define EFX_TXQ_TYPES		8
+/* HIGHPRI is Siena-only, and INNER_CSUM is EF10, so no need for both */
+#define EFX_MAX_TXQ_PER_CHANNEL	4
+#define EFX_MAX_TX_QUEUES	(EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_CHANNELS)
 
 /* Maximum possible MTU the driver supports */
 #define EFX_MAX_MTU (9 * 1024)
@@ -76,6 +77,9 @@
 /* Minimum MTU, from RFC791 (IP) */
 #define EFX_MIN_MTU 68
 
+/* Maximum total header length for TSOv2 */
+#define EFX_TSO2_MAX_HDRLEN	208
+
 /* Size of an RX scatter buffer.  Small enough to pack 2 into a 4K page,
  * and should be a multiple of the cache line size.
  */
@@ -192,7 +196,9 @@ struct efx_tx_buffer {
  * @queue: DMA queue number
  * @label: Label for TX completion events.
  *	Is our index within @channel->tx_queue array.
+ * @type: configuration type of this TX queue.  A bitmask of %EFX_TXQ_TYPE_* flags.
  * @tso_version: Version of TSO in use for this queue.
+ * @tso_encap: Is encapsulated TSO supported? Supported in TSOv2 on 8000 series.
  * @channel: The associated channel
  * @core_txq: The networking core TX queue structure
  * @buffer: The software buffer ring
@@ -206,8 +212,6 @@ struct efx_tx_buffer {
  * @initialised: Has hardware queue been initialised?
  * @timestamping: Is timestamping enabled for this channel?
  * @xdp_tx: Is this an XDP tx queue?
- * @handle_tso: TSO xmit preparation handler.  Sets up the TSO metadata and
- *	may also map tx data, depending on the nature of the TSO implementation.
  * @read_count: Current read pointer.
  *	This is the number of buffers that have been removed from both rings.
  * @old_write_count: The value of @write_count when last checked.
@@ -244,7 +248,7 @@ struct efx_tx_buffer {
  * @tso_fallbacks: Number of times TSO fallback used
  * @pushes: Number of times the TX push feature has been used
  * @pio_packets: Number of times the TX PIO feature has been used
- * @xmit_more_available: Are any packets waiting to be pushed to the NIC
+ * @xmit_pending: Are any packets waiting to be pushed to the NIC
  * @cb_packets: Number of times the TX copybreak feature has been used
  * @notify_count: Count of notified descriptors to the NIC
  * @empty_read_count: If the completion path has seen the queue as empty
@@ -256,7 +260,9 @@ struct efx_tx_queue {
 	struct efx_nic *efx ____cacheline_aligned_in_smp;
 	unsigned int queue;
 	unsigned int label;
+	unsigned int type;
 	unsigned int tso_version;
+	bool tso_encap;
 	struct efx_channel *channel;
 	struct netdev_queue *core_txq;
 	struct efx_tx_buffer *buffer;
@@ -269,9 +275,6 @@ struct efx_tx_queue {
 	bool timestamping;
 	bool xdp_tx;
 
-	/* Function pointers used in the fast path. */
-	int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
-
 	/* Members used mainly on the completion path */
 	unsigned int read_count ____cacheline_aligned_in_smp;
 	unsigned int old_write_count;
@@ -292,7 +295,7 @@ struct efx_tx_queue {
 	unsigned int tso_fallbacks;
 	unsigned int pushes;
 	unsigned int pio_packets;
-	bool xmit_more_available;
+	bool xmit_pending;
 	unsigned int cb_packets;
 	unsigned int notify_count;
 	/* Statistics to supplement MAC stats */
@@ -455,7 +458,7 @@ enum efx_sync_events_state {
  *	were checked for expiry
  * @rfs_expire_index: next accelerated RFS filter ID to check for expiry
  * @n_rfs_succeeded: number of successful accelerated RFS filter insertions
- * @n_rfs_failed; number of failed accelerated RFS filter insertions
+ * @n_rfs_failed: number of failed accelerated RFS filter insertions
  * @filter_work: Work item for efx_filter_rfs_expire()
  * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
  *      indexed by filter ID
@@ -481,6 +484,7 @@ enum efx_sync_events_state {
  * @rx_list: list of SKBs from current RX, awaiting processing
  * @rx_queue: RX queue for this channel
  * @tx_queue: TX queues for this channel
+ * @tx_queue_by_type: pointers into @tx_queue, or %NULL, indexed by txq type
  * @sync_events_state: Current state of sync events on this channel
  * @sync_timestamp_major: Major part of the last ptp sync event
  * @sync_timestamp_minor: Minor part of the last ptp sync event
@@ -542,7 +546,8 @@ struct efx_channel {
 	struct list_head *rx_list;
 
 	struct efx_rx_queue rx_queue;
-	struct efx_tx_queue tx_queue[EFX_TXQ_TYPES];
+	struct efx_tx_queue tx_queue[EFX_MAX_TXQ_PER_CHANNEL];
+	struct efx_tx_queue *tx_queue_by_type[EFX_TXQ_TYPES];
 
 	enum efx_sync_events_state sync_events_state;
 	u32 sync_timestamp_major;
@@ -658,51 +663,6 @@ static inline bool efx_link_state_equal(const struct efx_link_state *left,
 }
 
 /**
- * struct efx_phy_operations - Efx PHY operations table
- * @probe: Probe PHY and initialise efx->mdio.mode_support, efx->mdio.mmds,
- *	efx->loopback_modes.
- * @init: Initialise PHY
- * @fini: Shut down PHY
- * @reconfigure: Reconfigure PHY (e.g. for new link parameters)
- * @poll: Update @link_state and report whether it changed.
- *	Serialised by the mac_lock.
- * @get_link_ksettings: Get ethtool settings. Serialised by the mac_lock.
- * @set_link_ksettings: Set ethtool settings. Serialised by the mac_lock.
- * @get_fecparam: Get Forward Error Correction settings. Serialised by mac_lock.
- * @set_fecparam: Set Forward Error Correction settings. Serialised by mac_lock.
- * @set_npage_adv: Set abilities advertised in (Extended) Next Page
- *	(only needed where AN bit is set in mmds)
- * @test_alive: Test that PHY is 'alive' (online)
- * @test_name: Get the name of a PHY-specific test/result
- * @run_tests: Run tests and record results as appropriate (offline).
- *	Flags are the ethtool tests flags.
- */
-struct efx_phy_operations {
-	int (*probe) (struct efx_nic *efx);
-	int (*init) (struct efx_nic *efx);
-	void (*fini) (struct efx_nic *efx);
-	void (*remove) (struct efx_nic *efx);
-	int (*reconfigure) (struct efx_nic *efx);
-	bool (*poll) (struct efx_nic *efx);
-	void (*get_link_ksettings)(struct efx_nic *efx,
-				   struct ethtool_link_ksettings *cmd);
-	int (*set_link_ksettings)(struct efx_nic *efx,
-				  const struct ethtool_link_ksettings *cmd);
-	int (*get_fecparam)(struct efx_nic *efx, struct ethtool_fecparam *fec);
-	int (*set_fecparam)(struct efx_nic *efx,
-			    const struct ethtool_fecparam *fec);
-	void (*set_npage_adv) (struct efx_nic *efx, u32);
-	int (*test_alive) (struct efx_nic *efx);
-	const char *(*test_name) (struct efx_nic *efx, unsigned int index);
-	int (*run_tests) (struct efx_nic *efx, int *results, unsigned flags);
-	int (*get_module_eeprom) (struct efx_nic *efx,
-			       struct ethtool_eeprom *ee,
-			       u8 *data);
-	int (*get_module_info) (struct efx_nic *efx,
-				struct ethtool_modinfo *modinfo);
-};
-
-/**
  * enum efx_phy_mode - PHY operating mode flags
  * @PHY_MODE_NORMAL: on and should pass traffic
  * @PHY_MODE_TX_DISABLED: on with TX disabled
@@ -920,7 +880,6 @@ struct efx_async_filter_insertion {
  *	field of %MC_CMD_GET_CAPABILITIES_V4 response, or %MC_CMD_MAC_NSTATS)
  * @stats_buffer: DMA buffer for statistics
  * @phy_type: PHY type
- * @phy_op: PHY interface
  * @phy_data: PHY private data (including PHY-specific stats)
  * @mdio: PHY MDIO interface
  * @mdio_bus: PHY MDIO bus ID (only used by Siena)
@@ -1094,7 +1053,6 @@ struct efx_nic {
 	bool rx_nodesc_drops_prev_state;
 
 	unsigned int phy_type;
-	const struct efx_phy_operations *phy_op;
 	void *phy_data;
 	struct mdio_if_info mdio;
 	unsigned int mdio_bus;
@@ -1214,10 +1172,12 @@ struct efx_udp_tunnel {
  * @describe_stats: Describe statistics for ethtool
  * @update_stats: Update statistics not provided by event handling.
  *	Either argument may be %NULL.
+ * @update_stats_atomic: Update statistics while in atomic context, if that
+ *	is more limiting than @update_stats.  Otherwise, leave %NULL and
+ *	driver core will call @update_stats.
  * @start_stats: Start the regular fetching of statistics
  * @pull_stats: Pull stats from the NIC and wait until they arrive.
  * @stop_stats: Stop the regular fetching of statistics
- * @set_id_led: Set state of identifying LED or revert to automatic function
  * @push_irq_moderation: Apply interrupt moderation value
  * @reconfigure_port: Push loopback/power/txdis changes to the MAC and PHY
  * @prepare_enable_fc_tx: Prepare MAC to enable pause frame TX (may be %NULL)
@@ -1250,7 +1210,7 @@ struct efx_udp_tunnel {
  *	a pointer to the &struct efx_msi_context for the channel.
  * @irq_handle_legacy: Handle legacy interrupt.  The @dev_id argument
  *	is a pointer to the &struct efx_nic.
- * @tx_probe: Allocate resources for TX queue
+ * @tx_probe: Allocate resources for TX queue (and select TXQ type)
  * @tx_init: Initialise TX queue on the NIC
  * @tx_remove: Free resources for TX queue
  * @tx_write: Write TX descriptors and doorbell
@@ -1359,10 +1319,11 @@ struct efx_nic_type {
 	size_t (*describe_stats)(struct efx_nic *efx, u8 *names);
 	size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats,
 			       struct rtnl_link_stats64 *core_stats);
+	size_t (*update_stats_atomic)(struct efx_nic *efx, u64 *full_stats,
+				      struct rtnl_link_stats64 *core_stats);
 	void (*start_stats)(struct efx_nic *efx);
 	void (*pull_stats)(struct efx_nic *efx);
 	void (*stop_stats)(struct efx_nic *efx);
-	void (*set_id_led)(struct efx_nic *efx, enum efx_led_mode mode);
 	void (*push_irq_moderation)(struct efx_channel *channel);
 	int (*reconfigure_port)(struct efx_nic *efx);
 	void (*prepare_enable_fc_tx)(struct efx_nic *efx);
@@ -1546,14 +1507,6 @@ efx_get_tx_channel(struct efx_nic *efx, unsigned int index)
 	return efx->channel[efx->tx_channel_offset + index];
 }
 
-static inline struct efx_tx_queue *
-efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
-{
-	EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_tx_channels ||
-				  type >= efx->tx_queues_per_channel);
-	return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
-}
-
 static inline struct efx_channel *
 efx_get_xdp_channel(struct efx_nic *efx, unsigned int index)
 {
@@ -1580,10 +1533,18 @@ static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel
 }
 
 static inline struct efx_tx_queue *
-efx_channel_get_tx_queue(struct efx_channel *channel, unsigned type)
+efx_channel_get_tx_queue(struct efx_channel *channel, unsigned int type)
+{
+	EFX_WARN_ON_ONCE_PARANOID(type >= EFX_TXQ_TYPES);
+	return channel->tx_queue_by_type[type];
+}
+
+static inline struct efx_tx_queue *
+efx_get_tx_queue(struct efx_nic *efx, unsigned int index, unsigned int type)
 {
-	EFX_WARN_ON_ONCE_PARANOID(type >= efx_channel_num_tx_queues(channel));
-	return &channel->tx_queue[type];
+	struct efx_channel *channel = efx_get_tx_channel(efx, index);
+
+	return efx_channel_get_tx_queue(channel, type);
 }
 
 /* Iterate over all TX queues belonging to a channel */
@@ -1683,10 +1644,6 @@ efx_channel_tx_fill_level(struct efx_channel *channel)
 	struct efx_tx_queue *tx_queue;
 	unsigned int fill_level = 0;
 
-	/* This function is currently only used by EF100, which maybe
-	 * could do something simpler and just compute the fill level
-	 * of the single TXQ that's really in use.
-	 */
 	efx_for_each_channel_tx_queue(tx_queue, channel)
 		fill_level = max(fill_level,
 				 tx_queue->insert_count - tx_queue->read_count);
@@ -1694,6 +1651,20 @@ efx_channel_tx_fill_level(struct efx_channel *channel)
 	return fill_level;
 }
 
+/* Conservative approximation of efx_channel_tx_fill_level using cached value */
+static inline unsigned int
+efx_channel_tx_old_fill_level(struct efx_channel *channel)
+{
+	struct efx_tx_queue *tx_queue;
+	unsigned int fill_level = 0;
+
+	efx_for_each_channel_tx_queue(tx_queue, channel)
+		fill_level = max(fill_level,
+				 tx_queue->insert_count - tx_queue->old_read_count);
+
+	return fill_level;
+}
+
 /* Get all supported features.
  * If a feature is not fixed, it is present in hw_features.
  * If a feature is fixed, it does not present in hw_features, but
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 724e2776b585..5c2fe3ce3f4d 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -297,6 +297,10 @@ struct efx_ef10_nic_data {
 	u64 licensed_features;
 };
 
+/* TSOv2 */
+int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+			 bool *data_mapped);
+
 int efx_init_sriov(void);
 void efx_fini_sriov(void);
 
diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h
index 974107354087..b9cafe9cd568 100644
--- a/drivers/net/ethernet/sfc/nic_common.h
+++ b/drivers/net/ethernet/sfc/nic_common.h
@@ -65,8 +65,7 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
 /* Report whether this TX queue would be empty for the given write_count.
  * May return false negative.
  */
-static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
-					 unsigned int write_count)
+static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count)
 {
 	unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
 
@@ -76,41 +75,6 @@ static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
 	return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
 }
 
-/* Report whether the NIC considers this TX queue empty, using
- * packet_write_count (the write count recorded for the last completable
- * doorbell push).  May return false negative.  EF10 only, which is OK
- * because only EF10 supports PIO.
- */
-static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue)
-{
-	EFX_WARN_ON_ONCE_PARANOID(!tx_queue->efx->type->option_descriptors);
-	return __efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count);
-}
-
-/* Get partner of a TX queue, seen as part of the same net core queue */
-/* XXX is this a thing on EF100? */
-static inline struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
-{
-	if (tx_queue->label & EFX_TXQ_TYPE_OFFLOAD)
-		return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
-	else
-		return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
-}
-
-/* Decide whether we can use TX PIO, ie. write packet data directly into
- * a buffer on the device.  This can reduce latency at the expense of
- * throughput, so we only do this if both hardware and software TX rings
- * are empty.  This also ensures that only one packet at a time can be
- * using the PIO buffer.
- */
-static inline bool efx_nic_may_tx_pio(struct efx_tx_queue *tx_queue)
-{
-	struct efx_tx_queue *partner = efx_tx_queue_partner(tx_queue);
-
-	return tx_queue->piobuf && efx_nic_tx_is_empty(tx_queue) &&
-	       efx_nic_tx_is_empty(partner);
-}
-
 int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
 			bool *data_mapped);
 
@@ -125,7 +89,7 @@ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
 static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue,
 					    unsigned int write_count)
 {
-	bool was_empty = __efx_nic_tx_is_empty(tx_queue, write_count);
+	bool was_empty = efx_nic_tx_is_empty(tx_queue, write_count);
 
 	tx_queue->empty_read_count = 0;
 	return was_empty && tx_queue->write_count - write_count == 1;
@@ -280,6 +244,13 @@ void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
 			  const unsigned long *mask, u64 *stats,
 			  const void *dma_buf, bool accumulate);
 void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat);
+static inline size_t efx_nic_update_stats_atomic(struct efx_nic *efx, u64 *full_stats,
+						 struct rtnl_link_stats64 *core_stats)
+{
+	if (efx->type->update_stats_atomic)
+		return efx->type->update_stats_atomic(efx, full_stats, core_stats);
+	return efx->type->update_stats(efx, full_stats, core_stats);
+}
 
 #define EFX_MAX_FLUSH_TIME 5000
 
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index bea4725a4499..a39c5143b386 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -43,6 +43,7 @@
 #include "mcdi_pcol.h"
 #include "io.h"
 #include "farch_regs.h"
+#include "tx.h"
 #include "nic.h" /* indirectly includes ptp.h */
 
 /* Maximum number of events expected to make up a PTP event */
@@ -172,9 +173,11 @@ struct efx_ptp_match {
 
 /**
  * struct efx_ptp_event_rx - A PTP receive event (from MC)
+ * @link: list of events
  * @seq0: First part of (PTP) UUID
  * @seq1: Second part of (PTP) UUID and sequence number
  * @hwtimestamp: Event timestamp
+ * @expiry: Time which the packet arrived
  */
 struct efx_ptp_event_rx {
 	struct list_head link;
@@ -222,11 +225,13 @@ struct efx_ptp_timeset {
  *                  reset (disable, enable).
  * @rxfilter_event: Receive filter when operating
  * @rxfilter_general: Receive filter when operating
+ * @rxfilter_installed: Receive filter installed
  * @config: Current timestamp configuration
  * @enabled: PTP operation enabled
  * @mode: Mode in which PTP operating (PTP version)
  * @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time
  * @nic_to_kernel_time: Function to convert from NIC to kernel time
+ * @nic_time: contains time details
  * @nic_time.minor_max: Wrap point for NIC minor times
  * @nic_time.sync_event_diff_min: Minimum acceptable difference between time
  * in packet prefix and last MCDI time sync event i.e. how much earlier than
@@ -238,6 +243,7 @@ struct efx_ptp_timeset {
  * field in MCDI time sync event.
  * @min_synchronisation_ns: Minimum acceptable corrected sync window
  * @capabilities: Capabilities flags from the NIC
+ * @ts_corrections: contains corrections details
  * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit
  *                         timestamps
  * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive
@@ -325,7 +331,7 @@ struct efx_ptp_data {
 	struct work_struct pps_work;
 	struct workqueue_struct *pps_workwq;
 	bool nic_ts_enabled;
-	_MCDI_DECLARE_BUF(txbuf, MC_CMD_PTP_IN_TRANSMIT_LENMAX);
+	efx_dword_t txbuf[MCDI_TX_BUF_LEN(MC_CMD_PTP_IN_TRANSMIT_LENMAX)];
 
 	unsigned int good_syncs;
 	unsigned int fast_syncs;
@@ -1082,10 +1088,10 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
 static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
 {
 	struct efx_ptp_data *ptp_data = efx->ptp_data;
+	u8 type = efx_tx_csum_type_skb(skb);
 	struct efx_tx_queue *tx_queue;
-	u8 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
 
-	tx_queue = &ptp_data->channel->tx_queue[type];
+	tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
 	if (tx_queue && tx_queue->timestamping) {
 		efx_enqueue_skb(tx_queue, skb);
 	} else {
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index e71d6d37a317..3c5227afd497 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -21,6 +21,7 @@
 #include "efx_common.h"
 #include "efx_channels.h"
 #include "nic.h"
+#include "mcdi_port_common.h"
 #include "selftest.h"
 #include "workarounds.h"
 
@@ -67,7 +68,7 @@ static const char *const efx_interrupt_mode_names[] = {
 	STRING_TABLE_LOOKUP(efx->interrupt_mode, efx_interrupt_mode)
 
 /**
- * efx_loopback_state - persistent state during a loopback selftest
+ * struct efx_loopback_state - persistent state during a loopback selftest
  * @flush:		Drop all packets in efx_loopback_rx_packet
  * @packet_count:	Number of packets being used in this test
  * @skbs:		An array of skbs transmitted
@@ -99,10 +100,8 @@ static int efx_test_phy_alive(struct efx_nic *efx, struct efx_self_tests *tests)
 {
 	int rc = 0;
 
-	if (efx->phy_op->test_alive) {
-		rc = efx->phy_op->test_alive(efx);
-		tests->phy_alive = rc ? -1 : 1;
-	}
+	rc = efx_mcdi_phy_test_alive(efx);
+	tests->phy_alive = rc ? -1 : 1;
 
 	return rc;
 }
@@ -257,11 +256,8 @@ static int efx_test_phy(struct efx_nic *efx, struct efx_self_tests *tests,
 {
 	int rc;
 
-	if (!efx->phy_op->run_tests)
-		return 0;
-
 	mutex_lock(&efx->mac_lock);
-	rc = efx->phy_op->run_tests(efx, tests->phy_ext, flags);
+	rc = efx_mcdi_phy_run_tests(efx, tests->phy_ext, flags);
 	mutex_unlock(&efx->mac_lock);
 	if (rc == -EPERM)
 		rc = 0;
@@ -660,8 +656,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests,
 
 		/* Test all enabled types of TX queue */
 		efx_for_each_channel_tx_queue(tx_queue, channel) {
-			state->offload_csum = (tx_queue->label &
-					       EFX_TXQ_TYPE_OFFLOAD);
+			state->offload_csum = (tx_queue->type &
+					       EFX_TXQ_TYPE_OUTER_CSUM);
 			rc = efx_test_loopback(tx_queue,
 					       &tests->loopback[mode]);
 			if (rc)
diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h
index ca88ebb4f6b1..a23f085bf298 100644
--- a/drivers/net/ethernet/sfc/selftest.h
+++ b/drivers/net/ethernet/sfc/selftest.h
@@ -15,8 +15,8 @@
  */
 
 struct efx_loopback_self_tests {
-	int tx_sent[EFX_TXQ_TYPES];
-	int tx_done[EFX_TXQ_TYPES];
+	int tx_sent[EFX_MAX_TXQ_PER_CHANNEL];
+	int tx_done[EFX_MAX_TXQ_PER_CHANNEL];
 	int rx_good;
 	int rx_bad;
 };
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index a7ea630bb5e6..16347a6d0c47 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -994,7 +994,6 @@ const struct efx_nic_type siena_a0_nic_type = {
 	.start_stats = efx_mcdi_mac_start_stats,
 	.pull_stats = efx_mcdi_mac_pull_stats,
 	.stop_stats = efx_mcdi_mac_stop_stats,
-	.set_id_led = efx_mcdi_set_id_led,
 	.push_irq_moderation = siena_push_irq_moderation,
 	.reconfigure_mac = siena_mac_reconfigure,
 	.check_mac_fault = efx_mcdi_mac_check_fault,
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 727201d5eb24..1665529a7271 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -59,13 +59,12 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
 
 static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
 {
-	/* We need to consider both queues that the net core sees as one */
-	struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
+	/* We need to consider all queues that the net core sees as one */
 	struct efx_nic *efx = txq1->efx;
+	struct efx_tx_queue *txq2;
 	unsigned int fill_level;
 
-	fill_level = max(txq1->insert_count - txq1->old_read_count,
-			 txq2->insert_count - txq2->old_read_count);
+	fill_level = efx_channel_tx_old_fill_level(txq1->channel);
 	if (likely(fill_level < efx->txq_stop_thresh))
 		return;
 
@@ -85,11 +84,10 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
 	 */
 	netif_tx_stop_queue(txq1->core_txq);
 	smp_mb();
-	txq1->old_read_count = READ_ONCE(txq1->read_count);
-	txq2->old_read_count = READ_ONCE(txq2->read_count);
+	efx_for_each_channel_tx_queue(txq2, txq1->channel)
+		txq2->old_read_count = READ_ONCE(txq2->read_count);
 
-	fill_level = max(txq1->insert_count - txq1->old_read_count,
-			 txq2->insert_count - txq2->old_read_count);
+	fill_level = efx_channel_tx_old_fill_level(txq1->channel);
 	EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries);
 	if (likely(fill_level < efx->txq_stop_thresh)) {
 		smp_mb();
@@ -266,8 +264,45 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
 	++tx_queue->insert_count;
 	return 0;
 }
+
+/* Decide whether we can use TX PIO, ie. write packet data directly into
+ * a buffer on the device.  This can reduce latency at the expense of
+ * throughput, so we only do this if both hardware and software TX rings
+ * are empty, including all queues for the channel.  This also ensures that
+ * only one packet at a time can be using the PIO buffer. If the xmit_more
+ * flag is set then we don't use this - there'll be another packet along
+ * shortly and we want to hold off the doorbell.
+ */
+static bool efx_tx_may_pio(struct efx_tx_queue *tx_queue)
+{
+	struct efx_channel *channel = tx_queue->channel;
+
+	if (!tx_queue->piobuf)
+		return false;
+
+	EFX_WARN_ON_ONCE_PARANOID(!channel->efx->type->option_descriptors);
+
+	efx_for_each_channel_tx_queue(tx_queue, channel)
+		if (!efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count))
+			return false;
+
+	return true;
+}
 #endif /* EFX_USE_PIO */
 
+/* Send any pending traffic for a channel. xmit_more is shared across all
+ * queues for a channel, so we must check all of them.
+ */
+static void efx_tx_send_pending(struct efx_channel *channel)
+{
+	struct efx_tx_queue *q;
+
+	efx_for_each_channel_tx_queue(q, channel) {
+		if (q->xmit_pending)
+			efx_nic_push_buffers(q);
+	}
+}
+
 /*
  * Add a socket buffer to a TX queue
  *
@@ -303,8 +338,18 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
 	 * size limit.
 	 */
 	if (segments) {
-		EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso);
-		rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped);
+		switch (tx_queue->tso_version) {
+		case 1:
+			rc = efx_enqueue_skb_tso(tx_queue, skb, &data_mapped);
+			break;
+		case 2:
+			rc = efx_ef10_tx_tso_desc(tx_queue, skb, &data_mapped);
+			break;
+		case 0: /* No TSO on this queue, SW fallback needed */
+		default:
+			rc = -EINVAL;
+			break;
+		}
 		if (rc == -EINVAL) {
 			rc = efx_tx_tso_fallback(tx_queue, skb);
 			tx_queue->tso_fallbacks++;
@@ -315,7 +360,7 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
 			goto err;
 #ifdef EFX_USE_PIO
 	} else if (skb_len <= efx_piobuf_size && !xmit_more &&
-		   efx_nic_may_tx_pio(tx_queue)) {
+		   efx_tx_may_pio(tx_queue)) {
 		/* Use PIO for short packets with an empty queue. */
 		if (efx_enqueue_skb_pio(tx_queue, skb))
 			goto err;
@@ -336,21 +381,11 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
 
 	efx_tx_maybe_stop_queue(tx_queue);
 
-	/* Pass off to hardware */
-	if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more)) {
-		struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
+	tx_queue->xmit_pending = true;
 
-		/* There could be packets left on the partner queue if
-		 * xmit_more was set. If we do not push those they
-		 * could be left for a long time and cause a netdev watchdog.
-		 */
-		if (txq2->xmit_more_available)
-			efx_nic_push_buffers(txq2);
-
-		efx_nic_push_buffers(tx_queue);
-	} else {
-		tx_queue->xmit_more_available = xmit_more;
-	}
+	/* Pass off to hardware */
+	if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more))
+		efx_tx_send_pending(tx_queue->channel);
 
 	if (segments) {
 		tx_queue->tso_bursts++;
@@ -371,14 +406,8 @@ err:
 	 * on this queue or a partner queue then we need to push here to get the
 	 * previous packets out.
 	 */
-	if (!xmit_more) {
-		struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
-
-		if (txq2->xmit_more_available)
-			efx_nic_push_buffers(txq2);
-
-		efx_nic_push_buffers(tx_queue);
-	}
+	if (!xmit_more)
+		efx_tx_send_pending(tx_queue->channel);
 
 	return NETDEV_TX_OK;
 }
@@ -472,13 +501,10 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
 }
 
 /* Initiate a packet transmission.  We use one channel per CPU
- * (sharing when we have more CPUs than channels).  On Falcon, the TX
- * completion events will be directed back to the CPU that transmitted
- * the packet, which should be cache-efficient.
+ * (sharing when we have more CPUs than channels).
  *
  * Context: non-blocking.
- * Note that returning anything other than NETDEV_TX_OK will cause the
- * OS to free the skb.
+ * Should always return NETDEV_TX_OK and consume the skb.
  */
 netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev)
@@ -489,19 +515,39 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 
 	EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
 
+	index = skb_get_queue_mapping(skb);
+	type = efx_tx_csum_type_skb(skb);
+	if (index >= efx->n_tx_channels) {
+		index -= efx->n_tx_channels;
+		type |= EFX_TXQ_TYPE_HIGHPRI;
+	}
+
 	/* PTP "event" packet */
 	if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
 	    unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
+		/* There may be existing transmits on the channel that are
+		 * waiting for this packet to trigger the doorbell write.
+		 * We need to send the packets at this point.
+		 */
+		efx_tx_send_pending(efx_get_tx_channel(efx, index));
 		return efx_ptp_tx(efx, skb);
 	}
 
-	index = skb_get_queue_mapping(skb);
-	type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
-	if (index >= efx->n_tx_channels) {
-		index -= efx->n_tx_channels;
-		type |= EFX_TXQ_TYPE_HIGHPRI;
-	}
 	tx_queue = efx_get_tx_queue(efx, index, type);
+	if (WARN_ON_ONCE(!tx_queue)) {
+		/* We don't have a TXQ of the right type.
+		 * This should never happen, as we don't advertise offload
+		 * features unless we can support them.
+		 */
+		dev_kfree_skb_any(skb);
+		/* If we're not expecting another transmit and we had something to push
+		 * on this queue or a partner queue then we need to push here to get the
+		 * previous packets out.
+		 */
+		if (!netdev_xmit_more())
+			efx_tx_send_pending(tx_queue->channel);
+		return NETDEV_TX_OK;
+	}
 
 	return __efx_enqueue_skb(tx_queue, skb);
 }
@@ -552,7 +598,7 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 	tx_queue->core_txq =
 		netdev_get_tx_queue(efx->net_dev,
 				    tx_queue->channel->channel +
-				    ((tx_queue->label & EFX_TXQ_TYPE_HIGHPRI) ?
+				    ((tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
 				     efx->n_tx_channels : 0));
 }
 
diff --git a/drivers/net/ethernet/sfc/tx.h b/drivers/net/ethernet/sfc/tx.h
index a3cf06c5570d..f2c4d2f89919 100644
--- a/drivers/net/ethernet/sfc/tx.h
+++ b/drivers/net/ethernet/sfc/tx.h
@@ -18,4 +18,30 @@ unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue,
 u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
 				   struct efx_tx_buffer *buffer, size_t len);
 
+/* What TXQ type will satisfy the checksum offloads required for this skb? */
+static inline unsigned int efx_tx_csum_type_skb(struct sk_buff *skb)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0; /* no checksum offload */
+
+	if (skb->encapsulation &&
+	    skb_checksum_start_offset(skb) == skb_inner_transport_offset(skb)) {
+		/* we only advertise features for IPv4 and IPv6 checksums on
+		 * encapsulated packets, so if the checksum is for the inner
+		 * packet, it must be one of them; no further checking required.
+		 */
+
+		/* Do we also need to offload the outer header checksum? */
+		if (skb_shinfo(skb)->gso_segs > 1 &&
+		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			return EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM;
+		return EFX_TXQ_TYPE_INNER_CSUM;
+	}
+
+	/* similarly, we only advertise features for IPv4 and IPv6 checksums,
+	 * so it must be one of them. No need for further checks.
+	 */
+	return EFX_TXQ_TYPE_OUTER_CSUM;
+}
 #endif /* EFX_TX_H */
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index 793e234819a8..d530cde2b864 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -47,11 +47,12 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 		goto fail1;
 	}
 
-	/* Allocate hardware ring */
+	/* Allocate hardware ring, determine TXQ type */
 	rc = efx_nic_probe_tx(tx_queue);
 	if (rc)
 		goto fail2;
 
+	tx_queue->channel->tx_queue_by_type[tx_queue->type] = tx_queue;
 	return 0;
 
 fail2:
@@ -78,18 +79,14 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 	tx_queue->read_count = 0;
 	tx_queue->old_read_count = 0;
 	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
-	tx_queue->xmit_more_available = false;
+	tx_queue->xmit_pending = false;
 	tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
 				  tx_queue->channel == efx_ptp_channel(efx));
 	tx_queue->completed_timestamp_major = 0;
 	tx_queue->completed_timestamp_minor = 0;
 
 	tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
-
-	/* Set up default function pointers. These may get replaced by
-	 * efx_nic_init_tx() based off NIC/queue capabilities.
-	 */
-	tx_queue->handle_tso = efx_enqueue_skb_tso;
+	tx_queue->tso_version = 0;
 
 	/* Set up TX descriptor ring */
 	efx_nic_init_tx(tx_queue);
@@ -116,7 +113,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
 
 		++tx_queue->read_count;
 	}
-	tx_queue->xmit_more_available = false;
+	tx_queue->xmit_pending = false;
 	netdev_tx_reset_queue(tx_queue->core_txq);
 }
 
@@ -141,6 +138,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 
 	kfree(tx_queue->buffer);
 	tx_queue->buffer = NULL;
+	tx_queue->channel->tx_queue_by_type[tx_queue->type] = NULL;
 }
 
 void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
@@ -242,7 +240,6 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 {
 	unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
 	struct efx_nic *efx = tx_queue->efx;
-	struct efx_tx_queue *txq2;
 
 	EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
 
@@ -261,9 +258,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
 	    likely(efx->port_enabled) &&
 	    likely(netif_device_present(efx->net_dev))) {
-		txq2 = efx_tx_queue_partner(tx_queue);
-		fill_level = max(tx_queue->insert_count - tx_queue->read_count,
-				 txq2->insert_count - txq2->read_count);
+		fill_level = efx_channel_tx_fill_level(tx_queue->channel);
 		if (fill_level <= efx->txq_wake_thresh)
 			netif_tx_wake_queue(tx_queue->core_txq);
 	}
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index f94078f8ebe5..1fd08a04bd4e 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -301,6 +301,7 @@ struct sc92031_priv {
 
 	/* for dev->get_stats */
 	long			rx_value;
+	struct net_device	*ndev;
 };
 
 /* I don't know which registers can be safely read; however, I can guess
@@ -829,10 +830,10 @@ static void _sc92031_link_tasklet(struct net_device *dev)
 	}
 }
 
-static void sc92031_tasklet(unsigned long data)
+static void sc92031_tasklet(struct tasklet_struct *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct sc92031_priv *priv = netdev_priv(dev);
+	struct  sc92031_priv *priv = from_tasklet(priv, t, tasklet);
+	struct net_device *dev = priv->ndev;
 	void __iomem *port_base = priv->port_base;
 	u32 intr_status, intr_mask;
 
@@ -993,15 +994,15 @@ static int sc92031_open(struct net_device *dev)
 	struct sc92031_priv *priv = netdev_priv(dev);
 	struct pci_dev *pdev = priv->pdev;
 
-	priv->rx_ring = pci_alloc_consistent(pdev, RX_BUF_LEN,
-			&priv->rx_ring_dma_addr);
+	priv->rx_ring = dma_alloc_coherent(&pdev->dev, RX_BUF_LEN,
+					   &priv->rx_ring_dma_addr, GFP_KERNEL);
 	if (unlikely(!priv->rx_ring)) {
 		err = -ENOMEM;
 		goto out_alloc_rx_ring;
 	}
 
-	priv->tx_bufs = pci_alloc_consistent(pdev, TX_BUF_TOT_LEN,
-			&priv->tx_bufs_dma_addr);
+	priv->tx_bufs = dma_alloc_coherent(&pdev->dev, TX_BUF_TOT_LEN,
+					   &priv->tx_bufs_dma_addr, GFP_KERNEL);
 	if (unlikely(!priv->tx_bufs)) {
 		err = -ENOMEM;
 		goto out_alloc_tx_bufs;
@@ -1031,11 +1032,11 @@ static int sc92031_open(struct net_device *dev)
 	return 0;
 
 out_request_irq:
-	pci_free_consistent(pdev, TX_BUF_TOT_LEN, priv->tx_bufs,
-			priv->tx_bufs_dma_addr);
+	dma_free_coherent(&pdev->dev, TX_BUF_TOT_LEN, priv->tx_bufs,
+			  priv->tx_bufs_dma_addr);
 out_alloc_tx_bufs:
-	pci_free_consistent(pdev, RX_BUF_LEN, priv->rx_ring,
-			priv->rx_ring_dma_addr);
+	dma_free_coherent(&pdev->dev, RX_BUF_LEN, priv->rx_ring,
+			  priv->rx_ring_dma_addr);
 out_alloc_rx_ring:
 	return err;
 }
@@ -1058,10 +1059,10 @@ static int sc92031_stop(struct net_device *dev)
 	spin_unlock_bh(&priv->lock);
 
 	free_irq(pdev->irq, dev);
-	pci_free_consistent(pdev, TX_BUF_TOT_LEN, priv->tx_bufs,
-			priv->tx_bufs_dma_addr);
-	pci_free_consistent(pdev, RX_BUF_LEN, priv->rx_ring,
-			priv->rx_ring_dma_addr);
+	dma_free_coherent(&pdev->dev, TX_BUF_TOT_LEN, priv->tx_bufs,
+			  priv->tx_bufs_dma_addr);
+	dma_free_coherent(&pdev->dev, RX_BUF_LEN, priv->rx_ring,
+			  priv->rx_ring_dma_addr);
 
 	return 0;
 }
@@ -1108,7 +1109,7 @@ static void sc92031_poll_controller(struct net_device *dev)
 
 	disable_irq(irq);
 	if (sc92031_interrupt(irq, dev) != IRQ_NONE)
-		sc92031_tasklet((unsigned long)dev);
+		sc92031_tasklet(&priv->tasklet);
 	enable_irq(irq);
 }
 #endif
@@ -1407,11 +1408,11 @@ static int sc92031_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_set_master(pdev);
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (unlikely(err < 0))
 		goto out_set_dma_mask;
 
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (unlikely(err < 0))
 		goto out_set_dma_mask;
 
@@ -1443,10 +1444,11 @@ static int sc92031_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->ethtool_ops	= &sc92031_ethtool_ops;
 
 	priv = netdev_priv(dev);
+	priv->ndev = dev;
 	spin_lock_init(&priv->lock);
 	priv->port_base = port_base;
 	priv->pdev = pdev;
-	tasklet_init(&priv->tasklet, sc92031_tasklet, (unsigned long)dev);
+	tasklet_setup(&priv->tasklet, sc92031_tasklet);
 	/* Fudge tasklet count so the call to sc92031_enable_interrupts at
 	 * sc92031_open will work correctly */
 	tasklet_disable_nosync(&priv->tasklet);
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index cfa460c7db23..620c26f71be8 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -789,10 +789,9 @@ static u16 sis900_default_phy(struct net_device * net_dev)
 static void sis900_set_capability(struct net_device *net_dev, struct mii_phy *phy)
 {
 	u16 cap;
-	u16 status;
 
-	status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
-	status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
+	mdio_read(net_dev, phy->phy_addr, MII_STATUS);
+	mdio_read(net_dev, phy->phy_addr, MII_STATUS);
 
 	cap = MII_NWAY_CSMA_CD |
 		((phy->status & MII_STAT_CAN_TX_FDX)? MII_NWAY_TX_FDX:0) |
@@ -1302,7 +1301,7 @@ static void sis630_set_eq(struct net_device *net_dev, u8 revision)
 
 /**
  *	sis900_timer - sis900 timer routine
- *	@data: pointer to sis900 net device
+ *	@t: timer list containing a pointer to sis900 net device
  *
  *	On each timer ticks we check two things,
  *	link status (ON/OFF) and link mode (10/100/Full/Half)
@@ -1536,6 +1535,7 @@ static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex
 /**
  *	sis900_tx_timeout - sis900 transmit timeout routine
  *	@net_dev: the net device to transmit
+ *	@txqueue: index of hanging queue
  *
  *	print transmit timeout status
  *	disable interrupts and do some tasks
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index d950b312c418..51cd7dca91cd 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -374,13 +374,15 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ep->mii.phy_id_mask = 0x1f;
 	ep->mii.reg_num_mask = 0x1f;
 
-	ring_space = pci_alloc_consistent(pdev, TX_TOTAL_SIZE, &ring_dma);
+	ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, &ring_dma,
+					GFP_KERNEL);
 	if (!ring_space)
 		goto err_out_iounmap;
 	ep->tx_ring = ring_space;
 	ep->tx_ring_dma = ring_dma;
 
-	ring_space = pci_alloc_consistent(pdev, RX_TOTAL_SIZE, &ring_dma);
+	ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, &ring_dma,
+					GFP_KERNEL);
 	if (!ring_space)
 		goto err_out_unmap_tx;
 	ep->rx_ring = ring_space;
@@ -493,9 +495,11 @@ out:
 	return ret;
 
 err_out_unmap_rx:
-	pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
+	dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring,
+			  ep->rx_ring_dma);
 err_out_unmap_tx:
-	pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
+	dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring,
+			  ep->tx_ring_dma);
 err_out_iounmap:
 	pci_iounmap(pdev, ioaddr);
 err_out_free_netdev:
@@ -918,8 +922,10 @@ static void epic_init_ring(struct net_device *dev)
 		if (skb == NULL)
 			break;
 		skb_reserve(skb, 2);	/* 16 byte align the IP header. */
-		ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev,
-			skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+		ep->rx_ring[i].bufaddr = dma_map_single(&ep->pci_dev->dev,
+							skb->data,
+							ep->rx_buf_sz,
+							DMA_FROM_DEVICE);
 		ep->rx_ring[i].rxstatus = DescOwn;
 	}
 	ep->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
@@ -955,8 +961,9 @@ static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	entry = ep->cur_tx % TX_RING_SIZE;
 
 	ep->tx_skbuff[entry] = skb;
-	ep->tx_ring[entry].bufaddr = pci_map_single(ep->pci_dev, skb->data,
-		 			            skb->len, PCI_DMA_TODEVICE);
+	ep->tx_ring[entry].bufaddr = dma_map_single(&ep->pci_dev->dev,
+						    skb->data, skb->len,
+						    DMA_TO_DEVICE);
 	if (free_count < TX_QUEUE_LEN/2) {/* Typical path */
 		ctrl_word = 0x100000; /* No interrupt */
 	} else if (free_count == TX_QUEUE_LEN/2) {
@@ -1036,8 +1043,9 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep)
 
 		/* Free the original skb. */
 		skb = ep->tx_skbuff[entry];
-		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&ep->pci_dev->dev,
+				 ep->tx_ring[entry].bufaddr, skb->len,
+				 DMA_TO_DEVICE);
 		dev_consume_skb_irq(skb);
 		ep->tx_skbuff[entry] = NULL;
 	}
@@ -1178,20 +1186,21 @@ static int epic_rx(struct net_device *dev, int budget)
 			if (pkt_len < rx_copybreak &&
 			    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				pci_dma_sync_single_for_cpu(ep->pci_dev,
-							    ep->rx_ring[entry].bufaddr,
-							    ep->rx_buf_sz,
-							    PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&ep->pci_dev->dev,
+							ep->rx_ring[entry].bufaddr,
+							ep->rx_buf_sz,
+							DMA_FROM_DEVICE);
 				skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
-				pci_dma_sync_single_for_device(ep->pci_dev,
-							       ep->rx_ring[entry].bufaddr,
-							       ep->rx_buf_sz,
-							       PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_device(&ep->pci_dev->dev,
+							   ep->rx_ring[entry].bufaddr,
+							   ep->rx_buf_sz,
+							   DMA_FROM_DEVICE);
 			} else {
-				pci_unmap_single(ep->pci_dev,
-					ep->rx_ring[entry].bufaddr,
-					ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&ep->pci_dev->dev,
+						 ep->rx_ring[entry].bufaddr,
+						 ep->rx_buf_sz,
+						 DMA_FROM_DEVICE);
 				skb_put(skb = ep->rx_skbuff[entry], pkt_len);
 				ep->rx_skbuff[entry] = NULL;
 			}
@@ -1213,8 +1222,10 @@ static int epic_rx(struct net_device *dev, int budget)
 			if (skb == NULL)
 				break;
 			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
-			ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev,
-				skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			ep->rx_ring[entry].bufaddr = dma_map_single(&ep->pci_dev->dev,
+								    skb->data,
+								    ep->rx_buf_sz,
+								    DMA_FROM_DEVICE);
 			work_done++;
 		}
 		/* AV: shouldn't we add a barrier here? */
@@ -1294,8 +1305,8 @@ static int epic_close(struct net_device *dev)
 		ep->rx_ring[i].rxstatus = 0;		/* Not owned by Epic chip. */
 		ep->rx_ring[i].buflength = 0;
 		if (skb) {
-			pci_unmap_single(pdev, ep->rx_ring[i].bufaddr,
-					 ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&pdev->dev, ep->rx_ring[i].bufaddr,
+					 ep->rx_buf_sz, DMA_FROM_DEVICE);
 			dev_kfree_skb(skb);
 		}
 		ep->rx_ring[i].bufaddr = 0xBADF00D0; /* An invalid address. */
@@ -1305,8 +1316,8 @@ static int epic_close(struct net_device *dev)
 		ep->tx_skbuff[i] = NULL;
 		if (!skb)
 			continue;
-		pci_unmap_single(pdev, ep->tx_ring[i].bufaddr, skb->len,
-				 PCI_DMA_TODEVICE);
+		dma_unmap_single(&pdev->dev, ep->tx_ring[i].bufaddr, skb->len,
+				 DMA_TO_DEVICE);
 		dev_kfree_skb(skb);
 	}
 
@@ -1502,8 +1513,10 @@ static void epic_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct epic_private *ep = netdev_priv(dev);
 
-	pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
-	pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
+	dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring,
+			  ep->tx_ring_dma);
+	dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring,
+			  ep->rx_ring_dma);
 	unregister_netdev(dev);
 	pci_iounmap(pdev, ep->ioaddr);
 	pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 1c4fea9c3ec4..f6b73afd1879 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -535,10 +535,10 @@ static inline void  smc_rcv(struct net_device *dev)
 /*
  * This is called to actually send a packet to the chip.
  */
-static void smc_hardware_send_pkt(unsigned long data)
+static void smc_hardware_send_pkt(struct tasklet_struct *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct smc_local *lp = netdev_priv(dev);
+	struct smc_local *lp = from_tasklet(lp, t, tx_task);
+	struct net_device *dev = lp->dev;
 	void __iomem *ioaddr = lp->base;
 	struct sk_buff *skb;
 	unsigned int packet_no, len;
@@ -688,7 +688,7 @@ smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * Allocation succeeded: push packet to the chip's own memory
 		 * immediately.
 		 */
-		smc_hardware_send_pkt((unsigned long)dev);
+		smc_hardware_send_pkt(&lp->tx_task);
 	}
 
 	return NETDEV_TX_OK;
@@ -1036,7 +1036,6 @@ static void smc_phy_configure(struct work_struct *work)
 	int phyaddr = lp->mii.phy_id;
 	int my_phy_caps; /* My PHY capabilities */
 	int my_ad_caps; /* My Advertised capabilities */
-	int status;
 
 	DBG(3, dev, "smc_program_phy()\n");
 
@@ -1110,7 +1109,7 @@ static void smc_phy_configure(struct work_struct *work)
 	 * auto-negotiation is restarted, sometimes it isn't ready and
 	 * the link does not come up.
 	 */
-	status = smc_phy_read(dev, phyaddr, MII_ADVERTISE);
+	smc_phy_read(dev, phyaddr, MII_ADVERTISE);
 
 	DBG(2, dev, "phy caps=%x\n", my_phy_caps);
 	DBG(2, dev, "phy advertised caps=%x\n", my_ad_caps);
@@ -1965,7 +1964,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr,
 	dev->netdev_ops = &smc_netdev_ops;
 	dev->ethtool_ops = &smc_ethtool_ops;
 
-	tasklet_init(&lp->tx_task, smc_hardware_send_pkt, (unsigned long)dev);
+	tasklet_setup(&lp->tx_task, smc_hardware_send_pkt);
 	INIT_WORK(&lp->phy_configure, smc_phy_configure);
 	lp->dev = dev;
 	lp->mii.phy_id_mask = 0x1f;
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index fc168f85e7af..823d9a7184fe 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -1196,9 +1196,8 @@ smsc911x_rx_fastforward(struct smsc911x_data *pdata, unsigned int pktwords)
 			SMSC_WARN(pdata, hw, "Timed out waiting for "
 				  "RX FFWD to finish, RX_DP_CTRL: 0x%08X", val);
 	} else {
-		unsigned int temp;
 		while (pktwords--)
-			temp = smsc911x_reg_read(pdata, RX_DATA_FIFO);
+			smsc911x_reg_read(pdata, RX_DATA_FIFO);
 	}
 }
 
@@ -2055,7 +2054,6 @@ static int smsc911x_eeprom_write_location(struct smsc911x_data *pdata,
 					  u8 address, u8 data)
 {
 	u32 op = E2P_CMD_EPC_CMD_ERASE_ | address;
-	u32 temp;
 	int ret;
 
 	SMSC_TRACE(pdata, drv, "address 0x%x, data 0x%x", address, data);
@@ -2066,7 +2064,7 @@ static int smsc911x_eeprom_write_location(struct smsc911x_data *pdata,
 		smsc911x_reg_write(pdata, E2P_DATA, (u32)data);
 
 		/* Workaround for hardware read-after-write restriction */
-		temp = smsc911x_reg_read(pdata, BYTE_TEST);
+		smsc911x_reg_read(pdata, BYTE_TEST);
 
 		ret = smsc911x_eeprom_send_cmd(pdata, op);
 	}
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 42bef04d65ba..c1dab009415d 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -497,8 +497,9 @@ static void smsc9420_free_tx_ring(struct smsc9420_pdata *pd)
 
 		if (skb) {
 			BUG_ON(!pd->tx_buffers[i].mapping);
-			pci_unmap_single(pd->pdev, pd->tx_buffers[i].mapping,
-					 skb->len, PCI_DMA_TODEVICE);
+			dma_unmap_single(&pd->pdev->dev,
+					 pd->tx_buffers[i].mapping, skb->len,
+					 DMA_TO_DEVICE);
 			dev_kfree_skb_any(skb);
 		}
 
@@ -530,8 +531,9 @@ static void smsc9420_free_rx_ring(struct smsc9420_pdata *pd)
 			dev_kfree_skb_any(pd->rx_buffers[i].skb);
 
 		if (pd->rx_buffers[i].mapping)
-			pci_unmap_single(pd->pdev, pd->rx_buffers[i].mapping,
-				PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&pd->pdev->dev,
+					 pd->rx_buffers[i].mapping,
+					 PKT_BUF_SZ, DMA_FROM_DEVICE);
 
 		pd->rx_ring[i].status = 0;
 		pd->rx_ring[i].length = 0;
@@ -749,8 +751,8 @@ static void smsc9420_rx_handoff(struct smsc9420_pdata *pd, const int index,
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += packet_length;
 
-	pci_unmap_single(pd->pdev, pd->rx_buffers[index].mapping,
-		PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&pd->pdev->dev, pd->rx_buffers[index].mapping,
+			 PKT_BUF_SZ, DMA_FROM_DEVICE);
 	pd->rx_buffers[index].mapping = 0;
 
 	skb = pd->rx_buffers[index].skb;
@@ -782,9 +784,9 @@ static int smsc9420_alloc_rx_buffer(struct smsc9420_pdata *pd, int index)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	mapping = pci_map_single(pd->pdev, skb_tail_pointer(skb),
-				 PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(pd->pdev, mapping)) {
+	mapping = dma_map_single(&pd->pdev->dev, skb_tail_pointer(skb),
+				 PKT_BUF_SZ, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&pd->pdev->dev, mapping)) {
 		dev_kfree_skb_any(skb);
 		netif_warn(pd, rx_err, pd->dev, "pci_map_single failed!\n");
 		return -ENOMEM;
@@ -901,8 +903,10 @@ static void smsc9420_complete_tx(struct net_device *dev)
 		BUG_ON(!pd->tx_buffers[index].skb);
 		BUG_ON(!pd->tx_buffers[index].mapping);
 
-		pci_unmap_single(pd->pdev, pd->tx_buffers[index].mapping,
-			pd->tx_buffers[index].skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&pd->pdev->dev,
+				 pd->tx_buffers[index].mapping,
+				 pd->tx_buffers[index].skb->len,
+				 DMA_TO_DEVICE);
 		pd->tx_buffers[index].mapping = 0;
 
 		dev_kfree_skb_any(pd->tx_buffers[index].skb);
@@ -932,9 +936,9 @@ static netdev_tx_t smsc9420_hard_start_xmit(struct sk_buff *skb,
 	BUG_ON(pd->tx_buffers[index].skb);
 	BUG_ON(pd->tx_buffers[index].mapping);
 
-	mapping = pci_map_single(pd->pdev, skb->data,
-				 skb->len, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(pd->pdev, mapping)) {
+	mapping = dma_map_single(&pd->pdev->dev, skb->data, skb->len,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(&pd->pdev->dev, mapping)) {
 		netif_warn(pd, tx_err, pd->dev,
 			   "pci_map_single failed, dropping packet\n");
 		return NETDEV_TX_BUSY;
@@ -1522,7 +1526,7 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_free_netdev_2;
 	}
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		netdev_err(dev, "No usable DMA configuration, aborting\n");
 		goto out_free_regions_3;
 	}
@@ -1540,10 +1544,9 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pd = netdev_priv(dev);
 
 	/* pci descriptors are created in the PCI consistent area */
-	pd->rx_ring = pci_alloc_consistent(pdev,
-		sizeof(struct smsc9420_dma_desc) * RX_RING_SIZE +
-		sizeof(struct smsc9420_dma_desc) * TX_RING_SIZE,
-		&pd->rx_dma_addr);
+	pd->rx_ring = dma_alloc_coherent(&pdev->dev,
+		sizeof(struct smsc9420_dma_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+		&pd->rx_dma_addr, GFP_KERNEL);
 
 	if (!pd->rx_ring)
 		goto out_free_io_4;
@@ -1599,8 +1602,9 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 out_free_dmadesc_5:
-	pci_free_consistent(pdev, sizeof(struct smsc9420_dma_desc) *
-		(RX_RING_SIZE + TX_RING_SIZE), pd->rx_ring, pd->rx_dma_addr);
+	dma_free_coherent(&pdev->dev,
+			  sizeof(struct smsc9420_dma_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+			  pd->rx_ring, pd->rx_dma_addr);
 out_free_io_4:
 	iounmap(virt_addr - LAN9420_CPSR_ENDIAN_OFFSET);
 out_free_regions_3:
@@ -1632,8 +1636,9 @@ static void smsc9420_remove(struct pci_dev *pdev)
 	BUG_ON(!pd->tx_ring);
 	BUG_ON(!pd->rx_ring);
 
-	pci_free_consistent(pdev, sizeof(struct smsc9420_dma_desc) *
-		(RX_RING_SIZE + TX_RING_SIZE), pd->rx_ring, pd->rx_dma_addr);
+	dma_free_coherent(&pdev->dev,
+			  sizeof(struct smsc9420_dma_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+			  pd->rx_ring, pd->rx_dma_addr);
 
 	iounmap(pd->ioaddr - LAN9420_CPSR_ENDIAN_OFFSET);
 	pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 81b554dd7221..501b9c7aba56 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1585,7 +1585,7 @@ static int ave_probe(struct platform_device *pdev)
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-	ndev = alloc_etherdev(sizeof(struct ave_private));
+	ndev = devm_alloc_etherdev(dev, sizeof(struct ave_private));
 	if (!ndev) {
 		dev_err(dev, "can't allocate ethernet device\n");
 		return -ENOMEM;
@@ -1632,7 +1632,7 @@ static int ave_probe(struct platform_device *pdev)
 	}
 	ret = dma_set_mask(dev, dma_mask);
 	if (ret)
-		goto out_free_netdev;
+		return ret;
 
 	priv->tx.ndesc = AVE_NR_TXDESC;
 	priv->rx.ndesc = AVE_NR_RXDESC;
@@ -1645,10 +1645,8 @@ static int ave_probe(struct platform_device *pdev)
 		if (!name)
 			break;
 		priv->clk[i] = devm_clk_get(dev, name);
-		if (IS_ERR(priv->clk[i])) {
-			ret = PTR_ERR(priv->clk[i]);
-			goto out_free_netdev;
-		}
+		if (IS_ERR(priv->clk[i]))
+			return PTR_ERR(priv->clk[i]);
 		priv->nclks++;
 	}
 
@@ -1657,10 +1655,8 @@ static int ave_probe(struct platform_device *pdev)
 		if (!name)
 			break;
 		priv->rst[i] = devm_reset_control_get_shared(dev, name);
-		if (IS_ERR(priv->rst[i])) {
-			ret = PTR_ERR(priv->rst[i]);
-			goto out_free_netdev;
-		}
+		if (IS_ERR(priv->rst[i]))
+			return PTR_ERR(priv->rst[i]);
 		priv->nrsts++;
 	}
 
@@ -1669,26 +1665,23 @@ static int ave_probe(struct platform_device *pdev)
 					       1, 0, &args);
 	if (ret) {
 		dev_err(dev, "can't get syscon-phy-mode property\n");
-		goto out_free_netdev;
+		return ret;
 	}
 	priv->regmap = syscon_node_to_regmap(args.np);
 	of_node_put(args.np);
 	if (IS_ERR(priv->regmap)) {
 		dev_err(dev, "can't map syscon-phy-mode\n");
-		ret = PTR_ERR(priv->regmap);
-		goto out_free_netdev;
+		return PTR_ERR(priv->regmap);
 	}
 	ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]);
 	if (ret) {
 		dev_err(dev, "invalid phy-mode setting\n");
-		goto out_free_netdev;
+		return ret;
 	}
 
 	priv->mdio = devm_mdiobus_alloc(dev);
-	if (!priv->mdio) {
-		ret = -ENOMEM;
-		goto out_free_netdev;
-	}
+	if (!priv->mdio)
+		return -ENOMEM;
 	priv->mdio->priv = ndev;
 	priv->mdio->parent = dev;
 	priv->mdio->read = ave_mdiobus_read;
@@ -1725,8 +1718,6 @@ static int ave_probe(struct platform_device *pdev)
 out_del_napi:
 	netif_napi_del(&priv->napi_rx);
 	netif_napi_del(&priv->napi_tx);
-out_free_netdev:
-	free_netdev(ndev);
 
 	return ret;
 }
@@ -1739,7 +1730,6 @@ static int ave_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 	netif_napi_del(&priv->napi_rx);
 	netif_napi_del(&priv->napi_tx);
-	free_netdev(ndev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 9a47c5aec91a..53f14c5a9e02 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -3,7 +3,7 @@ config STMMAC_ETH
 	tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
 	depends on HAS_IOMEM && HAS_DMA
 	select MII
-	select MDIO_XPCS
+	select PCS_XPCS
 	select PAGE_POOL
 	select PHYLINK
 	select CRC32
@@ -209,6 +209,16 @@ config DWMAC_IMX8
 	  device driver. This driver is used for i.MX8 series like
 	  iMX8MP/iMX8DXL GMAC ethernet controller.
 
+config DWMAC_INTEL_PLAT
+	tristate "Intel dwmac support"
+	depends on OF && COMMON_CLK
+	depends on STMMAC_ETH
+	help
+	  Support for ethernet controllers on Intel SoCs
+
+	  This selects the Intel platform specific glue layer support for
+	  the stmmac device driver. This driver is used for the Intel Keem Bay
+	  SoC.
 endif
 
 config DWMAC_INTEL
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 295615ab36a7..24e6145d4eae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_DWMAC_STM32)	+= dwmac-stm32.o
 obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_SUN8I)	+= dwmac-sun8i.o
 obj-$(CONFIG_DWMAC_DWC_QOS_ETH)	+= dwmac-dwc-qos-eth.o
+obj-$(CONFIG_DWMAC_INTEL_PLAT)	+= dwmac-intel-plat.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 obj-$(CONFIG_DWMAC_IMX8)	+= dwmac-imx.o
 stmmac-platform-objs:= stmmac_platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index 52971f5293aa..d2cdc02d9f94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -46,7 +46,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
 
 	while (len != 0) {
 		tx_q->tx_skbuff[entry] = NULL;
-		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
 		desc = tx_q->dma_tx + entry;
 
 		if (len > bmax) {
@@ -137,7 +137,7 @@ static void refill_desc3(void *priv_ptr, struct dma_desc *p)
 		 */
 		p->des3 = cpu_to_le32((unsigned int)(rx_q->dma_rx_phy +
 				      (((rx_q->dirty_rx) + 1) %
-				       DMA_RX_SIZE) *
+				       priv->dma_rx_size) *
 				      sizeof(struct dma_desc)));
 }
 
@@ -154,7 +154,8 @@ static void clean_desc3(void *priv_ptr, struct dma_desc *p)
 		 * to keep explicit chaining in the descriptor.
 		 */
 		p->des3 = cpu_to_le32((unsigned int)((tx_q->dma_tx_phy +
-				      ((tx_q->dirty_tx + 1) % DMA_TX_SIZE))
+				      ((tx_q->dirty_tx + 1) %
+				       priv->dma_tx_size))
 				      * sizeof(struct dma_desc)));
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 127f75862962..df7de50497a0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -15,7 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/stmmac.h>
 #include <linux/phy.h>
-#include <linux/mdio-xpcs.h>
+#include <linux/pcs/pcs-xpcs.h>
 #include <linux/module.h>
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define STMMAC_VLAN_TAG_USED
@@ -42,9 +42,16 @@
 
 #define STMMAC_CHAN0	0	/* Always supported and default for all chips */
 
-/* These need to be power of two, and >= 4 */
-#define DMA_TX_SIZE 512
-#define DMA_RX_SIZE 512
+/* TX and RX Descriptor Length, these need to be power of two.
+ * TX descriptor length less than 64 may cause transmit queue timed out error.
+ * RX descriptor length less than 64 may cause inconsistent Rx chain error.
+ */
+#define DMA_MIN_TX_SIZE		64
+#define DMA_MAX_TX_SIZE		1024
+#define DMA_DEFAULT_TX_SIZE	512
+#define DMA_MIN_RX_SIZE		64
+#define DMA_MAX_RX_SIZE		1024
+#define DMA_DEFAULT_RX_SIZE	512
 #define STMMAC_GET_ENTRY(x, size)	((x + 1) & (size - 1))
 
 #undef FRAME_FILTER_DEBUG
@@ -474,6 +481,8 @@ struct mac_device_info {
 	unsigned int num_vlan;
 	u32 vlan_filter[32];
 	unsigned int promisc;
+	bool vlan_fail_q_en;
+	u8 vlan_fail_q;
 };
 
 struct stmmac_rx_routing {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 3c5df5eeed6c..efef5476a577 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -129,8 +129,7 @@ static void imx_dwmac_exit(struct platform_device *pdev, void *priv)
 {
 	struct imx_priv_data *dwmac = priv;
 
-	if (dwmac->clk_tx)
-		clk_disable_unprepare(dwmac->clk_tx);
+	clk_disable_unprepare(dwmac->clk_tx);
 	clk_disable_unprepare(dwmac->clk_mem);
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
new file mode 100644
index 000000000000..f61cb997a8f6
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Intel DWMAC platform driver
+ *
+ * Copyright(C) 2020 Intel Corporation
+ */
+
+#include <linux/ethtool.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/stmmac.h>
+
+#include "dwmac4.h"
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+struct intel_dwmac {
+	struct device *dev;
+	struct clk *tx_clk;
+	const struct intel_dwmac_data *data;
+};
+
+struct intel_dwmac_data {
+	void (*fix_mac_speed)(void *priv, unsigned int speed);
+	unsigned long ptp_ref_clk_rate;
+	unsigned long tx_clk_rate;
+	bool tx_clk_en;
+};
+
+static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed)
+{
+	struct intel_dwmac *dwmac = priv;
+	unsigned long rate;
+	int ret;
+
+	rate = clk_get_rate(dwmac->tx_clk);
+
+	switch (speed) {
+	case SPEED_1000:
+		rate = 125000000;
+		break;
+
+	case SPEED_100:
+		rate = 25000000;
+		break;
+
+	case SPEED_10:
+		rate = 2500000;
+		break;
+
+	default:
+		dev_err(dwmac->dev, "Invalid speed\n");
+		break;
+	}
+
+	ret = clk_set_rate(dwmac->tx_clk, rate);
+	if (ret)
+		dev_err(dwmac->dev, "Failed to configure tx clock rate\n");
+}
+
+static const struct intel_dwmac_data kmb_data = {
+	.fix_mac_speed = kmb_eth_fix_mac_speed,
+	.ptp_ref_clk_rate = 200000000,
+	.tx_clk_rate = 125000000,
+	.tx_clk_en = true,
+};
+
+static const struct of_device_id intel_eth_plat_match[] = {
+	{ .compatible = "intel,keembay-dwmac", .data = &kmb_data },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, intel_eth_plat_match);
+
+static int intel_eth_plat_probe(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	const struct of_device_id *match;
+	struct intel_dwmac *dwmac;
+	unsigned long rate;
+	int ret;
+
+	plat_dat = priv->plat;
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat)) {
+		dev_err(&pdev->dev, "dt configuration failed\n");
+		return PTR_ERR(plat_dat);
+	}
+
+	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
+
+	dwmac->dev = &pdev->dev;
+	dwmac->tx_clk = NULL;
+
+	match = of_match_device(intel_eth_plat_match, &pdev->dev);
+	if (match && match->data) {
+		dwmac->data = (const struct intel_dwmac_data *)match->data;
+
+		if (dwmac->data->fix_mac_speed)
+			plat_dat->fix_mac_speed = dwmac->data->fix_mac_speed;
+
+		/* Enable TX clock */
+		if (dwmac->data->tx_clk_en) {
+			dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+			if (IS_ERR(dwmac->tx_clk))
+				goto err_remove_config_dt;
+
+			clk_prepare_enable(dwmac->tx_clk);
+
+			/* Check and configure TX clock rate */
+			rate = clk_get_rate(dwmac->tx_clk);
+			if (dwmac->data->tx_clk_rate &&
+			    rate != dwmac->data->tx_clk_rate) {
+				rate = dwmac->data->tx_clk_rate;
+				ret = clk_set_rate(dwmac->tx_clk, rate);
+				if (ret) {
+					dev_err(&pdev->dev,
+						"Failed to set tx_clk\n");
+					return ret;
+				}
+			}
+		}
+
+		/* Check and configure PTP ref clock rate */
+		rate = clk_get_rate(plat_dat->clk_ptp_ref);
+		if (dwmac->data->ptp_ref_clk_rate &&
+		    rate != dwmac->data->ptp_ref_clk_rate) {
+			rate = dwmac->data->ptp_ref_clk_rate;
+			ret = clk_set_rate(plat_dat->clk_ptp_ref, rate);
+			if (ret) {
+				dev_err(&pdev->dev,
+					"Failed to set clk_ptp_ref\n");
+				return ret;
+			}
+		}
+	}
+
+	plat_dat->bsp_priv = dwmac;
+	plat_dat->eee_usecs_rate = plat_dat->clk_ptp_rate;
+
+	if (plat_dat->eee_usecs_rate > 0) {
+		u32 tx_lpi_usec;
+
+		tx_lpi_usec = (plat_dat->eee_usecs_rate / 1000000) - 1;
+		writel(tx_lpi_usec, stmmac_res.addr + GMAC_1US_TIC_COUNTER);
+	}
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret) {
+		clk_disable_unprepare(dwmac->tx_clk);
+		goto err_remove_config_dt;
+	}
+
+	return 0;
+
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
+}
+
+static int intel_eth_plat_remove(struct platform_device *pdev)
+{
+	struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
+	int ret;
+
+	ret = stmmac_pltfr_remove(pdev);
+	clk_disable_unprepare(dwmac->tx_clk);
+
+	return ret;
+}
+
+static struct platform_driver intel_eth_plat_driver = {
+	.probe  = intel_eth_plat_probe,
+	.remove = intel_eth_plat_remove,
+	.driver = {
+		.name		= "intel-eth-plat",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = intel_eth_plat_match,
+	},
+};
+module_platform_driver(intel_eth_plat_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel DWMAC platform driver");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 9e6d60e75f85..b6e5e3e36b63 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -6,6 +6,7 @@
 #include <linux/pci.h>
 #include <linux/dmi.h>
 #include "dwmac-intel.h"
+#include "dwmac4.h"
 #include "stmmac.h"
 
 struct intel_priv_data {
@@ -295,6 +296,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->axi->axi_blen[2] = 16;
 
 	plat->ptp_max_adj = plat->clk_ptp_rate;
+	plat->eee_usecs_rate = plat->clk_ptp_rate;
 
 	/* Set system clock */
 	plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev,
@@ -321,6 +323,11 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	/* Set the maxmtu to a default of JUMBO_LEN */
 	plat->maxmtu = JUMBO_LEN;
 
+	plat->vlan_fail_q_en = true;
+
+	/* Use the last Rx queue */
+	plat->vlan_fail_q = plat->rx_queues_to_use - 1;
+
 	return 0;
 }
 
@@ -618,6 +625,13 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
+	if (plat->eee_usecs_rate > 0) {
+		u32 tx_lpi_usec;
+
+		tx_lpi_usec = (plat->eee_usecs_rate / 1000000) - 1;
+		writel(tx_lpi_usec, res.addr + GMAC_1US_TIC_COUNTER);
+	}
+
 	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 2d5573b3dee1..6ef30252bfe0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /**
- * dwmac-rk.c - Rockchip RK3288 DWMAC specific glue layer
+ * DOC: dwmac-rk.c - Rockchip RK3288 DWMAC specific glue layer
  *
  * Copyright (C) 2014 Chen-Zhi (Roger Chen)
  *
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 61f3249bd724..592b043f9676 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -76,6 +76,7 @@
 #define GMAC_PACKET_FILTER_HPF		BIT(10)
 #define GMAC_PACKET_FILTER_VTFE		BIT(16)
 #define GMAC_PACKET_FILTER_IPFE		BIT(20)
+#define GMAC_PACKET_FILTER_RA		BIT(31)
 
 #define GMAC_MAX_PERFECT_ADDRESSES	128
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index ecd834e0e121..002791b77356 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -618,7 +618,18 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 	value &= ~GMAC_PACKET_FILTER_PM;
 	value &= ~GMAC_PACKET_FILTER_PR;
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF;
+		/* VLAN Tag Filter Fail Packets Queuing */
+		if (hw->vlan_fail_q_en) {
+			value = readl(ioaddr + GMAC_RXQ_CTRL4);
+			value &= ~GMAC_RXQCTRL_VFFQ_MASK;
+			value |= GMAC_RXQCTRL_VFFQE |
+				 (hw->vlan_fail_q << GMAC_RXQCTRL_VFFQ_SHIFT);
+			writel(value, ioaddr + GMAC_RXQ_CTRL4);
+			value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_RA;
+		} else {
+			value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF;
+		}
+
 	} else if ((dev->flags & IFF_ALLMULTI) ||
 		   (netdev_mc_count(dev) > hw->multicast_filter_bins)) {
 		/* Pass all multi */
@@ -680,7 +691,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 
 	writel(value, ioaddr + GMAC_PACKET_FILTER);
 
-	if (dev->flags & IFF_PROMISC) {
+	if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) {
 		if (!hw->promisc) {
 			hw->promisc = 1;
 			dwmac4_vlan_promisc_enable(dev, hw);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index eff82065a501..c6540b003b43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -494,10 +494,9 @@ static void dwmac4_set_vlan(struct dma_desc *p, u32 type)
 	p->des2 |= cpu_to_le32(type & TDES2_VLAN_TAG_MASK);
 }
 
-static int dwmac4_get_rx_header_len(struct dma_desc *p, unsigned int *len)
+static void dwmac4_get_rx_header_len(struct dma_desc *p, unsigned int *len)
 {
 	*len = le32_to_cpu(p->des2) & RDES2_HL;
-	return 0;
 }
 
 static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 3e8faa96b4d4..56b0762c1276 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -92,6 +92,12 @@
 #define TCEIE				BIT(0)
 #define DMA_ECC_INT_STATUS		0x00001088
 
+/* EQoS version 5.xx VLAN Tag Filter Fail Packets Queuing */
+#define GMAC_RXQ_CTRL4			0x00000094
+#define GMAC_RXQCTRL_VFFQ_MASK		GENMASK(19, 17)
+#define GMAC_RXQCTRL_VFFQ_SHIFT		17
+#define GMAC_RXQCTRL_VFFQE		BIT(16)
+
 int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
 int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		void __iomem *ioaddr, unsigned int asp,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index c3d654cfa9ef..0aaf19ab5672 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -286,11 +286,10 @@ static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash,
 	return -EINVAL;
 }
 
-static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
+static void dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
 {
 	if (le32_to_cpu(p->des3) & XGMAC_RDES3_L34T)
 		*len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
-	return 0;
 }
 
 static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index ffe2d63389b8..e2dca9b6e992 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -90,7 +90,7 @@ struct stmmac_desc_ops {
 	/* RSS */
 	int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
 			   enum pkt_hash_types *type);
-	int (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
+	void (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
 	void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
 	void (*set_sarc)(struct dma_desc *p, u32 sarc_type);
 	void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag,
@@ -150,7 +150,7 @@ struct stmmac_desc_ops {
 #define stmmac_get_rx_hash(__priv, __args...) \
 	stmmac_do_callback(__priv, desc, get_rx_hash, __args)
 #define stmmac_get_rx_header_len(__priv, __args...) \
-	stmmac_do_callback(__priv, desc, get_rx_header_len, __args)
+	stmmac_do_void_callback(__priv, desc, get_rx_header_len, __args)
 #define stmmac_set_desc_sec_addr(__priv, __args...) \
 	stmmac_do_void_callback(__priv, desc, set_sec_addr, __args)
 #define stmmac_set_desc_sarc(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 14bd5e7b9875..8ad900949dc8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -51,7 +51,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum,
 				STMMAC_RING_MODE, 0, false, skb->len);
 		tx_q->tx_skbuff[entry] = NULL;
-		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
 
 		if (priv->extend_desc)
 			desc = (struct dma_desc *)(tx_q->dma_etx + entry);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 545696971f65..727e68dfaf1c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -171,9 +171,11 @@ struct stmmac_priv {
 
 	/* RX Queue */
 	struct stmmac_rx_queue rx_queue[MTL_MAX_RX_QUEUES];
+	unsigned int dma_rx_size;
 
 	/* TX Queue */
 	struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES];
+	unsigned int dma_tx_size;
 
 	/* Generic channel for NAPI */
 	struct stmmac_channel channel[STMMAC_CH_MAX];
@@ -266,6 +268,8 @@ int stmmac_dvr_probe(struct device *device,
 		     struct stmmac_resources *res);
 void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
+int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
+int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
 
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 814879f91f76..9e54f953634b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -440,6 +440,33 @@ static int stmmac_nway_reset(struct net_device *dev)
 	return phylink_ethtool_nway_reset(priv->phylink);
 }
 
+static void stmmac_get_ringparam(struct net_device *netdev,
+				 struct ethtool_ringparam *ring)
+{
+	struct stmmac_priv *priv = netdev_priv(netdev);
+
+	ring->rx_max_pending = DMA_MAX_RX_SIZE;
+	ring->tx_max_pending = DMA_MAX_TX_SIZE;
+	ring->rx_pending = priv->dma_rx_size;
+	ring->tx_pending = priv->dma_tx_size;
+}
+
+static int stmmac_set_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending ||
+	    ring->rx_pending < DMA_MIN_RX_SIZE ||
+	    ring->rx_pending > DMA_MAX_RX_SIZE ||
+	    !is_power_of_2(ring->rx_pending) ||
+	    ring->tx_pending < DMA_MIN_TX_SIZE ||
+	    ring->tx_pending > DMA_MAX_TX_SIZE ||
+	    !is_power_of_2(ring->tx_pending))
+		return -EINVAL;
+
+	return stmmac_reinit_ringparam(netdev, ring->rx_pending,
+				       ring->tx_pending);
+}
+
 static void
 stmmac_get_pauseparam(struct net_device *netdev,
 		      struct ethtool_pauseparam *pause)
@@ -843,6 +870,30 @@ static int stmmac_set_rxfh(struct net_device *dev, const u32 *indir,
 				    priv->plat->rx_queues_to_use);
 }
 
+static void stmmac_get_channels(struct net_device *dev,
+				struct ethtool_channels *chan)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	chan->rx_count = priv->plat->rx_queues_to_use;
+	chan->tx_count = priv->plat->tx_queues_to_use;
+	chan->max_rx = priv->dma_cap.number_rx_queues;
+	chan->max_tx = priv->dma_cap.number_tx_queues;
+}
+
+static int stmmac_set_channels(struct net_device *dev,
+			       struct ethtool_channels *chan)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	if (chan->rx_count > priv->dma_cap.number_rx_queues ||
+	    chan->tx_count > priv->dma_cap.number_tx_queues ||
+	    !chan->rx_count || !chan->tx_count)
+		return -EINVAL;
+
+	return stmmac_reinit_queues(dev, chan->rx_count, chan->tx_count);
+}
+
 static int stmmac_get_ts_info(struct net_device *dev,
 			      struct ethtool_ts_info *info)
 {
@@ -926,6 +977,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.get_regs_len = stmmac_ethtool_get_regs_len,
 	.get_link = ethtool_op_get_link,
 	.nway_reset = stmmac_nway_reset,
+	.get_ringparam = stmmac_get_ringparam,
+	.set_ringparam = stmmac_set_ringparam,
 	.get_pauseparam = stmmac_get_pauseparam,
 	.set_pauseparam = stmmac_set_pauseparam,
 	.self_test = stmmac_selftest_run,
@@ -944,6 +997,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.get_ts_info = stmmac_get_ts_info,
 	.get_coalesce = stmmac_get_coalesce,
 	.set_coalesce = stmmac_set_coalesce,
+	.get_channels = stmmac_get_channels,
+	.set_channels = stmmac_set_channels,
 	.get_tunable = stmmac_get_tunable,
 	.set_tunable = stmmac_set_tunable,
 	.get_link_ksettings = stmmac_ethtool_get_link_ksettings,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b56b13d64ab4..220626a8d499 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -63,8 +63,8 @@ static int phyaddr = -1;
 module_param(phyaddr, int, 0444);
 MODULE_PARM_DESC(phyaddr, "Physical device address");
 
-#define STMMAC_TX_THRESH	(DMA_TX_SIZE / 4)
-#define STMMAC_RX_THRESH	(DMA_RX_SIZE / 4)
+#define STMMAC_TX_THRESH(x)	((x)->dma_tx_size / 4)
+#define STMMAC_RX_THRESH(x)	((x)->dma_rx_size / 4)
 
 static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
@@ -176,32 +176,6 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 	}
 }
 
-/**
- * stmmac_stop_all_queues - Stop all queues
- * @priv: driver private structure
- */
-static void stmmac_stop_all_queues(struct stmmac_priv *priv)
-{
-	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
-
-	for (queue = 0; queue < tx_queues_cnt; queue++)
-		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
-}
-
-/**
- * stmmac_start_all_queues - Start all queues
- * @priv: driver private structure
- */
-static void stmmac_start_all_queues(struct stmmac_priv *priv)
-{
-	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
-
-	for (queue = 0; queue < tx_queues_cnt; queue++)
-		netif_tx_start_queue(netdev_get_tx_queue(priv->dev, queue));
-}
-
 static void stmmac_service_event_schedule(struct stmmac_priv *priv)
 {
 	if (!test_bit(STMMAC_DOWN, &priv->state) &&
@@ -297,7 +271,7 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue)
 	if (tx_q->dirty_tx > tx_q->cur_tx)
 		avail = tx_q->dirty_tx - tx_q->cur_tx - 1;
 	else
-		avail = DMA_TX_SIZE - tx_q->cur_tx + tx_q->dirty_tx - 1;
+		avail = priv->dma_tx_size - tx_q->cur_tx + tx_q->dirty_tx - 1;
 
 	return avail;
 }
@@ -315,7 +289,7 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
 	if (rx_q->dirty_rx <= rx_q->cur_rx)
 		dirty = rx_q->cur_rx - rx_q->dirty_rx;
 	else
-		dirty = DMA_RX_SIZE - rx_q->dirty_rx + rx_q->cur_rx;
+		dirty = priv->dma_rx_size - rx_q->dirty_rx + rx_q->cur_rx;
 
 	return dirty;
 }
@@ -360,7 +334,7 @@ void stmmac_disable_eee_mode(struct stmmac_priv *priv)
 
 /**
  * stmmac_eee_ctrl_timer - EEE TX SW timer.
- * @arg : data hook
+ * @t:  timer_list struct containing private info
  * Description:
  *  if there is no data transfer and if we are not in LPI state,
  *  then MAC Transmitter can be moved to LPI state.
@@ -736,7 +710,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
  *  a proprietary structure used to pass information to the driver.
  *  Description:
  *  This function obtain the current hardware timestamping settings
-    as requested.
+ *  as requested.
  */
 static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 {
@@ -789,14 +763,14 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
 
 static void stmmac_release_ptp(struct stmmac_priv *priv)
 {
-	if (priv->plat->clk_ptp_ref)
-		clk_disable_unprepare(priv->plat->clk_ptp_ref);
+	clk_disable_unprepare(priv->plat->clk_ptp_ref);
 	stmmac_ptp_unregister(priv);
 }
 
 /**
  *  stmmac_mac_flow_ctrl - Configure flow control in all queues
  *  @priv: driver private structure
+ *  @duplex: duplex passed to the next function
  *  Description: It is used for configuring the flow control in all queues
  */
 static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
@@ -1150,7 +1124,7 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 			head_rx = (void *)rx_q->dma_rx;
 
 		/* Display RX ring */
-		stmmac_display_ring(priv, head_rx, DMA_RX_SIZE, true);
+		stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true);
 	}
 }
 
@@ -1173,7 +1147,7 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 		else
 			head_tx = (void *)tx_q->dma_tx;
 
-		stmmac_display_ring(priv, head_tx, DMA_TX_SIZE, false);
+		stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false);
 	}
 }
 
@@ -1217,16 +1191,16 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
 	int i;
 
 	/* Clear the RX descriptors */
-	for (i = 0; i < DMA_RX_SIZE; i++)
+	for (i = 0; i < priv->dma_rx_size; i++)
 		if (priv->extend_desc)
 			stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic,
 					priv->use_riwt, priv->mode,
-					(i == DMA_RX_SIZE - 1),
+					(i == priv->dma_rx_size - 1),
 					priv->dma_buf_sz);
 		else
 			stmmac_init_rx_desc(priv, &rx_q->dma_rx[i],
 					priv->use_riwt, priv->mode,
-					(i == DMA_RX_SIZE - 1),
+					(i == priv->dma_rx_size - 1),
 					priv->dma_buf_sz);
 }
 
@@ -1243,8 +1217,8 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue)
 	int i;
 
 	/* Clear the TX descriptors */
-	for (i = 0; i < DMA_TX_SIZE; i++) {
-		int last = (i == (DMA_TX_SIZE - 1));
+	for (i = 0; i < priv->dma_tx_size; i++) {
+		int last = (i == (priv->dma_tx_size - 1));
 		struct dma_desc *p;
 
 		if (priv->extend_desc)
@@ -1398,7 +1372,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 
 		stmmac_clear_rx_descriptors(priv, queue);
 
-		for (i = 0; i < DMA_RX_SIZE; i++) {
+		for (i = 0; i < priv->dma_rx_size; i++) {
 			struct dma_desc *p;
 
 			if (priv->extend_desc)
@@ -1413,16 +1387,18 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 		}
 
 		rx_q->cur_rx = 0;
-		rx_q->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
+		rx_q->dirty_rx = (unsigned int)(i - priv->dma_rx_size);
 
 		/* Setup the chained descriptor addresses */
 		if (priv->mode == STMMAC_CHAIN_MODE) {
 			if (priv->extend_desc)
 				stmmac_mode_init(priv, rx_q->dma_erx,
-						rx_q->dma_rx_phy, DMA_RX_SIZE, 1);
+						 rx_q->dma_rx_phy,
+						 priv->dma_rx_size, 1);
 			else
 				stmmac_mode_init(priv, rx_q->dma_rx,
-						rx_q->dma_rx_phy, DMA_RX_SIZE, 0);
+						 rx_q->dma_rx_phy,
+						 priv->dma_rx_size, 0);
 		}
 	}
 
@@ -1436,7 +1412,7 @@ err_init_rx_buffers:
 		if (queue == 0)
 			break;
 
-		i = DMA_RX_SIZE;
+		i = priv->dma_rx_size;
 		queue--;
 	}
 
@@ -1468,13 +1444,15 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 		if (priv->mode == STMMAC_CHAIN_MODE) {
 			if (priv->extend_desc)
 				stmmac_mode_init(priv, tx_q->dma_etx,
-						tx_q->dma_tx_phy, DMA_TX_SIZE, 1);
+						 tx_q->dma_tx_phy,
+						 priv->dma_tx_size, 1);
 			else if (!(tx_q->tbs & STMMAC_TBS_AVAIL))
 				stmmac_mode_init(priv, tx_q->dma_tx,
-						tx_q->dma_tx_phy, DMA_TX_SIZE, 0);
+						 tx_q->dma_tx_phy,
+						 priv->dma_tx_size, 0);
 		}
 
-		for (i = 0; i < DMA_TX_SIZE; i++) {
+		for (i = 0; i < priv->dma_tx_size; i++) {
 			struct dma_desc *p;
 			if (priv->extend_desc)
 				p = &((tx_q->dma_etx + i)->basic);
@@ -1538,7 +1516,7 @@ static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
 	int i;
 
-	for (i = 0; i < DMA_RX_SIZE; i++)
+	for (i = 0; i < priv->dma_rx_size; i++)
 		stmmac_free_rx_buffer(priv, queue, i);
 }
 
@@ -1551,7 +1529,7 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
 	int i;
 
-	for (i = 0; i < DMA_TX_SIZE; i++)
+	for (i = 0; i < priv->dma_tx_size; i++)
 		stmmac_free_tx_buffer(priv, queue, i);
 }
 
@@ -1573,11 +1551,11 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 
 		/* Free DMA regions of consistent memory previously allocated */
 		if (!priv->extend_desc)
-			dma_free_coherent(priv->device,
-					  DMA_RX_SIZE * sizeof(struct dma_desc),
+			dma_free_coherent(priv->device, priv->dma_rx_size *
+					  sizeof(struct dma_desc),
 					  rx_q->dma_rx, rx_q->dma_rx_phy);
 		else
-			dma_free_coherent(priv->device, DMA_RX_SIZE *
+			dma_free_coherent(priv->device, priv->dma_rx_size *
 					  sizeof(struct dma_extended_desc),
 					  rx_q->dma_erx, rx_q->dma_rx_phy);
 
@@ -1616,7 +1594,7 @@ static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
 			addr = tx_q->dma_tx;
 		}
 
-		size *= DMA_TX_SIZE;
+		size *= priv->dma_tx_size;
 
 		dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy);
 
@@ -1649,7 +1627,7 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 		rx_q->priv_data = priv;
 
 		pp_params.flags = PP_FLAG_DMA_MAP;
-		pp_params.pool_size = DMA_RX_SIZE;
+		pp_params.pool_size = priv->dma_rx_size;
 		num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
 		pp_params.order = ilog2(num_pages);
 		pp_params.nid = dev_to_node(priv->device);
@@ -1663,14 +1641,16 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 			goto err_dma;
 		}
 
-		rx_q->buf_pool = kcalloc(DMA_RX_SIZE, sizeof(*rx_q->buf_pool),
+		rx_q->buf_pool = kcalloc(priv->dma_rx_size,
+					 sizeof(*rx_q->buf_pool),
 					 GFP_KERNEL);
 		if (!rx_q->buf_pool)
 			goto err_dma;
 
 		if (priv->extend_desc) {
 			rx_q->dma_erx = dma_alloc_coherent(priv->device,
-							   DMA_RX_SIZE * sizeof(struct dma_extended_desc),
+							   priv->dma_rx_size *
+							   sizeof(struct dma_extended_desc),
 							   &rx_q->dma_rx_phy,
 							   GFP_KERNEL);
 			if (!rx_q->dma_erx)
@@ -1678,7 +1658,8 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 
 		} else {
 			rx_q->dma_rx = dma_alloc_coherent(priv->device,
-							  DMA_RX_SIZE * sizeof(struct dma_desc),
+							  priv->dma_rx_size *
+							  sizeof(struct dma_desc),
 							  &rx_q->dma_rx_phy,
 							  GFP_KERNEL);
 			if (!rx_q->dma_rx)
@@ -1717,13 +1698,13 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 		tx_q->queue_index = queue;
 		tx_q->priv_data = priv;
 
-		tx_q->tx_skbuff_dma = kcalloc(DMA_TX_SIZE,
+		tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size,
 					      sizeof(*tx_q->tx_skbuff_dma),
 					      GFP_KERNEL);
 		if (!tx_q->tx_skbuff_dma)
 			goto err_dma;
 
-		tx_q->tx_skbuff = kcalloc(DMA_TX_SIZE,
+		tx_q->tx_skbuff = kcalloc(priv->dma_tx_size,
 					  sizeof(struct sk_buff *),
 					  GFP_KERNEL);
 		if (!tx_q->tx_skbuff)
@@ -1736,7 +1717,7 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 		else
 			size = sizeof(struct dma_desc);
 
-		size *= DMA_TX_SIZE;
+		size *= priv->dma_tx_size;
 
 		addr = dma_alloc_coherent(priv->device, size,
 					  &tx_q->dma_tx_phy, GFP_KERNEL);
@@ -1965,6 +1946,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 /**
  * stmmac_tx_clean - to manage the transmission completion
  * @priv: driver private structure
+ * @budget: napi budget limiting this functions packet handling
  * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
@@ -2046,7 +2028,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 		stmmac_release_tx_desc(priv, p, priv->mode);
 
-		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
 	}
 	tx_q->dirty_tx = entry;
 
@@ -2055,7 +2037,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	if (unlikely(netif_tx_queue_stopped(netdev_get_tx_queue(priv->dev,
 								queue))) &&
-	    stmmac_tx_avail(priv, queue) > STMMAC_TX_THRESH) {
+	    stmmac_tx_avail(priv, queue) > STMMAC_TX_THRESH(priv)) {
 
 		netif_dbg(priv, tx_done, priv->dev,
 			  "%s: restart transmit\n", __func__);
@@ -2328,7 +2310,8 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 				    rx_q->dma_rx_phy, chan);
 
 		rx_q->rx_tail_addr = rx_q->dma_rx_phy +
-			    (DMA_RX_SIZE * sizeof(struct dma_desc));
+				     (priv->dma_rx_size *
+				      sizeof(struct dma_desc));
 		stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
 				       rx_q->rx_tail_addr, chan);
 	}
@@ -2357,7 +2340,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
 
 /**
  * stmmac_tx_timer - mitigation sw timer for tx.
- * @data: data pointer
+ * @t: data pointer
  * Description:
  * This is the timer handler to directly invoke the stmmac_tx_clean.
  */
@@ -2412,12 +2395,12 @@ static void stmmac_set_rings_length(struct stmmac_priv *priv)
 	/* set TX ring length */
 	for (chan = 0; chan < tx_channels_count; chan++)
 		stmmac_set_tx_ring_len(priv, priv->ioaddr,
-				(DMA_TX_SIZE - 1), chan);
+				       (priv->dma_tx_size - 1), chan);
 
 	/* set RX ring length */
 	for (chan = 0; chan < rx_channels_count; chan++)
 		stmmac_set_rx_ring_len(priv, priv->ioaddr,
-				(DMA_RX_SIZE - 1), chan);
+				       (priv->dma_rx_size - 1), chan);
 }
 
 /**
@@ -2620,6 +2603,7 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
+ *  @init_ptp: initialize PTP if set
  *  Description:
  *  this is the main function to setup the HW in a usable state because the
  *  dma engine is reset, the core registers are configured (e.g. AXI,
@@ -2740,6 +2724,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 		stmmac_enable_tbs(priv, priv->ioaddr, enable, chan);
 	}
 
+	/* Configure real RX and TX queues */
+	netif_set_real_num_rx_queues(dev, priv->plat->rx_queues_to_use);
+	netif_set_real_num_tx_queues(dev, priv->plat->tx_queues_to_use);
+
 	/* Start the ball rolling... */
 	stmmac_start_all_dma(priv);
 
@@ -2797,6 +2785,11 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
 
+	if (!priv->dma_tx_size)
+		priv->dma_tx_size = DMA_DEFAULT_TX_SIZE;
+	if (!priv->dma_rx_size)
+		priv->dma_rx_size = DMA_DEFAULT_RX_SIZE;
+
 	/* Earlier check for TBS */
 	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
@@ -2868,7 +2861,7 @@ static int stmmac_open(struct net_device *dev)
 	}
 
 	stmmac_enable_all_queues(priv);
-	stmmac_start_all_queues(priv);
+	netif_tx_start_all_queues(priv->dev);
 
 	return 0;
 
@@ -2911,8 +2904,6 @@ static int stmmac_release(struct net_device *dev)
 	phylink_stop(priv->phylink);
 	phylink_disconnect_phy(priv->phylink);
 
-	stmmac_stop_all_queues(priv);
-
 	stmmac_disable_all_queues(priv);
 
 	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
@@ -2968,7 +2959,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb,
 		return false;
 
 	stmmac_set_tx_owner(priv, p);
-	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_tx_size);
 	return true;
 }
 
@@ -2977,7 +2968,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb,
  *  @priv: driver private structure
  *  @des: buffer start address
  *  @total_len: total length to fill in descriptors
- *  @last_segmant: condition for the last descriptor
+ *  @last_segment: condition for the last descriptor
  *  @queue: TX queue index
  *  Description:
  *  This function fills descriptor and request new descriptors according to
@@ -2996,7 +2987,8 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
 	while (tmp_len > 0) {
 		dma_addr_t curr_addr;
 
-		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx,
+						priv->dma_tx_size);
 		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 
 		if (tx_q->tbs & STMMAC_TBS_AVAIL)
@@ -3103,7 +3095,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		stmmac_set_mss(priv, mss_desc, mss);
 		tx_q->mss = mss;
-		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx,
+						priv->dma_tx_size);
 		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 	}
 
@@ -3210,7 +3203,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * ndo_start_xmit will fill this descriptor the next time it's
 	 * called and stmmac_tx_clean may clean up to this descriptor.
 	 */
-	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_tx_size);
 
 	if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
 		netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
@@ -3373,7 +3366,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		int len = skb_frag_size(frag);
 		bool last_segment = (i == (nfrags - 1));
 
-		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
 		WARN_ON(tx_q->tx_skbuff[entry]);
 
 		if (likely(priv->extend_desc))
@@ -3441,7 +3434,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * ndo_start_xmit will fill this descriptor the next time it's
 	 * called and stmmac_tx_clean may clean up to this descriptor.
 	 */
-	entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+	entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
 	tx_q->cur_tx = entry;
 
 	if (netif_msg_pktdata(priv)) {
@@ -3626,7 +3619,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 		dma_wmb();
 		stmmac_set_rx_owner(priv, p, use_rx_wd);
 
-		entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
+		entry = STMMAC_GET_ENTRY(entry, priv->dma_rx_size);
 	}
 	rx_q->dirty_rx = entry;
 	rx_q->rx_tail_addr = rx_q->dma_rx_phy +
@@ -3638,15 +3631,15 @@ static unsigned int stmmac_rx_buf1_len(struct stmmac_priv *priv,
 				       struct dma_desc *p,
 				       int status, unsigned int len)
 {
-	int ret, coe = priv->hw->rx_csum;
 	unsigned int plen = 0, hlen = 0;
+	int coe = priv->hw->rx_csum;
 
 	/* Not first descriptor, buffer is always zero */
 	if (priv->sph && len)
 		return 0;
 
 	/* First descriptor, get split header length */
-	ret = stmmac_get_rx_header_len(priv, p, &hlen);
+	stmmac_get_rx_header_len(priv, p, &hlen);
 	if (priv->sph && hlen) {
 		priv->xstats.rx_split_hdr_pkt_n++;
 		return hlen;
@@ -3709,7 +3702,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 		else
 			rx_head = (void *)rx_q->dma_rx;
 
-		stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
+		stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true);
 	}
 	while (count < limit) {
 		unsigned int buf1_len = 0, buf2_len = 0;
@@ -3751,7 +3744,8 @@ read_again:
 		if (unlikely(status & dma_own))
 			break;
 
-		rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE);
+		rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx,
+						priv->dma_rx_size);
 		next_entry = rx_q->cur_rx;
 
 		if (priv->extend_desc)
@@ -3926,7 +3920,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 
 	priv->xstats.napi_poll++;
 
-	work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
+	work_done = stmmac_tx_clean(priv, priv->dma_tx_size, chan);
 	work_done = min(work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -3943,6 +3937,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 /**
  *  stmmac_tx_timeout
  *  @dev : Pointer to net device structure
+ *  @txqueue: the index of the hanging transmit queue
  *  Description: this function is called when a packet transmission fails to
  *   complete within a reasonable time. The driver will mark the error in the
  *   netdev structure and arrange for the device to be reset to a sane state
@@ -4319,11 +4314,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 		if (priv->extend_desc) {
 			seq_printf(seq, "Extended descriptor ring:\n");
 			sysfs_display_ring((void *)rx_q->dma_erx,
-					   DMA_RX_SIZE, 1, seq);
+					   priv->dma_rx_size, 1, seq);
 		} else {
 			seq_printf(seq, "Descriptor ring:\n");
 			sysfs_display_ring((void *)rx_q->dma_rx,
-					   DMA_RX_SIZE, 0, seq);
+					   priv->dma_rx_size, 0, seq);
 		}
 	}
 
@@ -4335,11 +4330,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 		if (priv->extend_desc) {
 			seq_printf(seq, "Extended descriptor ring:\n");
 			sysfs_display_ring((void *)tx_q->dma_etx,
-					   DMA_TX_SIZE, 1, seq);
+					   priv->dma_tx_size, 1, seq);
 		} else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) {
 			seq_printf(seq, "Descriptor ring:\n");
 			sysfs_display_ring((void *)tx_q->dma_tx,
-					   DMA_TX_SIZE, 0, seq);
+					   priv->dma_tx_size, 0, seq);
 		}
 	}
 
@@ -4725,6 +4720,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	if (priv->dma_cap.tsoen)
 		dev_info(priv->device, "TSO supported\n");
 
+	priv->hw->vlan_fail_q_en = priv->plat->vlan_fail_q_en;
+	priv->hw->vlan_fail_q = priv->plat->vlan_fail_q;
+
 	/* Run HW quirks, if any */
 	if (priv->hwif_quirks) {
 		ret = priv->hwif_quirks(priv);
@@ -4747,6 +4745,86 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	return 0;
 }
 
+static void stmmac_napi_add(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 queue, maxq;
+
+	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
+
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
+
+		ch->priv_data = priv;
+		ch->index = queue;
+
+		if (queue < priv->plat->rx_queues_to_use) {
+			netif_napi_add(dev, &ch->rx_napi, stmmac_napi_poll_rx,
+				       NAPI_POLL_WEIGHT);
+		}
+		if (queue < priv->plat->tx_queues_to_use) {
+			netif_tx_napi_add(dev, &ch->tx_napi,
+					  stmmac_napi_poll_tx,
+					  NAPI_POLL_WEIGHT);
+		}
+	}
+}
+
+static void stmmac_napi_del(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 queue, maxq;
+
+	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
+
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
+
+		if (queue < priv->plat->rx_queues_to_use)
+			netif_napi_del(&ch->rx_napi);
+		if (queue < priv->plat->tx_queues_to_use)
+			netif_napi_del(&ch->tx_napi);
+	}
+}
+
+int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int ret = 0;
+
+	if (netif_running(dev))
+		stmmac_release(dev);
+
+	stmmac_napi_del(dev);
+
+	priv->plat->rx_queues_to_use = rx_cnt;
+	priv->plat->tx_queues_to_use = tx_cnt;
+
+	stmmac_napi_add(dev);
+
+	if (netif_running(dev))
+		ret = stmmac_open(dev);
+
+	return ret;
+}
+
+int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int ret = 0;
+
+	if (netif_running(dev))
+		stmmac_release(dev);
+
+	priv->dma_rx_size = rx_size;
+	priv->dma_tx_size = tx_size;
+
+	if (netif_running(dev))
+		ret = stmmac_open(dev);
+
+	return ret;
+}
+
 /**
  * stmmac_dvr_probe
  * @device: device pointer
@@ -4763,7 +4841,7 @@ int stmmac_dvr_probe(struct device *device,
 {
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
-	u32 queue, rxq, maxq;
+	u32 rxq;
 	int i, ret = 0;
 
 	ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv),
@@ -4827,10 +4905,6 @@ int stmmac_dvr_probe(struct device *device,
 
 	stmmac_check_ether_addr(priv);
 
-	/* Configure real RX and TX queues */
-	netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
-	netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
-
 	ndev->netdev_ops = &stmmac_netdev_ops;
 
 	ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -4928,25 +5002,7 @@ int stmmac_dvr_probe(struct device *device,
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
 
 	/* Setup channels NAPI */
-	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
-
-	for (queue = 0; queue < maxq; queue++) {
-		struct stmmac_channel *ch = &priv->channel[queue];
-
-		spin_lock_init(&ch->lock);
-		ch->priv_data = priv;
-		ch->index = queue;
-
-		if (queue < priv->plat->rx_queues_to_use) {
-			netif_napi_add(ndev, &ch->rx_napi, stmmac_napi_poll_rx,
-				       NAPI_POLL_WEIGHT);
-		}
-		if (queue < priv->plat->tx_queues_to_use) {
-			netif_tx_napi_add(ndev, &ch->tx_napi,
-					  stmmac_napi_poll_tx,
-					  NAPI_POLL_WEIGHT);
-		}
-	}
+	stmmac_napi_add(ndev);
 
 	mutex_init(&priv->lock);
 
@@ -5011,14 +5067,7 @@ error_phy_setup:
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 error_mdio_register:
-	for (queue = 0; queue < maxq; queue++) {
-		struct stmmac_channel *ch = &priv->channel[queue];
-
-		if (queue < priv->plat->rx_queues_to_use)
-			netif_napi_del(&ch->rx_napi);
-		if (queue < priv->plat->tx_queues_to_use)
-			netif_napi_del(&ch->tx_napi);
-	}
+	stmmac_napi_del(ndev);
 error_hw_init:
 	destroy_workqueue(priv->wq);
 
@@ -5086,7 +5135,6 @@ int stmmac_suspend(struct device *dev)
 	mutex_lock(&priv->lock);
 
 	netif_device_detach(ndev);
-	stmmac_stop_all_queues(priv);
 
 	stmmac_disable_all_queues(priv);
 
@@ -5115,8 +5163,7 @@ int stmmac_suspend(struct device *dev)
 		stmmac_mac_set(priv, priv->ioaddr, false);
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
-		if (priv->plat->clk_ptp_ref)
-			clk_disable_unprepare(priv->plat->clk_ptp_ref);
+		clk_disable_unprepare(priv->plat->clk_ptp_ref);
 		clk_disable_unprepare(priv->plat->pclk);
 		clk_disable_unprepare(priv->plat->stmmac_clk);
 	}
@@ -5129,7 +5176,7 @@ EXPORT_SYMBOL_GPL(stmmac_suspend);
 
 /**
  * stmmac_reset_queues_param - reset queue parameters
- * @dev: device pointer
+ * @priv: device pointer
  */
 static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 {
@@ -5213,8 +5260,6 @@ int stmmac_resume(struct device *dev)
 
 	stmmac_enable_all_queues(priv);
 
-	stmmac_start_all_queues(priv);
-
 	mutex_unlock(&priv->lock);
 
 	if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index f32317fa75c8..af34a4cadbb0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -125,6 +125,7 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
 /**
  * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
  * @pdev: platform device
+ * @plat: enet data
  */
 static int stmmac_mtl_setup(struct platform_device *pdev,
 			    struct plat_stmmacenet_data *plat)
@@ -360,7 +361,7 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 
 /**
  * stmmac_of_get_mac_mode - retrieves the interface of the MAC
- * @np - device-tree node
+ * @np: - device-tree node
  * Description:
  * Similar to `of_get_phy_mode()`, this function will retrieve (from
  * the device-tree) the interface mode on the MAC side. This assumes
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index bf195adee393..0462dcc93e53 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -796,7 +796,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv)
 		u32 tail;
 
 		tail = priv->rx_queue[i].dma_rx_phy +
-			(DMA_RX_SIZE * sizeof(struct dma_desc));
+			(priv->dma_rx_size * sizeof(struct dma_desc));
 
 		stmmac_set_rx_tail_ptr(priv, priv->ioaddr, tail, i);
 		stmmac_start_rx(priv, priv->ioaddr, i);
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index b624e177ec71..9ff894ba8d3e 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -454,8 +454,8 @@ static int cas_page_free(struct cas *cp, cas_page_t *page)
 #define RX_USED_ADD(x, y)       ((x)->used += (y))
 #define RX_USED_SET(x, y)       ((x)->used  = (y))
 #else
-#define RX_USED_ADD(x, y)
-#define RX_USED_SET(x, y)
+#define RX_USED_ADD(x, y) do { } while(0)
+#define RX_USED_SET(x, y) do { } while(0)
 #endif
 
 /* local page allocation routines for the receive buffers. jumbo pages
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 34fdbc6d6031..c646575e79d5 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -209,13 +209,13 @@ static void bigmac_clean_rings(struct bigmac *bp)
 	}
 }
 
-static void bigmac_init_rings(struct bigmac *bp, int from_irq)
+static void bigmac_init_rings(struct bigmac *bp, bool non_blocking)
 {
 	struct bmac_init_block *bb = bp->bmac_block;
 	int i;
 	gfp_t gfp_flags = GFP_KERNEL;
 
-	if (from_irq || in_interrupt())
+	if (non_blocking)
 		gfp_flags = GFP_ATOMIC;
 
 	bp->rx_new = bp->rx_old = bp->tx_new = bp->tx_old = 0;
@@ -489,7 +489,7 @@ static void bigmac_tcvr_init(struct bigmac *bp)
 	}
 }
 
-static int bigmac_init_hw(struct bigmac *, int);
+static int bigmac_init_hw(struct bigmac *, bool);
 
 static int try_next_permutation(struct bigmac *bp, void __iomem *tregs)
 {
@@ -549,7 +549,7 @@ static void bigmac_timer(struct timer_list *t)
 				if (ret == -1) {
 					printk(KERN_ERR "%s: Link down, cable problem?\n",
 					       bp->dev->name);
-					ret = bigmac_init_hw(bp, 0);
+					ret = bigmac_init_hw(bp, true);
 					if (ret) {
 						printk(KERN_ERR "%s: Error, cannot re-init the "
 						       "BigMAC.\n", bp->dev->name);
@@ -617,7 +617,7 @@ static void bigmac_begin_auto_negotiation(struct bigmac *bp)
 	add_timer(&bp->bigmac_timer);
 }
 
-static int bigmac_init_hw(struct bigmac *bp, int from_irq)
+static int bigmac_init_hw(struct bigmac *bp, bool non_blocking)
 {
 	void __iomem *gregs        = bp->gregs;
 	void __iomem *cregs        = bp->creg;
@@ -635,7 +635,7 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq)
 	qec_init(bp);
 
 	/* Alloc and reset the tx/rx descriptor chains. */
-	bigmac_init_rings(bp, from_irq);
+	bigmac_init_rings(bp, non_blocking);
 
 	/* Initialize the PHY. */
 	bigmac_tcvr_init(bp);
@@ -749,7 +749,7 @@ static void bigmac_is_medium_rare(struct bigmac *bp, u32 qec_status, u32 bmac_st
 	}
 
 	printk(" RESET\n");
-	bigmac_init_hw(bp, 1);
+	bigmac_init_hw(bp, true);
 }
 
 /* BigMAC transmit complete service routines. */
@@ -921,7 +921,7 @@ static int bigmac_open(struct net_device *dev)
 		return ret;
 	}
 	timer_setup(&bp->bigmac_timer, bigmac_timer, 0);
-	ret = bigmac_init_hw(bp, 0);
+	ret = bigmac_init_hw(bp, false);
 	if (ret)
 		free_irq(dev->irq, bp);
 	return ret;
@@ -945,7 +945,7 @@ static void bigmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct bigmac *bp = netdev_priv(dev);
 
-	bigmac_init_hw(bp, 0);
+	bigmac_init_hw(bp, true);
 	netif_wake_queue(dev);
 }
 
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 8deb943ca5de..58f142ee78a3 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -2965,9 +2965,8 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* It is guaranteed that the returned buffer will be at least
 	 * PAGE_SIZE aligned.
 	 */
-	gp->init_block = (struct gem_init_block *)
-		dma_alloc_coherent(&pdev->dev, sizeof(struct gem_init_block),
-				   &gp->gblock_dvma, GFP_KERNEL);
+	gp->init_block = dma_alloc_coherent(&pdev->dev, sizeof(struct gem_init_block),
+					    &gp->gblock_dvma, GFP_KERNEL);
 	if (!gp->init_block) {
 		pr_err("Cannot allocate init block, aborting\n");
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-common.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-common.c
index eb1c6b03c329..df26cea45904 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-common.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-common.c
@@ -513,7 +513,7 @@ void xlgmac_get_all_hw_features(struct xlgmac_pdata *pdata)
 
 void xlgmac_print_all_hw_features(struct xlgmac_pdata *pdata)
 {
-	char *str = NULL;
+	char __maybe_unused *str = NULL;
 
 	XLGMAC_PR("\n");
 	XLGMAC_PR("=====================================================\n");
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index e28727297563..b8f4f419173f 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -138,7 +138,10 @@ static void print_eth_id(struct net_device *ndev)
  * @priv: NIC private structure
  * @f: fifo to initialize
  * @fsz_type: fifo size type: 0-4KB, 1-8KB, 2-16KB, 3-32KB
- * @reg_XXX: offsets of registers relative to base address
+ * @reg_CFG0: offsets of registers relative to base address
+ * @reg_CFG1: offsets of registers relative to base address
+ * @reg_RPTR: offsets of registers relative to base address
+ * @reg_WPTR: offsets of registers relative to base address
  *
  * 1K extra space is allocated at the end of the fifo to simplify
  * processing of descriptors that wraps around fifo's end
@@ -153,11 +156,11 @@ bdx_fifo_init(struct bdx_priv *priv, struct fifo *f, int fsz_type,
 	u16 memsz = FIFO_SIZE * (1 << fsz_type);
 
 	memset(f, 0, sizeof(struct fifo));
-	/* pci_alloc_consistent gives us 4k-aligned memory */
-	f->va = pci_alloc_consistent(priv->pdev,
-				     memsz + FIFO_EXTRA_SPACE, &f->da);
+	/* dma_alloc_coherent gives us 4k-aligned memory */
+	f->va = dma_alloc_coherent(&priv->pdev->dev, memsz + FIFO_EXTRA_SPACE,
+				   &f->da, GFP_ATOMIC);
 	if (!f->va) {
-		pr_err("pci_alloc_consistent failed\n");
+		pr_err("dma_alloc_coherent failed\n");
 		RET(-ENOMEM);
 	}
 	f->reg_CFG0 = reg_CFG0;
@@ -183,8 +186,8 @@ static void bdx_fifo_free(struct bdx_priv *priv, struct fifo *f)
 {
 	ENTER;
 	if (f->va) {
-		pci_free_consistent(priv->pdev,
-				    f->memsz + FIFO_EXTRA_SPACE, f->va, f->da);
+		dma_free_coherent(&priv->pdev->dev,
+				  f->memsz + FIFO_EXTRA_SPACE, f->va, f->da);
 		f->va = NULL;
 	}
 	RET();
@@ -558,7 +561,7 @@ static int bdx_reset(struct bdx_priv *priv)
 
 /**
  * bdx_close - Disables a network interface
- * @netdev: network interface device structure
+ * @ndev: network interface device structure
  *
  * Returns 0, this is not allowed to fail
  *
@@ -585,7 +588,7 @@ static int bdx_close(struct net_device *ndev)
 
 /**
  * bdx_open - Called when a network interface is made active
- * @netdev: network interface device structure
+ * @ndev: network interface device structure
  *
  * Returns 0 on success, negative value on failure
  *
@@ -698,7 +701,7 @@ static int bdx_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
  * __bdx_vlan_rx_vid - private helper for adding/killing VLAN vid
  * @ndev: network device
  * @vid:  VLAN vid
- * @op:   add or kill operation
+ * @enable: enable or disable vlan
  *
  * Passes VLAN filter table to hardware
  */
@@ -729,6 +732,7 @@ static void __bdx_vlan_rx_vid(struct net_device *ndev, uint16_t vid, int enable)
 /**
  * bdx_vlan_rx_add_vid - kernel hook for adding VLAN vid to hw filtering table
  * @ndev: network device
+ * @proto: unused
  * @vid:  VLAN vid to add
  */
 static int bdx_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
@@ -740,6 +744,7 @@ static int bdx_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
 /**
  * bdx_vlan_rx_kill_vid - kernel hook for killing VLAN vid in hw filtering table
  * @ndev: network device
+ * @proto: unused
  * @vid:  VLAN vid to kill
  */
 static int bdx_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
@@ -750,7 +755,7 @@ static int bdx_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
 
 /**
  * bdx_change_mtu - Change the Maximum Transfer Unit
- * @netdev: network interface device structure
+ * @ndev: network interface device structure
  * @new_mtu: new value for maximum frame size
  *
  * Returns 0 on success, negative on failure
@@ -1033,9 +1038,8 @@ static void bdx_rx_free_skbs(struct bdx_priv *priv, struct rxf_fifo *f)
 	for (i = 0; i < db->nelem; i++) {
 		dm = bdx_rxdb_addr_elem(db, i);
 		if (dm->dma) {
-			pci_unmap_single(priv->pdev,
-					 dm->dma, f->m.pktsz,
-					 PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&priv->pdev->dev, dm->dma,
+					 f->m.pktsz, DMA_FROM_DEVICE);
 			dev_kfree_skb(dm->skb);
 		}
 	}
@@ -1097,9 +1101,8 @@ static void bdx_rx_alloc_skbs(struct bdx_priv *priv, struct rxf_fifo *f)
 
 		idx = bdx_rxdb_alloc_elem(db);
 		dm = bdx_rxdb_addr_elem(db, idx);
-		dm->dma = pci_map_single(priv->pdev,
-					 skb->data, f->m.pktsz,
-					 PCI_DMA_FROMDEVICE);
+		dm->dma = dma_map_single(&priv->pdev->dev, skb->data,
+					 f->m.pktsz, DMA_FROM_DEVICE);
 		dm->skb = skb;
 		rxfd = (struct rxf_desc *)(f->m.va + f->m.wptr);
 		rxfd->info = CPU_CHIP_SWAP32(0x10003);	/* INFO=1 BC=3 */
@@ -1259,16 +1262,15 @@ static int bdx_rx_receive(struct bdx_priv *priv, struct rxd_fifo *f, int budget)
 		    (skb2 = netdev_alloc_skb(priv->ndev, len + NET_IP_ALIGN))) {
 			skb_reserve(skb2, NET_IP_ALIGN);
 			/*skb_put(skb2, len); */
-			pci_dma_sync_single_for_cpu(priv->pdev,
-						    dm->dma, rxf_fifo->m.pktsz,
-						    PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_cpu(&priv->pdev->dev, dm->dma,
+						rxf_fifo->m.pktsz,
+						DMA_FROM_DEVICE);
 			memcpy(skb2->data, skb->data, len);
 			bdx_recycle_skb(priv, rxdd);
 			skb = skb2;
 		} else {
-			pci_unmap_single(priv->pdev,
-					 dm->dma, rxf_fifo->m.pktsz,
-					 PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&priv->pdev->dev, dm->dma,
+					 rxf_fifo->m.pktsz, DMA_FROM_DEVICE);
 			bdx_rxdb_free_elem(db, rxdd->va_lo);
 		}
 
@@ -1478,8 +1480,8 @@ bdx_tx_map_skb(struct bdx_priv *priv, struct sk_buff *skb,
 	int i;
 
 	db->wptr->len = skb_headlen(skb);
-	db->wptr->addr.dma = pci_map_single(priv->pdev, skb->data,
-					    db->wptr->len, PCI_DMA_TODEVICE);
+	db->wptr->addr.dma = dma_map_single(&priv->pdev->dev, skb->data,
+					    db->wptr->len, DMA_TO_DEVICE);
 	pbl->len = CPU_CHIP_SWAP32(db->wptr->len);
 	pbl->pa_lo = CPU_CHIP_SWAP32(L32_64(db->wptr->addr.dma));
 	pbl->pa_hi = CPU_CHIP_SWAP32(H32_64(db->wptr->addr.dma));
@@ -1716,8 +1718,8 @@ static void bdx_tx_cleanup(struct bdx_priv *priv)
 		BDX_ASSERT(db->rptr->len == 0);
 		do {
 			BDX_ASSERT(db->rptr->addr.dma == 0);
-			pci_unmap_page(priv->pdev, db->rptr->addr.dma,
-				       db->rptr->len, PCI_DMA_TODEVICE);
+			dma_unmap_page(&priv->pdev->dev, db->rptr->addr.dma,
+				       db->rptr->len, DMA_TO_DEVICE);
 			bdx_tx_db_inc_rptr(db);
 		} while (db->rptr->len > 0);
 		tx_level -= db->rptr->len;	/* '-' koz len is negative */
@@ -1756,6 +1758,8 @@ static void bdx_tx_cleanup(struct bdx_priv *priv)
 
 /**
  * bdx_tx_free_skbs - frees all skbs from TXD fifo.
+ * @priv: NIC private structure
+ *
  * It gets called when OS stops this dev, eg upon "ifconfig down" or rmmod
  */
 static void bdx_tx_free_skbs(struct bdx_priv *priv)
@@ -1765,8 +1769,8 @@ static void bdx_tx_free_skbs(struct bdx_priv *priv)
 	ENTER;
 	while (db->rptr != db->wptr) {
 		if (likely(db->rptr->len))
-			pci_unmap_page(priv->pdev, db->rptr->addr.dma,
-				       db->rptr->len, PCI_DMA_TODEVICE);
+			dma_unmap_page(&priv->pdev->dev, db->rptr->addr.dma,
+				       db->rptr->len, DMA_TO_DEVICE);
 		else
 			dev_kfree_skb(db->rptr->addr.skb);
 		bdx_tx_db_inc_rptr(db);
@@ -1902,12 +1906,12 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)			/* it triggers interrupt, dunno why. */
 		goto err_pci;		/* it's not a problem though */
 
-	if (!(err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) &&
-	    !(err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))) {
+	if (!(err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) &&
+	    !(err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)))) {
 		pci_using_dac = 1;
 	} else {
-		if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) ||
-		    (err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))) {
+		if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) ||
+		    (err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)))) {
 			pr_err("No usable DMA configuration, aborting\n");
 			goto err_dma;
 		}
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index 496dafb25128..6e4d4f9e32e0 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -572,13 +572,14 @@ static int am65_cpsw_nway_reset(struct net_device *ndev)
 static int am65_cpsw_get_regs_len(struct net_device *ndev)
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
-	u32 i, regdump_len = 0;
+	u32 ale_entries, i, regdump_len = 0;
 
+	ale_entries = cpsw_ale_get_num_entries(common->ale);
 	for (i = 0; i < ARRAY_SIZE(am65_cpsw_regdump); i++) {
 		if (am65_cpsw_regdump[i].hdr.module_id ==
 		    AM65_CPSW_REGDUMP_MOD_CPSW_ALE_TBL) {
 			regdump_len += sizeof(struct am65_cpsw_regdump_hdr);
-			regdump_len += common->ale->params.ale_entries *
+			regdump_len += ale_entries *
 				       ALE_ENTRY_WORDS * sizeof(u32);
 			continue;
 		}
@@ -592,10 +593,11 @@ static void am65_cpsw_get_regs(struct net_device *ndev,
 			       struct ethtool_regs *regs, void *p)
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
-	u32 i, j, pos, *reg = p;
+	u32 ale_entries, i, j, pos, *reg = p;
 
 	/* update CPSW IP version */
 	regs->version = AM65_CPSW_REGDUMP_VER;
+	ale_entries = cpsw_ale_get_num_entries(common->ale);
 
 	pos = 0;
 	for (i = 0; i < ARRAY_SIZE(am65_cpsw_regdump); i++) {
@@ -603,7 +605,7 @@ static void am65_cpsw_get_regs(struct net_device *ndev,
 
 		if (am65_cpsw_regdump[i].hdr.module_id ==
 		    AM65_CPSW_REGDUMP_MOD_CPSW_ALE_TBL) {
-			u32 ale_tbl_len = common->ale->params.ale_entries *
+			u32 ale_tbl_len = ale_entries *
 					  ALE_ENTRY_WORDS * sizeof(u32) +
 					  sizeof(struct am65_cpsw_regdump_hdr);
 			reg[pos++] = ale_tbl_len;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 9baf3f3da91e..501d676fd88b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -5,6 +5,7 @@
  *
  */
 
+#include <linux/clk.h>
 #include <linux/etherdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/interrupt.h>
@@ -2038,6 +2039,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 	struct am65_cpsw_common *common;
 	struct device_node *node;
 	struct resource *res;
+	struct clk *clk;
 	int ret, i;
 
 	common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL);
@@ -2086,6 +2088,16 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 	if (!common->ports)
 		return -ENOMEM;
 
+	clk = devm_clk_get(dev, "fck");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "error getting fck clock %d\n", ret);
+		return ret;
+	}
+	common->bus_freq = clk_get_rate(clk);
+
 	pm_runtime_enable(dev);
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
@@ -2131,10 +2143,10 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 	/* init common data */
 	ale_params.dev = dev;
 	ale_params.ale_ageout = AM65_CPSW_ALE_AGEOUT_DEFAULT;
-	ale_params.ale_entries = 0;
 	ale_params.ale_ports = common->port_num + 1;
 	ale_params.ale_regs = common->cpsw_base + AM65_CPSW_NU_ALE_BASE;
-	ale_params.nu_switch_ale = true;
+	ale_params.dev_id = "am65x-cpsw2g";
+	ale_params.bus_freq = common->bus_freq;
 
 	common->ale = cpsw_ale_create(&ale_params);
 	if (IS_ERR(common->ale)) {
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index 94f666ea0e53..993e1d4d3222 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -106,6 +106,7 @@ struct am65_cpsw_common {
 
 	u32			nuss_ver;
 	u32			cpsw_ver;
+	unsigned long		bus_freq;
 	bool			pf_p0_rx_ptype_rrobin;
 	struct am65_cpts	*cpts;
 	int			est_enabled;
diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index c59a289e428c..75056c14b161 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -83,6 +83,8 @@ struct am65_cpts_regs {
 #define AM65_CPTS_CONTROL_HW8_TS_PUSH_EN	BIT(15)
 #define AM65_CPTS_CONTROL_HW1_TS_PUSH_OFFSET	(8)
 
+#define AM65_CPTS_CONTROL_TX_GENF_CLR_EN	BIT(17)
+
 #define AM65_CPTS_CONTROL_TS_SYNC_SEL_MASK	(0xF)
 #define AM65_CPTS_CONTROL_TS_SYNC_SEL_SHIFT	(28)
 
@@ -748,42 +750,23 @@ EXPORT_SYMBOL_GPL(am65_cpts_rx_enable);
 static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid)
 {
 	unsigned int ptp_class = ptp_classify_raw(skb);
-	u8 *msgtype, *data = skb->data;
-	unsigned int offset = 0;
-	__be16 *seqid;
+	struct ptp_header *hdr;
+	u8 msgtype;
+	u16 seqid;
 
 	if (ptp_class == PTP_CLASS_NONE)
 		return 0;
 
-	if (ptp_class & PTP_CLASS_VLAN)
-		offset += VLAN_HLEN;
-
-	switch (ptp_class & PTP_CLASS_PMASK) {
-	case PTP_CLASS_IPV4:
-		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
-		break;
-	case PTP_CLASS_IPV6:
-		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
-		break;
-	case PTP_CLASS_L2:
-		offset += ETH_HLEN;
-		break;
-	default:
-		return 0;
-	}
-
-	if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+	hdr = ptp_parse_header(skb, ptp_class);
+	if (!hdr)
 		return 0;
 
-	if (unlikely(ptp_class & PTP_CLASS_V1))
-		msgtype = data + offset + OFF_PTP_CONTROL;
-	else
-		msgtype = data + offset;
+	msgtype = ptp_get_msgtype(hdr, ptp_class);
+	seqid	= ntohs(hdr->sequence_id);
 
-	seqid = (__be16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
-	*mtype_seqid = (*msgtype << AM65_CPTS_EVENT_1_MESSAGE_TYPE_SHIFT) &
+	*mtype_seqid  = (msgtype << AM65_CPTS_EVENT_1_MESSAGE_TYPE_SHIFT) &
 			AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK;
-	*mtype_seqid |= (ntohs(*seqid) & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK);
+	*mtype_seqid |= (seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK);
 
 	return 1;
 }
@@ -1005,7 +988,9 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
 
 	am65_cpts_set_add_val(cpts);
 
-	am65_cpts_write32(cpts, AM65_CPTS_CONTROL_EN | AM65_CPTS_CONTROL_64MODE,
+	am65_cpts_write32(cpts, AM65_CPTS_CONTROL_EN |
+			  AM65_CPTS_CONTROL_64MODE |
+			  AM65_CPTS_CONTROL_TX_GENF_CLR_EN,
 			  control);
 	am65_cpts_write32(cpts, AM65_CPTS_INT_ENABLE_TS_PEND_EN, int_enable);
 
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 4a65edc5a375..9fd1f77190ad 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1278,12 +1278,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->channels = prop;
 
-	if (of_property_read_u32(node, "ale_entries", &prop)) {
-		dev_err(&pdev->dev, "Missing ale_entries property in the DT.\n");
-		return -EINVAL;
-	}
-	data->ale_entries = prop;
-
 	if (of_property_read_u32(node, "bd_ram_size", &prop)) {
 		dev_err(&pdev->dev, "Missing bd_ram_size property in the DT.\n");
 		return -EINVAL;
@@ -1297,7 +1291,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	data->mac_control = prop;
 
 	if (of_property_read_bool(node, "dual_emac"))
-		data->dual_emac = 1;
+		data->dual_emac = true;
 
 	/*
 	 * Populate all the child nodes here...
@@ -1596,7 +1590,7 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	soc = soc_device_match(cpsw_soc_devices);
 	if (soc)
-		cpsw->quirk_irq = 1;
+		cpsw->quirk_irq = true;
 
 	data = &cpsw->data;
 	cpsw->slaves = devm_kcalloc(dev,
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 9ad872bfae3a..a6a455c32628 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -32,6 +32,7 @@
 #define ALE_STATUS		0x04
 #define ALE_CONTROL		0x08
 #define ALE_PRESCALE		0x10
+#define ALE_AGING_TIMER		0x14
 #define ALE_UNKNOWNVLAN		0x18
 #define ALE_TABLE_CONTROL	0x20
 #define ALE_TABLE		0x34
@@ -46,6 +47,46 @@
 
 #define AM65_CPSW_ALE_THREAD_DEF_REG 0x134
 
+/* ALE_AGING_TIMER */
+#define ALE_AGING_TIMER_MASK	GENMASK(23, 0)
+
+/**
+ * struct ale_entry_fld - The ALE tbl entry field description
+ * @start_bit: field start bit
+ * @num_bits: field bit length
+ * @flags: field flags
+ */
+struct ale_entry_fld {
+	u8 start_bit;
+	u8 num_bits;
+	u8 flags;
+};
+
+enum {
+	CPSW_ALE_F_STATUS_REG = BIT(0), /* Status register present */
+	CPSW_ALE_F_HW_AUTOAGING = BIT(1), /* HW auto aging */
+
+	CPSW_ALE_F_COUNT
+};
+
+/**
+ * struct ale_dev_id - The ALE version/SoC specific configuration
+ * @dev_id: ALE version/SoC id
+ * @features: features supported by ALE
+ * @tbl_entries: number of ALE entries
+ * @major_ver_mask: mask of ALE Major Version Value in ALE_IDVER reg.
+ * @nu_switch_ale: NU Switch ALE
+ * @vlan_entry_tbl: ALE vlan entry fields description tbl
+ */
+struct cpsw_ale_dev_id {
+	const char *dev_id;
+	u32 features;
+	u32 tbl_entries;
+	u32 major_ver_mask;
+	bool nu_switch_ale;
+	const struct ale_entry_fld *vlan_entry_tbl;
+};
+
 #define ALE_TABLE_WRITE		BIT(31)
 
 #define ALE_TYPE_FREE			0
@@ -60,7 +101,6 @@
 
 #define ALE_TABLE_SIZE_MULTIPLIER	1024
 #define ALE_STATUS_SIZE_MASK		0x1f
-#define ALE_TABLE_SIZE_DEFAULT		64
 
 static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
 {
@@ -106,6 +146,59 @@ static inline void cpsw_ale_set_##name(u32 *ale_entry, u32 value,	\
 	cpsw_ale_set_field(ale_entry, start, bits, value);		\
 }
 
+enum {
+	ALE_ENT_VID_MEMBER_LIST = 0,
+	ALE_ENT_VID_UNREG_MCAST_MSK,
+	ALE_ENT_VID_REG_MCAST_MSK,
+	ALE_ENT_VID_FORCE_UNTAGGED_MSK,
+	ALE_ENT_VID_UNREG_MCAST_IDX,
+	ALE_ENT_VID_REG_MCAST_IDX,
+	ALE_ENT_VID_LAST,
+};
+
+#define ALE_FLD_ALLOWED			BIT(0)
+#define ALE_FLD_SIZE_PORT_MASK_BITS	BIT(1)
+#define ALE_FLD_SIZE_PORT_NUM_BITS	BIT(2)
+
+#define ALE_ENTRY_FLD(id, start, bits)	\
+[id] = {				\
+	.start_bit = start,		\
+	.num_bits = bits,		\
+	.flags = ALE_FLD_ALLOWED,	\
+}
+
+#define ALE_ENTRY_FLD_DYN_MSK_SIZE(id, start)	\
+[id] = {					\
+	.start_bit = start,			\
+	.num_bits = 0,				\
+	.flags = ALE_FLD_ALLOWED |		\
+		 ALE_FLD_SIZE_PORT_MASK_BITS,	\
+}
+
+/* dm814x, am3/am4/am5, k2hk */
+static const struct ale_entry_fld vlan_entry_cpsw[ALE_ENT_VID_LAST] = {
+	ALE_ENTRY_FLD(ALE_ENT_VID_MEMBER_LIST, 0, 3),
+	ALE_ENTRY_FLD(ALE_ENT_VID_UNREG_MCAST_MSK, 8, 3),
+	ALE_ENTRY_FLD(ALE_ENT_VID_REG_MCAST_MSK, 16, 3),
+	ALE_ENTRY_FLD(ALE_ENT_VID_FORCE_UNTAGGED_MSK, 24, 3),
+};
+
+/* k2e/k2l, k3 am65/j721e cpsw2g  */
+static const struct ale_entry_fld vlan_entry_nu[ALE_ENT_VID_LAST] = {
+	ALE_ENTRY_FLD_DYN_MSK_SIZE(ALE_ENT_VID_MEMBER_LIST, 0),
+	ALE_ENTRY_FLD(ALE_ENT_VID_UNREG_MCAST_IDX, 20, 3),
+	ALE_ENTRY_FLD_DYN_MSK_SIZE(ALE_ENT_VID_FORCE_UNTAGGED_MSK, 24),
+	ALE_ENTRY_FLD(ALE_ENT_VID_REG_MCAST_IDX, 44, 3),
+};
+
+/* K3 j721e/j7200 cpsw9g/5g, am64x cpsw3g  */
+static const struct ale_entry_fld vlan_entry_k3_cpswxg[] = {
+	ALE_ENTRY_FLD_DYN_MSK_SIZE(ALE_ENT_VID_MEMBER_LIST, 0),
+	ALE_ENTRY_FLD_DYN_MSK_SIZE(ALE_ENT_VID_UNREG_MCAST_MSK, 12),
+	ALE_ENTRY_FLD_DYN_MSK_SIZE(ALE_ENT_VID_FORCE_UNTAGGED_MSK, 24),
+	ALE_ENTRY_FLD_DYN_MSK_SIZE(ALE_ENT_VID_REG_MCAST_MSK, 36),
+};
+
 DEFINE_ALE_FIELD(entry_type,		60,	2)
 DEFINE_ALE_FIELD(vlan_id,		48,	12)
 DEFINE_ALE_FIELD(mcast_state,		62,	2)
@@ -115,17 +208,76 @@ DEFINE_ALE_FIELD(ucast_type,		62,     2)
 DEFINE_ALE_FIELD1(port_num,		66)
 DEFINE_ALE_FIELD(blocked,		65,     1)
 DEFINE_ALE_FIELD(secure,		64,     1)
-DEFINE_ALE_FIELD1(vlan_untag_force,	24)
-DEFINE_ALE_FIELD1(vlan_reg_mcast,	16)
-DEFINE_ALE_FIELD1(vlan_unreg_mcast,	8)
-DEFINE_ALE_FIELD1(vlan_member_list,	0)
 DEFINE_ALE_FIELD(mcast,			40,	1)
-/* ALE NetCP nu switch specific */
-DEFINE_ALE_FIELD(vlan_unreg_mcast_idx,	20,	3)
-DEFINE_ALE_FIELD(vlan_reg_mcast_idx,	44,	3)
 
 #define NU_VLAN_UNREG_MCAST_IDX	1
 
+static int cpsw_ale_entry_get_fld(struct cpsw_ale *ale,
+				  u32 *ale_entry,
+				  const struct ale_entry_fld *entry_tbl,
+				  int fld_id)
+{
+	const struct ale_entry_fld *entry_fld;
+	u32 bits;
+
+	if (!ale || !ale_entry)
+		return -EINVAL;
+
+	entry_fld = &entry_tbl[fld_id];
+	if (!(entry_fld->flags & ALE_FLD_ALLOWED)) {
+		dev_err(ale->params.dev, "get: wrong ale fld id %d\n", fld_id);
+		return -ENOENT;
+	}
+
+	bits = entry_fld->num_bits;
+	if (entry_fld->flags & ALE_FLD_SIZE_PORT_MASK_BITS)
+		bits = ale->port_mask_bits;
+
+	return cpsw_ale_get_field(ale_entry, entry_fld->start_bit, bits);
+}
+
+static void cpsw_ale_entry_set_fld(struct cpsw_ale *ale,
+				   u32 *ale_entry,
+				   const struct ale_entry_fld *entry_tbl,
+				   int fld_id,
+				   u32 value)
+{
+	const struct ale_entry_fld *entry_fld;
+	u32 bits;
+
+	if (!ale || !ale_entry)
+		return;
+
+	entry_fld = &entry_tbl[fld_id];
+	if (!(entry_fld->flags & ALE_FLD_ALLOWED)) {
+		dev_err(ale->params.dev, "set: wrong ale fld id %d\n", fld_id);
+		return;
+	}
+
+	bits = entry_fld->num_bits;
+	if (entry_fld->flags & ALE_FLD_SIZE_PORT_MASK_BITS)
+		bits = ale->port_mask_bits;
+
+	cpsw_ale_set_field(ale_entry, entry_fld->start_bit, bits, value);
+}
+
+static int cpsw_ale_vlan_get_fld(struct cpsw_ale *ale,
+				 u32 *ale_entry,
+				 int fld_id)
+{
+	return cpsw_ale_entry_get_fld(ale, ale_entry,
+				      ale->vlan_entry_tbl, fld_id);
+}
+
+static void cpsw_ale_vlan_set_fld(struct cpsw_ale *ale,
+				  u32 *ale_entry,
+				  int fld_id,
+				  u32 value)
+{
+	cpsw_ale_entry_set_fld(ale, ale_entry,
+			       ale->vlan_entry_tbl, fld_id, value);
+}
+
 /* The MAC address field in the ALE entry cannot be macroized as above */
 static inline void cpsw_ale_get_addr(u32 *ale_entry, u8 *addr)
 {
@@ -420,19 +572,22 @@ static void cpsw_ale_set_vlan_mcast(struct cpsw_ale *ale, u32 *ale_entry,
 	int idx;
 
 	/* Set VLAN registered multicast flood mask */
-	idx = cpsw_ale_get_vlan_reg_mcast_idx(ale_entry);
+	idx = cpsw_ale_vlan_get_fld(ale, ale_entry,
+				    ALE_ENT_VID_REG_MCAST_IDX);
 	writel(reg_mcast, ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx));
 
 	/* Set VLAN unregistered multicast flood mask */
-	idx = cpsw_ale_get_vlan_unreg_mcast_idx(ale_entry);
+	idx = cpsw_ale_vlan_get_fld(ale, ale_entry,
+				    ALE_ENT_VID_UNREG_MCAST_IDX);
 	writel(unreg_mcast, ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx));
 }
 
 static void cpsw_ale_set_vlan_untag(struct cpsw_ale *ale, u32 *ale_entry,
 				    u16 vid, int untag_mask)
 {
-	cpsw_ale_set_vlan_untag_force(ale_entry,
-				      untag_mask, ale->vlan_field_bits);
+	cpsw_ale_vlan_set_fld(ale, ale_entry,
+			      ALE_ENT_VID_FORCE_UNTAGGED_MSK,
+			      untag_mask);
 	if (untag_mask & ALE_PORT_HOST)
 		bitmap_set(ale->p0_untag_vid_mask, vid, 1);
 	else
@@ -454,17 +609,19 @@ int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port_mask, int untag,
 	cpsw_ale_set_vlan_untag(ale, ale_entry, vid, untag);
 
 	if (!ale->params.nu_switch_ale) {
-		cpsw_ale_set_vlan_reg_mcast(ale_entry, reg_mcast,
-					    ale->vlan_field_bits);
-		cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast,
-					      ale->vlan_field_bits);
+		cpsw_ale_vlan_set_fld(ale, ale_entry,
+				      ALE_ENT_VID_REG_MCAST_MSK, reg_mcast);
+		cpsw_ale_vlan_set_fld(ale, ale_entry,
+				      ALE_ENT_VID_UNREG_MCAST_MSK, unreg_mcast);
 	} else {
-		cpsw_ale_set_vlan_unreg_mcast_idx(ale_entry,
-						  NU_VLAN_UNREG_MCAST_IDX);
+		cpsw_ale_vlan_set_fld(ale, ale_entry,
+				      ALE_ENT_VID_UNREG_MCAST_IDX,
+				      NU_VLAN_UNREG_MCAST_IDX);
 		cpsw_ale_set_vlan_mcast(ale, ale_entry, reg_mcast, unreg_mcast);
 	}
-	cpsw_ale_set_vlan_member_list(ale_entry, port_mask,
-				      ale->vlan_field_bits);
+
+	cpsw_ale_vlan_set_fld(ale, ale_entry,
+			      ALE_ENT_VID_MEMBER_LIST, port_mask);
 
 	if (idx < 0)
 		idx = cpsw_ale_match_free(ale);
@@ -483,20 +640,20 @@ static void cpsw_ale_del_vlan_modify(struct cpsw_ale *ale, u32 *ale_entry,
 	int reg_mcast, unreg_mcast;
 	int members, untag;
 
-	members = cpsw_ale_get_vlan_member_list(ale_entry,
-						ale->vlan_field_bits);
+	members = cpsw_ale_vlan_get_fld(ale, ale_entry,
+					ALE_ENT_VID_MEMBER_LIST);
 	members &= ~port_mask;
 	if (!members) {
 		cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
 		return;
 	}
 
-	untag = cpsw_ale_get_vlan_untag_force(ale_entry,
-					      ale->vlan_field_bits);
-	reg_mcast = cpsw_ale_get_vlan_reg_mcast(ale_entry,
-						ale->vlan_field_bits);
-	unreg_mcast = cpsw_ale_get_vlan_unreg_mcast(ale_entry,
-						    ale->vlan_field_bits);
+	untag = cpsw_ale_vlan_get_fld(ale, ale_entry,
+				      ALE_ENT_VID_FORCE_UNTAGGED_MSK);
+	reg_mcast = cpsw_ale_vlan_get_fld(ale, ale_entry,
+					  ALE_ENT_VID_REG_MCAST_MSK);
+	unreg_mcast = cpsw_ale_vlan_get_fld(ale, ale_entry,
+					    ALE_ENT_VID_UNREG_MCAST_MSK);
 	untag &= members;
 	reg_mcast &= members;
 	unreg_mcast &= members;
@@ -504,16 +661,16 @@ static void cpsw_ale_del_vlan_modify(struct cpsw_ale *ale, u32 *ale_entry,
 	cpsw_ale_set_vlan_untag(ale, ale_entry, vid, untag);
 
 	if (!ale->params.nu_switch_ale) {
-		cpsw_ale_set_vlan_reg_mcast(ale_entry, reg_mcast,
-					    ale->vlan_field_bits);
-		cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast,
-					      ale->vlan_field_bits);
+		cpsw_ale_vlan_set_fld(ale, ale_entry,
+				      ALE_ENT_VID_REG_MCAST_MSK, reg_mcast);
+		cpsw_ale_vlan_set_fld(ale, ale_entry,
+				      ALE_ENT_VID_UNREG_MCAST_MSK, unreg_mcast);
 	} else {
 		cpsw_ale_set_vlan_mcast(ale, ale_entry, reg_mcast,
 					unreg_mcast);
 	}
-	cpsw_ale_set_vlan_member_list(ale_entry, members,
-				      ale->vlan_field_bits);
+	cpsw_ale_vlan_set_fld(ale, ale_entry,
+			      ALE_ENT_VID_MEMBER_LIST, members);
 }
 
 int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask)
@@ -551,15 +708,15 @@ int cpsw_ale_vlan_add_modify(struct cpsw_ale *ale, u16 vid, int port_mask,
 	if (idx >= 0)
 		cpsw_ale_read(ale, idx, ale_entry);
 
-	vlan_members = cpsw_ale_get_vlan_member_list(ale_entry,
-						     ale->vlan_field_bits);
-	reg_mcast_members = cpsw_ale_get_vlan_reg_mcast(ale_entry,
-							ale->vlan_field_bits);
+	vlan_members = cpsw_ale_vlan_get_fld(ale, ale_entry,
+					     ALE_ENT_VID_MEMBER_LIST);
+	reg_mcast_members = cpsw_ale_vlan_get_fld(ale, ale_entry,
+						  ALE_ENT_VID_REG_MCAST_MSK);
 	unreg_mcast_members =
-		cpsw_ale_get_vlan_unreg_mcast(ale_entry,
-					      ale->vlan_field_bits);
-	untag_members = cpsw_ale_get_vlan_untag_force(ale_entry,
-						      ale->vlan_field_bits);
+		cpsw_ale_vlan_get_fld(ale, ale_entry,
+				      ALE_ENT_VID_UNREG_MCAST_MSK);
+	untag_members = cpsw_ale_vlan_get_fld(ale, ale_entry,
+					      ALE_ENT_VID_FORCE_UNTAGGED_MSK);
 
 	vlan_members |= port_mask;
 	untag_members = (untag_members & ~port_mask) | untag_mask;
@@ -592,14 +749,15 @@ void cpsw_ale_set_unreg_mcast(struct cpsw_ale *ale, int unreg_mcast_mask,
 			continue;
 
 		unreg_members =
-			cpsw_ale_get_vlan_unreg_mcast(ale_entry,
-						      ale->vlan_field_bits);
+			cpsw_ale_vlan_get_fld(ale, ale_entry,
+					      ALE_ENT_VID_UNREG_MCAST_MSK);
 		if (add)
 			unreg_members |= unreg_mcast_mask;
 		else
 			unreg_members &= ~unreg_mcast_mask;
-		cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_members,
-					      ale->vlan_field_bits);
+		cpsw_ale_vlan_set_fld(ale, ale_entry,
+				      ALE_ENT_VID_UNREG_MCAST_MSK,
+				      unreg_members);
 		cpsw_ale_write(ale, idx, ale_entry);
 	}
 }
@@ -609,15 +767,15 @@ static void cpsw_ale_vlan_set_unreg_mcast(struct cpsw_ale *ale, u32 *ale_entry,
 {
 	int unreg_mcast;
 
-	unreg_mcast =
-		cpsw_ale_get_vlan_unreg_mcast(ale_entry,
-					      ale->vlan_field_bits);
+	unreg_mcast = cpsw_ale_vlan_get_fld(ale, ale_entry,
+					    ALE_ENT_VID_UNREG_MCAST_MSK);
 	if (allmulti)
 		unreg_mcast |= ALE_PORT_HOST;
 	else
 		unreg_mcast &= ~ALE_PORT_HOST;
-	cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast,
-				      ale->vlan_field_bits);
+
+	cpsw_ale_vlan_set_fld(ale, ale_entry,
+			      ALE_ENT_VID_UNREG_MCAST_MSK, unreg_mcast);
 }
 
 static void
@@ -627,7 +785,8 @@ cpsw_ale_vlan_set_unreg_mcast_idx(struct cpsw_ale *ale, u32 *ale_entry,
 	int unreg_mcast;
 	int idx;
 
-	idx = cpsw_ale_get_vlan_unreg_mcast_idx(ale_entry);
+	idx = cpsw_ale_vlan_get_fld(ale, ale_entry,
+				    ALE_ENT_VID_UNREG_MCAST_IDX);
 
 	unreg_mcast = readl(ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx));
 
@@ -651,9 +810,9 @@ void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port)
 		type = cpsw_ale_get_entry_type(ale_entry);
 		if (type != ALE_TYPE_VLAN)
 			continue;
-		vlan_members =
-			cpsw_ale_get_vlan_member_list(ale_entry,
-						      ale->vlan_field_bits);
+
+		vlan_members = cpsw_ale_vlan_get_fld(ale, ale_entry,
+						     ALE_ENT_VID_MEMBER_LIST);
 
 		if (port != -1 && !(vlan_members & BIT(port)))
 			continue;
@@ -960,30 +1119,146 @@ static void cpsw_ale_timer(struct timer_list *t)
 	}
 }
 
+static void cpsw_ale_hw_aging_timer_start(struct cpsw_ale *ale)
+{
+	u32 aging_timer;
+
+	aging_timer = ale->params.bus_freq / 1000000;
+	aging_timer *= ale->params.ale_ageout;
+
+	if (aging_timer & ~ALE_AGING_TIMER_MASK) {
+		aging_timer = ALE_AGING_TIMER_MASK;
+		dev_warn(ale->params.dev,
+			 "ALE aging timer overflow, set to max\n");
+	}
+
+	writel(aging_timer, ale->params.ale_regs + ALE_AGING_TIMER);
+}
+
+static void cpsw_ale_hw_aging_timer_stop(struct cpsw_ale *ale)
+{
+	writel(0, ale->params.ale_regs + ALE_AGING_TIMER);
+}
+
+static void cpsw_ale_aging_start(struct cpsw_ale *ale)
+{
+	if (!ale->params.ale_ageout)
+		return;
+
+	if (ale->features & CPSW_ALE_F_HW_AUTOAGING) {
+		cpsw_ale_hw_aging_timer_start(ale);
+		return;
+	}
+
+	timer_setup(&ale->timer, cpsw_ale_timer, 0);
+	ale->timer.expires = jiffies + ale->ageout;
+	add_timer(&ale->timer);
+}
+
+static void cpsw_ale_aging_stop(struct cpsw_ale *ale)
+{
+	if (!ale->params.ale_ageout)
+		return;
+
+	if (ale->features & CPSW_ALE_F_HW_AUTOAGING) {
+		cpsw_ale_hw_aging_timer_stop(ale);
+		return;
+	}
+
+	del_timer_sync(&ale->timer);
+}
+
 void cpsw_ale_start(struct cpsw_ale *ale)
 {
 	cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
 	cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
 
-	timer_setup(&ale->timer, cpsw_ale_timer, 0);
-	if (ale->ageout) {
-		ale->timer.expires = jiffies + ale->ageout;
-		add_timer(&ale->timer);
-	}
+	cpsw_ale_aging_start(ale);
 }
 
 void cpsw_ale_stop(struct cpsw_ale *ale)
 {
-	del_timer_sync(&ale->timer);
+	cpsw_ale_aging_stop(ale);
 	cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
 	cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0);
 }
 
+static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = {
+	{
+		/* am3/4/5, dra7. dm814x, 66ak2hk-gbe */
+		.dev_id = "cpsw",
+		.tbl_entries = 1024,
+		.major_ver_mask = 0xff,
+		.vlan_entry_tbl = vlan_entry_cpsw,
+	},
+	{
+		/* 66ak2h_xgbe */
+		.dev_id = "66ak2h-xgbe",
+		.tbl_entries = 2048,
+		.major_ver_mask = 0xff,
+		.vlan_entry_tbl = vlan_entry_cpsw,
+	},
+	{
+		.dev_id = "66ak2el",
+		.features = CPSW_ALE_F_STATUS_REG,
+		.major_ver_mask = 0x7,
+		.nu_switch_ale = true,
+		.vlan_entry_tbl = vlan_entry_nu,
+	},
+	{
+		.dev_id = "66ak2g",
+		.features = CPSW_ALE_F_STATUS_REG,
+		.tbl_entries = 64,
+		.major_ver_mask = 0x7,
+		.nu_switch_ale = true,
+		.vlan_entry_tbl = vlan_entry_nu,
+	},
+	{
+		.dev_id = "am65x-cpsw2g",
+		.features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING,
+		.tbl_entries = 64,
+		.major_ver_mask = 0x7,
+		.nu_switch_ale = true,
+		.vlan_entry_tbl = vlan_entry_nu,
+	},
+	{
+		.dev_id = "j721e-cpswxg",
+		.features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING,
+		.major_ver_mask = 0x7,
+		.vlan_entry_tbl = vlan_entry_k3_cpswxg,
+	},
+	{ },
+};
+
+static const struct
+cpsw_ale_dev_id *cpsw_ale_match_id(const struct cpsw_ale_dev_id *id,
+				   const char *dev_id)
+{
+	if (!dev_id)
+		return NULL;
+
+	while (id->dev_id) {
+		if (strcmp(dev_id, id->dev_id) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
 struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 {
+	const struct cpsw_ale_dev_id *ale_dev_id;
 	struct cpsw_ale *ale;
 	u32 rev, ale_entries;
 
+	ale_dev_id = cpsw_ale_match_id(cpsw_ale_id_match, params->dev_id);
+	if (!ale_dev_id)
+		return ERR_PTR(-EINVAL);
+
+	params->ale_entries = ale_dev_id->tbl_entries;
+	params->major_ver_mask = ale_dev_id->major_ver_mask;
+	params->nu_switch_ale = ale_dev_id->nu_switch_ale;
+
 	ale = devm_kzalloc(params->dev, sizeof(*ale), GFP_KERNEL);
 	if (!ale)
 		return ERR_PTR(-ENOMEM);
@@ -997,10 +1272,10 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 
 	ale->params = *params;
 	ale->ageout = ale->params.ale_ageout * HZ;
+	ale->features = ale_dev_id->features;
+	ale->vlan_entry_tbl = ale_dev_id->vlan_entry_tbl;
 
 	rev = readl_relaxed(ale->params.ale_regs + ALE_IDVER);
-	if (!ale->params.major_ver_mask)
-		ale->params.major_ver_mask = 0xff;
 	ale->version =
 		(ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask) << 8) |
 		 ALE_VERSION_MINOR(rev);
@@ -1008,7 +1283,8 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 		 ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask),
 		 ALE_VERSION_MINOR(rev));
 
-	if (!ale->params.ale_entries) {
+	if (ale->features & CPSW_ALE_F_STATUS_REG &&
+	    !ale->params.ale_entries) {
 		ale_entries =
 			readl_relaxed(ale->params.ale_regs + ALE_STATUS) &
 			ALE_STATUS_SIZE_MASK;
@@ -1017,16 +1293,12 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 		 * table which shows the size as a multiple of 1024 entries.
 		 * For these, params.ale_entries will be set to zero. So
 		 * read the register and update the value of ale_entries.
-		 * ALE table on NetCP lite, is much smaller and is indicated
-		 * by a value of zero in ALE_STATUS. So use a default value
-		 * of ALE_TABLE_SIZE_DEFAULT for this. Caller is expected
-		 * to set the value of ale_entries for all other versions
-		 * of ALE.
+		 * return error if ale_entries is zero in ALE_STATUS.
 		 */
 		if (!ale_entries)
-			ale_entries = ALE_TABLE_SIZE_DEFAULT;
-		else
-			ale_entries *= ALE_TABLE_SIZE_MULTIPLIER;
+			return ERR_PTR(-EINVAL);
+
+		ale_entries *= ALE_TABLE_SIZE_MULTIPLIER;
 		ale->params.ale_entries = ale_entries;
 	}
 	dev_info(ale->params.dev,
@@ -1079,3 +1351,8 @@ void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data)
 		data += ALE_ENTRY_WORDS;
 	}
 }
+
+u32 cpsw_ale_get_num_entries(struct cpsw_ale *ale)
+{
+	return ale ? ale->params.ale_entries : 0;
+}
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index 6a3cb6898728..5e4a69662c5f 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -24,18 +24,24 @@ struct cpsw_ale_params {
 	 * pass it from caller.
 	 */
 	u32			major_ver_mask;
+	const char		*dev_id;
+	unsigned long		bus_freq;
 };
 
+struct ale_entry_fld;
+
 struct cpsw_ale {
 	struct cpsw_ale_params	params;
 	struct timer_list	timer;
 	unsigned long		ageout;
 	u32			version;
+	u32			features;
 	/* These bits are different on NetCP NU Switch ALE */
 	u32			port_mask_bits;
 	u32			port_num_bits;
 	u32			vlan_field_bits;
 	unsigned long		*p0_untag_vid_mask;
+	const struct ale_entry_fld *vlan_entry_tbl;
 };
 
 enum cpsw_ale_control {
@@ -119,6 +125,7 @@ int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control);
 int cpsw_ale_control_set(struct cpsw_ale *ale, int port,
 			 int control, int value);
 void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data);
+u32 cpsw_ale_get_num_entries(struct cpsw_ale *ale);
 
 static inline int cpsw_ale_get_vlan_p0_untag(struct cpsw_ale *ale, u16 vid)
 {
diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index fa54efe3be63..4d02c5135611 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -339,7 +339,8 @@ int cpsw_get_regs_len(struct net_device *ndev)
 {
 	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
+	return cpsw_ale_get_num_entries(cpsw->ale) *
+	       ALE_ENTRY_WORDS * sizeof(u32);
 }
 
 void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p)
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 15672d0a4de6..f779d2e1b5c5 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1244,7 +1244,6 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
 
 	data->active_slave = 0;
 	data->channels = CPSW_MAX_QUEUES;
-	data->ale_entries = CPSW_ALE_NUM_ENTRIES;
 	data->dual_emac = true;
 	data->bd_ram_size = CPSW_BD_RAM_SIZE;
 	data->mac_control = 0;
@@ -1661,12 +1660,10 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id,
 		for (i = 0; i < cpsw->data.slaves; i++) {
 			struct cpsw_slave *slave = &cpsw->slaves[i];
 			struct net_device *sl_ndev = slave->ndev;
-			struct cpsw_priv *priv;
 
 			if (!sl_ndev)
 				continue;
 
-			priv = netdev_priv(sl_ndev);
 			if (switch_en)
 				vlan = cpsw->data.default_vlan;
 			else
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 482a1a451e43..51cc29f39038 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -500,8 +500,8 @@ int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs,
 
 	ale_params.dev			= dev;
 	ale_params.ale_ageout		= ale_ageout;
-	ale_params.ale_entries		= data->ale_entries;
 	ale_params.ale_ports		= CPSW_ALE_PORTS_NUM;
+	ale_params.dev_id		= "cpsw";
 
 	cpsw->ale = cpsw_ale_create(&ale_params);
 	if (IS_ERR(cpsw->ale)) {
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index bf4e179b4ca4..7b7f3596b20d 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -117,7 +117,6 @@ do {								\
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
 #define CPSW_ALE_AGEOUT_DEFAULT		10 /* sec */
-#define CPSW_ALE_NUM_ENTRIES		1024
 #define CPSW_FIFO_QUEUE_TYPE_SHIFT	16
 #define CPSW_FIFO_SHAPE_EN_SHIFT	16
 #define CPSW_FIFO_RATE_EN_SHIFT		20
@@ -294,7 +293,6 @@ struct cpsw_platform_data {
 	u32	channels;	/* number of cpdma channels (symmetric) */
 	u32	slaves;		/* number of slave cpgmac ports */
 	u32	active_slave;/* time stamping, ethtool and SIOCGMIIPHY slave */
-	u32	ale_entries;	/* ale table size */
 	u32	bd_ram_size;	/*buffer descriptor ram size */
 	u32	mac_control;	/* Mac control register */
 	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 7c55d395de2c..d1fc7955d422 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -446,41 +446,22 @@ static const struct ptp_clock_info cpts_info = {
 static int cpts_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid)
 {
 	unsigned int ptp_class = ptp_classify_raw(skb);
-	u8 *msgtype, *data = skb->data;
-	unsigned int offset = 0;
-	u16 *seqid;
+	struct ptp_header *hdr;
+	u8 msgtype;
+	u16 seqid;
 
 	if (ptp_class == PTP_CLASS_NONE)
 		return 0;
 
-	if (ptp_class & PTP_CLASS_VLAN)
-		offset += VLAN_HLEN;
-
-	switch (ptp_class & PTP_CLASS_PMASK) {
-	case PTP_CLASS_IPV4:
-		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
-		break;
-	case PTP_CLASS_IPV6:
-		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
-		break;
-	case PTP_CLASS_L2:
-		offset += ETH_HLEN;
-		break;
-	default:
-		return 0;
-	}
-
-	if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+	hdr = ptp_parse_header(skb, ptp_class);
+	if (!hdr)
 		return 0;
 
-	if (unlikely(ptp_class & PTP_CLASS_V1))
-		msgtype = data + offset + OFF_PTP_CONTROL;
-	else
-		msgtype = data + offset;
+	msgtype = ptp_get_msgtype(hdr, ptp_class);
+	seqid	= ntohs(hdr->sequence_id);
 
-	seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
-	*mtype_seqid = (*msgtype & MESSAGE_TYPE_MASK) << MESSAGE_TYPE_SHIFT;
-	*mtype_seqid |= (ntohs(*seqid) & SEQUENCE_ID_MASK) << SEQUENCE_ID_SHIFT;
+	*mtype_seqid  = (msgtype & MESSAGE_TYPE_MASK) << MESSAGE_TYPE_SHIFT;
+	*mtype_seqid |= (seqid & SEQUENCE_ID_MASK) << SEQUENCE_ID_SHIFT;
 
 	return 1;
 }
@@ -528,6 +509,11 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	int ret;
 	u64 ns;
 
+	/* cpts_rx_timestamp() is called before eth_type_trans(), so
+	 * skb MAC Hdr properties are not configured yet. Hence need to
+	 * reset skb MAC header here
+	 */
+	skb_reset_mac_header(skb);
 	ret = cpts_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid);
 	if (!ret)
 		return;
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 6614fa3089b2..d2eab5cd1e0c 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -718,7 +718,7 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
 		most_chan->desc_num += desc_cnt;
 }
 
-/**
+/*
  * cpdma_chan_split_pool - Splits ctrl pool between all channels.
  * Has to be called under ctlr lock
  */
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index de282531f68b..c7031e1960d4 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -671,7 +671,7 @@ static int emac_hash_del(struct emac_priv *priv, u8 *mac_addr)
  * emac_add_mcast - Set multicast address in the EMAC adapter (Internal)
  * @priv: The DaVinci EMAC private adapter structure
  * @action: multicast operation to perform
- * mac_addr: mac address to set
+ * @mac_addr: mac address to set
  *
  * Set multicast addresses in EMAC adapter - internal function
  *
@@ -977,6 +977,7 @@ fail_tx:
 /**
  * emac_dev_tx_timeout - EMAC Transmit timeout function
  * @ndev: The DaVinci EMAC network adapter
+ * @txqueue: the index of the hung transmit queue
  *
  * Called when system detects that a skb timeout period has expired
  * potentially due to a fault in the adapter in not being able to send
@@ -1209,7 +1210,7 @@ static int emac_hw_enable(struct emac_priv *priv)
 
 /**
  * emac_poll - EMAC NAPI Poll function
- * @ndev: The DaVinci EMAC network adapter
+ * @napi: pointer to the napi_struct containing The DaVinci EMAC network adapter
  * @budget: Number of receive packets to process (as told by NAPI layer)
  *
  * NAPI Poll function implemented to process packets as per budget. We check
@@ -1227,7 +1228,7 @@ static int emac_poll(struct napi_struct *napi, int budget)
 	struct net_device *ndev = priv->ndev;
 	struct device *emac_dev = &ndev->dev;
 	u32 status = 0;
-	u32 num_tx_pkts = 0, num_rx_pkts = 0;
+	u32 num_rx_pkts = 0;
 
 	/* Check interrupt vectors and call packet processing */
 	status = emac_read(EMAC_MACINVECTOR);
@@ -1238,8 +1239,7 @@ static int emac_poll(struct napi_struct *napi, int budget)
 		mask = EMAC_DM646X_MAC_IN_VECTOR_TX_INT_VEC;
 
 	if (status & mask) {
-		num_tx_pkts = cpdma_chan_process(priv->txchan,
-					      EMAC_DEF_TX_MAX_SERVICE);
+		cpdma_chan_process(priv->txchan, EMAC_DEF_TX_MAX_SERVICE);
 	} /* TX processing */
 
 	mask = EMAC_DM644X_MAC_IN_VECTOR_RX_INT_VEC;
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 28093923a7fb..33c1592d5381 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -51,7 +51,6 @@
 #define GBE13_CPTS_OFFSET		0x500
 #define GBE13_ALE_OFFSET		0x600
 #define GBE13_HOST_PORT_NUM		0
-#define GBE13_NUM_ALE_ENTRIES		1024
 
 /* 1G Ethernet NU SS defines */
 #define GBENU_MODULE_NAME		"netcp-gbenu"
@@ -101,7 +100,6 @@
 #define XGBE10_ALE_OFFSET		0x700
 #define XGBE10_HW_STATS_OFFSET		0x800
 #define XGBE10_HOST_PORT_NUM		0
-#define XGBE10_NUM_ALE_ENTRIES		2048
 
 #define	GBE_TIMER_INTERVAL			(HZ / 2)
 
@@ -711,7 +709,6 @@ struct gbe_priv {
 	struct netcp_device		*netcp_device;
 	struct timer_list		timer;
 	u32				num_slaves;
-	u32				ale_entries;
 	u32				ale_ports;
 	bool				enable_ale;
 	u8				max_num_slaves;
@@ -3309,7 +3306,6 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
 	gbe_dev->cpts_reg = gbe_dev->switch_regs + XGBE10_CPTS_OFFSET;
 	gbe_dev->ale_ports = gbe_dev->max_num_ports;
 	gbe_dev->host_port = XGBE10_HOST_PORT_NUM;
-	gbe_dev->ale_entries = XGBE10_NUM_ALE_ENTRIES;
 	gbe_dev->stats_en_mask = (1 << (gbe_dev->max_num_ports)) - 1;
 
 	/* Subsystem registers */
@@ -3433,7 +3429,6 @@ static int set_gbe_ethss14_priv(struct gbe_priv *gbe_dev,
 	gbe_dev->ale_reg = gbe_dev->switch_regs + GBE13_ALE_OFFSET;
 	gbe_dev->ale_ports = gbe_dev->max_num_ports;
 	gbe_dev->host_port = GBE13_HOST_PORT_NUM;
-	gbe_dev->ale_entries = GBE13_NUM_ALE_ENTRIES;
 	gbe_dev->stats_en_mask = GBE13_REG_VAL_STAT_ENABLE_ALL;
 
 	/* Subsystem registers */
@@ -3697,12 +3692,15 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 	ale_params.dev		= gbe_dev->dev;
 	ale_params.ale_regs	= gbe_dev->ale_reg;
 	ale_params.ale_ageout	= GBE_DEFAULT_ALE_AGEOUT;
-	ale_params.ale_entries	= gbe_dev->ale_entries;
 	ale_params.ale_ports	= gbe_dev->ale_ports;
-	if (IS_SS_ID_MU(gbe_dev)) {
-		ale_params.major_ver_mask = 0x7;
-		ale_params.nu_switch_ale = true;
-	}
+	ale_params.dev_id	= "cpsw";
+	if (IS_SS_ID_NU(gbe_dev))
+		ale_params.dev_id = "66ak2el";
+	else if (IS_SS_ID_2U(gbe_dev))
+		ale_params.dev_id = "66ak2g";
+	else if (IS_SS_ID_XGBE(gbe_dev))
+		ale_params.dev_id = "66ak2h-xgbe";
+
 	gbe_dev->ale = cpsw_ale_create(&ale_params);
 	if (IS_ERR(gbe_dev->ale)) {
 		dev_err(gbe_dev->dev, "error initializing ale engine\n");
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 76a342ea3797..267c080ee084 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -305,9 +305,8 @@ static void tlan_remove_one(struct pci_dev *pdev)
 	unregister_netdev(dev);
 
 	if (priv->dma_storage) {
-		pci_free_consistent(priv->pci_dev,
-				    priv->dma_size, priv->dma_storage,
-				    priv->dma_storage_dma);
+		dma_free_coherent(&priv->pci_dev->dev, priv->dma_size,
+				  priv->dma_storage, priv->dma_storage_dma);
 	}
 
 #ifdef CONFIG_PCI
@@ -482,7 +481,7 @@ static int tlan_probe1(struct pci_dev *pdev, long ioaddr, int irq, int rev,
 
 		priv->adapter = &board_info[ent->driver_data];
 
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			pr_err("No suitable PCI mapping available\n");
 			goto err_out_free_dev;
@@ -584,8 +583,8 @@ static int tlan_probe1(struct pci_dev *pdev, long ioaddr, int irq, int rev,
 	return 0;
 
 err_out_uninit:
-	pci_free_consistent(priv->pci_dev, priv->dma_size, priv->dma_storage,
-			    priv->dma_storage_dma);
+	dma_free_coherent(&priv->pci_dev->dev, priv->dma_size,
+			  priv->dma_storage, priv->dma_storage_dma);
 err_out_free_dev:
 	free_netdev(dev);
 err_out_regions:
@@ -609,9 +608,9 @@ static void tlan_eisa_cleanup(void)
 		dev = tlan_eisa_devices;
 		priv = netdev_priv(dev);
 		if (priv->dma_storage) {
-			pci_free_consistent(priv->pci_dev, priv->dma_size,
-					    priv->dma_storage,
-					    priv->dma_storage_dma);
+			dma_free_coherent(&priv->pci_dev->dev, priv->dma_size,
+					  priv->dma_storage,
+					  priv->dma_storage_dma);
 		}
 		release_region(dev->base_addr, 0x10);
 		unregister_netdev(dev);
@@ -654,7 +653,6 @@ module_exit(tlan_exit);
 static void  __init tlan_eisa_probe(void)
 {
 	long	ioaddr;
-	int	rc = -ENODEV;
 	int	irq;
 	u16	device_id;
 
@@ -719,8 +717,7 @@ static void  __init tlan_eisa_probe(void)
 
 
 		/* Setup the newly found eisa adapter */
-		rc = tlan_probe1(NULL, ioaddr, irq,
-				 12, NULL);
+		tlan_probe1(NULL, ioaddr, irq, 12, NULL);
 		continue;
 
 out:
@@ -826,9 +823,8 @@ static int tlan_init(struct net_device *dev)
 
 	dma_size = (TLAN_NUM_RX_LISTS + TLAN_NUM_TX_LISTS)
 		* (sizeof(struct tlan_list));
-	priv->dma_storage = pci_alloc_consistent(priv->pci_dev,
-						 dma_size,
-						 &priv->dma_storage_dma);
+	priv->dma_storage = dma_alloc_coherent(&priv->pci_dev->dev, dma_size,
+					       &priv->dma_storage_dma, GFP_KERNEL);
 	priv->dma_size = dma_size;
 
 	if (priv->dma_storage == NULL) {
@@ -1069,9 +1065,9 @@ static netdev_tx_t tlan_start_tx(struct sk_buff *skb, struct net_device *dev)
 
 	tail_list->forward = 0;
 
-	tail_list->buffer[0].address = pci_map_single(priv->pci_dev,
+	tail_list->buffer[0].address = dma_map_single(&priv->pci_dev->dev,
 						      skb->data, txlen,
-						      PCI_DMA_TODEVICE);
+						      DMA_TO_DEVICE);
 	tlan_store_skb(tail_list, skb);
 
 	tail_list->frame_size = (u16) txlen;
@@ -1365,10 +1361,10 @@ static u32 tlan_handle_tx_eof(struct net_device *dev, u16 host_int)
 		struct sk_buff *skb = tlan_get_skb(head_list);
 
 		ack++;
-		pci_unmap_single(priv->pci_dev, head_list->buffer[0].address,
-				 max(skb->len,
-				     (unsigned int)TLAN_MIN_FRAME_SIZE),
-				 PCI_DMA_TODEVICE);
+		dma_unmap_single(&priv->pci_dev->dev,
+				 head_list->buffer[0].address,
+				 max(skb->len, (unsigned int)TLAN_MIN_FRAME_SIZE),
+				 DMA_TO_DEVICE);
 		dev_kfree_skb_any(skb);
 		head_list->buffer[8].address = 0;
 		head_list->buffer[9].address = 0;
@@ -1511,8 +1507,8 @@ static u32 tlan_handle_rx_eof(struct net_device *dev, u16 host_int)
 			goto drop_and_reuse;
 
 		skb = tlan_get_skb(head_list);
-		pci_unmap_single(priv->pci_dev, frame_dma,
-				 TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&priv->pci_dev->dev, frame_dma,
+				 TLAN_MAX_FRAME_SIZE, DMA_FROM_DEVICE);
 		skb_put(skb, frame_size);
 
 		dev->stats.rx_bytes += frame_size;
@@ -1521,8 +1517,8 @@ static u32 tlan_handle_rx_eof(struct net_device *dev, u16 host_int)
 		netif_rx(skb);
 
 		head_list->buffer[0].address =
-			pci_map_single(priv->pci_dev, new_skb->data,
-				       TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
+			dma_map_single(&priv->pci_dev->dev, new_skb->data,
+				       TLAN_MAX_FRAME_SIZE, DMA_FROM_DEVICE);
 
 		tlan_store_skb(head_list, new_skb);
 drop_and_reuse:
@@ -1923,10 +1919,10 @@ static void tlan_reset_lists(struct net_device *dev)
 		if (!skb)
 			break;
 
-		list->buffer[0].address = pci_map_single(priv->pci_dev,
+		list->buffer[0].address = dma_map_single(&priv->pci_dev->dev,
 							 skb->data,
 							 TLAN_MAX_FRAME_SIZE,
-							 PCI_DMA_FROMDEVICE);
+							 DMA_FROM_DEVICE);
 		tlan_store_skb(list, skb);
 		list->buffer[1].count = 0;
 		list->buffer[1].address = 0;
@@ -1954,12 +1950,10 @@ static void tlan_free_lists(struct net_device *dev)
 		list = priv->tx_list + i;
 		skb = tlan_get_skb(list);
 		if (skb) {
-			pci_unmap_single(
-				priv->pci_dev,
-				list->buffer[0].address,
-				max(skb->len,
-				    (unsigned int)TLAN_MIN_FRAME_SIZE),
-				PCI_DMA_TODEVICE);
+			dma_unmap_single(&priv->pci_dev->dev,
+					 list->buffer[0].address,
+					 max(skb->len, (unsigned int)TLAN_MIN_FRAME_SIZE),
+					 DMA_TO_DEVICE);
 			dev_kfree_skb_any(skb);
 			list->buffer[8].address = 0;
 			list->buffer[9].address = 0;
@@ -1970,10 +1964,9 @@ static void tlan_free_lists(struct net_device *dev)
 		list = priv->rx_list + i;
 		skb = tlan_get_skb(list);
 		if (skb) {
-			pci_unmap_single(priv->pci_dev,
+			dma_unmap_single(&priv->pci_dev->dev,
 					 list->buffer[0].address,
-					 TLAN_MAX_FRAME_SIZE,
-					 PCI_DMA_FROMDEVICE);
+					 TLAN_MAX_FRAME_SIZE, DMA_FROM_DEVICE);
 			dev_kfree_skb_any(skb);
 			list->buffer[8].address = 0;
 			list->buffer[9].address = 0;
@@ -2511,7 +2504,7 @@ static void tlan_phy_power_down(struct net_device *dev)
 	}
 
 	/* Wait for 50 ms and powerup
-	 * This is abitrary.  It is intended to make sure the
+	 * This is arbitrary.  It is intended to make sure the
 	 * transceiver settles.
 	 */
 	tlan_set_timer(dev, msecs_to_jiffies(50), TLAN_TIMER_PHY_PUP);
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 6bcda20ed7e7..7a6e5ff8e5d4 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -454,9 +454,9 @@ static struct sk_buff *alloc_rxbuf_skb(struct net_device *dev,
 	skb = netdev_alloc_skb(dev, RX_BUF_SIZE);
 	if (!skb)
 		return NULL;
-	*dma_handle = pci_map_single(hwdev, skb->data, RX_BUF_SIZE,
-				     PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(hwdev, *dma_handle)) {
+	*dma_handle = dma_map_single(&hwdev->dev, skb->data, RX_BUF_SIZE,
+				     DMA_FROM_DEVICE);
+	if (dma_mapping_error(&hwdev->dev, *dma_handle)) {
 		dev_kfree_skb_any(skb);
 		return NULL;
 	}
@@ -466,8 +466,8 @@ static struct sk_buff *alloc_rxbuf_skb(struct net_device *dev,
 
 static void free_rxbuf_skb(struct pci_dev *hwdev, struct sk_buff *skb, dma_addr_t dma_handle)
 {
-	pci_unmap_single(hwdev, dma_handle, RX_BUF_SIZE,
-			 PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&hwdev->dev, dma_handle, RX_BUF_SIZE,
+			 DMA_FROM_DEVICE);
 	dev_kfree_skb_any(skb);
 }
 
@@ -876,9 +876,9 @@ tc35815_init_queues(struct net_device *dev)
 		       sizeof(struct TxFD) * TX_FD_NUM >
 		       PAGE_SIZE * FD_PAGE_NUM);
 
-		lp->fd_buf = pci_alloc_consistent(lp->pci_dev,
-						  PAGE_SIZE * FD_PAGE_NUM,
-						  &lp->fd_buf_dma);
+		lp->fd_buf = dma_alloc_coherent(&lp->pci_dev->dev,
+						PAGE_SIZE * FD_PAGE_NUM,
+						&lp->fd_buf_dma, GFP_ATOMIC);
 		if (!lp->fd_buf)
 			return -ENOMEM;
 		for (i = 0; i < RX_BUF_NUM; i++) {
@@ -892,10 +892,9 @@ tc35815_init_queues(struct net_device *dev)
 						       lp->rx_skbs[i].skb_dma);
 					lp->rx_skbs[i].skb = NULL;
 				}
-				pci_free_consistent(lp->pci_dev,
-						    PAGE_SIZE * FD_PAGE_NUM,
-						    lp->fd_buf,
-						    lp->fd_buf_dma);
+				dma_free_coherent(&lp->pci_dev->dev,
+						  PAGE_SIZE * FD_PAGE_NUM,
+						  lp->fd_buf, lp->fd_buf_dma);
 				lp->fd_buf = NULL;
 				return -ENOMEM;
 			}
@@ -990,7 +989,9 @@ tc35815_clear_queues(struct net_device *dev)
 		BUG_ON(lp->tx_skbs[i].skb != skb);
 #endif
 		if (skb) {
-			pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE);
+			dma_unmap_single(&lp->pci_dev->dev,
+					 lp->tx_skbs[i].skb_dma, skb->len,
+					 DMA_TO_DEVICE);
 			lp->tx_skbs[i].skb = NULL;
 			lp->tx_skbs[i].skb_dma = 0;
 			dev_kfree_skb_any(skb);
@@ -1022,7 +1023,9 @@ tc35815_free_queues(struct net_device *dev)
 			BUG_ON(lp->tx_skbs[i].skb != skb);
 #endif
 			if (skb) {
-				pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&lp->pci_dev->dev,
+						 lp->tx_skbs[i].skb_dma,
+						 skb->len, DMA_TO_DEVICE);
 				dev_kfree_skb(skb);
 				lp->tx_skbs[i].skb = NULL;
 				lp->tx_skbs[i].skb_dma = 0;
@@ -1044,8 +1047,8 @@ tc35815_free_queues(struct net_device *dev)
 		}
 	}
 	if (lp->fd_buf) {
-		pci_free_consistent(lp->pci_dev, PAGE_SIZE * FD_PAGE_NUM,
-				    lp->fd_buf, lp->fd_buf_dma);
+		dma_free_coherent(&lp->pci_dev->dev, PAGE_SIZE * FD_PAGE_NUM,
+				  lp->fd_buf, lp->fd_buf_dma);
 		lp->fd_buf = NULL;
 	}
 }
@@ -1292,7 +1295,10 @@ tc35815_send_packet(struct sk_buff *skb, struct net_device *dev)
 	BUG_ON(lp->tx_skbs[lp->tfd_start].skb);
 #endif
 	lp->tx_skbs[lp->tfd_start].skb = skb;
-	lp->tx_skbs[lp->tfd_start].skb_dma = pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE);
+	lp->tx_skbs[lp->tfd_start].skb_dma = dma_map_single(&lp->pci_dev->dev,
+							    skb->data,
+							    skb->len,
+							    DMA_TO_DEVICE);
 
 	/*add to ring */
 	txfd = &lp->tfd_base[lp->tfd_start];
@@ -1500,9 +1506,9 @@ tc35815_rx(struct net_device *dev, int limit)
 			skb = lp->rx_skbs[cur_bd].skb;
 			prefetch(skb->data);
 			lp->rx_skbs[cur_bd].skb = NULL;
-			pci_unmap_single(lp->pci_dev,
+			dma_unmap_single(&lp->pci_dev->dev,
 					 lp->rx_skbs[cur_bd].skb_dma,
-					 RX_BUF_SIZE, PCI_DMA_FROMDEVICE);
+					 RX_BUF_SIZE, DMA_FROM_DEVICE);
 			if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN != 0)
 				memmove(skb->data, skb->data - NET_IP_ALIGN,
 					pkt_len);
@@ -1756,7 +1762,9 @@ tc35815_txdone(struct net_device *dev)
 #endif
 		if (skb) {
 			dev->stats.tx_bytes += skb->len;
-			pci_unmap_single(lp->pci_dev, lp->tx_skbs[lp->tfd_end].skb_dma, skb->len, PCI_DMA_TODEVICE);
+			dma_unmap_single(&lp->pci_dev->dev,
+					 lp->tx_skbs[lp->tfd_end].skb_dma,
+					 skb->len, DMA_TO_DEVICE);
 			lp->tx_skbs[lp->tfd_end].skb = NULL;
 			lp->tx_skbs[lp->tfd_end].skb_dma = 0;
 			dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 55b0ddab1776..73ca597ebd1b 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1504,7 +1504,7 @@ static void rhine_init_cam_filter(struct net_device *dev)
 
 /**
  * rhine_update_vcam - update VLAN CAM filters
- * @rp: rhine_private data of this Rhine
+ * @dev: rhine_private data of this Rhine
  *
  * Update VLAN CAM filters to match configuration change.
  */
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 6d2a31488a74..b65767f9e499 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -372,7 +372,7 @@ static const struct pci_device_id velocity_pci_id_table[] = {
 
 MODULE_DEVICE_TABLE(pci, velocity_pci_id_table);
 
-/**
+/*
  *	Describe the OF device identifiers that we support in this
  *	device driver. Used for devicetree nodes.
  */
@@ -384,7 +384,7 @@ MODULE_DEVICE_TABLE(of, velocity_of_ids);
 
 /**
  *	get_chip_name	- 	identifier to name
- *	@id: chip identifier
+ *	@chip_id: chip identifier
  *
  *	Given a chip identifier return a suitable description. Returns
  *	a pointer a static string valid while the driver is loaded.
@@ -748,7 +748,7 @@ static u32 mii_check_media_mode(struct mac_regs __iomem *regs)
 /**
  *	velocity_mii_write	-	write MII data
  *	@regs: velocity registers
- *	@index: MII register index
+ *	@mii_addr: MII register index
  *	@data: 16bit data for the MII register
  *
  *	Perform a single write to an MII 16bit register. Returns zero
@@ -869,6 +869,7 @@ static u32 check_connection_type(struct mac_regs __iomem *regs)
 
 /**
  *	velocity_set_media_mode		-	set media mode
+ *	@vptr: velocity adapter
  *	@mii_status: old MII link state
  *
  *	Check the media link state and configure the flow control
@@ -877,26 +878,13 @@ static u32 check_connection_type(struct mac_regs __iomem *regs)
  */
 static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 {
-	u32 curr_status;
 	struct mac_regs __iomem *regs = vptr->mac_regs;
 
 	vptr->mii_status = mii_check_media_mode(vptr->mac_regs);
-	curr_status = vptr->mii_status & (~VELOCITY_LINK_FAIL);
 
 	/* Set mii link status */
 	set_mii_flow_control(vptr);
 
-	/*
-	   Check if new status is consistent with current status
-	   if (((mii_status & curr_status) & VELOCITY_AUTONEG_ENABLE) ||
-	       (mii_status==curr_status)) {
-	   vptr->mii_status=mii_check_media_mode(vptr->mac_regs);
-	   vptr->mii_status=check_connection_type(vptr->mac_regs);
-	   netdev_info(vptr->netdev, "Velocity link no change\n");
-	   return 0;
-	   }
-	 */
-
 	if (PHYID_GET_PHY_ID(vptr->phy_id) == PHYID_CICADA_CS8201)
 		MII_REG_BITS_ON(AUXCR_MDPPS, MII_NCONFIG, vptr->mac_regs);
 
@@ -1269,6 +1257,7 @@ static void mii_init(struct velocity_info *vptr, u32 mii_status)
 
 /**
  * setup_queue_timers	-	Setup interrupt timers
+ * @vptr: velocity adapter
  *
  * Setup interrupt frequency during suppression (timeout if the frame
  * count isn't filled).
@@ -1293,8 +1282,7 @@ static void setup_queue_timers(struct velocity_info *vptr)
 
 /**
  * setup_adaptive_interrupts  -  Setup interrupt suppression
- *
- * @vptr velocity adapter
+ * @vptr: velocity adapter
  *
  * The velocity is able to suppress interrupt during high interrupt load.
  * This function turns on that feature.
@@ -1735,6 +1723,7 @@ err_free_dma_rings_0:
  *	velocity_free_tx_buf	-	free transmit buffer
  *	@vptr: velocity
  *	@tdinfo: buffer
+ *	@td: transmit descriptor to free
  *
  *	Release an transmit buffer. If the buffer was preallocated then
  *	recycle it, if not then unmap the buffer.
@@ -1909,7 +1898,7 @@ static void velocity_error(struct velocity_info *vptr, int status)
 
 /**
  *	tx_srv		-	transmit interrupt service
- *	@vptr; Velocity
+ *	@vptr: Velocity
  *
  *	Scan the queues looking for transmitted packets that
  *	we can complete and clean up. Update any statistics as
@@ -2003,8 +1992,7 @@ static inline void velocity_rx_csum(struct rx_desc *rd, struct sk_buff *skb)
  *	velocity_rx_copy	-	in place Rx copy for small packets
  *	@rx_skb: network layer packet buffer candidate
  *	@pkt_size: received data size
- *	@rd: receive packet descriptor
- *	@dev: network device
+ *	@vptr: velocity adapter
  *
  *	Replace the current skb that is scheduled for Rx processing by a
  *	shorter, immediately allocated skb, if the received packet is small
@@ -2110,6 +2098,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 /**
  *	velocity_rx_srv		-	service RX interrupt
  *	@vptr: velocity
+ *	@budget_left: remaining budget
  *
  *	Walk the receive ring of the velocity adapter and remove
  *	any received packets from the receive queue. Hand the ring
@@ -2658,7 +2647,6 @@ static const struct net_device_ops velocity_netdev_ops = {
 
 /**
  *	velocity_init_info	-	init private data
- *	@pdev: PCI device
  *	@vptr: Velocity info
  *	@info: Board type
  *
@@ -2677,7 +2665,6 @@ static void velocity_init_info(struct velocity_info *vptr,
 /**
  *	velocity_get_pci_info	-	retrieve PCI info for device
  *	@vptr: velocity device
- *	@pdev: PCI device it matches
  *
  *	Retrieve the PCI configuration space data that interests us from
  *	the kernel PCI layer
@@ -2714,7 +2701,6 @@ static int velocity_get_pci_info(struct velocity_info *vptr)
 /**
  *	velocity_get_platform_info - retrieve platform info for device
  *	@vptr: velocity device
- *	@pdev: platform device it matches
  *
  *	Retrieve the Platform configuration data that interests us
  */
@@ -2764,8 +2750,9 @@ static u32 velocity_get_link(struct net_device *dev)
 
 /**
  *	velocity_probe - set up discovered velocity device
- *	@pdev: PCI device
- *	@ent: PCI device table entry that matched
+ *	@dev: PCI device
+ *	@info: table of match
+ *	@irq: interrupt info
  *	@bustype: bus that device is connected to
  *
  *	Configure a discovered adapter from scratch. Return a negative
@@ -2982,6 +2969,7 @@ static int velocity_platform_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 /**
  *	wol_calc_crc		-	WOL CRC
+ *	@size: size of the wake mask
  *	@pattern: data pattern
  *	@mask_pattern: mask
  *
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 9a15f14daa47..60c199fcb91e 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -106,7 +106,7 @@ static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout)
  */
 #define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC)
 
-/**
+/*
  * temac_indirect_busywait - Wait for current indirect register access
  * to complete.
  */
@@ -121,7 +121,7 @@ int temac_indirect_busywait(struct temac_local *lp)
 		return 0;
 }
 
-/**
+/*
  * temac_indirect_in32 - Indirect register read access.  This function
  * must be called without lp->indirect_lock being held.
  */
@@ -136,7 +136,7 @@ u32 temac_indirect_in32(struct temac_local *lp, int reg)
 	return val;
 }
 
-/**
+/*
  * temac_indirect_in32_locked - Indirect register read access.  This
  * function must be called with lp->indirect_lock being held.  Use
  * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
@@ -164,7 +164,7 @@ u32 temac_indirect_in32_locked(struct temac_local *lp, int reg)
 	return temac_ior(lp, XTE_LSW0_OFFSET);
 }
 
-/**
+/*
  * temac_indirect_out32 - Indirect register write access.  This function
  * must be called without lp->indirect_lock being held.
  */
@@ -177,7 +177,7 @@ void temac_indirect_out32(struct temac_local *lp, int reg, u32 value)
 	spin_unlock_irqrestore(lp->indirect_lock, flags);
 }
 
-/**
+/*
  * temac_indirect_out32_locked - Indirect register write access.  This
  * function must be called with lp->indirect_lock being held.  Use
  * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
@@ -202,7 +202,7 @@ void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value)
 	WARN_ON(temac_indirect_busywait(lp));
 }
 
-/**
+/*
  * temac_dma_in32_* - Memory mapped DMA read, these function expects a
  * register input that is based on DCR word addresses which are then
  * converted to memory mapped byte addresses.  To be assigned to
@@ -218,7 +218,7 @@ static u32 temac_dma_in32_le(struct temac_local *lp, int reg)
 	return ioread32(lp->sdma_regs + (reg << 2));
 }
 
-/**
+/*
  * temac_dma_out32_* - Memory mapped DMA read, these function expects
  * a register input that is based on DCR word addresses which are then
  * converted to memory mapped byte addresses.  To be assigned to
@@ -240,7 +240,7 @@ static void temac_dma_out32_le(struct temac_local *lp, int reg, u32 value)
  */
 #ifdef CONFIG_PPC_DCR
 
-/**
+/*
  * temac_dma_dcr_in32 - DCR based DMA read
  */
 static u32 temac_dma_dcr_in(struct temac_local *lp, int reg)
@@ -248,7 +248,7 @@ static u32 temac_dma_dcr_in(struct temac_local *lp, int reg)
 	return dcr_read(lp->sdma_dcrs, reg);
 }
 
-/**
+/*
  * temac_dma_dcr_out32 - DCR based DMA write
  */
 static void temac_dma_dcr_out(struct temac_local *lp, int reg, u32 value)
@@ -256,7 +256,7 @@ static void temac_dma_dcr_out(struct temac_local *lp, int reg, u32 value)
 	dcr_write(lp->sdma_dcrs, reg, value);
 }
 
-/**
+/*
  * temac_dcr_setup - If the DMA is DCR based, then setup the address and
  * I/O  functions
  */
@@ -293,7 +293,7 @@ static int temac_dcr_setup(struct temac_local *lp, struct platform_device *op,
 
 #endif
 
-/**
+/*
  * temac_dma_bd_release - Release buffer descriptor rings
  */
 static void temac_dma_bd_release(struct net_device *ndev)
@@ -323,7 +323,7 @@ static void temac_dma_bd_release(struct net_device *ndev)
 				  lp->tx_bd_v, lp->tx_bd_p);
 }
 
-/**
+/*
  * temac_dma_bd_init - Setup buffer descriptor rings
  */
 static int temac_dma_bd_init(struct net_device *ndev)
@@ -593,7 +593,7 @@ static struct temac_option {
 	{}
 };
 
-/**
+/*
  * temac_setoptions
  */
 static u32 temac_setoptions(struct net_device *ndev, u32 options)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index fa5dc2993520..9aafd3ecdaa4 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2038,8 +2038,7 @@ static int axienet_remove(struct platform_device *pdev)
 
 	axienet_mdio_teardown(lp);
 
-	if (lp->clk)
-		clk_disable_unprepare(lp->clk);
+	clk_disable_unprepare(lp->clk);
 
 	of_node_put(lp->phy_node);
 	lp->phy_node = NULL;