summaryrefslogtreecommitdiff
path: root/drivers/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:37 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:37 -0500
commit697766df6b952f09b17eefda8b5ef746acb9c1eb (patch)
treea4962667802529c26231f4768c0233a15f9e9e4c /drivers/net
parentc11a9009ae6a8c42a8cd69d885601e1aa6fbea04 (diff)
parent124b74c18e0e31b24638d256afee7122a994e1b3 (diff)
Merge branch 'dma_mb'
Alexander Duyck says: ==================== arch: Add lightweight memory barriers for coherent memory access These patches introduce two new primitives for synchronizing cache coherent memory writes and reads. These two new primitives are: dma_rmb() dma_wmb() The first patch cleans up some unnecessary overhead related to the definition of read_barrier_depends, smp_read_barrier_depends, and comments related to the barrier. The second patch adds the primitives for the applicable architectures and asm-generic. The third patch adds the barriers to r8169 which turns out to be a good example of where the new barriers might be useful as they have full rmb()/wmb() barriers ordering accesses to the descriptors and the DescOwn bit. The fourth patch adds support for coherent_rmb() to the Intel fm10k, igb, and ixgbe drivers. Testing with the ixgbe driver has shown a processing time reduction of at least 7ns per 64B frame on a Core i7-4930K. This patch series is essentially the v7 for: v4-7: Add lightweight memory barriers for coherent memory access v3: Add lightweight memory barriers fast_rmb() and fast_wmb() v2: Introduce load_acquire() and store_release() v1: Introduce read_acquire() The key changes in this patch series versus the earlier patches are: v7 resubmit: - Added Acked-by: Ben Herrenschmidt from v5 to dma_rmb/wmb patch - No code changes from previous set, still applies cleanly and builds. v7: - Dropped test/debug patch that was accidentally slipped in v6: - Replaced "memory based device I/O" with "consistent memory" in docs - Added reference to DMA-API.txt to explain consistent memory v5: - Renamed barriers dma_rmb and dma_wmb - Undid smp_wmb changes in x86 and PowerPC - Defined smp_rmb as __lwsync for SMP case on PowerPC v4: - Renamed barriers coherent_rmb and coherent_wmb - Added smp_lwsync for use in smp_load_acquire/smp_store_release v3: - Moved away from acquire()/store() and instead focused on barriers - Added cleanup of read_barrier_depends - Added change in r8169 to fix cur_tx/DescOwn ordering - Simplified changes to just replacing/moving barriers in r8169 - Added update to documentation with code example v2: - Renamed read_acquire() to be consistent with smp_load_acquire() - Changed barrier used to be consistent with smp_load_acquire() - Updated PowerPC code to use __lwsync based on IBM article - Added store_release() as this is a viable use case for drivers - Added r8169 patch which is able to fully use primitives - Added fm10k/igb/ixgbe patch which is able to test performance ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c6
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c9
-rw-r--r--drivers/net/ethernet/realtek/r8169.c29
4 files changed, 31 insertions, 19 deletions
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index ee1ecb146df7..eb088b129bc7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -615,14 +615,14 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
+ if (!rx_desc->d.staterr)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STATUS_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 2e526d4904a6..ff59897a9463 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6910,14 +6910,14 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.status_error)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STAT_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 798b05556e1b..2ed2c7de2304 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2009,15 +2009,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.status_error)
break;
- /*
- * This memory barrier is needed to keep us from reading
+ /* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STAT_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3dad7e884952..088136b37ebe 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6605,6 +6605,9 @@ static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
{
u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
+ /* Force memory writes to complete before releasing descriptor */
+ dma_wmb();
+
desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
}
@@ -6612,7 +6615,6 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
u32 rx_buf_sz)
{
desc->addr = cpu_to_le64(mapping);
- wmb();
rtl8169_mark_to_asic(desc, rx_buf_sz);
}
@@ -7073,16 +7075,18 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
skb_tx_timestamp(skb);
- wmb();
+ /* Force memory writes to complete before releasing descriptor */
+ dma_wmb();
/* Anti gcc 2.95.3 bugware (sic) */
status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
txd->opts1 = cpu_to_le32(status);
- tp->cur_tx += frags + 1;
-
+ /* Force all memory writes to complete before notifying device */
wmb();
+ tp->cur_tx += frags + 1;
+
RTL_W8(TxPoll, NPQ);
mmiowb();
@@ -7181,11 +7185,16 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
struct ring_info *tx_skb = tp->tx_skb + entry;
u32 status;
- rmb();
status = le32_to_cpu(tp->TxDescArray[entry].opts1);
if (status & DescOwn)
break;
+ /* This barrier is needed to keep us from reading
+ * any other fields out of the Tx descriptor until
+ * we know the status of DescOwn
+ */
+ dma_rmb();
+
rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
tp->TxDescArray + entry);
if (status & LastFrag) {
@@ -7280,11 +7289,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
struct RxDesc *desc = tp->RxDescArray + entry;
u32 status;
- rmb();
status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
-
if (status & DescOwn)
break;
+
+ /* This barrier is needed to keep us from reading
+ * any other fields out of the Rx descriptor until
+ * we know the status of DescOwn
+ */
+ dma_rmb();
+
if (unlikely(status & RxRES)) {
netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
status);
@@ -7346,7 +7360,6 @@ process_pkt:
}
release_descriptor:
desc->opts2 = 0;
- wmb();
rtl8169_mark_to_asic(desc, rx_buf_sz);
}