summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:37 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:37 -0500
commit697766df6b952f09b17eefda8b5ef746acb9c1eb (patch)
treea4962667802529c26231f4768c0233a15f9e9e4c
parentc11a9009ae6a8c42a8cd69d885601e1aa6fbea04 (diff)
parent124b74c18e0e31b24638d256afee7122a994e1b3 (diff)
Merge branch 'dma_mb'
Alexander Duyck says: ==================== arch: Add lightweight memory barriers for coherent memory access These patches introduce two new primitives for synchronizing cache coherent memory writes and reads. These two new primitives are: dma_rmb() dma_wmb() The first patch cleans up some unnecessary overhead related to the definition of read_barrier_depends, smp_read_barrier_depends, and comments related to the barrier. The second patch adds the primitives for the applicable architectures and asm-generic. The third patch adds the barriers to r8169 which turns out to be a good example of where the new barriers might be useful as they have full rmb()/wmb() barriers ordering accesses to the descriptors and the DescOwn bit. The fourth patch adds support for coherent_rmb() to the Intel fm10k, igb, and ixgbe drivers. Testing with the ixgbe driver has shown a processing time reduction of at least 7ns per 64B frame on a Core i7-4930K. This patch series is essentially the v7 for: v4-7: Add lightweight memory barriers for coherent memory access v3: Add lightweight memory barriers fast_rmb() and fast_wmb() v2: Introduce load_acquire() and store_release() v1: Introduce read_acquire() The key changes in this patch series versus the earlier patches are: v7 resubmit: - Added Acked-by: Ben Herrenschmidt from v5 to dma_rmb/wmb patch - No code changes from previous set, still applies cleanly and builds. v7: - Dropped test/debug patch that was accidentally slipped in v6: - Replaced "memory based device I/O" with "consistent memory" in docs - Added reference to DMA-API.txt to explain consistent memory v5: - Renamed barriers dma_rmb and dma_wmb - Undid smp_wmb changes in x86 and PowerPC - Defined smp_rmb as __lwsync for SMP case on PowerPC v4: - Renamed barriers coherent_rmb and coherent_wmb - Added smp_lwsync for use in smp_load_acquire/smp_store_release v3: - Moved away from acquire()/store() and instead focused on barriers - Added cleanup of read_barrier_depends - Added change in r8169 to fix cur_tx/DescOwn ordering - Simplified changes to just replacing/moving barriers in r8169 - Added update to documentation with code example v2: - Renamed read_acquire() to be consistent with smp_load_acquire() - Changed barrier used to be consistent with smp_load_acquire() - Updated PowerPC code to use __lwsync based on IBM article - Added store_release() as this is a viable use case for drivers - Added r8169 patch which is able to fully use primitives - Added fm10k/igb/ixgbe patch which is able to test performance ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/memory-barriers.txt42
-rw-r--r--arch/alpha/include/asm/barrier.h51
-rw-r--r--arch/arm/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/barrier.h3
-rw-r--r--arch/blackfin/include/asm/barrier.h51
-rw-r--r--arch/ia64/include/asm/barrier.h25
-rw-r--r--arch/metag/include/asm/barrier.h19
-rw-r--r--arch/mips/include/asm/barrier.h61
-rw-r--r--arch/powerpc/include/asm/barrier.h19
-rw-r--r--arch/s390/include/asm/barrier.h7
-rw-r--r--arch/sparc/include/asm/barrier_64.h7
-rw-r--r--arch/x86/include/asm/barrier.h70
-rw-r--r--arch/x86/um/asm/barrier.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c6
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c9
-rw-r--r--drivers/net/ethernet/realtek/r8169.c29
-rw-r--r--include/asm-generic/barrier.h8
18 files changed, 258 insertions, 179 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7ee2ae6d5451..70a09f8a0383 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1633,6 +1633,48 @@ There are some more advanced barrier functions:
operations" subsection for information on where to use these.
+ (*) dma_wmb();
+ (*) dma_rmb();
+
+ These are for use with consistent memory to guarantee the ordering
+ of writes or reads of shared memory accessible to both the CPU and a
+ DMA capable device.
+
+ For example, consider a device driver that shares memory with a device
+ and uses a descriptor status value to indicate if the descriptor belongs
+ to the device or the CPU, and a doorbell to notify it when new
+ descriptors are available:
+
+ if (desc->status != DEVICE_OWN) {
+ /* do not read data until we own descriptor */
+ dma_rmb();
+
+ /* read/modify data */
+ read_data = desc->data;
+ desc->data = write_data;
+
+ /* flush modifications before status update */
+ dma_wmb();
+
+ /* assign ownership */
+ desc->status = DEVICE_OWN;
+
+ /* force memory to sync before notifying device via MMIO */
+ wmb();
+
+ /* notify device of new descriptors */
+ writel(DESC_NOTIFY, doorbell);
+ }
+
+ The dma_rmb() allows us guarantee the device has released ownership
+ before we read the data from the descriptor, and he dma_wmb() allows
+ us to guarantee the data is written to the descriptor before the device
+ can see it now has ownership. The wmb() is needed to guarantee that the
+ cache coherent memory writes have completed before attempting a write to
+ the cache incoherent MMIO region.
+
+ See Documentation/DMA-API.txt for more information on consistent memory.
+
MMIO WRITE BARRIER
------------------
diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h
index 3832bdb794fe..77516c87255d 100644
--- a/arch/alpha/include/asm/barrier.h
+++ b/arch/alpha/include/asm/barrier.h
@@ -7,6 +7,57 @@
#define rmb() __asm__ __volatile__("mb": : :"memory")
#define wmb() __asm__ __volatile__("wmb": : :"memory")
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
#ifdef CONFIG_SMP
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index c6a3e73a6e24..d2f81e6b8c1c 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -43,10 +43,14 @@
#define mb() do { dsb(); outer_sync(); } while (0)
#define rmb() dsb()
#define wmb() do { dsb(st); outer_sync(); } while (0)
+#define dma_rmb() dmb(osh)
+#define dma_wmb() dmb(oshst)
#else
#define mb() barrier()
#define rmb() barrier()
#define wmb() barrier()
+#define dma_rmb() barrier()
+#define dma_wmb() barrier()
#endif
#ifndef CONFIG_SMP
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 6389d60574d9..a5abb0062d6e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -32,6 +32,9 @@
#define rmb() dsb(ld)
#define wmb() dsb(st)
+#define dma_rmb() dmb(oshld)
+#define dma_wmb() dmb(oshst)
+
#ifndef CONFIG_SMP
#define smp_mb() barrier()
#define smp_rmb() barrier()
diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h
index 420006877998..dfb66fe88b34 100644
--- a/arch/blackfin/include/asm/barrier.h
+++ b/arch/blackfin/include/asm/barrier.h
@@ -22,6 +22,57 @@
# define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0)
# define rmb() do { barrier(); smp_check_barrier(); } while (0)
# define wmb() do { barrier(); smp_mark_barrier(); } while (0)
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0)
#endif
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index a48957c7b445..f6769eb2bbf9 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -35,26 +35,25 @@
* it's (presumably) much slower than mf and (b) mf.a is supported for
* sequential memory pages only.
*/
-#define mb() ia64_mf()
-#define rmb() mb()
-#define wmb() mb()
-#define read_barrier_depends() do { } while(0)
+#define mb() ia64_mf()
+#define rmb() mb()
+#define wmb() mb()
+
+#define dma_rmb() mb()
+#define dma_wmb() mb()
#ifdef CONFIG_SMP
# define smp_mb() mb()
-# define smp_rmb() rmb()
-# define smp_wmb() wmb()
-# define smp_read_barrier_depends() read_barrier_depends()
-
#else
-
# define smp_mb() barrier()
-# define smp_rmb() barrier()
-# define smp_wmb() barrier()
-# define smp_read_barrier_depends() do { } while(0)
-
#endif
+#define smp_rmb() smp_mb()
+#define smp_wmb() smp_mb()
+
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#define smp_mb__before_atomic() barrier()
#define smp_mb__after_atomic() barrier()
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c7591e80067c..d703d8e26a65 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -4,8 +4,6 @@
#include <asm/metag_mem.h>
#define nop() asm volatile ("NOP")
-#define mb() wmb()
-#define rmb() barrier()
#ifdef CONFIG_METAG_META21
@@ -41,13 +39,13 @@ static inline void wr_fence(void)
#endif /* !CONFIG_METAG_META21 */
-static inline void wmb(void)
-{
- /* flush writes through the write combiner */
- wr_fence();
-}
+/* flush writes through the write combiner */
+#define mb() wr_fence()
+#define rmb() barrier()
+#define wmb() mb()
-#define read_barrier_depends() do { } while (0)
+#define dma_rmb() rmb()
+#define dma_wmb() wmb()
#ifndef CONFIG_SMP
#define fence() do { } while (0)
@@ -82,7 +80,10 @@ static inline void fence(void)
#define smp_wmb() barrier()
#endif
#endif
-#define smp_read_barrier_depends() do { } while (0)
+
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define smp_store_release(p, v) \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index d0101dd0575e..2b8bbbcb9be0 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -10,58 +10,6 @@
#include <asm/addrspace.h>
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- */
-
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
@@ -127,20 +75,21 @@
#include <asm/wbflush.h>
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
#define mb() wbflush()
#define iob() wbflush()
#else /* !CONFIG_CPU_HAS_WB */
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
#define mb() fast_mb()
#define iob() fast_iob()
#endif /* !CONFIG_CPU_HAS_WB */
+#define wmb() fast_wmb()
+#define rmb() fast_rmb()
+#define dma_wmb() fast_wmb()
+#define dma_rmb() fast_rmb()
+
#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
# ifdef CONFIG_CPU_CAVIUM_OCTEON
# define smp_mb() __sync()
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index bab79a110c7b..a3bf5be111ff 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -33,12 +33,9 @@
#define mb() __asm__ __volatile__ ("sync" : : : "memory")
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; mb(); } while (0)
-#ifdef CONFIG_SMP
-
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
#else
@@ -46,20 +43,26 @@
#endif
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define dma_rmb() __lwsync()
+#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
+
+#ifdef CONFIG_SMP
+#define smp_lwsync() __lwsync()
#define smp_mb() mb()
#define smp_rmb() __lwsync()
#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
-#define smp_read_barrier_depends() read_barrier_depends()
#else
-#define __lwsync() barrier()
+#define smp_lwsync() barrier()
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
#endif /* CONFIG_SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
/*
* This is a barrier which prevents following instructions from being
* started until the value of the argument x is known. For example, if
@@ -72,7 +75,7 @@
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
- __lwsync(); \
+ smp_lwsync(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
@@ -80,7 +83,7 @@ do { \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
- __lwsync(); \
+ smp_lwsync(); \
___p1; \
})
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index b5dce6544d76..8d724718ec21 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -24,11 +24,14 @@
#define rmb() mb()
#define wmb() mb()
-#define read_barrier_depends() do { } while(0)
+#define dma_rmb() rmb()
+#define dma_wmb() wmb()
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
+
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 305dcc3dc721..76648941fea7 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -37,7 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define rmb() __asm__ __volatile__("":::"memory")
#define wmb() __asm__ __volatile__("":::"memory")
-#define read_barrier_depends() do { } while(0)
+#define dma_rmb() rmb()
+#define dma_wmb() wmb()
+
#define set_mb(__var, __value) \
do { __var = __value; membar_safe("#StoreLoad"); } while(0)
@@ -51,7 +53,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define smp_wmb() __asm__ __volatile__("":::"memory")
#endif
-#define smp_read_barrier_depends() do { } while(0)
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0f4460b5636d..2ab1eb33106e 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,78 +24,28 @@
#define wmb() asm volatile("sfence" ::: "memory")
#endif
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb() rmb()
+#define dma_rmb() rmb()
#else
-# define smp_rmb() barrier()
+#define dma_rmb() barrier()
#endif
+#define dma_wmb() barrier()
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() read_barrier_depends()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#if defined(CONFIG_X86_PPRO_FENCE)
/*
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index cc04e67bfd05..2d7d9a1f5b53 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -29,20 +29,18 @@
#endif /* CONFIG_X86_32 */
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-
-#define smp_mb() mb()
#ifdef CONFIG_X86_PPRO_FENCE
-#define smp_rmb() rmb()
+#define dma_rmb() rmb()
#else /* CONFIG_X86_PPRO_FENCE */
-#define smp_rmb() barrier()
+#define dma_rmb() barrier()
#endif /* CONFIG_X86_PPRO_FENCE */
+#define dma_wmb() barrier()
-#define smp_wmb() barrier()
+#ifdef CONFIG_SMP
-#define smp_read_barrier_depends() read_barrier_depends()
+#define smp_mb() mb()
+#define smp_rmb() dma_rmb()
+#define smp_wmb() barrier()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* CONFIG_SMP */
@@ -50,11 +48,13 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* CONFIG_SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
/*
* Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index ee1ecb146df7..eb088b129bc7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -615,14 +615,14 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
+ if (!rx_desc->d.staterr)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STATUS_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 2e526d4904a6..ff59897a9463 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6910,14 +6910,14 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.status_error)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STAT_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 798b05556e1b..2ed2c7de2304 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2009,15 +2009,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.status_error)
break;
- /*
- * This memory barrier is needed to keep us from reading
+ /* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STAT_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3dad7e884952..088136b37ebe 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6605,6 +6605,9 @@ static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
{
u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
+ /* Force memory writes to complete before releasing descriptor */
+ dma_wmb();
+
desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
}
@@ -6612,7 +6615,6 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
u32 rx_buf_sz)
{
desc->addr = cpu_to_le64(mapping);
- wmb();
rtl8169_mark_to_asic(desc, rx_buf_sz);
}
@@ -7073,16 +7075,18 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
skb_tx_timestamp(skb);
- wmb();
+ /* Force memory writes to complete before releasing descriptor */
+ dma_wmb();
/* Anti gcc 2.95.3 bugware (sic) */
status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
txd->opts1 = cpu_to_le32(status);
- tp->cur_tx += frags + 1;
-
+ /* Force all memory writes to complete before notifying device */
wmb();
+ tp->cur_tx += frags + 1;
+
RTL_W8(TxPoll, NPQ);
mmiowb();
@@ -7181,11 +7185,16 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
struct ring_info *tx_skb = tp->tx_skb + entry;
u32 status;
- rmb();
status = le32_to_cpu(tp->TxDescArray[entry].opts1);
if (status & DescOwn)
break;
+ /* This barrier is needed to keep us from reading
+ * any other fields out of the Tx descriptor until
+ * we know the status of DescOwn
+ */
+ dma_rmb();
+
rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
tp->TxDescArray + entry);
if (status & LastFrag) {
@@ -7280,11 +7289,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
struct RxDesc *desc = tp->RxDescArray + entry;
u32 status;
- rmb();
status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
-
if (status & DescOwn)
break;
+
+ /* This barrier is needed to keep us from reading
+ * any other fields out of the Rx descriptor until
+ * we know the status of DescOwn
+ */
+ dma_rmb();
+
if (unlikely(status & RxRES)) {
netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
status);
@@ -7346,7 +7360,6 @@ process_pkt:
}
release_descriptor:
desc->opts2 = 0;
- wmb();
rtl8169_mark_to_asic(desc, rx_buf_sz);
}
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 1402fa855388..f5c40b0fadc2 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -42,6 +42,14 @@
#define wmb() mb()
#endif
+#ifndef dma_rmb
+#define dma_rmb() rmb()
+#endif
+
+#ifndef dma_wmb
+#define dma_wmb() wmb()
+#endif
+
#ifndef read_barrier_depends
#define read_barrier_depends() do { } while (0)
#endif