ARM: atomics: prefetch the destination word for write prior to strex

The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch prefixes our atomic access implementations with pldw instructions (on CPUs which support them) to try and grab the line in exclusive state from the start. Only the barrier-less functions are updated, since memory barriers can limit the usefulness of prefetching data. Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
author: Will Deacon <will.deacon@arm.com> 2013-07-04 11:43:18 +0100
committer: Will Deacon <will.deacon@arm.com> 2013-09-30 16:42:56 +0100
commit: f38d999c4d16fc0fce4270374f15fbb2d8713c09 (patch)
tree: c91a2a9fd5505a27ee0e8d03141842b07cc4e0c9 /arch/arm/include
parent: 9bb17be062de6f5a9c9643258951aa0935652ec3 (diff)
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index da1c77d39327..55ffc3b850f4 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,6 +12,7 @@
 #define __ASM_ARM_ATOMIC_H
 
 #include <linux/compiler.h>
+#include <linux/prefetch.h>
 #include <linux/types.h>
 #include <linux/irqflags.h>
 #include <asm/barrier.h>
@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v)
 	unsigned long tmp;
 	int result;
 
+	prefetchw(&v->counter);
 	__asm__ __volatile__("@ atomic_add\n"
 "1:	ldrex	%0, [%3]\n"
 "	add	%0, %0, %4\n"
@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 	unsigned long tmp;
 	int result;
 
+	prefetchw(&v->counter);
 	__asm__ __volatile__("@ atomic_sub\n"
 "1:	ldrex	%0, [%3]\n"
 "	sub	%0, %0, %4\n"
@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 {
 	unsigned long tmp, tmp2;
 
+	prefetchw(addr);
 	__asm__ __volatile__("@ atomic_clear_mask\n"
 "1:	ldrex	%0, [%3]\n"
 "	bic	%0, %0, %4\n"
@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i)
 {
 	u64 tmp;
 
+	prefetchw(&v->counter);
 	__asm__ __volatile__("@ atomic64_set\n"
 "1:	ldrexd	%0, %H0, [%2]\n"
 "	strexd	%0, %3, %H3, [%2]\n"
@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 	u64 result;
 	unsigned long tmp;
 
+	prefetchw(&v->counter);
 	__asm__ __volatile__("@ atomic64_add\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
 "	adds	%0, %0, %4\n"
@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 	u64 result;
 	unsigned long tmp;
 
+	prefetchw(&v->counter);
 	__asm__ __volatile__("@ atomic64_sub\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
 "	subs	%0, %0, %4\n"
author	Will Deacon <will.deacon@arm.com>	2013-07-04 11:43:18 +0100
committer	Will Deacon <will.deacon@arm.com>	2013-09-30 16:42:56 +0100
commit	f38d999c4d16fc0fce4270374f15fbb2d8713c09 (patch)
tree	c91a2a9fd5505a27ee0e8d03141842b07cc4e0c9 /arch/arm/include
parent	9bb17be062de6f5a9c9643258951aa0935652ec3 (diff)