summaryrefslogtreecommitdiff
path: root/arch/arc/lib/memcmp.S
diff options
context:
space:
mode:
authorVineet Gupta <vgupta@synopsys.com>2013-01-18 15:12:18 +0530
committerVineet Gupta <vgupta@synopsys.com>2013-02-11 20:00:35 +0530
commit5210d1e6889c8183ecad269e86e2d9c524015b5f (patch)
tree77fcc0cfb1853c553eaf58a271256f13b860a528 /arch/arc/lib/memcmp.S
parent6e35fa2d430538cd0609e499c6f789beea9e9798 (diff)
ARC: String library
Hand optimised asm code for ARC700 pipeline. Originally written/optimized by Joern Rennecke Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Cc: Joern Rennecke <joern.rennecke@embecosm.com>
Diffstat (limited to 'arch/arc/lib/memcmp.S')
-rw-r--r--arch/arc/lib/memcmp.S124
1 files changed, 124 insertions, 0 deletions
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 000000000000..bc813d55b6c3
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ARC_ENTRY memcmp
+ or r12,r0,r1
+ asl_s r12,r12,30
+ sub r3,r2,1
+ brls r2,r12,.Lbytewise
+ ld r4,[r0,0]
+ ld r5,[r1,0]
+ lsr.f lp_count,r3,3
+ lpne .Loop_end
+ ld_s WORD2,[r0,4]
+ ld_s r12,[r1,4]
+ brne r4,r5,.Leven
+ ld.a r4,[r0,8]
+ ld.a r5,[r1,8]
+ brne WORD2,r12,.Lodd
+.Loop_end:
+ asl_s SHIFT,SHIFT,3
+ bhs_s .Last_cmp
+ brne r4,r5,.Leven
+ ld r4,[r0,4]
+ ld r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+ nop_s
+ ; one more load latency cycle
+.Last_cmp:
+ xor r0,r4,r5
+ bset r0,r0,SHIFT
+ sub_s r1,r0,1
+ bic_s r1,r1,r0
+ norm r1,r1
+ b.d .Leven_cmp
+ and r1,r1,24
+.Leven:
+ xor r0,r4,r5
+ sub_s r1,r0,1
+ bic_s r1,r1,r0
+ norm r1,r1
+ ; slow track insn
+ and r1,r1,24
+.Leven_cmp:
+ asl r2,r4,r1
+ asl r12,r5,r1
+ lsr_s r2,r2,1
+ lsr_s r12,r12,1
+ j_s.d [blink]
+ sub r0,r2,r12
+ .balign 4
+.Lodd:
+ xor r0,WORD2,r12
+ sub_s r1,r0,1
+ bic_s r1,r1,r0
+ norm r1,r1
+ ; slow track insn
+ and r1,r1,24
+ asl_s r2,r2,r1
+ asl_s r12,r12,r1
+ lsr_s r2,r2,1
+ lsr_s r12,r12,1
+ j_s.d [blink]
+ sub r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+ neg_s SHIFT,SHIFT
+ lsr r4,r4,SHIFT
+ lsr r5,r5,SHIFT
+ ; slow track insn
+.Leven:
+ sub.f r0,r4,r5
+ mov.ne r0,1
+ j_s.d [blink]
+ bset.cs r0,r0,31
+.Lodd:
+ cmp_s WORD2,r12
+
+ mov_s r0,1
+ j_s.d [blink]
+ bset.cs r0,r0,31
+#endif /* ENDIAN */
+ .balign 4
+.Lbytewise:
+ breq r2,0,.Lnil
+ ldb r4,[r0,0]
+ ldb r5,[r1,0]
+ lsr.f lp_count,r3
+ lpne .Lbyte_end
+ ldb_s r3,[r0,1]
+ ldb r12,[r1,1]
+ brne r4,r5,.Lbyte_even
+ ldb.a r4,[r0,2]
+ ldb.a r5,[r1,2]
+ brne r3,r12,.Lbyte_odd
+.Lbyte_end:
+ bcc .Lbyte_even
+ brne r4,r5,.Lbyte_even
+ ldb_s r3,[r0,1]
+ ldb_s r12,[r1,1]
+.Lbyte_odd:
+ j_s.d [blink]
+ sub r0,r3,r12
+.Lbyte_even:
+ j_s.d [blink]
+ sub r0,r4,r5
+.Lnil:
+ j_s.d [blink]
+ mov r0,0
+ARC_EXIT memcmp