summaryrefslogtreecommitdiff
path: root/firmware
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2006-02-22 18:52:08 +0000
committerThom Johansen <thomj@rockbox.org>2006-02-22 18:52:08 +0000
commite94d2627957e1142c96577197288991debfa0a2d (patch)
tree543f162aa003631e9ca8f7925d9bfdf703854fb9 /firmware
parent32879b2323dfcf9d69859bd816397b0c89016f1f (diff)
ARM optimised memset from Linux.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8785 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r--firmware/SOURCES4
-rw-r--r--firmware/common/memcpy_a.S256
2 files changed, 260 insertions, 0 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 1a8335e61f..6b5627e4f4 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -40,6 +40,10 @@ common/timefuncs.c
common/memcpy_a.S
common/memmove_a.S
common/memset_a.S
+#elif defined(CPU_ARM)
+common/memcpy.c
+common/memmove.c
+common/memset_a.S
#else
common/memcpy.c
common/memmove.c
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S
index 9f6c813be3..3de319903e 100644
--- a/firmware/common/memcpy_a.S
+++ b/firmware/common/memcpy_a.S
@@ -18,7 +18,11 @@
****************************************************************************/
#include "config.h"
+#ifdef CPU_ARM
+ .section .icode,"ax",%progbits
+#else
.section .icode,"ax",@progbits
+#endif
#if CONFIG_CPU == SH7034
.align 2
@@ -875,4 +879,256 @@ __memcpy_fwd_entry:
.end:
.size memcpy,.end-memcpy
+#elif defined(CPU_ARM)
+/*
+ * linux/arch/arm/lib/memcpy.S and copy_template.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ .macro ldr1w ptr reg abort
+ ldr \reg, [\ptr], #4
+ .endm
+
+ .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+ .endm
+
+ .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
+
+ .macro ldr1b ptr reg cond=al abort
+ ldr\cond\()b \reg, [\ptr], #1
+ .endm
+
+ .macro str1w ptr reg abort
+ str \reg, [\ptr], #4
+ .endm
+
+ .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
+
+ .macro str1b ptr reg cond=al abort
+ str\cond\()b \reg, [\ptr], #1
+ .endm
+
+ .macro enter reg1 reg2
+ stmdb sp!, {r0, \reg1, \reg2}
+ .endm
+
+ .macro exit reg1 reg2
+ ldmfd sp!, {r0, \reg1, \reg2}
+ .endm
+
+ .text
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ .global memcpy
+ .type memcpy,%function
+memcpy:
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do. That might be different in the future.
+ */
+//#define CALGN(code...) code
+#define CALGN(code...)
+#define PLD(code...)
+
+ enter r4, lr
+
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #0] )
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
+
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ blt 5f
+
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb r3, ip, #32 )
+ CALGN( sbcnes r4, r3, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, r3 ) @ C gets set
+ CALGN( add pc, r4, ip )
+
+ PLD( pld [r1, #0] )
+2: PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 4f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+
+3: PLD( pld [r1, #124] )
+4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+ subs r2, r2, #32
+ str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+ bge 3b
+ PLD( cmn r2, #96 )
+ PLD( bge 4b )
+
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6: nop
+ ldr1w r1, r3, abort=20f
+ ldr1w r1, r4, abort=20f
+ ldr1w r1, r5, abort=20f
+ ldr1w r1, r6, abort=20f
+ ldr1w r1, r7, abort=20f
+ ldr1w r1, r8, abort=20f
+ ldr1w r1, lr, abort=20f
+
+ add pc, pc, ip
+ nop
+ nop
+ str1w r0, r3, abort=20f
+ str1w r0, r4, abort=20f
+ str1w r0, r5, abort=20f
+ str1w r0, r6, abort=20f
+ str1w r0, r7, abort=20f
+ str1w r0, r8, abort=20f
+ str1w r0, lr, abort=20f
+
+ CALGN( bcs 2b )
+
+7: ldmfd sp!, {r5 - r8}
+
+8: movs r2, r2, lsl #31
+ ldr1b r1, r3, ne, abort=21f
+ ldr1b r1, r4, cs, abort=21f
+ ldr1b r1, ip, cs, abort=21f
+ str1b r0, r3, ne, abort=21f
+ str1b r0, r4, cs, abort=21f
+ str1b r0, ip, cs, abort=21f
+
+ exit r4, pc
+
+9: rsb ip, ip, #4
+ cmp ip, #2
+ ldr1b r1, r3, gt, abort=21f
+ ldr1b r1, r4, ge, abort=21f
+ ldr1b r1, lr, abort=21f
+ str1b r0, r3, gt, abort=21f
+ str1b r0, r4, ge, abort=21f
+ subs r2, r2, ip
+ str1b r0, lr, abort=21f
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
+
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr1w r1, lr, abort=21f
+ beq 17f
+ bgt 18f
+
+
+ .macro forward_copy_shift pull push
+
+ subs r2, r2, #28
+ blt 14f
+
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb ip, ip, #32 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
+
+11: stmfd sp!, {r5 - r9}
+
+ PLD( pld [r1, #0] )
+ PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 13f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+
+12: PLD( pld [r1, #124] )
+13: ldr4w r1, r4, r5, r6, r7, abort=19f
+ mov r3, lr, pull #\pull
+ subs r2, r2, #32
+ ldr4w r1, r8, r9, ip, lr, abort=19f
+ orr r3, r3, r4, push #\push
+ mov r4, r4, pull #\pull
+ orr r4, r4, r5, push #\push
+ mov r5, r5, pull #\pull
+ orr r5, r5, r6, push #\push
+ mov r6, r6, pull #\pull
+ orr r6, r6, r7, push #\push
+ mov r7, r7, pull #\pull
+ orr r7, r7, r8, push #\push
+ mov r8, r8, pull #\pull
+ orr r8, r8, r9, push #\push
+ mov r9, r9, pull #\pull
+ orr r9, r9, ip, push #\push
+ mov ip, ip, pull #\pull
+ orr ip, ip, lr, push #\push
+ str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+ bge 12b
+ PLD( cmn r2, #96 )
+ PLD( bge 13b )
+
+ ldmfd sp!, {r5 - r9}
+
+14: ands ip, r2, #28
+ beq 16f
+
+15: mov r3, lr, pull #\pull
+ ldr1w r1, lr, abort=21f
+ subs ip, ip, #4
+ orr r3, r3, lr, push #\push
+ str1w r0, r3, abort=21f
+ bgt 15b
+ CALGN( cmp r2, #0 )
+ CALGN( bge 11b )
+
+16: sub r1, r1, #(\push / 8)
+ b 8b
+
+ .endm
+
+
+ forward_copy_shift pull=8 push=24
+
+17: forward_copy_shift pull=16 push=16
+
+18: forward_copy_shift pull=24 push=8
+
+
+/*
+ * Abort preanble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+ .macro copy_abort_preamble
+19: ldmfd sp!, {r5 - r9}
+ b 21f
+20: ldmfd sp!, {r5 - r8}
+21:
+ .endm
+
+ .macro copy_abort_end
+ ldmfd sp!, {r4, pc}
+ .endm
+end:
+ .size memcpy,.end-memcpy
#endif