summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-11-24 18:40:43 +0000
committerJens Arnold <amiconn@rockbox.org>2008-11-24 18:40:43 +0000
commit66c0cf2eb17158eec9d0cd2553481a2caf86e611 (patch)
tree9a226b31d0c7c9ef216136586c7336b0acd1db7d /apps/codecs
parent7bf4e7bda74fe392039d15c063e5c3a6ae3cca58 (diff)
Tweak the ARMv6 filter assembly a bit further.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19198 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h49
1 files changed, 22 insertions, 27 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index cd27b271af..61471103bd 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -22,7 +22,7 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
-*/
+*/
/* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned, otherwise it will result either in a data abort, or
@@ -44,9 +44,8 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
"beq 20f \n"
"10: \n"
- "ldrh r4, [%[v2]], #2 \n"
- "ldr r5, [%[v2]], #4 \n"
- "mov r4, r4, lsl #16 \n"
+ "bic %[v2], %[v2], #2 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
"1: \n"
".rept " ADD_SUB_BLOCKS "\n"
"ldmia %[v2]!, {r6-r7} \n"
@@ -114,9 +113,8 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
"beq 20f \n"
"10: \n"
- "ldrh r4, [%[v2]], #2 \n"
- "ldr r5, [%[v2]], #4 \n"
- "mov r4, r4, lsl #16 \n"
+ "bic %[v2], %[v2], #2 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
"1: \n"
".rept " ADD_SUB_BLOCKS "\n"
"ldmia %[v2]!, {r6-r7} \n"
@@ -194,51 +192,48 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
"beq 20f \n"
"10: \n"
- "ldrh r7, [%[v2]], #2 \n"
- "ldmia %[v2]!, {r4-r5} \n"
+ "bic %[v2], %[v2], #2 \n"
+ "ldmia %[v2]!, {r5-r7} \n"
"ldmia %[v1]!, {r0-r1} \n"
-#if ORDER > 32
- "mov r7, r7, lsl #16 \n"
"1: \n"
- "pkhbt r8, r4, r7 \n"
- "ldmia %[v2]!, {r6-r7} \n"
+ "pkhbt r8, r6, r5 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
+#if ORDER > 32
"smladx %[res], r0, r8, %[res] \n"
#else
- "pkhbt r8, r4, r7, lsl #16 \n"
- "ldmia %[v2]!, {r6-r7} \n"
"smuadx %[res], r0, r8 \n"
#endif
".rept " MLA_BLOCKS "\n"
- "pkhbt r8, r5, r4 \n"
+ "pkhbt r8, r7, r6 \n"
"ldmia %[v1]!, {r2-r3} \n"
"smladx %[res], r1, r8, %[res] \n"
- "pkhbt r8, r6, r5 \n"
- "ldmia %[v2]!, {r4-r5} \n"
+ "pkhbt r8, r4, r7 \n"
+ "ldmia %[v2]!, {r6-r7} \n"
"smladx %[res], r2, r8, %[res] \n"
- "pkhbt r8, r7, r6 \n"
+ "pkhbt r8, r5, r4 \n"
"ldmia %[v1]!, {r0-r1} \n"
"smladx %[res], r3, r8, %[res] \n"
- "pkhbt r8, r4, r7 \n"
- "ldmia %[v2]!, {r6-r7} \n"
+ "pkhbt r8, r6, r5 \n"
+ "ldmia %[v2]!, {r4-r5} \n"
"smladx %[res], r0, r8, %[res] \n"
".endr \n"
- "pkhbt r8, r5, r4 \n"
+ "pkhbt r8, r7, r6 \n"
"ldmia %[v1]!, {r2-r3} \n"
"smladx %[res], r1, r8, %[res] \n"
- "pkhbt r8, r6, r5 \n"
+ "pkhbt r8, r4, r7 \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
- "ldmneia %[v2]!, {r4-r5} \n"
+ "ldmneia %[v2]!, {r6-r7} \n"
"smladx %[res], r2, r8, %[res] \n"
- "pkhbt r8, r7, r6 \n"
+ "pkhbt r8, r5, r4 \n"
"ldmneia %[v1]!, {r0-r1} \n"
"smladx %[res], r3, r8, %[res] \n"
"bne 1b \n"
#else
- "pkhbt r7, r7, r6 \n"
+ "pkhbt r5, r5, r4 \n"
"smladx %[res], r2, r8, %[res] \n"
- "smladx %[res], r3, r7, %[res] \n"
+ "smladx %[res], r3, r5, %[res] \n"
#endif
"b 99f \n"