summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-15 08:31:39 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-15 08:31:39 +0000
commit3f08357f2b89277d6196a46b099f7143ef14f223 (patch)
tree4a4f16e82507eadde1968179f613610351e5f7dc
parent454272cabfadeed04822525347800997ba71192b (diff)
codeclib: more cf asm for the mdct, saves ~0.7MHz decoding vorbis on h300.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30552 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/mdct.c116
1 files changed, 115 insertions, 1 deletions
diff --git a/apps/codecs/lib/mdct.c b/apps/codecs/lib/mdct.c
index 5524afbbc4..777aec4a55 100644
--- a/apps/codecs/lib/mdct.c
+++ b/apps/codecs/lib/mdct.c
@@ -41,7 +41,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
{
int n8, n4, n2, n, j;
const fixed32 *in1, *in2;
-
+ (void)j;
n = 1 << nbits;
n2 = n >> 1;
@@ -79,6 +79,62 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
const uint16_t * p_revtab=revtab;
{
const uint16_t * const p_revtab_end = p_revtab + n8;
+#ifdef CPU_COLDFIRE
+ asm volatile ("move.l (%[in2]), %%d0\n\t"
+ "move.l (%[in1]), %%d1\n\t"
+ "bra.s 1f\n\t"
+ "0:\n\t"
+ "movem.l (%[T]), %%d2-%%d3\n\t"
+
+ "addq.l #8, %[in1]\n\t"
+ "subq.l #8, %[in2]\n\t"
+
+ "lea (%[step]*4, %[T]), %[T]\n\t"
+
+ "mac.l %%d0, %%d3, (%[T]), %%d4, %%acc0;"
+ "msac.l %%d1, %%d2, (4, %[T]), %%d5, %%acc0;"
+ "mac.l %%d1, %%d3, (%[in1]), %%d1, %%acc1;"
+ "mac.l %%d0, %%d2, (%[in2]), %%d0, %%acc1;"
+
+ "addq.l #8, %[in1]\n\t"
+ "subq.l #8, %[in2]\n\t"
+
+ "mac.l %%d0, %%d5, %%acc2;"
+ "msac.l %%d1, %%d4, (%[p_revtab])+, %%d2, %%acc2;"
+ "mac.l %%d1, %%d5, (%[in1]), %%d1, %%acc3;"
+ "mac.l %%d0, %%d4, (%[in2]), %%d0, %%acc3;"
+
+ "clr.l %%d3\n\t"
+ "move.w %%d2, %%d3\n\t"
+ "eor.l %%d3, %%d2\n\t"
+ "swap %%d2\n\t"
+ "lsr.l %[revtab_shift], %%d2\n\t"
+
+ "movclr.l %%acc0, %%d4;"
+ "movclr.l %%acc1, %%d5;"
+ "lsl.l #3, %%d2\n\t"
+ "lea (%%d2, %[z]), %%a1\n\t"
+ "movem.l %%d4-%%d5, (%%a1)\n\t"
+
+ "lsr.l %[revtab_shift], %%d3\n\t"
+
+ "movclr.l %%acc2, %%d4;"
+ "movclr.l %%acc3, %%d5;"
+ "lsl.l #3, %%d3\n\t"
+ "lea (%%d3, %[z]), %%a1\n\t"
+ "movem.l %%d4-%%d5, (%%a1)\n\t"
+
+ "lea (%[step]*4, %[T]), %[T]\n\t"
+
+ "1:\n\t"
+ "cmp.l %[p_revtab_end], %[p_revtab]\n\t"
+ "bcs.s 0b\n\t"
+ : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T),
+ [p_revtab] "+a" (p_revtab)
+ : [z] "a" (z), [step] "d" (step), [revtab_shift] "d" (revtab_shift),
+ [p_revtab_end] "r" (p_revtab_end)
+ : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory");
+#else
while(LIKELY(p_revtab < p_revtab_end))
{
j = (*p_revtab)>>revtab_shift;
@@ -94,9 +150,66 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
in2 -= 2;
p_revtab++;
}
+#endif
}
{
const uint16_t * const p_revtab_end = p_revtab + n8;
+#ifdef CPU_COLDFIRE
+ asm volatile ("move.l (%[in2]), %%d0\n\t"
+ "move.l (%[in1]), %%d1\n\t"
+ "bra.s 1f\n\t"
+ "0:\n\t"
+ "movem.l (%[T]), %%d2-%%d3\n\t"
+
+ "addq.l #8, %[in1]\n\t"
+ "subq.l #8, %[in2]\n\t"
+
+ "lea (%[step]*4, %[T]), %[T]\n\t"
+
+ "mac.l %%d0, %%d2, (%[T]), %%d4, %%acc0;"
+ "msac.l %%d1, %%d3, (4, %[T]), %%d5, %%acc0;"
+ "mac.l %%d1, %%d2, (%[in1]), %%d1, %%acc1;"
+ "mac.l %%d0, %%d3, (%[in2]), %%d0, %%acc1;"
+
+ "addq.l #8, %[in1]\n\t"
+ "subq.l #8, %[in2]\n\t"
+
+ "mac.l %%d0, %%d4, %%acc2;"
+ "msac.l %%d1, %%d5, (%[p_revtab])+, %%d2, %%acc2;"
+ "mac.l %%d1, %%d4, (%[in1]), %%d1, %%acc3;"
+ "mac.l %%d0, %%d5, (%[in2]), %%d0, %%acc3;"
+
+ "clr.l %%d3\n\t"
+ "move.w %%d2, %%d3\n\t"
+ "eor.l %%d3, %%d2\n\t"
+ "swap %%d2\n\t"
+ "lsr.l %[revtab_shift], %%d2\n\t"
+
+ "movclr.l %%acc0, %%d4;"
+ "movclr.l %%acc1, %%d5;"
+ "lsl.l #3, %%d2\n\t"
+ "lea (%%d2, %[z]), %%a1\n\t"
+ "movem.l %%d4-%%d5, (%%a1)\n\t"
+
+ "lsr.l %[revtab_shift], %%d3\n\t"
+
+ "movclr.l %%acc2, %%d4;"
+ "movclr.l %%acc3, %%d5;"
+ "lsl.l #3, %%d3\n\t"
+ "lea (%%d3, %[z]), %%a1\n\t"
+ "movem.l %%d4-%%d5, (%%a1)\n\t"
+
+ "lea (%[step]*4, %[T]), %[T]\n\t"
+
+ "1:\n\t"
+ "cmp.l %[p_revtab_end], %[p_revtab]\n\t"
+ "bcs.s 0b\n\t"
+ : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T),
+ [p_revtab] "+a" (p_revtab)
+ : [z] "a" (z), [step] "d" (-step), [revtab_shift] "d" (revtab_shift),
+ [p_revtab_end] "r" (p_revtab_end)
+ : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory");
+#else
while(LIKELY(p_revtab < p_revtab_end))
{
j = (*p_revtab)>>revtab_shift;
@@ -112,6 +225,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
in2 -= 2;
p_revtab++;
}
+#endif
}