summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-05-02 15:45:43 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-05-02 15:45:43 +0000
commiteec5eb9ecca46a4d202f7b41b3d0aa4a8626a892 (patch)
tree05a747e702ab0a0266c52094a325bf8d352b9f56 /apps
parent0fd111d4310c767828dd83d9cc23f108fe584750 (diff)
Refacturate arm version of libmad's synthesis filter. Only two asm macros left, renamed asm-implementation for better clarity. No change in speed or precision.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25777 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/libmad/synth.c264
-rw-r--r--apps/codecs/libmad/synth_full_arm.S8
2 files changed, 30 insertions, 242 deletions
diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c
index 9d1bdb7c91..15c1f9decc 100644
--- a/apps/codecs/libmad/synth.c
+++ b/apps/codecs/libmad/synth.c
@@ -828,60 +828,8 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
#elif defined(FPM_ARM)
-#define PROD_ODD_0(hi, lo, f, ptr) \
- do { \
- mad_fixed_t *__p = (f); \
- asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #4]\n\t" \
- "smull %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #60]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #52]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #44]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- "ldmia %2, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #36]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #28]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #20]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #12]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- : "=&r" (lo), "=&r" (hi), "+r" (__p) \
- : "r" (ptr) \
- : "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-#define PROD_ODD_A(hi, lo, f, ptr) \
- do { \
- mad_fixed_t *__p = (f); \
- asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #4]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #60]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #52]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #44]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- "ldmia %2, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #36]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #28]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #20]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #12]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- : "+r" (lo), "+r" (hi), "+r" (__p) \
- : "r" (ptr) \
- : "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-#define PROD_EVEN_0(hi, lo, f, ptr) \
- do { \
+#define PROD_O(hi, lo, f, ptr) \
+ ({ \
mad_fixed_t *__p = (f); \
asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
"ldr r4, [%3, #0]\n\t" \
@@ -904,10 +852,10 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
: "=&r" (lo), "=&r" (hi), "+r" (__p) \
: "r" (ptr) \
: "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
+ })
-#define PROD_EVEN_A(hi, lo, f, ptr) \
- do { \
+#define PROD_A(hi, lo, f, ptr) \
+ ({ \
mad_fixed_t *__p = (f); \
asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
"ldr r4, [%3, #0]\n\t" \
@@ -930,118 +878,18 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
: "+r" (lo), "+r" (hi), "+r" (__p) \
: "r" (ptr) \
: "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-#define PROD_EVENBACK_0(hi, lo, f, ptr) \
- do { \
- mad_fixed_t *__p = (f); \
- asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #60]\n\t" \
- "smull %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #68]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #76]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #84]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- "ldmia %2, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #92]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #100]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #108]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #116]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- : "=&r" (lo), "=&r" (hi), "+r" (__p) \
- : "r" (ptr) \
- : "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-#define PROD_EVENBACK_A(hi, lo, f, ptr) \
- do { \
- mad_fixed_t *__p = (f); \
- asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #60]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #68]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #76]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #84]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- "ldmia %2, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #92]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #100]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #108]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #116]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- : "+r" (lo), "+r" (hi), "+r" (__p) \
- : "r" (ptr) \
- : "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-#define PROD_ODDBACK_0(hi, lo, f, ptr) \
- do { \
- mad_fixed_t *__p = (f); \
- asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #120]\n\t" \
- "smull %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #64]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #72]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #80]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- "ldmia %2, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #88]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #96]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #104]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #112]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- : "=&r" (lo), "=&r" (hi), "+r" (__p) \
- : "r" (ptr) \
- : "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-#define PROD_ODDBACK_A(hi, lo, f, ptr) \
- do { \
- mad_fixed_t *__p = (f); \
- asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #120]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #64]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #72]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #80]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- "ldmia %2, {r0, r1, r2, r3}\n\t" \
- "ldr r4, [%3, #88]\n\t" \
- "smlal %0, %1, r0, r4\n\t" \
- "ldr r4, [%3, #96]\n\t" \
- "smlal %0, %1, r1, r4\n\t" \
- "ldr r4, [%3, #104]\n\t" \
- "smlal %0, %1, r2, r4\n\t" \
- "ldr r4, [%3, #112]\n\t" \
- "smlal %0, %1, r3, r4\n\t" \
- : "+r" (lo), "+r" (hi), "+r" (__p) \
- : "r" (ptr) \
- : "r0", "r1", "r2", "r3", "r4"); \
- } while (0)
-
-void synth_full1(mad_fixed_t *pcm, mad_fixed_t (*fo)[8], mad_fixed_t (*fe)[8],
- mad_fixed_t const (*D0ptr)[32],
- mad_fixed_t const (*D1ptr)[32]);
-void synth_full2(mad_fixed_t *pcm, mad_fixed_t (*fo)[8], mad_fixed_t (*fe)[8],
- mad_fixed_t const (*D0ptr)[32],
- mad_fixed_t const (*D1ptr)[32]);
+ })
+
+void synth_full_odd_band (mad_fixed_t *pcm,
+ mad_fixed_t (*fo)[8],
+ mad_fixed_t (*fe)[8],
+ mad_fixed_t const (*D0ptr)[32],
+ mad_fixed_t const (*D1ptr)[32]);
+void synth_full_even_band(mad_fixed_t *pcm,
+ mad_fixed_t (*fo)[8],
+ mad_fixed_t (*fe)[8],
+ mad_fixed_t const (*D0ptr)[32],
+ mad_fixed_t const (*D1ptr)[32]);
static
void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
@@ -1083,99 +931,39 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
if(s & 1)
{
ptr = *D0ptr;
-/*
- ML0(hi, lo, (*fx)[0], ptr[ 1]);
- MLA(hi, lo, (*fx)[1], ptr[15]);
- MLA(hi, lo, (*fx)[2], ptr[13]);
- MLA(hi, lo, (*fx)[3], ptr[11]);
- MLA(hi, lo, (*fx)[4], ptr[ 9]);
- MLA(hi, lo, (*fx)[5], ptr[ 7]);
- MLA(hi, lo, (*fx)[6], ptr[ 5]);
- MLA(hi, lo, (*fx)[7], ptr[ 3]);
-*/
- PROD_ODD_0(hi, lo, *fx, ptr);
+ PROD_O(hi, lo, *fo, ptr+1);
MLN(hi, lo);
-/*
- MLA(hi, lo, (*fe)[0], ptr[ 0]);
- MLA(hi, lo, (*fe)[1], ptr[14]);
- MLA(hi, lo, (*fe)[2], ptr[12]);
- MLA(hi, lo, (*fe)[3], ptr[10]);
- MLA(hi, lo, (*fe)[4], ptr[ 8]);
- MLA(hi, lo, (*fe)[5], ptr[ 6]);
- MLA(hi, lo, (*fe)[6], ptr[ 4]);
- MLA(hi, lo, (*fe)[7], ptr[ 2]);
-*/
- PROD_EVEN_A(hi, lo, *fe, ptr);
+ PROD_A(hi, lo, *fe, ptr);
pcm[0] = SHIFT(MLZ(hi, lo));
pcm += 16;
- synth_full1(pcm, fo, fe, D0ptr, D1ptr);
+ synth_full_odd_band(pcm, fo, fe, D0ptr, D1ptr);
D0ptr += 15;
D1ptr += 15;
fo += 15;
fe += 15;
ptr = *(D0ptr + 1);
- PROD_ODD_0(hi, lo, *fo, ptr);
-/*
- ML0(hi, lo, (*fo)[0], ptr[ 1]);
- MLA(hi, lo, (*fo)[1], ptr[15]);
- MLA(hi, lo, (*fo)[2], ptr[13]);
- MLA(hi, lo, (*fo)[3], ptr[11]);
- MLA(hi, lo, (*fo)[4], ptr[ 9]);
- MLA(hi, lo, (*fo)[5], ptr[ 7]);
- MLA(hi, lo, (*fo)[6], ptr[ 5]);
- MLA(hi, lo, (*fo)[7], ptr[ 3]);
-*/
+ PROD_O(hi, lo, *fo, ptr+1);
pcm[0] = SHIFT(-MLZ(hi, lo));
}
else
{
ptr = *D0ptr;
-/*
- ML0(hi, lo, (*fx)[0], ptr[ 0]);
- MLA(hi, lo, (*fx)[1], ptr[14]);
- MLA(hi, lo, (*fx)[2], ptr[12]);
- MLA(hi, lo, (*fx)[3], ptr[10]);
- MLA(hi, lo, (*fx)[4], ptr[ 8]);
- MLA(hi, lo, (*fx)[5], ptr[ 6]);
- MLA(hi, lo, (*fx)[6], ptr[ 4]);
- MLA(hi, lo, (*fx)[7], ptr[ 2]);
-*/
- PROD_EVEN_0(hi, lo, *fx, ptr);
+ PROD_O(hi, lo, *fx, ptr);
MLN(hi, lo);
-/*
- MLA(hi, lo, (*fe)[0], ptr[ 1]);
- MLA(hi, lo, (*fe)[1], ptr[15]);
- MLA(hi, lo, (*fe)[2], ptr[13]);
- MLA(hi, lo, (*fe)[3], ptr[11]);
- MLA(hi, lo, (*fe)[4], ptr[ 9]);
- MLA(hi, lo, (*fe)[5], ptr[ 7]);
- MLA(hi, lo, (*fe)[6], ptr[ 5]);
- MLA(hi, lo, (*fe)[7], ptr[ 3]);
-*/
- PROD_ODD_A(hi, lo, *fe, ptr);
+ PROD_A(hi, lo, *fe, ptr+1);
pcm[0] = SHIFT(MLZ(hi, lo));
pcm += 16;
- synth_full2(pcm, fo, fe, D0ptr, D1ptr);
+ synth_full_even_band(pcm, fo, fe, D0ptr, D1ptr);
D0ptr += 15;
D1ptr += 15;
fo += 15;
fe += 15;
ptr = *(D0ptr + 1);
-/*
- ML0(hi, lo, (*fo)[0], ptr[ 0]);
- MLA(hi, lo, (*fo)[1], ptr[14]);
- MLA(hi, lo, (*fo)[2], ptr[12]);
- MLA(hi, lo, (*fo)[3], ptr[10]);
- MLA(hi, lo, (*fo)[4], ptr[ 8]);
- MLA(hi, lo, (*fo)[5], ptr[ 6]);
- MLA(hi, lo, (*fo)[6], ptr[ 4]);
- MLA(hi, lo, (*fo)[7], ptr[ 2]);
-*/
- PROD_EVEN_0(hi, lo, *fo, ptr);
+ PROD_O(hi, lo, *fo, ptr);
pcm[0] = SHIFT(-MLZ(hi, lo));
}
@@ -1185,7 +973,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
}
}
-# else
+# else /* not FPM_COLDFIRE_EMAC and not FPM_ARM */
static
void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S
index 419bf2b96e..c5848d3327 100644
--- a/apps/codecs/libmad/synth_full_arm.S
+++ b/apps/codecs/libmad/synth_full_arm.S
@@ -23,15 +23,15 @@
.section ICODE_SECTION_MPA_ARM,"ax",%progbits
- .global synth_full1
- .global synth_full2
+ .global synth_full_odd_band
+ .global synth_full_even_band
;; r0 = pcm
;; r1 = fo
;; r2 = fe
;; r3 = D0ptr
;; r4 = D1ptr
-synth_full1:
+synth_full_odd_band:
stmdb sp!, {r4-r11, lr}
ldr r4, [sp, #36]
ldr r5, =synth_full_sp
@@ -137,7 +137,7 @@ synth_full1:
ldr sp, [r5]
ldmia sp!, {r4-r11, pc}
-synth_full2:
+synth_full_even_band:
stmdb sp!, {r4-r11, lr}
ldr r4, [sp, #36]
ldr r5, =synth_full_sp