diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-02-13 22:01:24 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-02-13 22:01:24 +0000 |
commit | 35024bd54e0e9a75b80ab102c44da4b4f369aec5 (patch) | |
tree | 6fad29eb617ad9b92748a7d1658f95f089a6e4f3 /apps/codecs | |
parent | b4fd5d852a84f141bf5d34cdf33d2d969d051edd (diff) |
Speed up atrac codec for ARM through simple loop unrolling. Saves 9 MHz on PP5022 (14% speed up).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24637 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libatrac/atrac3_arm.S | 127 |
1 files changed, 107 insertions, 20 deletions
diff --git a/apps/codecs/libatrac/atrac3_arm.S b/apps/codecs/libatrac/atrac3_arm.S index be8b2a0e0e..0908d582ed 100644 --- a/apps/codecs/libatrac/atrac3_arm.S +++ b/apps/codecs/libatrac/atrac3_arm.S @@ -100,38 +100,125 @@ atrac3_iqmf_dewindowing: /* r1 = input samples */
/* r2 = window coefficients */
/* r3 = counter */
- stmfd sp!, {r4-r10, lr} /* save non-scratch registers */
+ stmfd sp!, {r4-r9, lr} /* save non-scratch registers */
.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */
-
- ldmia r2!, {r5, r6} /* load win[0..1] */
- ldmia r1!, {r7, r8} /* load in[0..1] */
- smull lr , r10, r5, r7 /* s1 = win[0] * in[0] */
- smull r12, r9 , r6, r8 /* s2 = win[1] * in[1] */
-
- mov r4, #46 /* r4 = 46 */
-.iqmf_dewindow_inner_loop: /* inner loop i=2...48 */
- ldmia r2!, {r5, r6} /* load win[i...i+1] */
- ldmia r1!, {r7, r8} /* load in[i...i+1] */
- smlal lr , r10, r5, r7 /* s1 = win[i ] * in[i ] */
- smlal r12, r9 , r6, r8 /* s2 = win[i+1] * in[i+1] */
-
- subs r4, r4, #2 /* inner loop -= 2*/
- bgt .iqmf_dewindow_inner_loop
+ /* 0.. 7 */
+ ldmia r2!, {r4, r5} /* load win[0..1] */
+ ldmia r1!, {r6, r7} /* load in[0..1] */
+ smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */
+ smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ /* 8..15 */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ /* 16..23 */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ /* 24..31 */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ /* 32..39 */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ /* 40..47 */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
+ ldmia r2!, {r4, r5} /* load win[i...i+1] */
+ ldmia r1!, {r6, r7} /* load in[i...i+1] */
+ smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
+ smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
mov lr , lr , lsr #31
- orr r10, lr , r10, lsl #1 /* s1 = low>>31 || hi<<1 */
+ orr r9, lr , r9, lsl #1 /* s1 = low>>31 || hi<<1 */
mov r12, r12, lsr #31
- orr r9 , r12, r9 , lsl #1 /* s2 = low>>31 || hi<<1 */
+ orr r8, r12, r8, lsl #1 /* s2 = low>>31 || hi<<1 */
- stmia r0!, {r9, r10} /* store result out[0]=s2, out[1]=s1 */
+ stmia r0!, {r8, r9} /* store result out[0]=s2, out[1]=s1 */
sub r1, r1, #184 /* roll back 64 entries = 184 bytes */
sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */
subs r3, r3, #1 /* outer loop -= 1 */
bgt .iqmf_dewindow_outer_loop
- ldmfd sp!, {r4-r10, pc} /* restore registers */
+ ldmfd sp!, {r4-r9, pc} /* restore registers */
.atrac3_iqmf_dewindowing_end:
.size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing
|