summaryrefslogtreecommitdiff
path: root/apps/codecs/libfaad
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-11 14:40:05 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-11 14:40:05 +0000
commitcef3681bae290763aa0ec378783dbaeffcf59fe3 (patch)
tree1863e5d25a81da108224a0a6d18058c122965dd1 /apps/codecs/libfaad
parentb1f5e5688c9ccc94cf2767d045b20df33b8a3d9d (diff)
Further changes to aac-he decoding. Refactor and rearrange dct tabs to be able to use consecutive addresses (this allows additional optimization for ARM through ldm-usage). Define a macro for identical butterfly operations and exchange 2 butterflyies with less compex operations.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27384 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libfaad')
-rw-r--r--apps/codecs/libfaad/sbr_dct.c222
1 files changed, 94 insertions, 128 deletions
diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c
index 123514f226..f471745813 100644
--- a/apps/codecs/libfaad/sbr_dct.c
+++ b/apps/codecs/libfaad/sbr_dct.c
@@ -1452,103 +1452,76 @@ void DCT2_32_unscaled(real_t *y, real_t *x)
#else /* #ifdef SBR_LOW_POWER */
-static const real_t dct4_64_tab[] ICONST_ATTR = {
- COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507),
- COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537),
- COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708),
- COEF_CONST(0.953306019306183), COEF_CONST(0.937339007854462),
- COEF_CONST(0.919113874435425), COEF_CONST(0.898674488067627),
- COEF_CONST(0.876070082187653), COEF_CONST(0.851355195045471),
- COEF_CONST(0.824589252471924), COEF_CONST(0.795836925506592),
- COEF_CONST(0.765167236328125), COEF_CONST(0.732654273509979),
- COEF_CONST(0.698376238346100), COEF_CONST(0.662415742874146),
- COEF_CONST(0.624859452247620), COEF_CONST(0.585797846317291),
- COEF_CONST(0.545324981212616), COEF_CONST(0.503538429737091),
- COEF_CONST(0.460538715124130), COEF_CONST(0.416429549455643),
- COEF_CONST(0.371317148208618), COEF_CONST(0.325310230255127),
- COEF_CONST(0.278519600629807), COEF_CONST(0.231058135628700),
- COEF_CONST(0.183039888739586), COEF_CONST(0.134580686688423),
- COEF_CONST(0.085797272622585), COEF_CONST(0.036807164549828),
- COEF_CONST(-1.012196302413940), COEF_CONST(-1.059438824653626),
- COEF_CONST(-1.104129195213318), COEF_CONST(-1.146159529685974),
- COEF_CONST(-1.185428738594055), COEF_CONST(-1.221842169761658),
- COEF_CONST(-1.255311965942383), COEF_CONST(-1.285757660865784),
- COEF_CONST(-1.313105940818787), COEF_CONST(-1.337290763854981),
- COEF_CONST(-1.358253836631775), COEF_CONST(-1.375944852828980),
- COEF_CONST(-1.390321016311646), COEF_CONST(-1.401347875595093),
- COEF_CONST(-1.408998727798462), COEF_CONST(-1.413255214691162),
- COEF_CONST(-1.414107084274292), COEF_CONST(-1.411552190780640),
- COEF_CONST(-1.405596733093262), COEF_CONST(-1.396255016326904),
- COEF_CONST(-1.383549690246582), COEF_CONST(-1.367511272430420),
- COEF_CONST(-1.348178386688232), COEF_CONST(-1.325597524642944),
- COEF_CONST(-1.299823284149170), COEF_CONST(-1.270917654037476),
- COEF_CONST(-1.238950133323669), COEF_CONST(-1.203998088836670),
- COEF_CONST(-1.166145324707031), COEF_CONST(-1.125483393669128),
- COEF_CONST(-1.082109928131104), COEF_CONST(-1.036129593849182),
- COEF_CONST(-0.987653195858002), COEF_CONST(-0.936797380447388),
- COEF_CONST(-0.883684754371643), COEF_CONST(-0.828443288803101),
- COEF_CONST(-0.771206021308899), COEF_CONST(-0.712110757827759),
- COEF_CONST(-0.651300072669983), COEF_CONST(-0.588920354843140),
- COEF_CONST(-0.525121808052063), COEF_CONST(-0.460058242082596),
- COEF_CONST(-0.393886327743530), COEF_CONST(-0.326765477657318),
- COEF_CONST(-0.258857429027557), COEF_CONST(-0.190325915813446),
- COEF_CONST(-0.121335685253143), COEF_CONST(-0.052053272724152),
- COEF_CONST(0.017354607582092), COEF_CONST(0.086720645427704),
- COEF_CONST(0.155877828598022), COEF_CONST(0.224659323692322),
- COEF_CONST(0.292899727821350), COEF_CONST(0.360434412956238),
- COEF_CONST(0.427100926637650), COEF_CONST(0.492738455533981),
- COEF_CONST(0.557188928127289), COEF_CONST(0.620297133922577),
- COEF_CONST(0.681910991668701), COEF_CONST(0.741881847381592),
- COEF_CONST(0.800065577030182), COEF_CONST(0.856321990489960),
- COEF_CONST(0.910515367984772), COEF_CONST(0.962515234947205),
- COEF_CONST(1.000000000000000), COEF_CONST(0.998795449733734),
- COEF_CONST(0.995184719562531), COEF_CONST(0.989176511764526),
- COEF_CONST(0.980785250663757), COEF_CONST(0.970031261444092),
- COEF_CONST(0.956940352916718), COEF_CONST(0.941544055938721),
- COEF_CONST(0.923879504203796), COEF_CONST(0.903989315032959),
- COEF_CONST(0.881921231746674), COEF_CONST(0.857728600502014),
- COEF_CONST(0.831469595432281), COEF_CONST(0.803207516670227),
- COEF_CONST(0.773010432720184), COEF_CONST(0.740951120853424),
- COEF_CONST(0.707106769084930), COEF_CONST(0.671558916568756),
- COEF_CONST(0.634393274784088), COEF_CONST(0.595699310302734),
- COEF_CONST(0.555570185184479), COEF_CONST(0.514102697372437),
- COEF_CONST(0.471396654844284), COEF_CONST(0.427555114030838),
- COEF_CONST(0.382683426141739), COEF_CONST(0.336889833211899),
- COEF_CONST(0.290284633636475), COEF_CONST(0.242980122566223),
- COEF_CONST(0.195090234279633), COEF_CONST(0.146730497479439),
- COEF_CONST(0.098017133772373), COEF_CONST(0.049067649990320),
- COEF_CONST(-1.000000000000000), COEF_CONST(-1.047863125801086),
- COEF_CONST(-1.093201875686646), COEF_CONST(-1.135906934738159),
- COEF_CONST(-1.175875544548035), COEF_CONST(-1.213011503219605),
- COEF_CONST(-1.247225046157837), COEF_CONST(-1.278433918952942),
- COEF_CONST(-1.306562900543213), COEF_CONST(-1.331544399261475),
- COEF_CONST(-1.353317975997925), COEF_CONST(-1.371831417083740),
- COEF_CONST(-1.387039899826050), COEF_CONST(-1.398906826972961),
- COEF_CONST(-1.407403707504273), COEF_CONST(-1.412510156631470),
- COEF_CONST(0), COEF_CONST(-1.412510156631470),
- COEF_CONST(-1.407403707504273), COEF_CONST(-1.398906826972961),
- COEF_CONST(-1.387039899826050), COEF_CONST(-1.371831417083740),
- COEF_CONST(-1.353317975997925), COEF_CONST(-1.331544399261475),
- COEF_CONST(-1.306562900543213), COEF_CONST(-1.278433918952942),
- COEF_CONST(-1.247225046157837), COEF_CONST(-1.213011384010315),
- COEF_CONST(-1.175875544548035), COEF_CONST(-1.135907053947449),
- COEF_CONST(-1.093201875686646), COEF_CONST(-1.047863125801086),
- COEF_CONST(-1.000000000000000), COEF_CONST(-0.949727773666382),
- COEF_CONST(-0.897167563438416), COEF_CONST(-0.842446029186249),
- COEF_CONST(-0.785694956779480), COEF_CONST(-0.727051079273224),
- COEF_CONST(-0.666655659675598), COEF_CONST(-0.604654192924500),
- COEF_CONST(-0.541196048259735), COEF_CONST(-0.476434230804443),
- COEF_CONST(-0.410524487495422), COEF_CONST(-0.343625843524933),
- COEF_CONST(-0.275899350643158), COEF_CONST(-0.207508206367493),
- COEF_CONST(-0.138617098331451), COEF_CONST(-0.069392144680023),
- COEF_CONST(0), COEF_CONST(0.069392263889313),
- COEF_CONST(0.138617157936096), COEF_CONST(0.207508206367493),
- COEF_CONST(0.275899469852448), COEF_CONST(0.343625962734222),
- COEF_CONST(0.410524636507034), COEF_CONST(0.476434201002121),
- COEF_CONST(0.541196107864380), COEF_CONST(0.604654192924500),
- COEF_CONST(0.666655719280243), COEF_CONST(0.727051138877869),
- COEF_CONST(0.785695075988770), COEF_CONST(0.842446029186249),
- COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382)
+/* table for pre-processing within dct4_kernel() */
+static const real_t dct4_pre_tab[] ICONST_ATTR = {
+ COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002),
+ COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388),
+ COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643),
+ COEF_CONST(0.987301409244537), COEF_CONST(-1.14615952968597), COEF_CONST(-0.828443288803101),
+ COEF_CONST(0.978317379951477), COEF_CONST(-1.18542873859405), COEF_CONST(-0.771206021308899),
+ COEF_CONST(0.966976463794708), COEF_CONST(-1.22184216976166), COEF_CONST(-0.712110757827759),
+ COEF_CONST(0.953306019306183), COEF_CONST(-1.25531196594238), COEF_CONST(-0.651300072669983),
+ COEF_CONST(0.937339007854462), COEF_CONST(-1.28575766086578), COEF_CONST(-0.588920354843140),
+ COEF_CONST(0.919113874435425), COEF_CONST(-1.31310594081879), COEF_CONST(-0.525121808052063),
+ COEF_CONST(0.898674488067627), COEF_CONST(-1.33729076385498), COEF_CONST(-0.460058242082596),
+ COEF_CONST(0.876070082187653), COEF_CONST(-1.35825383663177), COEF_CONST(-0.393886327743530),
+ COEF_CONST(0.851355195045471), COEF_CONST(-1.37594485282898), COEF_CONST(-0.326765477657318),
+ COEF_CONST(0.824589252471924), COEF_CONST(-1.39032101631165), COEF_CONST(-0.258857429027557),
+ COEF_CONST(0.795836925506592), COEF_CONST(-1.40134787559509), COEF_CONST(-0.190325915813446),
+ COEF_CONST(0.765167236328125), COEF_CONST(-1.40899872779846), COEF_CONST(-0.121335685253143),
+ COEF_CONST(0.732654273509979), COEF_CONST(-1.41325521469116), COEF_CONST(-0.052053272724152),
+ COEF_CONST(0.698376238346100), COEF_CONST(-1.41410708427429), COEF_CONST( 0.017354607582092),
+ COEF_CONST(0.662415742874146), COEF_CONST(-1.41155219078064), COEF_CONST( 0.086720645427704),
+ COEF_CONST(0.624859452247620), COEF_CONST(-1.40559673309326), COEF_CONST( 0.155877828598022),
+ COEF_CONST(0.585797846317291), COEF_CONST(-1.39625501632690), COEF_CONST( 0.224659323692322),
+ COEF_CONST(0.545324981212616), COEF_CONST(-1.38354969024658), COEF_CONST( 0.292899727821350),
+ COEF_CONST(0.503538429737091), COEF_CONST(-1.36751127243042), COEF_CONST( 0.360434412956238),
+ COEF_CONST(0.460538715124130), COEF_CONST(-1.34817838668823), COEF_CONST( 0.427100926637650),
+ COEF_CONST(0.416429549455643), COEF_CONST(-1.32559752464294), COEF_CONST( 0.492738455533981),
+ COEF_CONST(0.371317148208618), COEF_CONST(-1.29982328414917), COEF_CONST( 0.557188928127289),
+ COEF_CONST(0.325310230255127), COEF_CONST(-1.27091765403748), COEF_CONST( 0.620297133922577),
+ COEF_CONST(0.278519600629807), COEF_CONST(-1.23895013332367), COEF_CONST( 0.681910991668701),
+ COEF_CONST(0.231058135628700), COEF_CONST(-1.20399808883667), COEF_CONST( 0.741881847381592),
+ COEF_CONST(0.183039888739586), COEF_CONST(-1.16614532470703), COEF_CONST( 0.800065577030182),
+ COEF_CONST(0.134580686688423), COEF_CONST(-1.12548339366913), COEF_CONST( 0.856321990489960),
+ COEF_CONST(0.085797272622585), COEF_CONST(-1.08210992813110), COEF_CONST( 0.910515367984772),
+ COEF_CONST(0.036807164549828), COEF_CONST(-1.03612959384918), COEF_CONST( 0.962515234947205)
+};
+
+/* table for post-processing within dct4_kernel() */
+static const real_t dct4_post_tab[] ICONST_ATTR = {
+ COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ),
+ COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382),
+ COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416),
+ COEF_CONST(0.989176511764526), COEF_CONST(-1.13590693473816), COEF_CONST(-0.842446029186249),
+ COEF_CONST(0.980785250663757), COEF_CONST(-1.17587554454803), COEF_CONST(-0.785694956779480),
+ COEF_CONST(0.970031261444092), COEF_CONST(-1.21301150321960), COEF_CONST(-0.727051079273224),
+ COEF_CONST(0.956940352916718), COEF_CONST(-1.24722504615784), COEF_CONST(-0.666655659675598),
+ COEF_CONST(0.941544055938721), COEF_CONST(-1.27843391895294), COEF_CONST(-0.604654192924500),
+ COEF_CONST(0.923879504203796), COEF_CONST(-1.30656290054321), COEF_CONST(-0.541196048259735),
+ COEF_CONST(0.903989315032959), COEF_CONST(-1.33154439926148), COEF_CONST(-0.476434230804443),
+ COEF_CONST(0.881921231746674), COEF_CONST(-1.35331797599793), COEF_CONST(-0.410524487495422),
+ COEF_CONST(0.857728600502014), COEF_CONST(-1.37183141708374), COEF_CONST(-0.343625843524933),
+ COEF_CONST(0.831469595432281), COEF_CONST(-1.38703989982605), COEF_CONST(-0.275899350643158),
+ COEF_CONST(0.803207516670227), COEF_CONST(-1.39890682697296), COEF_CONST(-0.207508206367493),
+ COEF_CONST(0.773010432720184), COEF_CONST(-1.40740370750427), COEF_CONST(-0.138617098331451),
+ COEF_CONST(0.740951120853424), COEF_CONST(-1.41251015663147), COEF_CONST(-0.069392144680023),
+ COEF_CONST(0.707106769084930), COEF_CONST( 0 ), COEF_CONST( 0 ),
+ COEF_CONST(0.671558916568756), COEF_CONST(-1.41251015663147), COEF_CONST( 0.069392263889313),
+ COEF_CONST(0.634393274784088), COEF_CONST(-1.40740370750427), COEF_CONST( 0.138617157936096),
+ COEF_CONST(0.595699310302734), COEF_CONST(-1.39890682697296), COEF_CONST( 0.207508206367493),
+ COEF_CONST(0.555570185184479), COEF_CONST(-1.38703989982605), COEF_CONST( 0.275899469852448),
+ COEF_CONST(0.514102697372437), COEF_CONST(-1.37183141708374), COEF_CONST( 0.343625962734222),
+ COEF_CONST(0.471396654844284), COEF_CONST(-1.35331797599793), COEF_CONST( 0.410524636507034),
+ COEF_CONST(0.427555114030838), COEF_CONST(-1.33154439926148), COEF_CONST( 0.476434201002121),
+ COEF_CONST(0.382683426141739), COEF_CONST(-1.30656290054321), COEF_CONST( 0.541196107864380),
+ COEF_CONST(0.336889833211899), COEF_CONST(-1.27843391895294), COEF_CONST( 0.604654192924500),
+ COEF_CONST(0.290284633636475), COEF_CONST(-1.24722504615784), COEF_CONST( 0.666655719280243),
+ COEF_CONST(0.242980122566223), COEF_CONST(-1.21301138401031), COEF_CONST( 0.727051138877869),
+ COEF_CONST(0.195090234279633), COEF_CONST(-1.17587554454803), COEF_CONST( 0.785695075988770),
+ COEF_CONST(0.146730497479439), COEF_CONST(-1.13590705394745), COEF_CONST( 0.842446029186249),
+ COEF_CONST(0.098017133772373), COEF_CONST(-1.09320187568665), COEF_CONST( 0.897167563438416),
+ COEF_CONST(0.049067649990320), COEF_CONST(-1.04786312580109), COEF_CONST( 0.949727773666382)
};
// Table adapted from codeclib to fit into IRAM
@@ -1556,59 +1529,52 @@ const uint32_t dct4_revtab[32] ICONST_ATTR = {
0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17,
1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16};
+// Bufferfly used in dct4_kernel()'s pre- and post-processing
+#define BUTTERFLY_DCT4(out1, out2, real_part, imag_part, tab, tabidx) \
+ x_re = real_part; \
+ x_im = imag_part; \
+ tmp = MUL_C(x_re + x_im, tab[tabidx++]); \
+ out1 = MUL_C(x_re , tab[tabidx++]) + tmp; \
+ out2 = MUL_C(x_im , tab[tabidx++]) + tmp; \
+
/* size 64 only! */
void dct4_kernel(real_t *real, real_t *imag)
{
- uint32_t i, idx;
+ uint32_t i, idx, tabidx;
real_t x_re, x_im, tmp;
FFTComplex xc[32]; /* used for calling codeclib's fft implementation */
/* Step 2: modulate and pre-rotate for codeclib's fft implementation */
// 3*32=96 multiplications
// 3*32=96 additions
- for (i = 0; i < 32; i++)
+ for (i = 0, tabidx = 0; i < 32; i++)
{
- idx = dct4_revtab[i];
- x_re = real[i];
- x_im = imag[i];
- tmp = MUL_C(x_re + x_im, dct4_64_tab[i ]);
- xc[idx].re = MUL_C(x_im , dct4_64_tab[i + 64]) + tmp;
- xc[idx].im = MUL_C(x_re , dct4_64_tab[i + 32]) + tmp;
+ idx = dct4_revtab[i];
+ BUTTERFLY_DCT4(xc[idx].im, xc[idx].re, real[i], imag[i], dct4_pre_tab, tabidx);
}
/* Step 3: FFT (codeclib's implementation) */
ff_fft_calc_c(5, xc);
/* Step 4: modulate + reordering */
- // 3*31+2=95 multiplications
- // 3*31+2=95 additions
- x_re = xc[0].re;
- x_im = xc[0].im;
- tmp = MUL_C(x_re + x_im, dct4_64_tab[0 + 3*32]);
- real[0] = MUL_C(x_im , dct4_64_tab[0 + 5*32]) + tmp;
- imag[0] = MUL_C(x_re , dct4_64_tab[0 + 4*32]) + tmp;
- for (i = 1; i < 16; i++)
+ // 3*30+2=92 multiplications
+ // 3*30+2=92 additions
+ imag[0] = xc[0].im;
+ real[0] = xc[0].re;
+ for (i = 1, tabidx = 3*1; i < 16; i++)
{
- idx = 32-i;
- x_re = xc[idx].re;
- x_im = xc[idx].im;
- tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
- real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp;
- imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp;
+ idx = 32-i;
+ BUTTERFLY_DCT4(imag[i], real[i], xc[idx].re, xc[idx].im, dct4_post_tab, tabidx);
}
// i = 16, idx = 16 = reorder_tab[16];
x_re = xc[16].re;
x_im = xc[16].im;
- imag[16] = MUL_C(x_im - x_re, dct4_64_tab[16 + 3*32]);
- real[16] = MUL_C(x_re + x_im, dct4_64_tab[16 + 3*32]);
- for (i = 17; i < 32; i++)
+ imag[16] = MUL_C(x_im - x_re, COEF_CONST(0.707106769084930));
+ real[16] = MUL_C(x_re + x_im, COEF_CONST(0.707106769084930));
+ for (i = 17, tabidx = 3*17; i < 32; i++)
{
- idx = 32-i;
- x_re = xc[idx].re;
- x_im = xc[idx].im;
- tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
- real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp;
- imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp;
+ idx = 32-i;
+ BUTTERFLY_DCT4(imag[i], real[i], xc[idx].re, xc[idx].im, dct4_post_tab, tabidx);
}
}