diff options
author | Marcoen Hirschberg <marcoen@gmail.com> | 2007-09-20 12:27:24 +0000 |
---|---|---|
committer | Marcoen Hirschberg <marcoen@gmail.com> | 2007-09-20 12:27:24 +0000 |
commit | 1ddef375df58c96327ec05f386f544d2c98007cf (patch) | |
tree | b1a50148ed750d26f32860267f087a1433ba641a /apps/codecs/libwma/wmadeci.c | |
parent | ab7975b1c9b1c28120cd1470ae3df349e001e97d (diff) |
move (i)MDCT and FFT code to separate files
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14776 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwma/wmadeci.c')
-rw-r--r-- | apps/codecs/libwma/wmadeci.c | 331 |
1 files changed, 3 insertions, 328 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 99a098ea1f..bc3c11f9f2 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c @@ -28,6 +28,7 @@ #include "wmadec.h" #include "wmafixed.h" #include "bitstream.h" +#include "mdct.h" #define VLCBITS 7 /*7 is the lowest without glitching*/ @@ -40,71 +41,6 @@ #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) -#ifdef CPU_ARM -static inline -void CMUL(fixed32 *x, fixed32 *y, - fixed32 a, fixed32 b, - fixed32 t, fixed32 v) -{ - /* This version loses one bit of precision. Could be solved at the cost - * of 2 extra cycles if it becomes an issue. */ - int x1, y1, l; - asm( - "smull %[l], %[y1], %[b], %[t] \n" - "smlal %[l], %[y1], %[a], %[v] \n" - "rsb %[b], %[b], #0 \n" - "smull %[l], %[x1], %[a], %[t] \n" - "smlal %[l], %[x1], %[b], %[v] \n" - : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b) - : [a] "r" (a), [t] "r" (t), [v] "r" (v) - : "cc" - ); - *x = x1 << 1; - *y = y1 << 1; -} -#elif defined CPU_COLDFIRE -static inline -void CMUL(fixed32 *x, fixed32 *y, - fixed32 a, fixed32 b, - fixed32 t, fixed32 v) -{ - asm volatile ("mac.l %[a], %[t], %%acc0;" - "msac.l %[b], %[v], %%acc0;" - "mac.l %[b], %[t], %%acc1;" - "mac.l %[a], %[v], %%acc1;" - "movclr.l %%acc0, %[a];" - "move.l %[a], (%[x]);" - "movclr.l %%acc1, %[a];" - "move.l %[a], (%[y]);" - : [a] "+&r" (a) - : [x] "a" (x), [y] "a" (y), - [b] "r" (b), [t] "r" (t), [v] "r" (v) - : "cc", "memory"); -} -#else -// PJJ : reinstate macro -void CMUL(fixed32 *pre, - fixed32 *pim, - fixed32 are, - fixed32 aim, - fixed32 bre, - fixed32 bim) -{ - //int64_t x,y; - fixed32 _aref = are; - fixed32 _aimf = aim; - fixed32 _bref = bre; - fixed32 _bimf = bim; - fixed32 _r1 = fixmul32b(_bref, _aref); - fixed32 _r2 = fixmul32b(_bimf, _aimf); - fixed32 _r3 = fixmul32b(_bref, _aimf); - fixed32 _r4 = fixmul32b(_bimf, _aref); - *pre = _r1 - _r2; - *pim = _r3 + _r4; - -} -#endif - typedef struct CoefVLCTable { int n; /* total number of codes */ @@ -121,13 +57,6 @@ fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; //static variables that replace malloced stuff fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128]; //these are the MDCT reconstruction windows -fixed32 *tcosarray[5], *tsinarray[5]; -fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; //these are the sin and cos rotations used by the MDCT -fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64]; - -FFTComplex exptab0[512] IBSS_ATTR; -uint16_t revtab0[1024]; - uint16_t *runtabarray[2], *levtabarray[2]; //these are VLC lookup tables uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336 @@ -146,225 +75,6 @@ VLC_TYPE vlcbuf4[540][2]; #include "wmadata.h" // PJJ -/* butter fly op */ -#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ -{\ - fixed32 ax, ay, bx, by;\ - bx=pre1;\ - by=pim1;\ - ax=qre1;\ - ay=qim1;\ - pre = (bx + ax);\ - pim = (by + ay);\ - qre = (bx - ax);\ - qim = (by - ay);\ -} - - -int fft_calc_unscaled(FFTContext *s, FFTComplex *z) -{ - int ln = s->nbits; - int j, np, np2; - int nblocks, nloops; - register FFTComplex *p, *q; - // FFTComplex *exptab = s->exptab; - int l; - fixed32 tmp_re, tmp_im; - int tabshift = 10-ln; - - np = 1 << ln; - - - /* pass 0 */ - - p=&z[0]; - j=(np >> 1); - do - { - BF(p[0].re, p[0].im, p[1].re, p[1].im, - p[0].re, p[0].im, p[1].re, p[1].im); - p+=2; - } - while (--j != 0); - - /* pass 1 */ - - - p=&z[0]; - j=np >> 2; - if (s->inverse) - { - do - { - BF(p[0].re, p[0].im, p[2].re, p[2].im, - p[0].re, p[0].im, p[2].re, p[2].im); - BF(p[1].re, p[1].im, p[3].re, p[3].im, - p[1].re, p[1].im, -p[3].im, p[3].re); - p+=4; - } - while (--j != 0); - } - else - { - do - { - BF(p[0].re, p[0].im, p[2].re, p[2].im, - p[0].re, p[0].im, p[2].re, p[2].im); - BF(p[1].re, p[1].im, p[3].re, p[3].im, - p[1].re, p[1].im, p[3].im, -p[3].re); - p+=4; - } - while (--j != 0); - } - /* pass 2 .. ln-1 */ - - nblocks = np >> 3; - nloops = 1 << 2; - np2 = np >> 1; - do - { - p = z; - q = z + nloops; - for (j = 0; j < nblocks; ++j) - { - BF(p->re, p->im, q->re, q->im, - p->re, p->im, q->re, q->im); - - p++; - q++; - for(l = nblocks; l < np2; l += nblocks) - { - CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im); - //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); - BF(p->re, p->im, q->re, q->im, - p->re, p->im, tmp_re, tmp_im); - p++; - q++; - } - - p += nloops; - q += nloops; - } - nblocks = nblocks >> 1; - nloops = nloops << 1; - } - while (nblocks != 0); - return 0; -} - -/** - * init MDCT or IMDCT computation. - */ -int ff_mdct_init(MDCTContext *s, int nbits, int inverse) -{ - int n, n4, i; - // fixed32 alpha; - - - memset(s, 0, sizeof(*s)); - n = 1 << nbits; //nbits ranges from 12 to 8 inclusive - s->nbits = nbits; - s->n = n; - n4 = n >> 2; - s->tcos = tcosarray[12-nbits]; - s->tsin = tsinarray[12-nbits]; - for(i=0;i<n4;i++) - { - //fixed32 pi2 = fixmul32(0x20000, M_PI_F); - fixed32 ip = itofix32(i) + 0x2000; - ip = ip >> nbits; - //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize - //alpha = fixmul32(TWO_M_PI_F, ip); - //s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2 - //s->tsin[i] = -fixsin32(alpha); - - s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i])); //I can't remember why this works, but it seems to agree for ~24 bits, maybe more! - s->tcos[i] *=-1; - } - (&s->fft)->nbits = nbits-2; - - (&s->fft)->inverse = inverse; - - return 0; - -} - -/** - * Compute inverse MDCT of size N = 2^nbits - * @param output N samples - * @param input N/2 samples - * @param tmp N/2 samples - */ -void ff_imdct_calc(MDCTContext *s, - fixed32 *output, - fixed32 *input) -{ - int k, n8, n4, n2, n, j,scale; - const fixed32 *tcos = s->tcos; - const fixed32 *tsin = s->tsin; - const fixed32 *in1, *in2; - FFTComplex *z1 = (FFTComplex *)output; - FFTComplex *z2 = (FFTComplex *)input; - int revtabshift = 12 - s->nbits; - - n = 1 << s->nbits; - - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - - - /* pre rotation */ - in1 = input; - in2 = input + n2 - 1; - - for(k = 0; k < n4; k++) - { - j=revtab0[k<<revtabshift]; - CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]); - in1 += 2; - in2 -= 2; - } - - scale = fft_calc_unscaled(&s->fft, z1); - - /* post rotation + reordering */ - - for(k = 0; k < n4; k++) - { - CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]); - } - - for(k = 0; k < n8; k++) - { - fixed32 r1,r2,r3,r4,r1n,r2n,r3n; - - r1 = z2[n8 + k].im; - r1n = r1 * -1; - r2 = z2[n8-1-k].re; - r2n = r2 * -1; - r3 = z2[k+n8].re; - r3n = r3 * -1; - r4 = z2[n8-k-1].im; - - output[2*k] = r1n; - output[n2-1-2*k] = r1; - - output[2*k+1] = r2; - output[n2-1-2*k-1] = r2n; - - output[n2 + 2*k]= r3n; - output[n-1- 2*k]= r3n; - - output[n2 + 2*k+1]= r4; - output[n-2 - 2 * k] = r4; - } - - - - -} - /* * Helper functions for wma_window. @@ -524,7 +234,7 @@ static void init_coef_vlc(VLC *vlc, int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) { //WMADecodeContext *s = avctx->priv_data; - int i, m, j, flags1, flags2; + int i, flags1, flags2; fixed32 *window; uint8_t *extradata; fixed64 bps1; @@ -800,10 +510,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) } } - /* init MDCT */ - /*TODO: figure out how to fold this up into one array*/ - tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4; - tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4; + mdct_init_global(); s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */ for(i = 0; i < s->nb_block_sizes; ++i) @@ -811,38 +518,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); } - { - int i, n; - fixed32 c1, s1, s2; - - n=1<<10; - s2 = 1 ? 1 : -1; - for(i=0;i<(n/2);++i) - { - fixed32 ifix = itofix32(i); - fixed32 nfix = itofix32(n); - fixed32 res = fixdiv32(ifix,nfix); - - s1 = fsincos(res<<16, &c1); - - exptab0[i].re = c1; - exptab0[i].im = s1*s2; - } - } - - /* init the MDCT bit reverse table here rather then in fft_init */ - - for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/ - { /*smaller sizes can reuse the largest*/ - m=0; - for(j=0;j<10;j++) - { - m |= ((i >> j) & 1) << (10-j-1); - } - - revtab0[i]=m; - } - /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage. * In the worst case you can have 5 window sizes, 128 doubling up 2048 * Smaller windows are handled differently. |