move (i)MDCT and FFT code to separate files

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14776 a1c6a512-1295-4272-9138-f99709370657
author: Marcoen Hirschberg <marcoen@gmail.com> 2007-09-20 12:27:24 +0000
committer: Marcoen Hirschberg <marcoen@gmail.com> 2007-09-20 12:27:24 +0000
commit: 1ddef375df58c96327ec05f386f544d2c98007cf (patch)
tree: b1a50148ed750d26f32860267f087a1433ba641a /apps/codecs/libwma/wmadeci.c
parent: ab7975b1c9b1c28120cd1470ae3df349e001e97d (diff)
1 files changed, 3 insertions, 328 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index 99a098ea1f..bc3c11f9f2 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -28,6 +28,7 @@
 #include "wmadec.h"
 #include "wmafixed.h"
 #include "bitstream.h"
+#include "mdct.h"
 
 
 #define VLCBITS 7       /*7 is the lowest without glitching*/
@@ -40,71 +41,6 @@
 #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
 
 
-#ifdef CPU_ARM
-static inline
-void CMUL(fixed32 *x, fixed32 *y,
-          fixed32  a, fixed32  b,
-          fixed32  t, fixed32  v)
-{
-    /* This version loses one bit of precision. Could be solved at the cost
-     * of 2 extra cycles if it becomes an issue. */
-    int x1, y1, l;
-    asm(
-        "smull    %[l], %[y1], %[b], %[t] \n"
-        "smlal    %[l], %[y1], %[a], %[v] \n"
-        "rsb      %[b], %[b], #0          \n"
-        "smull    %[l], %[x1], %[a], %[t] \n"
-        "smlal    %[l], %[x1], %[b], %[v] \n"
-        : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
-        : [a] "r" (a),   [t] "r" (t),    [v] "r" (v)
-        : "cc"
-    );
-    *x = x1 << 1;
-    *y = y1 << 1;
-}
-#elif defined CPU_COLDFIRE
-static inline
-void CMUL(fixed32 *x, fixed32 *y,
-          fixed32  a, fixed32  b,
-          fixed32  t, fixed32  v)
-{
-  asm volatile ("mac.l %[a], %[t], %%acc0;"
-                "msac.l %[b], %[v], %%acc0;"
-                "mac.l %[b], %[t], %%acc1;"
-                "mac.l %[a], %[v], %%acc1;"
-                "movclr.l %%acc0, %[a];"
-                "move.l %[a], (%[x]);"
-                "movclr.l %%acc1, %[a];"
-                "move.l %[a], (%[y]);"
-                : [a] "+&r" (a)
-                : [x] "a" (x), [y] "a" (y),
-                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
-                : "cc", "memory");
-}
-#else
-// PJJ : reinstate macro
-void CMUL(fixed32 *pre,
-          fixed32 *pim,
-          fixed32 are,
-          fixed32 aim,
-          fixed32 bre,
-          fixed32 bim)
-{
-    //int64_t x,y;
-    fixed32 _aref = are;
-    fixed32 _aimf = aim;
-    fixed32 _bref = bre;
-    fixed32 _bimf = bim;
-    fixed32 _r1 = fixmul32b(_bref, _aref);
-    fixed32 _r2 = fixmul32b(_bimf, _aimf);
-    fixed32 _r3 = fixmul32b(_bref, _aimf);
-    fixed32 _r4 = fixmul32b(_bimf, _aref);
-    *pre = _r1 - _r2;
-    *pim = _r3 + _r4;
-
-}
-#endif
-
 typedef struct CoefVLCTable
 {
     int n; /* total number of codes */
@@ -121,13 +57,6 @@ fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR;
 //static variables that replace malloced stuff
 fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128];    //these are the MDCT reconstruction windows
 
-fixed32 *tcosarray[5], *tsinarray[5];
-fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];        //these are the sin and cos rotations used by the MDCT
-fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
-
-FFTComplex  exptab0[512] IBSS_ATTR;
-uint16_t revtab0[1024];
-
 uint16_t *runtabarray[2], *levtabarray[2];                                        //these are VLC lookup tables
 
 uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336];                //these could be made smaller since only one can be 1336
@@ -146,225 +75,6 @@ VLC_TYPE vlcbuf4[540][2];
 #include "wmadata.h" // PJJ
 
 
-/* butter fly op */
-#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
-{\
-  fixed32 ax, ay, bx, by;\
-  bx=pre1;\
-  by=pim1;\
-  ax=qre1;\
-  ay=qim1;\
-  pre = (bx + ax);\
-  pim = (by + ay);\
-  qre = (bx - ax);\
-  qim = (by - ay);\
-}
-
-
-int fft_calc_unscaled(FFTContext *s, FFTComplex *z)
-{
-    int ln = s->nbits;
-    int j, np, np2;
-    int nblocks, nloops;
-    register FFTComplex *p, *q;
-   // FFTComplex *exptab = s->exptab;
-    int l;
-    fixed32 tmp_re, tmp_im;
-    int tabshift = 10-ln;
-
-    np = 1 << ln;
-
-
-    /* pass 0 */
-
-    p=&z[0];
-    j=(np >> 1);
-    do
-    {
-        BF(p[0].re, p[0].im, p[1].re, p[1].im,
-           p[0].re, p[0].im, p[1].re, p[1].im);
-        p+=2;
-    }
-    while (--j != 0);
-
-    /* pass 1 */
-
-
-    p=&z[0];
-    j=np >> 2;
-    if (s->inverse)
-    {
-        do
-        {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im,
-               p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im,
-               p[1].re, p[1].im, -p[3].im, p[3].re);
-            p+=4;
-        }
-        while (--j != 0);
-    }
-    else
-    {
-        do
-        {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im,
-               p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im,
-               p[1].re, p[1].im, p[3].im, -p[3].re);
-            p+=4;
-        }
-        while (--j != 0);
-    }
-    /* pass 2 .. ln-1 */
-
-    nblocks = np >> 3;
-    nloops = 1 << 2;
-    np2 = np >> 1;
-    do
-    {
-        p = z;
-        q = z + nloops;
-        for (j = 0; j < nblocks; ++j)
-        {
-            BF(p->re, p->im, q->re, q->im,
-               p->re, p->im, q->re, q->im);
-
-            p++;
-            q++;
-            for(l = nblocks; l < np2; l += nblocks)
-            {
-                CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im);
-                //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
-                BF(p->re, p->im, q->re, q->im,
-                   p->re, p->im, tmp_re, tmp_im);
-                p++;
-                q++;
-            }
-
-            p += nloops;
-            q += nloops;
-        }
-        nblocks = nblocks >> 1;
-        nloops = nloops << 1;
-    }
-    while (nblocks != 0);
-    return 0;
-}
-
-/**
- * init MDCT or IMDCT computation.
- */
-int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
-{
-    int n, n4, i;
-   // fixed32 alpha;
-
-
-    memset(s, 0, sizeof(*s));
-    n = 1 << nbits;            //nbits ranges from 12 to 8 inclusive
-    s->nbits = nbits;
-    s->n = n;
-    n4 = n >> 2;
-    s->tcos = tcosarray[12-nbits];
-    s->tsin = tsinarray[12-nbits];
-    for(i=0;i<n4;i++)
-    {
-        //fixed32 pi2 = fixmul32(0x20000, M_PI_F);
-        fixed32 ip = itofix32(i) + 0x2000;
-        ip = ip >> nbits;
-        //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize
-        //alpha = fixmul32(TWO_M_PI_F, ip);
-        //s->tcos[i] = -fixcos32(alpha);        //alpha between 0 and pi/2
-        //s->tsin[i] = -fixsin32(alpha);
-
-    s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i]));            //I can't remember why this works, but it seems to agree for ~24 bits, maybe more!
-    s->tcos[i] *=-1;
-  }
-    (&s->fft)->nbits = nbits-2;
-
-    (&s->fft)->inverse = inverse;
-
-    return 0;
-
-}
-
-/**
- * Compute inverse MDCT of size N = 2^nbits
- * @param output N samples
- * @param input N/2 samples
- * @param tmp N/2 samples
- */
-void ff_imdct_calc(MDCTContext *s,
-                   fixed32 *output,
-                   fixed32 *input)
-{
-    int k, n8, n4, n2, n, j,scale;
-    const fixed32 *tcos = s->tcos;
-    const fixed32 *tsin = s->tsin;
-    const fixed32 *in1, *in2;
-    FFTComplex *z1 = (FFTComplex *)output;
-    FFTComplex *z2 = (FFTComplex *)input;
-    int revtabshift = 12 - s->nbits;
-
-    n = 1 << s->nbits;
-
-    n2 = n >> 1;
-    n4 = n >> 2;
-    n8 = n >> 3;
-
-
-    /* pre rotation */
-    in1 = input;
-    in2 = input + n2 - 1;
-
-    for(k = 0; k < n4; k++)
-    {
-        j=revtab0[k<<revtabshift];
-        CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
-        in1 += 2;
-        in2 -= 2;
-    }
-
-        scale = fft_calc_unscaled(&s->fft, z1);
-
-    /* post rotation + reordering */
-
-    for(k = 0; k < n4; k++)
-    {
-        CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
-    }
-
-    for(k = 0; k < n8; k++)
-    {
-        fixed32 r1,r2,r3,r4,r1n,r2n,r3n;
-
-        r1 = z2[n8 + k].im;
-        r1n = r1 * -1;
-        r2 = z2[n8-1-k].re;
-        r2n = r2 * -1;
-        r3 = z2[k+n8].re;
-        r3n = r3 * -1;
-        r4 = z2[n8-k-1].im;
-
-        output[2*k] = r1n;
-        output[n2-1-2*k] = r1;
-
-        output[2*k+1] = r2;
-        output[n2-1-2*k-1] = r2n;
-
-        output[n2 + 2*k]= r3n;
-        output[n-1- 2*k]= r3n;
-
-        output[n2 + 2*k+1]= r4;
-        output[n-2 - 2 * k] = r4;
-    }
-
-
-
-
-}
-
 
 /*
  * Helper functions for wma_window.
@@ -524,7 +234,7 @@ static void init_coef_vlc(VLC *vlc,
 int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
 {
     //WMADecodeContext *s = avctx->priv_data;
-    int i, m, j, flags1, flags2;
+    int i, flags1, flags2;
     fixed32 *window;
     uint8_t *extradata;
     fixed64 bps1;
@@ -800,10 +510,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
         }
     }
 
-    /* init MDCT */
-    /*TODO:  figure out how to fold this up into one array*/
-    tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
-    tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
+    mdct_init_global();
 
     s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */
     for(i = 0; i < s->nb_block_sizes; ++i)
@@ -811,38 +518,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
         ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
     }
 
-    {
-        int i, n;
-        fixed32 c1, s1, s2;
-
-        n=1<<10;
-        s2 = 1 ? 1 : -1;
-        for(i=0;i<(n/2);++i)
-        {
-            fixed32 ifix = itofix32(i);
-            fixed32 nfix = itofix32(n);
-            fixed32 res = fixdiv32(ifix,nfix);
-
-            s1 = fsincos(res<<16, &c1);
-
-            exptab0[i].re = c1;
-            exptab0[i].im = s1*s2;
-        }
-    }
-
-    /* init the MDCT bit reverse table here rather then in fft_init */
-
-    for(i=0;i<1024;i++)           /*hard coded to a 2048 bit rotation*/
-    {                             /*smaller sizes can reuse the largest*/
-        m=0;
-        for(j=0;j<10;j++)
-        {
-            m |= ((i >> j) & 1) << (10-j-1);
-        }
-
-       revtab0[i]=m;
-    }
-
     /*ffmpeg uses malloc to only allocate as many window sizes as needed.  However, we're really only interested in the worst case memory usage.
     * In the worst case you can have 5 window sizes, 128 doubling up 2048
     * Smaller windows are handled differently.
author	Marcoen Hirschberg <marcoen@gmail.com>	2007-09-20 12:27:24 +0000
committer	Marcoen Hirschberg <marcoen@gmail.com>	2007-09-20 12:27:24 +0000
commit	1ddef375df58c96327ec05f386f544d2c98007cf (patch)
tree	b1a50148ed750d26f32860267f087a1433ba641a /apps/codecs/libwma/wmadeci.c
parent	ab7975b1c9b1c28120cd1470ae3df349e001e97d (diff)