Revert r14786 which resulted in a substantial reduction in accuracy to save a 7.6KB of RAM. Accuracy should be greatly improved now, and if we want to make that trade off again, I think of better ways to do it.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17783 a1c6a512-1295-4272-9138-f99709370657
author: Michael Giacomelli <giac2000@hotmail.com> 2008-06-24 15:39:44 +0000
committer: Michael Giacomelli <giac2000@hotmail.com> 2008-06-24 15:39:44 +0000
commit: ff9f3f8c04bf0ca85ff75fc7aac7518c152bc001 (patch)
tree: d54ccfe796ffac588a5b78cc9e46e29126cfe88a /apps
parent: dab9fa1ef7a547b0f7f3d4605100cd4579570e78 (diff)
2 files changed, 40 insertions, 25 deletions
diff --git a/apps/codecs/libwma/mdct.c b/apps/codecs/libwma/mdct.c
index e66be0e825..a764f47eed 100644
--- a/apps/codecs/libwma/mdct.c
+++ b/apps/codecs/libwma/mdct.c
@@ -20,7 +20,14 @@
 #include "wmafixed.h"
 #include "mdct.h"
 
-fixed32 tcos0[1024], tsin0[1024]; //these are the sin and cos rotations used by the MDCT
+/*these are the sin and cos rotations used by the MDCT*/
+
+/*accessed too infrequently to give much speedup in IRAM*/
+
+fixed32 *tcosarray[5], *tsinarray[5];
+fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];
+fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
+
 uint16_t revtab0[1024];
 
 /**
@@ -28,16 +35,28 @@ uint16_t revtab0[1024];
  */
 int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
 {
-    int n;
-   // fixed32 alpha;
+    int n, n4, i;
 
     memset(s, 0, sizeof(*s));
     n = 1 << nbits;            //nbits ranges from 12 to 8 inclusive
-
     s->nbits = nbits;
     s->n = n;
+    n4 = n >> 2;
+    s->tcos = tcosarray[12-nbits];
+    s->tsin = tsinarray[12-nbits];
+    for(i=0;i<n4;i++)
+    {
+
+        fixed32 ip = itofix32(i) + 0x2000;
+        ip = ip >> nbits;
+
+       /*I can't remember why this works, but it seems to agree for ~24 bits, maybe more!*/
+        s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i]));
+        s->tcos[i] *=-1;
+    }
 
     (&s->fft)->nbits = nbits-2;
+
     (&s->fft)->inverse = inverse;
 
     return 0;
@@ -55,6 +74,8 @@ void ff_imdct_calc(MDCTContext *s,
                    fixed32 *input)
 {
     int k, n8, n4, n2, n, j,scale;
+    const fixed32 *tcos = s->tcos;
+    const fixed32 *tsin = s->tsin;
     const fixed32 *in1, *in2;
     FFTComplex *z1 = (FFTComplex *)output;
     FFTComplex *z2 = (FFTComplex *)input;
@@ -73,21 +94,19 @@ void ff_imdct_calc(MDCTContext *s,
 
     for(k = 0; k < n4; k++)
     {
-        int kshift = k<<revtabshift;
-        j=revtab0[kshift];
-        CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos0[kshift], tsin0[kshift]);
+        j=revtab0[k<<revtabshift];
+        CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
         in1 += 2;
         in2 -= 2;
     }
 
-    scale = fft_calc_unscaled(&s->fft, z1);
+        scale = fft_calc_unscaled(&s->fft, z1);
 
     /* post rotation + reordering */
 
     for(k = 0; k < n4; k++)
     {
-        int kshift = k<<revtabshift;
-        CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos0[kshift], tsin0[kshift]);
+        CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
     }
 
     for(k = 0; k < n8; k++)
@@ -116,9 +135,18 @@ void ff_imdct_calc(MDCTContext *s,
     }
 }
 
+/* init MDCT */
+
 int mdct_init_global(void)
 {
     int i,j,m;
+
+    /* although seemingly degenerate, these cannot actually be merged together without
+       a substantial increase in error which is unjustified by the tiny memory savings*/
+
+    tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
+    tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
+
     /* init the MDCT bit reverse table here rather then in fft_init */
 
     for(i=0;i<1024;i++)           /*hard coded to a 2048 bit rotation*/
@@ -132,21 +160,6 @@ int mdct_init_global(void)
        revtab0[i]=m;
     }
 
-    for(i=0;i<1024;i++)
-    {
-        //fixed32 pi2 = fixmul32(0x20000, M_PI_F);
-        fixed32 ip = itofix32(i) + 0x2000;
-        ip = ip >> 12;
-        //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize
-        //alpha = fixmul32(TWO_M_PI_F, ip);
-        //s->tcos[i] = -fixcos32(alpha);        //alpha between 0 and pi/2
-        //s->tsin[i] = -fixsin32(alpha);
-
-        //I can't remember why this works, but it seems to agree for ~24 bits, maybe more!
-        tsin0[i] = - fsincos(ip<<16, &(tcos0[i]));
-        tcos0[i] *=-1;
-  }
-
     fft_init_global();
 
     return 0;
diff --git a/apps/codecs/libwma/mdct.h b/apps/codecs/libwma/mdct.h
index 67f510164b..57d65ae9a7 100644
--- a/apps/codecs/libwma/mdct.h
+++ b/apps/codecs/libwma/mdct.h
@@ -25,6 +25,8 @@ typedef struct MDCTContext
     int n;  /* size of MDCT (i.e. number of input data * 2) */
     int nbits; /* n = 2^nbits */
     /* pre/post rotation tables */
+    fixed32 *tcos;
+    fixed32 *tsin;
     FFTContext fft;
 }
 MDCTContext;
author	Michael Giacomelli <giac2000@hotmail.com>	2008-06-24 15:39:44 +0000
committer	Michael Giacomelli <giac2000@hotmail.com>	2008-06-24 15:39:44 +0000
commit	ff9f3f8c04bf0ca85ff75fc7aac7518c152bc001 (patch)
tree	d54ccfe796ffac588a5b78cc9e46e29126cfe88a /apps
parent	dab9fa1ef7a547b0f7f3d4605100cd4579570e78 (diff)