diff options
-rw-r--r-- | apps/codecs/Tremor/asm_mcf5249.h | 117 | ||||
-rw-r--r-- | apps/codecs/Tremor/block.c | 163 | ||||
-rw-r--r-- | apps/codecs/Tremor/mapping0.c | 12 | ||||
-rw-r--r-- | apps/codecs/Tremor/mdct.c | 4 | ||||
-rw-r--r-- | apps/codecs/Tremor/synthesis.c | 6 | ||||
-rw-r--r-- | apps/codecs/Tremor/window_lookup.h | 8 | ||||
-rw-r--r-- | docs/CREDITS | 1 |
7 files changed, 204 insertions, 107 deletions
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h index 811148a8c8..9844cc05a4 100644 --- a/apps/codecs/Tremor/asm_mcf5249.h +++ b/apps/codecs/Tremor/asm_mcf5249.h @@ -21,6 +21,9 @@ #if CONFIG_CPU == MCF5249 && !defined(SIMULATOR) +/* attribute for 16-byte alignment */ +#define LINE_ATTR __attribute__ ((aligned (16))) + #ifndef _V_WIDE_MATH #define _V_WIDE_MATH @@ -107,15 +110,14 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b, } - - -#if 1 /* Canonical definition */ +#if 1 +/* canonical definition */ #define XPROD32(_a, _b, _t, _v, _x, _y) \ { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ (_y)=MULT32(_b,_t)-MULT32(_a,_v); } #else -/* Thom Johansen suggestion; this could loose the lsb by overflow - but does it matter in practice? */ +/* Thom Johansen's suggestion; this could loose the LSB by overflow; + Does it matter in practice? */ #define XPROD32(_a, _b, _t, _v, _x, _y) \ asm volatile ("mac.l %[a], %[t], %%acc0;" \ "mac.l %[b], %[v], %%acc0;" \ @@ -129,14 +131,82 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b, : [a] "r" (_a), [b] "r" (_b), \ [t] "r" (_t), [v] "r" (_v) \ : "cc"); -#endif +#endif -/* asm versions of vector multiplication for window.c */ +/* asm versions of vector operations for block.c, window.c */ /* assumes MAC is initialized & accumulators cleared */ static inline +void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + /* align to 16 bytes */ + while(n>0 && (int)x&16) { + *x++ += *y++; + n--; + } + asm volatile ("bra 1f;" + "0:" /* loop start */ + "movem.l (%[x]), %%d0-%%d3;" /* fetch values */ + "movem.l (%[y]), %%a0-%%a3;" + /* add */ + "add.l %%a0, %%d0;" + "add.l %%a1, %%d1;" + "add.l %%a2, %%d2;" + "add.l %%a3, %%d3;" + /* store and advance */ + "movem.l %%d0-%%d3, (%[x]);" + "lea.l (4*4, %[x]), %[x];" + "lea.l (4*4, %[y]), %[y];" + "subq.l #4, %[n];" /* done 4 elements */ + "1: cmpi.l #4, %[n];" + "bge 0b;" + : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) + : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", + "cc", "memory"); + /* add final elements */ + while (n>0) { + *x++ += *y++; + n--; + } +} + +static inline +void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + /* align to 16 bytes */ + while(n>0 && (int)x&16) { + *x++ = *y++; + n--; + } + asm volatile ("bra 1f;" + "0:" /* loop start */ + "movem.l (%[y]), %%d0-%%d3;" /* fetch values */ + "movem.l %%d0-%%d3, (%[x]);" /* store */ + "lea.l (4*4, %[x]), %[x];" /* advance */ + "lea.l (4*4, %[y]), %[y];" + "subq.l #4, %[n];" /* done 4 elements */ + "1: cmpi.l #4, %[n];" + "bge 0b;" + : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) + : : "%d0", "%d1", "%d2", "%d3", "cc", "memory"); + /* copy final elements */ + while (n>0) { + *x++ = *y++; + n--; + } +} + + +static inline void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) { + /* ensure data is aligned to 16-bytes */ + while(n>0 && (int)data%16) { + *data = MULT31(*data, *window); + data++; + window++; + n--; + } asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */ "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */ "lea.l (4*4, %[w]), %[w];" @@ -184,6 +254,13 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) static inline void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) { + /* ensure at least data is aligned to 16-bytes */ + while(n>0 && (int)data%16) { + *data = MULT31(*data, *window); + data++; + window--; + n--; + } asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */ "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */ "movem.l (%[w]), %%a0-%%a3;" @@ -232,6 +309,11 @@ void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) static inline void mcf5249_vect_zero(ogg_int32_t *ptr, int n) { + /* ensure ptr is aligned to 16-bytes */ + while(n>0 && (int)ptr%16) { + *ptr++ = 0; + n--; + } asm volatile ("clr.l %%d0;" "clr.l %%d1;" "clr.l %%d2;" @@ -241,23 +323,16 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n) "bra 1f;" "0: movem.l %%d0-%%d3, (%[ptr]);" "lea (4*4, %[ptr]), %[ptr];" - "subq.l #4, %[n];" + "subq.l #4, %[n];" /* done 4 elements */ "1: bgt 0b;" - /* remaing elements */ - "tst.l %[n];" - "beq 1f;" /* n=0 */ - "clr.l (%[ptr])+;" - "subq.l #1, %[n];" - "beq 1f;" /* n=1 */ - "clr.l (%[ptr])+;" - "subq.l #1, %[n];" - "beq 1f;" /* n=2 */ - /* otherwise n = 3 */ - "clr.l (%[ptr])+;" - "1:" : [n] "+d" (n), [ptr] "+a" (ptr) : : "%d0","%d1","%d2","%d3","cc","memory"); + /* clear remaining elements */ + while(n>0) { + *ptr++ = 0; + n--; + } } #endif @@ -272,4 +347,6 @@ static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) { } #endif +#else +#define LINE_ATTR #endif diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c index 6f88fb812c..f51622b5ed 100644 --- a/apps/codecs/Tremor/block.c +++ b/apps/codecs/Tremor/block.c @@ -70,8 +70,8 @@ static int ilog(unsigned int v){ | | |endSr | |beginSr | |endSl - |beginSl - |beginW + |beginSl + |beginW */ /* block abstraction setup *********************************************/ @@ -173,10 +173,8 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); - // pbv: allow for extra padding for windowing for(i=0;i<vi->channels;i++) v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); - // v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); /* all 1 (large block) or 0 (small block) */ @@ -190,7 +188,7 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ int mapnum=ci->mode_param[i]->mapping; int maptype=ci->map_type[mapnum]; b->mode[i]=_mapping_P[maptype]->look(v,ci->mode_param[i], - ci->map_param[mapnum]); + ci->map_param[mapnum]); } return(0); } @@ -231,7 +229,7 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){ if(v->pcm){ for(i=0;i<vi->channels;i++) - if(v->pcm[i])_ogg_free(v->pcm[i]); + if(v->pcm[i])_ogg_free(v->pcm[i]); _ogg_free(v->pcm); if(v->pcmret)_ogg_free(v->pcmret); } @@ -239,9 +237,9 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){ /* free mode lookups; these are actually vorbis_look_mapping structs */ if(ci){ for(i=0;i<ci->modes;i++){ - int mapnum=ci->mode_param[i]->mapping; - int maptype=ci->map_type[mapnum]; - if(b && b->mode)_mapping_P[maptype]->free_look(b->mode[i]); + int mapnum=ci->mode_param[i]->mapping; + int maptype=ci->map_type[mapnum]; + if(b && b->mode)_mapping_P[maptype]->free_look(b->mode[i]); } } @@ -262,7 +260,11 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ vorbis_info *vi=v->vi; codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; private_state *b=v->backend_state; +#if CONFIG_CPU == MCF5249 + int j; +#else int i,j; +#endif if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); @@ -304,43 +306,64 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ for(j=0;j<vi->channels;j++){ /* the overlap/add section */ if(v->lW){ - if(v->W){ - /* large/large */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter; - ogg_int32_t *p=vb->pcm[j]; - for(i=0;i<n1;i++) - pcm[i]+=p[i]; - }else{ - /* large/small */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; - ogg_int32_t *p=vb->pcm[j]; - for(i=0;i<n0;i++) - pcm[i]+=p[i]; - } + if(v->W){ + /* large/large */ + ogg_int32_t *pcm=v->pcm[j]+prevCenter; + ogg_int32_t *p=vb->pcm[j]; +#if CONFIG_CPU == MCF5249 + mcf5249_vect_add(pcm, p, n1); +#else + for(i=0;i<n1;i++) + pcm[i]+=p[i]; +#endif + }else{ + /* large/small */ + ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; + ogg_int32_t *p=vb->pcm[j]; +#if CONFIG_CPU == MCF5249 + mcf5249_vect_add(pcm, p, n0); +#else + for(i=0;i<n0;i++) + pcm[i]+=p[i]; +#endif + } }else{ - if(v->W){ - /* small/large */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter; - ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; - for(i=0;i<n0;i++) - pcm[i]+=p[i]; - for(;i<n1/2+n0/2;i++) - pcm[i]=p[i]; - }else{ - /* small/small */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter; - ogg_int32_t *p=vb->pcm[j]; - for(i=0;i<n0;i++) - pcm[i]+=p[i]; - } + if(v->W){ + /* small/large */ + ogg_int32_t *pcm=v->pcm[j]+prevCenter; + ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; +#if CONFIG_CPU == MCF5249 + mcf5249_vect_add(pcm, p, n0); + mcf5249_vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); +#else + for(i=0;i<n0;i++) + pcm[i]+=p[i]; + for(;i<n1/2+n0/2;i++) + pcm[i]=p[i]; +#endif + }else{ + /* small/small */ + ogg_int32_t *pcm=v->pcm[j]+prevCenter; + ogg_int32_t *p=vb->pcm[j]; +#if CONFIG_CPU == MCF5249 + mcf5249_vect_add(pcm, p, n0); +#else + for(i=0;i<n0;i++) + pcm[i]+=p[i]; +#endif + } } /* the copy section */ { - ogg_int32_t *pcm=v->pcm[j]+thisCenter; - ogg_int32_t *p=vb->pcm[j]+n; - for(i=0;i<n;i++) - pcm[i]=p[i]; + ogg_int32_t *pcm=v->pcm[j]+thisCenter; + ogg_int32_t *p=vb->pcm[j]+n; +#if CONFIG_CPU == MCF5249 + mcf5249_vect_copy(pcm, p, n); +#else + for(i=0;i<n;i++) + pcm[i]=p[i]; +#endif } } @@ -359,8 +382,8 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ }else{ v->pcm_returned=prevCenter; v->pcm_current=prevCenter+ - ci->blocksizes[v->lW]/4+ - ci->blocksizes[v->W]/4; + ci->blocksizes[v->lW]/4+ + ci->blocksizes[v->W]/4; } } @@ -389,23 +412,23 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ /* is this a short page? */ if(b->sample_count>v->granulepos){ - /* corner case; if this is both the first and last audio page, - then spec says the end is cut, not beginning */ - if(vb->eofflag){ - /* trim the end */ - /* no preceeding granulepos; assume we started at zero (we'd - have to in a short single-page stream) */ - /* granulepos could be -1 due to a seek, but that would result - in a long coun`t, not short count */ - - v->pcm_current-=(b->sample_count-v->granulepos); - }else{ - /* trim the beginning */ - v->pcm_returned+=(b->sample_count-v->granulepos); - if(v->pcm_returned>v->pcm_current) - v->pcm_returned=v->pcm_current; - } - + /* corner case; if this is both the first and last audio page, + then spec says the end is cut, not beginning */ + if(vb->eofflag){ + /* trim the end */ + /* no preceeding granulepos; assume we started at zero (we'd + have to in a short single-page stream) */ + /* granulepos could be -1 due to a seek, but that would result + in a long coun`t, not short count */ + + v->pcm_current-=(b->sample_count-v->granulepos); + }else{ + /* trim the beginning */ + v->pcm_returned+=(b->sample_count-v->granulepos); + if(v->pcm_returned>v->pcm_current) + v->pcm_returned=v->pcm_current; + } + } } @@ -414,16 +437,16 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){ if(v->granulepos>vb->granulepos){ - long extra=v->granulepos-vb->granulepos; - - if(extra) - if(vb->eofflag){ - /* partial last frame. Strip the extra samples off */ - v->pcm_current-=extra; - } /* else {Shouldn't happen *unless* the bitstream is out of - spec. Either way, believe the bitstream } */ + long extra=v->granulepos-vb->granulepos; + + if(extra) + if(vb->eofflag){ + /* partial last frame. Strip the extra samples off */ + v->pcm_current-=extra; + } /* else {Shouldn't happen *unless* the bitstream is out of + spec. Either way, believe the bitstream } */ } /* else {Shouldn't happen *unless* the bitstream is out of - spec. Either way, believe the bitstream } */ + spec. Either way, believe the bitstream } */ v->granulepos=vb->granulepos; } } @@ -441,7 +464,7 @@ int vorbis_synthesis_pcmout(vorbis_dsp_state *v,ogg_int32_t ***pcm){ if(pcm){ int i; for(i=0;i<vi->channels;i++) - v->pcmret[i]=v->pcm[i]+v->pcm_returned; + v->pcmret[i]=v->pcm[i]+v->pcm_returned; *pcm=v->pcmret; } return(v->pcm_current-v->pcm_returned); diff --git a/apps/codecs/Tremor/mapping0.c b/apps/codecs/Tremor/mapping0.c index c53383de8f..6154f5de6b 100644 --- a/apps/codecs/Tremor/mapping0.c +++ b/apps/codecs/Tremor/mapping0.c @@ -202,10 +202,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ int nonzero[CHANNELS]; void *floormemo[CHANNELS]; - /* test for too many channels; - (maybe this is can be checked at the stream level?) */ - if (vi->channels > CHANNELS) return (-1); - /* time domain information decode (note that applying the information would have to happen later; we'll probably add a function entry to the harness for that later */ @@ -286,13 +282,14 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ //_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0); /* compute and apply spectral envelope */ +#if 0 for(i=0;i<vi->channels;i++){ ogg_int32_t *pcm=vb->pcm[i]; int submap=info->chmuxlist[i]; look->floor_func[submap]-> inverse2(vb,look->floor_look[submap],floormemo[i],pcm); } - +#endif //for(j=0;j<vi->channels;j++) //_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1); @@ -301,8 +298,11 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ for(i=0;i<vi->channels;i++){ ogg_int32_t *pcm=vb->pcm[i]; + int submap=info->chmuxlist[i]; - if(nonzero[i]) { + if(nonzero[i]) { + look->floor_func[submap]-> + inverse2(vb,look->floor_look[submap],floormemo[i],pcm); mdct_backward(n, pcm, pcm); /* window the data */ _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW); diff --git a/apps/codecs/Tremor/mdct.c b/apps/codecs/Tremor/mdct.c index 27a340bcad..9bdfdce2e2 100644 --- a/apps/codecs/Tremor/mdct.c +++ b/apps/codecs/Tremor/mdct.c @@ -341,10 +341,6 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out) { int shift; int step; -#if CONFIG_CPU == MCF5249 - /* mcf5249_init_mac(); */ /* should be redundant */ -#endif - for (shift=6;!(n&(1<<shift));shift++); shift=13-shift; step=2<<shift; diff --git a/apps/codecs/Tremor/synthesis.c b/apps/codecs/Tremor/synthesis.c index db178e7e9f..d01a7aaa64 100644 --- a/apps/codecs/Tremor/synthesis.c +++ b/apps/codecs/Tremor/synthesis.c @@ -33,7 +33,7 @@ #define CHANNELS 2 static ogg_int32_t *ipcm_vect[CHANNELS] IDATA_ATTR; -static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IDATA_ATTR; +static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IDATA_ATTR LINE_ATTR; int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){ vorbis_dsp_state *vd=vb->vd; @@ -73,10 +73,10 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){ vb->sequence=op->packetno-3; /* first block is third packet */ vb->eofflag=op->e_o_s; - if(decodep){ + if(decodep && vi->channels<=CHANNELS){ /* alloc pcm passback storage */ vb->pcmend=ci->blocksizes[vb->W]; - if (vi->channels <= CHANNELS && vb->pcmend<=IRAM_PCM_END) { + if (vb->pcmend<=IRAM_PCM_END) { /* use statically allocated iram buffer */ vb->pcm = ipcm_vect; for(i=0; i<CHANNELS; i++) diff --git a/apps/codecs/Tremor/window_lookup.h b/apps/codecs/Tremor/window_lookup.h index 71a413bcd5..64350d8fba 100644 --- a/apps/codecs/Tremor/window_lookup.h +++ b/apps/codecs/Tremor/window_lookup.h @@ -32,7 +32,7 @@ static LOOKUP_T vwin64[32] = { X(0x7fdd78a5), X(0x7ff6ec6d), X(0x7ffed0e9), X(0x7ffffc3f), }; -static LOOKUP_T vwin128[64] IDATA_ATTR = { +static LOOKUP_T vwin128[64] IDATA_ATTR LINE_ATTR = { X(0x0007c04d), X(0x0045bb89), X(0x00c18b87), X(0x017ae294), X(0x02714a4e), X(0x03a4217a), X(0x05129952), X(0x06bbb24f), X(0x089e38a1), X(0x0ab8c073), X(0x0d09a228), X(0x0f8ef6bd), @@ -51,7 +51,7 @@ static LOOKUP_T vwin128[64] IDATA_ATTR = { X(0x7ffdcf39), X(0x7fff6dac), X(0x7fffed01), X(0x7fffffc4), }; -static LOOKUP_T vwin256[128] IDATA_ATTR = { +static LOOKUP_T vwin256[128] IDATA_ATTR LINE_ATTR = { X(0x0001f018), X(0x00117066), X(0x00306e9e), X(0x005ee5f1), X(0x009ccf26), X(0x00ea208b), X(0x0146cdea), X(0x01b2c87f), X(0x022dfedf), X(0x02b85ced), X(0x0351cbbd), X(0x03fa317f), @@ -86,7 +86,7 @@ static LOOKUP_T vwin256[128] IDATA_ATTR = { X(0x7fffdcd2), X(0x7ffff6d6), X(0x7ffffed0), X(0x7ffffffc), }; -static LOOKUP_T vwin512[256] IDATA_ATTR = { +static LOOKUP_T vwin512[256] IDATA_ATTR LINE_ATTR = { X(0x00007c06), X(0x00045c32), X(0x000c1c62), X(0x0017bc4c), X(0x00273b7a), X(0x003a9955), X(0x0051d51c), X(0x006cede7), X(0x008be2a9), X(0x00aeb22a), X(0x00d55b0d), X(0x00ffdbcc), @@ -284,7 +284,7 @@ static LOOKUP_T vwin1024[512] = { X(0x7fffffdd), X(0x7ffffff7), X(0x7fffffff), X(0x7fffffff), }; -static LOOKUP_T vwin2048[1024] IDATA_ATTR = { +static LOOKUP_T vwin2048[1024] IDATA_ATTR LINE_ATTR = { X(0x000007c0), X(0x000045c4), X(0x0000c1ca), X(0x00017bd3), X(0x000273de), X(0x0003a9eb), X(0x00051df9), X(0x0006d007), X(0x0008c014), X(0x000aee1e), X(0x000d5a25), X(0x00100428), diff --git a/docs/CREDITS b/docs/CREDITS index 0167b9df08..d531078910 100644 --- a/docs/CREDITS +++ b/docs/CREDITS @@ -115,3 +115,4 @@ Alessio Lenzi David Bryant Martin Arver Alexander Spyridakis +Pedro Baltazar Vasconcelos |