From 024db0a766f9590baea73babfefff278c28a5b2c Mon Sep 17 00:00:00 2001 From: Pedro Vasconcelos Date: Fri, 27 May 2005 09:14:00 +0000 Subject: Basic optimizations of the Vorbis decoder for the ihp-1x0. Capable of real-time decoding with cpu freq at 120mhz. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6527 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/Tremor/Makefile | 2 +- apps/codecs/Tremor/asm_mcf5249.h | 257 +++++++++++++++++++++++++++++++++++++ apps/codecs/Tremor/bitwise.c | 2 +- apps/codecs/Tremor/block.c | 4 + apps/codecs/Tremor/codebook.c | 2 +- apps/codecs/Tremor/config-tremor.h | 7 +- apps/codecs/Tremor/floor1.c | 20 +-- apps/codecs/Tremor/framing.c | 2 +- apps/codecs/Tremor/lsp_lookup.h | 10 +- apps/codecs/Tremor/mapping0.c | 90 +++++++++---- apps/codecs/Tremor/mdct.c | 38 +++--- apps/codecs/Tremor/mdct.h | 4 +- apps/codecs/Tremor/mdct_lookup.h | 6 +- apps/codecs/Tremor/misc.h | 29 ++--- apps/codecs/Tremor/os.h | 6 +- apps/codecs/Tremor/os_types.h | 2 +- apps/codecs/Tremor/registry.c | 6 +- apps/codecs/Tremor/res012.c | 6 +- apps/codecs/Tremor/vorbisfile.c | 4 + apps/codecs/Tremor/window.c | 12 +- apps/codecs/Tremor/window_lookup.h | 17 ++- 21 files changed, 422 insertions(+), 104 deletions(-) create mode 100644 apps/codecs/Tremor/asm_mcf5249.h (limited to 'apps/codecs') diff --git a/apps/codecs/Tremor/Makefile b/apps/codecs/Tremor/Makefile index 20e30bbbf6..2b4adf146f 100644 --- a/apps/codecs/Tremor/Makefile +++ b/apps/codecs/Tremor/Makefile @@ -14,7 +14,7 @@ ifdef APPEXTRA INCLUDES += -I$(APPSDIR)/$(APPEXTRA) endif -TREMOROPTS = +TREMOROPTS = -O2 CFLAGS = $(GCCOPTS) $(TREMOROPTS) $(INCLUDES) $(TARGET) $(EXTRA_DEFINES) \ -DMEM=${MEMORYSIZE} diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h new file mode 100644 index 0000000000..09c74671bc --- /dev/null +++ b/apps/codecs/Tremor/asm_mcf5249.h @@ -0,0 +1,257 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * + * Copyright (C) 2005 by Pedro Vasconcelos + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +/* asm routines for wide math on the MCF5249 */ + +#include "os_types.h" + +#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR) + +#ifndef _V_WIDE_MATH +#define _V_WIDE_MATH + +//#define MB() asm volatile ("" : : : "memory") +#define MB() + +static inline void mcf5249_init_mac(void) { + int r; + asm volatile ("move.l #0x20, %%macsr;" // frac, truncate, no saturation + "movclr.l %%acc0, %[r];" // clear accumulators + "move.l %%acc0, %%acc1;" + "move.l %%acc0, %%acc2;" + "move.l %%acc0, %%acc3;" + : [r] "=r" (r)); +} + +static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { + ogg_int32_t r; + asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply into acc + "movclr.l %%acc0, %[r];" // move & clear acc + "asr.l #1, %[r];" // no overflow test + : [r] "=d" (r) + : [x] "r" (x), [y] "r" (y) + : "cc"); + return r; +} + +static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { + ogg_int32_t r; + asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply + "movclr.l %%acc0, %[r];" // move and clear + : [r] "=r" (r) + : [x] "r" (x), [y] "r" (y) + : "cc"); + return r; +} + + +static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { + ogg_int32_t r; + asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply + "movclr.l %%acc0, %[r];" // get higher half + "mulu.l %[y], %[x];" // get lower half + "asl.l #8, %[r];" // hi << 17 + "asl.l #8, %[r];" + "lsr.l #8, %[x];" // (unsigned)lo >> 15 + "lsr.l #7, %[x];" + "or.l %[x], %[r];" // or + : [r] "=&d" (r), [x] "+d" (x) + : [y] "d" (y) + : "cc"); + return r; +} + + +static inline +void XPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + ogg_int32_t r; + asm volatile ("mac.l %[a], %[t], %%acc0;" + "mac.l %[b], %[v], %%acc0;" + "mac.l %[b], %[t], %%acc1;" + "msac.l %[a], %[v], %%acc1;" + "movclr.l %%acc0, %[r];" + "move.l %[r], (%[x]);" + "movclr.l %%acc1, %[r];" + "move.l %[r], (%[y]);" + : [r] "=&r" (r) + : [x] "a" (x), [y] "a" (y), + [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v) + : "cc", "memory"); +} + + +static inline +void XNPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + ogg_int32_t r; + asm volatile ("mac.l %[a], %[t], %%acc0;" + "msac.l %[b], %[v], %%acc0;" + "mac.l %[b], %[t], %%acc1;" + "mac.l %[a], %[v], %%acc1;" + "movclr.l %%acc0, %[r];" + "move.l %[r], (%[x]);" + "movclr.l %%acc1, %[r];" + "move.l %[r], (%[y]);" + : [r] "=&r" (r) + : [x] "a" (x), [y] "a" (y), + [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v) + : "cc", "memory"); +} + + +/* no faster way of doing this using the MAC? */ +#define XPROD32(_a, _b, _t, _v, _x, _y) \ + { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ + (_y)=MULT32(_b,_t)-MULT32(_a,_v); } + + +/* asm versions of vector multiplication for window.c */ +/* assumes MAC is initialized & accumulators cleared */ +static inline +void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + asm volatile ("movem.l (%[d]), %%d0-%%d3;" // loop start + "movem.l (%[w]), %%a0-%%a3;" // pre-fetch registers + "lea.l (4*4, %[w]), %[w];" + "bra 1f;" // jump to loop condition + "0:" // loop body + // multiply and load next window values + "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;" + "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;" + "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;" + "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;" + "movclr.l %%acc0, %%d0;" // get the products + "movclr.l %%acc1, %%d1;" + "movclr.l %%acc2, %%d2;" + "movclr.l %%acc3, %%d3;" + // store and advance + "movem.l %%d0-%%d3, (%[d]);" + "lea.l (4*4, %[d]), %[d];" + "movem.l (%[d]), %%d0-%%d3;" + "subq.l #4, %[n];" // done 4 elements + "1: cmpi.l #4, %[n];" + "bge 0b;" + // multiply final elements + "tst.l %[n];" + "beq 1f;" // n=0 + "mac.l %%d0, %%a0, %%acc0;" + "movclr.l %%acc0, %%d0;" + "move.l %%d0, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" // n=1 + "mac.l %%d1, %%a1, %%acc0;" + "movclr.l %%acc0, %%d1;" + "move.l %%d1, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" // n=2 + // otherwise n = 3 + "mac.l %%d2, %%a2, %%acc0;" + "movclr.l %%acc0, %%d2;" + "move.l %%d2, (%[d])+;" + "1:" + : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) + : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", + "cc", "memory"); +} + +static inline +void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + asm volatile ("lea.l (-3*4, %[w]), %[w];" // loop start + "movem.l (%[d]), %%d0-%%d3;" // pre-fetch registers + "movem.l (%[w]), %%a0-%%a3;" + "bra 1f;" // jump to loop condition + "0:" // loop body + // multiply and load next window value + "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;" + "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;" + "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;" + "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;" + "movclr.l %%acc0, %%d0;" // get the products + "movclr.l %%acc1, %%d1;" + "movclr.l %%acc2, %%d2;" + "movclr.l %%acc3, %%d3;" + // store and advance + "movem.l %%d0-%%d3, (%[d]);" + "lea.l (4*4, %[d]), %[d];" + "movem.l (%[d]), %%d0-%%d3;" + "subq.l #4, %[n];" // done 4 elements + "1: cmpi.l #4, %[n];" + "bge 0b;" + // multiply final elements + "tst.l %[n];" + "beq 1f;" // n=0 + "mac.l %%d0, %%a3, %%acc0;" + "movclr.l %%acc0, %%d0;" + "move.l %%d0, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" // n=1 + "mac.l %%d1, %%a2, %%acc0;" + "movclr.l %%acc0, %%d1;" + "move.l %%d1, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" // n=2 + // otherwise n = 3 + "mac.l %%d2, %%a1, %%acc0;" + "movclr.l %%acc0, %%d2;" + "move.l %%d2, (%[d])+;" + "1:" + : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) + : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", + "cc", "memory"); +} + + +static inline +void mcf5249_vect_zero(ogg_int32_t *ptr, int n) +{ + asm volatile ("clr.l %%d0;" + "clr.l %%d1;" + "clr.l %%d2;" + "clr.l %%d3;" + // loop start + "tst.l %[n];" + "bra 1f;" + "0: movem.l %%d0-%%d3, (%[ptr]);" + "lea (4*4, %[ptr]), %[ptr];" + "subq.l #4, %[n];" + "1: bgt 0b;" + // remaing elements + "tst.l %[n];" + "beq 1f;" // n=0 + "clr.l (%[ptr])+;" + "subq.l #1, %[n];" + "beq 1f;" // n=1 + "clr.l (%[ptr])+;" + "subq.l #1, %[n];" + "beq 1f;" // n=2 + // otherwise n = 3 + "clr.l (%[ptr])+;" + "1:" + : [n] "+d" (n), [ptr] "+a" (ptr) + : + : "%d0","%d1","%d2","%d3","cc","memory"); +} + + +#endif +#endif diff --git a/apps/codecs/Tremor/bitwise.c b/apps/codecs/Tremor/bitwise.c index b75ac408b8..fa9dcd6eff 100644 --- a/apps/codecs/Tremor/bitwise.c +++ b/apps/codecs/Tremor/bitwise.c @@ -22,7 +22,7 @@ #include #include "ogg.h" -static unsigned long mask[]= +static const unsigned long mask[] IDATA_ATTR = {0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f, 0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff, 0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff, diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c index a4571c37d4..6f88fb812c 100644 --- a/apps/codecs/Tremor/block.c +++ b/apps/codecs/Tremor/block.c @@ -172,8 +172,12 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ v->pcm_storage=ci->blocksizes[1]; v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); + + // pbv: allow for extra padding for windowing for(i=0;ichannels;i++) v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); + // v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); + /* all 1 (large block) or 0 (small block) */ /* explicitly set for the sake of clarity */ diff --git a/apps/codecs/Tremor/codebook.c b/apps/codecs/Tremor/codebook.c index bb99487feb..4da7641553 100644 --- a/apps/codecs/Tremor/codebook.c +++ b/apps/codecs/Tremor/codebook.c @@ -306,7 +306,7 @@ long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a, return(0); } -long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,\ +long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a, long offset,int ch, oggpack_buffer *b,int n,int point){ long i,j,entry; diff --git a/apps/codecs/Tremor/config-tremor.h b/apps/codecs/Tremor/config-tremor.h index 81aa5bffab..8307d87b77 100644 --- a/apps/codecs/Tremor/config-tremor.h +++ b/apps/codecs/Tremor/config-tremor.h @@ -1,5 +1,4 @@ -#include "../codec.h" - +#include "../codec.h" #ifdef ROCKBOX_BIG_ENDIAN #define BIG_ENDIAN 1 #define LITTLE_ENDIAN 0 @@ -9,4 +8,6 @@ #define LITTLE_ENDIAN 1 #define BIG_ENDIAN 0 #endif -#define _LOW_ACCURACY_ + + +// #define _LOW_ACCURACY_ diff --git a/apps/codecs/Tremor/floor1.c b/apps/codecs/Tremor/floor1.c index 9bfec7e803..7f54aa76ea 100644 --- a/apps/codecs/Tremor/floor1.c +++ b/apps/codecs/Tremor/floor1.c @@ -124,7 +124,7 @@ static int icomp(const void *a,const void *b){ static vorbis_look_floor *floor1_look(vorbis_dsp_state *vd,vorbis_info_mode *mi, vorbis_info_floor *in){ - int *sortpointer[VIF_POSIT+2]; + static int *sortpointer[VIF_POSIT+2] IDATA_ATTR; vorbis_info_floor1 *info=(vorbis_info_floor1 *)in; vorbis_look_floor1 *look=(vorbis_look_floor1 *)_ogg_calloc(1,sizeof(*look)); int i,j,n=0; @@ -216,7 +216,7 @@ static int render_point(int x0,int x1,int y0,int y1,int x){ # define XdB(n) (n) #endif -static const ogg_int32_t FLOOR_fromdB_LOOKUP[256]={ +static ogg_int32_t FLOOR_fromdB_LOOKUP[256] IDATA_ATTR ={ XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114), XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163), XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9), @@ -283,14 +283,14 @@ static const ogg_int32_t FLOOR_fromdB_LOOKUP[256]={ XdB(0x69f80e9a), XdB(0x70dafda8), XdB(0x78307d76), XdB(0x7fffffff), }; -static void render_line(int x0,int x1,int y0,int y1,ogg_int32_t *d){ +static void render_line(int x0,register int x1,int y0,int y1,ogg_int32_t *d){ int dy=y1-y0; - int adx=x1-x0; - int ady=abs(dy); - int base=dy/adx; - int sy=(dy<0?base-1:base+1); - int x=x0; - int y=y0; + register int x=x0; + register int y=y0; + register int adx=x1-x0; + register int ady=abs(dy); + register int base=dy/adx; + register int sy=(dy<0?base-1:base+1); int err=0; ady-=abs(base*adx); @@ -433,7 +433,7 @@ static int floor1_inverse2(vorbis_block *vb,vorbis_look_floor *in,void *memo, } /* export hooks */ -vorbis_func_floor floor1_exportbundle={ +vorbis_func_floor floor1_exportbundle = { &floor1_unpack,&floor1_look,&floor1_free_info, &floor1_free_look,&floor1_inverse1,&floor1_inverse2 }; diff --git a/apps/codecs/Tremor/framing.c b/apps/codecs/Tremor/framing.c index 419c7dc8a3..9133333bda 100644 --- a/apps/codecs/Tremor/framing.c +++ b/apps/codecs/Tremor/framing.c @@ -501,7 +501,7 @@ int ogg_page_packets(ogg_page *og){ /* Static CRC calculation table. See older code in CVS for dead run-time initialization code. */ -static ogg_uint32_t crc_lookup[256]={ +static ogg_uint32_t crc_lookup[256] IDATA_ATTR = { 0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9, 0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005, 0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61, diff --git a/apps/codecs/Tremor/lsp_lookup.h b/apps/codecs/Tremor/lsp_lookup.h index fa84851887..cced7c9282 100644 --- a/apps/codecs/Tremor/lsp_lookup.h +++ b/apps/codecs/Tremor/lsp_lookup.h @@ -26,7 +26,7 @@ #define FROMdB2_SHIFT 3 #define FROMdB2_MASK 31 -static const ogg_int32_t FROMdB_LOOKUP[FROMdB_LOOKUP_SZ]={ +static const ogg_int32_t FROMdB_LOOKUP[FROMdB_LOOKUP_SZ] ={ 0x003fffff, 0x0028619b, 0x00197a96, 0x0010137a, 0x000a24b0, 0x00066666, 0x000409c3, 0x00028c42, 0x00019b8c, 0x000103ab, 0x0000a3d7, 0x00006760, @@ -37,7 +37,7 @@ static const ogg_int32_t FROMdB_LOOKUP[FROMdB_LOOKUP_SZ]={ 0x0000000b, 0x00000007, 0x00000004, 0x00000003, 0x00000002, 0x00000001, 0x00000001}; -static const ogg_int32_t FROMdB2_LOOKUP[FROMdB2_LOOKUP_SZ]={ +static const ogg_int32_t FROMdB2_LOOKUP[FROMdB2_LOOKUP_SZ] ={ 0x000001fc, 0x000001f5, 0x000001ee, 0x000001e7, 0x000001e0, 0x000001d9, 0x000001d2, 0x000001cc, 0x000001c5, 0x000001bf, 0x000001b8, 0x000001b2, @@ -50,7 +50,7 @@ static const ogg_int32_t FROMdB2_LOOKUP[FROMdB2_LOOKUP_SZ]={ #define INVSQ_LOOKUP_I_SHIFT 10 #define INVSQ_LOOKUP_I_MASK 1023 -static const long INVSQ_LOOKUP_I[64+1]={ +static const long INVSQ_LOOKUP_I[64+1] ={ 92682, 91966, 91267, 90583, 89915, 89261, 88621, 87995, 87381, 86781, 86192, 85616, @@ -70,7 +70,7 @@ static const long INVSQ_LOOKUP_I[64+1]={ 65536, }; -static const long INVSQ_LOOKUP_IDel[64]={ +static const long INVSQ_LOOKUP_IDel[64] ={ 716, 699, 684, 668, 654, 640, 626, 614, 600, 589, 576, 565, @@ -92,7 +92,7 @@ static const long INVSQ_LOOKUP_IDel[64]={ #define COS_LOOKUP_I_SHIFT 9 #define COS_LOOKUP_I_MASK 511 #define COS_LOOKUP_I_SZ 128 -static const ogg_int32_t COS_LOOKUP_I[COS_LOOKUP_I_SZ+1]={ +static const ogg_int32_t COS_LOOKUP_I[COS_LOOKUP_I_SZ+1] ={ 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893, 15791, diff --git a/apps/codecs/Tremor/mapping0.c b/apps/codecs/Tremor/mapping0.c index d4ab3a6a29..26d6289d4e 100644 --- a/apps/codecs/Tremor/mapping0.c +++ b/apps/codecs/Tremor/mapping0.c @@ -28,6 +28,8 @@ #include "registry.h" #include "misc.h" + + /* simplistic, wasteful way of doing this (unique lookup for each mode/submapping); there should be a central repository for identical lookups. That will require minor work, so I'm putting it @@ -124,6 +126,7 @@ static int ilog(unsigned int v){ return(ret); } + /* also responsible for range checking */ static vorbis_info_mapping *mapping0_unpack(vorbis_info *vi,oggpack_buffer *opb){ int i; @@ -176,7 +179,17 @@ static vorbis_info_mapping *mapping0_unpack(vorbis_info *vi,oggpack_buffer *opb) return(NULL); } -static int seq=0; + +/* IRAM buffer keep the pcm data; only for windows size upto 2048 + for space restrictions. No real compromise, larger window sizes + are only used for very low quality settings (q<0?) */ +#define IRAM_PCM_SIZE 2048 +static ogg_int32_t pcm_iram[IRAM_PCM_SIZE] IDATA_ATTR; + +static int seq = 0; + +#define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */ + static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ vorbis_dsp_state *vd=vb->vd; vorbis_info *vi=vd->vi; @@ -188,12 +201,16 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ int i,j; long n=vb->pcmend=ci->blocksizes[vb->W]; - ogg_int32_t **pcmbundle=(ogg_int32_t **)alloca(sizeof(*pcmbundle)*vi->channels); - int *zerobundle=(int *)alloca(sizeof(*zerobundle)*vi->channels); - - int *nonzero =(int *)alloca(sizeof(*nonzero)*vi->channels); - void **floormemo=(void **)alloca(sizeof(*floormemo)*vi->channels); - + /* statically allocate mapping structures in IRAM */ + static ogg_int32_t *pcmbundle[CHANNELS] IDATA_ATTR; + static int zerobundle[CHANNELS] IDATA_ATTR; + static int nonzero[CHANNELS] IDATA_ATTR; + static void *floormemo[CHANNELS] IDATA_ATTR; + + /* test for too many channels; + (maybe this is can be checked at the stream level?) */ + if (vi->channels > CHANNELS) return (-1); + /* time domain information decode (note that applying the information would have to happen later; we'll probably add a function entry to the harness for that later */ @@ -272,38 +289,64 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ //for(j=0;jchannels;j++) //_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0); + +/* pbv: removed this loop by fusion with the following one + to avoid recopying data to/from the IRAM */ +#if 0 /* compute and apply spectral envelope */ for(i=0;ichannels;i++){ ogg_int32_t *pcm=vb->pcm[i]; int submap=info->chmuxlist[i]; look->floor_func[submap]-> inverse2(vb,look->floor_look[submap],floormemo[i],pcm); - } + } +#endif //for(j=0;jchannels;j++) //_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1); /* transform the PCM data; takes PCM vector, vb; modifies PCM vector */ /* only MDCT right now.... */ - for(i=0;ichannels;i++){ - ogg_int32_t *pcm=vb->pcm[i]; - mdct_backward(n,pcm,pcm); + + /* check if we can do this in IRAM */ + if(n <= IRAM_PCM_SIZE) { /* normal window size: yes */ + for(i=0;ichannels;i++){ + ogg_int32_t *pcm=vb->pcm[i]; + int submap=info->chmuxlist[i]; + + if(nonzero[i]) { + memcpy(pcm_iram, pcm, sizeof(ogg_int32_t)*n); + look->floor_func[submap]-> + inverse2(vb,look->floor_look[submap],floormemo[i],pcm_iram); + mdct_backward(n, pcm_iram, pcm_iram); + /* window the data */ + _vorbis_apply_window(pcm_iram,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW); + memcpy(pcm, pcm_iram, sizeof(ogg_int32_t)*n); + } + else + memset(pcm, 0, sizeof(ogg_int32_t)*n); + } + } + else { /* large window: no, do it in the normal memory */ + for(i=0;ichannels;i++){ + ogg_int32_t *pcm=vb->pcm[i]; + int submap=info->chmuxlist[i]; + + look->floor_func[submap]-> + inverse2(vb,look->floor_look[submap],floormemo[i],pcm); + if(nonzero[i]) { + mdct_backward(n, pcm, pcm); + /* window the data */ + _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW); + } + else + memset(pcm, 0, sizeof(ogg_int32_t)*n); + } } //for(j=0;jchannels;j++) //_analysis_output("imdct",seq+j,vb->pcm[j],-24,n,0,0); - /* window the data */ - for(i=0;ichannels;i++){ - ogg_int32_t *pcm=vb->pcm[i]; - if(nonzero[i]) - _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW); - else - for(j=0;jchannels;j++) //_analysis_output("window",seq+j,vb->pcm[j],-24,n,0,0); @@ -313,10 +356,11 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ } /* export hooks */ -vorbis_func_mapping mapping0_exportbundle={ +vorbis_func_mapping mapping0_exportbundle ={ &mapping0_unpack, &mapping0_look, &mapping0_free_info, &mapping0_free_look, &mapping0_inverse }; + diff --git a/apps/codecs/Tremor/mdct.c b/apps/codecs/Tremor/mdct.c index cc201b2106..2d2564f196 100644 --- a/apps/codecs/Tremor/mdct.c +++ b/apps/codecs/Tremor/mdct.c @@ -94,7 +94,7 @@ STIN void mdct_butterfly_16(DATA_TYPE *x){ } /* 32 point butterfly (in place, 4 register) */ -STIN void mdct_butterfly_32(DATA_TYPE *x){ +STIN void mdct_butterfly_32(DATA_TYPE *x){ REG_TYPE r0, r1; @@ -152,7 +152,7 @@ STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ DATA_TYPE *x2 = x + (points>>1) - 8; REG_TYPE r0; REG_TYPE r1; - + do{ r0 = x1[6] - x2[6]; x1[6] += x2[6]; r1 = x2[7] - x1[7]; x1[7] += x2[7]; @@ -180,7 +180,7 @@ STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ r0 = x1[4] - x2[4]; x1[4] += x2[4]; r1 = x1[5] - x2[5]; x1[5] += x2[5]; XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step; - + r0 = x1[2] - x2[2]; x1[2] += x2[2]; r1 = x1[3] - x2[3]; x1[3] += x2[3]; XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step; @@ -231,7 +231,7 @@ STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ }while(T>sincos_lookup0); } -STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){ +STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) { int stages=8-shift; int i,j; @@ -243,33 +243,34 @@ STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){ for(j=0;j>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8); } -STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ +STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift) { int bit = 0; DATA_TYPE *w0 = x; DATA_TYPE *w1 = x = w0+(n>>1); LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; LOOKUP_T *Ttop = T+1024; - DATA_TYPE r2; + REG_TYPE r2; do{ - DATA_TYPE r3 = bitrev12(bit++); + REG_TYPE r3 = bitrev12(bit++); DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; DATA_TYPE *x1 = x + (r3>>shift); REG_TYPE r0 = x0[0] + x1[0]; REG_TYPE r1 = x1[1] - x0[1]; - XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step; + XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step; w1 -= 4; @@ -287,7 +288,7 @@ STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ r0 = x0[0] + x1[0]; r1 = x1[1] - x0[1]; - XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step; + XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step; r0 = (x0[1] + x1[1])>>1; r1 = (x0[0] - x1[0])>>1; @@ -299,14 +300,14 @@ STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ w0 += 4; }while(T>shift) -1; DATA_TYPE *x1 = x + (r3>>shift); REG_TYPE r0 = x0[0] + x1[0]; REG_TYPE r1 = x1[1] - x0[1]; - T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); + T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 ); w1 -= 4; @@ -324,7 +325,7 @@ STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ r0 = x0[0] + x1[0]; r1 = x1[1] - x0[1]; - T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); + T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 ); r0 = (x0[1] + x1[1])>>1; r1 = (x0[0] - x1[0])>>1; @@ -337,7 +338,8 @@ STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ }while(w0>1; int n4=n>>2; DATA_TYPE *iX; @@ -347,6 +349,10 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ int shift; int step; +#if CONFIG_CPU == MCF5249 + mcf5249_init_mac(); /* should be redundant */ +#endif + for (shift=6;!(n&(1<>=2; @@ -507,4 +512,3 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ }while(oX1>oX2); } } - diff --git a/apps/codecs/Tremor/mdct.h b/apps/codecs/Tremor/mdct.h index 6d8890720c..bdfe0e7304 100644 --- a/apps/codecs/Tremor/mdct.h +++ b/apps/codecs/Tremor/mdct.h @@ -34,8 +34,10 @@ #define cPI1_8 (0x7641af3d) #endif -extern void mdct_forward(int n, DATA_TYPE *in, DATA_TYPE *out); +//extern void mdct_forward(int n, DATA_TYPE *in, DATA_TYPE *out); extern void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out); +//extern void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift); +//extern void mdct_butterflies(DATA_TYPE *x,int points,int shift); #endif diff --git a/apps/codecs/Tremor/mdct_lookup.h b/apps/codecs/Tremor/mdct_lookup.h index 970e199f7f..8a88997376 100644 --- a/apps/codecs/Tremor/mdct_lookup.h +++ b/apps/codecs/Tremor/mdct_lookup.h @@ -17,8 +17,10 @@ #include "os_types.h" +/* keep lookup tables in fast IRAM */ + /* {sin(2*i*PI/4096), cos(2*i*PI/4096)}, with i = 0 to 512 */ -static LOOKUP_T sincos_lookup0[1026] = { +static LOOKUP_T sincos_lookup0[1026] IDATA_ATTR = { X(0x00000000), X(0x7fffffff), X(0x003243f5), X(0x7ffff621), X(0x006487e3), X(0x7fffd886), X(0x0096cbc1), X(0x7fffa72c), X(0x00c90f88), X(0x7fff6216), X(0x00fb5330), X(0x7fff0943), @@ -279,7 +281,7 @@ static LOOKUP_T sincos_lookup0[1026] = { }; /* {sin((2*i+1)*PI/4096), cos((2*i+1)*PI/4096)}, with i = 0 to 511 */ -static LOOKUP_T sincos_lookup1[1024] = { +static LOOKUP_T sincos_lookup1[1024] IDATA_ATTR = { X(0x001921fb), X(0x7ffffd88), X(0x004b65ee), X(0x7fffe9cb), X(0x007da9d4), X(0x7fffc251), X(0x00afeda8), X(0x7fff8719), X(0x00e23160), X(0x7fff3824), X(0x011474f6), X(0x7ffed572), diff --git a/apps/codecs/Tremor/misc.h b/apps/codecs/Tremor/misc.h index ed7617d0e0..dc2dccf116 100644 --- a/apps/codecs/Tremor/misc.h +++ b/apps/codecs/Tremor/misc.h @@ -23,6 +23,7 @@ #include "os_types.h" #include "asm_arm.h" +#include "asm_mcf5249.h" /* Some prototypes that were not defined elsewhere */ @@ -36,7 +37,6 @@ void* alloca(size_t size); #ifndef _LOW_ACCURACY_ /* 64 bit multiply */ - //#include #if BYTE_ORDER==LITTLE_ENDIAN @@ -47,9 +47,7 @@ union magic { } halves; ogg_int64_t whole; }; -#endif - -#if BYTE_ORDER==BIG_ENDIAN +#elif BYTE_ORDER==BIG_ENDIAN union magic { struct { ogg_int32_t hi; @@ -64,7 +62,6 @@ static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { magic.whole = (ogg_int64_t)x * y; return magic.halves.hi; } - static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { return MULT32(x,y)<<1; } @@ -102,7 +99,6 @@ static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { return (x >> 6) * y; /* y preshifted >>9 */ } - #endif /* @@ -122,11 +118,15 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { * macros. */ +/* replaced XPROD32 with a macro to avoid memory reference + _x, _y are the results (must be l-values) */ +#define XPROD32(_a, _b, _t, _v, _x, _y) \ + { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ + (_y)=MULT32(_b,_t)-MULT32(_a,_v); } + + #ifdef __i386__ -#define XPROD32(_a, _b, _t, _v, _x, _y) \ - { *(_x)=MULT32(_a,_t)+MULT32(_b,_v); \ - *(_y)=MULT32(_b,_t)-MULT32(_a,_v); } #define XPROD31(_a, _b, _t, _v, _x, _y) \ { *(_x)=MULT31(_a,_t)+MULT31(_b,_v); \ *(_y)=MULT31(_b,_t)-MULT31(_a,_v); } @@ -136,14 +136,6 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { #else -static inline void XPROD32(ogg_int32_t a, ogg_int32_t b, - ogg_int32_t t, ogg_int32_t v, - ogg_int32_t *x, ogg_int32_t *y) -{ - *x = MULT32(a, t) + MULT32(b, v); - *y = MULT32(b, t) - MULT32(a, v); -} - static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, ogg_int32_t t, ogg_int32_t v, ogg_int32_t *x, ogg_int32_t *y) @@ -159,9 +151,7 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, *x = MULT31(a, t) - MULT31(b, v); *y = MULT31(b, t) + MULT31(a, v); } - #endif - #endif #ifndef _V_CLIP_MATH @@ -245,4 +235,3 @@ static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap, - diff --git a/apps/codecs/Tremor/os.h b/apps/codecs/Tremor/os.h index 65a4992ffc..bb4b867e3d 100644 --- a/apps/codecs/Tremor/os.h +++ b/apps/codecs/Tremor/os.h @@ -24,12 +24,12 @@ # define _V_IFDEFJAIL_H_ # ifdef __GNUC__ -# define STIN static __inline__ +# define STIN static inline # elif _WIN32 -# define STIN static __inline +# define STIN static __inline__ # endif #else -# define STIN static +# define STIN static inline #endif #ifndef M_PI diff --git a/apps/codecs/Tremor/os_types.h b/apps/codecs/Tremor/os_types.h index fe0331770f..1e0cb1332f 100644 --- a/apps/codecs/Tremor/os_types.h +++ b/apps/codecs/Tremor/os_types.h @@ -21,7 +21,7 @@ #ifdef _LOW_ACCURACY_ # define X(n) (((((n)>>22)+1)>>1) - ((((n)>>22)+1)>>9)) -# define LOOKUP_T const unsigned char +# define LOOKUP_T const unsigned char #else # define X(n) (n) # define LOOKUP_T const ogg_int32_t diff --git a/apps/codecs/Tremor/registry.c b/apps/codecs/Tremor/registry.c index c0b5fec0cc..0882309b32 100644 --- a/apps/codecs/Tremor/registry.c +++ b/apps/codecs/Tremor/registry.c @@ -31,18 +31,18 @@ extern vorbis_func_residue residue1_exportbundle; extern vorbis_func_residue residue2_exportbundle; extern vorbis_func_mapping mapping0_exportbundle; -vorbis_func_floor *_floor_P[]={ +vorbis_func_floor *_floor_P[] = { &floor0_exportbundle, &floor1_exportbundle, }; -vorbis_func_residue *_residue_P[]={ +vorbis_func_residue *_residue_P[] = { &residue0_exportbundle, &residue1_exportbundle, &residue2_exportbundle, }; -vorbis_func_mapping *_mapping_P[]={ +vorbis_func_mapping *_mapping_P[] = { &mapping0_exportbundle, }; diff --git a/apps/codecs/Tremor/res012.c b/apps/codecs/Tremor/res012.c index 1ead185d8f..3d69cee73b 100644 --- a/apps/codecs/Tremor/res012.c +++ b/apps/codecs/Tremor/res012.c @@ -309,7 +309,7 @@ int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl, } -vorbis_func_residue residue0_exportbundle={ +const vorbis_func_residue residue0_exportbundle ={ &res0_unpack, &res0_look, &res0_free_info, @@ -317,7 +317,7 @@ vorbis_func_residue residue0_exportbundle={ &res0_inverse }; -vorbis_func_residue residue1_exportbundle={ +const vorbis_func_residue residue1_exportbundle ={ &res0_unpack, &res0_look, &res0_free_info, @@ -325,7 +325,7 @@ vorbis_func_residue residue1_exportbundle={ &res1_inverse }; -vorbis_func_residue residue2_exportbundle={ +const vorbis_func_residue residue2_exportbundle ={ &res0_unpack, &res0_look, &res0_free_info, diff --git a/apps/codecs/Tremor/vorbisfile.c b/apps/codecs/Tremor/vorbisfile.c index d9eeed3945..74bff6a3fb 100644 --- a/apps/codecs/Tremor/vorbisfile.c +++ b/apps/codecs/Tremor/vorbisfile.c @@ -669,6 +669,10 @@ static int _ov_open1(void *f,OggVorbis_File *vf,char *initial, int offsettest=(f?callbacks.seek_func(f,0,SEEK_CUR):-1); int ret; +#if CONFIG_CPU == MCF5249 + mcf5249_init_mac(); /* initialize the Coldfire MAC unit */ +#endif + memset(vf,0,sizeof(*vf)); vf->datasource=f; vf->callbacks = callbacks; diff --git a/apps/codecs/Tremor/window.c b/apps/codecs/Tremor/window.c index b50e36f413..b447ce5ad9 100644 --- a/apps/codecs/Tremor/window.c +++ b/apps/codecs/Tremor/window.c @@ -56,7 +56,6 @@ const void *_vorbis_window(int type, int left){ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], long *blocksizes, int lW,int W,int nW){ - LOOKUP_T *window[2]={window_p[0],window_p[1]}; long n=blocksizes[W]; long ln=blocksizes[lW]; @@ -67,7 +66,14 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], long rightbegin=n/2+n/4-rn/4; long rightend=rightbegin+rn/2; - + +#if CONFIG_CPU == MCF5249 + mcf5249_init_mac(); /* shouldn't be needed, but just in case */ + mcf5249_vect_zero(&d[0], leftbegin); + mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); + mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); + mcf5249_vect_zero(&d[rightend], n-rightend); +#else int i,p; for(i=0;i