summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorKarl Kurbjun <kkurbjun@gmail.com>2006-04-20 19:39:56 +0000
committerKarl Kurbjun <kkurbjun@gmail.com>2006-04-20 19:39:56 +0000
commit29ab31e8f1c68dd89dad1e9a92fe3c8a8dd223a9 (patch)
tree304b247322f0adefca4c27d1bf7c5a8cfe1a44e8 /apps
parent9e9921b08731367ebcaf19976d8e4c6c2534a016 (diff)
Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9747 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/plugins/doom/i_sound.c44
-rw-r--r--apps/plugins/doom/i_video.c48
-rw-r--r--apps/plugins/doom/m_fixed.h12
-rw-r--r--apps/plugins/doom/r_draw.c53
4 files changed, 107 insertions, 50 deletions
diff --git a/apps/plugins/doom/i_sound.c b/apps/plugins/doom/i_sound.c
index 7579ada5b5..271444527f 100644
--- a/apps/plugins/doom/i_sound.c
+++ b/apps/plugins/doom/i_sound.c
@@ -48,11 +48,11 @@
// mixing buffer, and the samplerate of the raw data.
// Needed for calling the actual sound output.
-#define SAMPLECOUNT 512
+#define SAMPLECOUNT 512
-#define NUM_CHANNELS 16
+#define NUM_CHANNELS 16
// It is 2 for 16bit, and 2 for two channels.
-#define BUFMUL 4
+#define BUFMUL 2
#define MIXBUFFERSIZE (SAMPLECOUNT*BUFMUL)
#if (CONFIG_KEYPAD == IPOD_3G_PAD) || (CONFIG_KEYPAD == IPOD_4G_PAD)
@@ -66,7 +66,7 @@
// Basically, samples from all active internal channels
// are modifed and added, and stored in the buffer
// that is submitted to the audio device.
-signed short *mixbuffer=NULL;
+signed short mixbuffer[MIXBUFFERSIZE] IBSS_ATTR;
typedef struct {
// SFX id of the playing sound effect.
@@ -91,7 +91,7 @@ typedef struct {
int *rightvol_lookup;
} channel_info_t;
-channel_info_t channelinfo[NUM_CHANNELS];
+channel_info_t channelinfo[NUM_CHANNELS] IBSS_ATTR;
int *vol_lookup; // Volume lookups.
@@ -355,13 +355,6 @@ int I_SoundIsPlaying(int handle)
// This function currently supports only 16bit.
//
-bool swap=0;
-bool lastswap=1;
- // Pointers in global mixbuffer, left, right, end.
- signed short* leftout;
- signed short* rightout;
- signed short* leftend;
-
void I_UpdateSound( void )
{
// Mix current sound data.
@@ -370,25 +363,26 @@ void I_UpdateSound( void )
register int dl;
register int dr;
+ // Pointers in global mixbuffer, left, right, end.
+ signed short* leftout;
+ signed short* rightout;
+ signed short* leftend;
+
// Step in mixbuffer, left and right, thus two.
int step;
// Mixing channel index.
int chan;
- if(lastswap==swap)
- return;
- lastswap=swap;
-
// Left and right channel
// are in global mixbuffer, alternating.
- leftout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2);
- rightout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2)+1;
+ leftout = mixbuffer;
+ rightout = mixbuffer +1;
step = 2;
// Determine end, for left channel only
// (right channel is implicit).
- leftend = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2) + SAMPLECOUNT*step;
+ leftend = mixbuffer + SAMPLECOUNT*step;
// Mix sounds into the mixing buffer.
// Loop over step*SAMPLECOUNT,
@@ -467,15 +461,10 @@ void I_UpdateSound( void )
void get_more(unsigned char** start, size_t* size)
{
- // This code works fine, the only problem is that doom runs slower then the sound
- // updates (sometimes). This code forces the update if the sound hasn't been
- // remixed.
- if(lastswap!=swap)
- I_UpdateSound(); // Force sound update (We don't want stutters)
+ I_UpdateSound(); // Force sound update
- *start = (unsigned char*)((swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2));
+ *start = (unsigned char*)(mixbuffer);
*size = SAMPLECOUNT*2*sizeof(short);
- swap=!swap;
}
@@ -520,9 +509,6 @@ void I_InitSound()
printf( " pre-cached all sound data\n");
- if(mixbuffer==NULL)
- mixbuffer=malloc(sizeof(short)*MIXBUFFERSIZE);
-
// Now initialize mixbuffer with zero.
for ( i = 0; i< MIXBUFFERSIZE; i++ )
mixbuffer[i] = 0;
diff --git a/apps/plugins/doom/i_video.c b/apps/plugins/doom/i_video.c
index a4db5f2672..db6adfa75e 100644
--- a/apps/plugins/doom/i_video.c
+++ b/apps/plugins/doom/i_video.c
@@ -16,7 +16,10 @@
* GNU General Public License for more details.
*
* $Log$
- * Revision 1.15 2006/04/16 23:14:04 kkurbjun
+ * Revision 1.16 2006/04/20 19:39:56 kkurbjun
+ * Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more
+ *
+ * Revision 1.15 2006-04-16 23:14:04 kkurbjun
* Fix run so that it stays enabled across level loads. Removed some unused code and added some back in for hopeful future use.
*
* Revision 1.14 2006-04-15 22:08:36 kkurbjun
@@ -359,10 +362,43 @@ static void I_UploadNewPalette(int pal)
void I_FinishUpdate (void)
{
#if (CONFIG_LCD == LCD_H300) && !defined(SIMULATOR)
- /*
- Lookup tables are no longer needed (H300 specific, decreases timedemo
- by about 500 tics)
- */
+
+#if 1
+ /* ASM screen update (drops 600 tics (100 asm)) */
+ asm (
+ "move.w #33,(%[LCD]) \n" /* Setup the LCD controller */
+ "clr.w (%[LCD2]) \n"
+ "move.w #34,(%[LCD]) \n" /* End LCD controller setup */
+ "move.l #220,%%d2 \n"
+ "move.l #176,%%d3 \n"
+ "clr.l %%d1 \n"
+ "widthloop: \n"
+ "move.b (%[screenptr])+, %%d1 \n" /* Unrolled by 5 */
+ "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
+ "move.b (%[screenptr])+, %%d1 \n"
+ "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
+ "move.b (%[screenptr])+, %%d1 \n"
+ "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
+ "move.b (%[screenptr])+, %%d1 \n"
+ "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
+ "move.b (%[screenptr])+, %%d1 \n"
+ "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
+ "subq.l #5,%%d2 \n"
+ "bne widthloop \n"
+ "move.w #220,%%d2 \n"
+ "subq.l #1,%%d3 \n"
+ "bne widthloop \n"
+ : /* outputs */
+ : /* inputs */
+ [screenptr] "a" (d_screens[0]),
+ [palette] "a" (palette),
+ [LCD] "a" (0xf0000000),
+ [LCD2] "a" (0xf0000002)
+ : /* clobbers */
+ "d1", "d2", "d3"
+ );
+#else
+ /* C version of above (drops 500 tics) */
// Start the write
*(volatile unsigned short *) 0xf0000000 = 0x21; // register
@@ -383,6 +419,8 @@ void I_FinishUpdate (void)
wcnt=0;
hcnt++;
}
+#endif
+
#else
unsigned char paletteIndex;
int x, y;
diff --git a/apps/plugins/doom/m_fixed.h b/apps/plugins/doom/m_fixed.h
index 3c922e8f50..e29933befd 100644
--- a/apps/plugins/doom/m_fixed.h
+++ b/apps/plugins/doom/m_fixed.h
@@ -47,15 +47,15 @@ inline static int FixedMul( int a, int b )
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
// Code contributed by Thom Johansen
register int result;
- asm volatile (
+ asm (
"mac.l %[x],%[y],%%acc0 \n" /* multiply */
"move.l %[y],%%d2 \n"
"mulu.l %[x],%%d2 \n" /* get lower half, avoid emac stall */
"movclr.l %%acc0,%[result] \n" /* get higher half */
- "moveq.l #15,%%d1 \n"
- "asl.l %%d1,%[result] \n" /* hi <<= 15, plus one free */
- "moveq.l #16,%%d1 \n"
- "lsr.l %%d1,%%d2 \n" /* (unsigned)lo >>= 16 */
+ "asl.l #8,%[result] \n" /* hi <<= 15, plus one free */
+ "asl.l #7,%[result] \n" /* hi <<= 15, plus one free */
+ "lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */
+ "lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */
"or.l %%d2 ,%[result] \n" /* combine result */
: /* outputs */
[result]"=&d"(result)
@@ -63,7 +63,7 @@ inline static int FixedMul( int a, int b )
[x] "d" (a),
[y] "d" (b)
: /* clobbers */
- "d1", "d2"
+ "d2"
);
return result;
#else
diff --git a/apps/plugins/doom/r_draw.c b/apps/plugins/doom/r_draw.c
index a6bc21e420..5f45323a36 100644
--- a/apps/plugins/doom/r_draw.c
+++ b/apps/plugins/doom/r_draw.c
@@ -526,16 +526,48 @@ byte *ds_source IBSS_ATTR;
void R_DrawSpan (void)
{
- register unsigned count,xfrac = ds_xfrac,yfrac = ds_yfrac;
-
- byte *source;
- byte *colormap;
- byte *dest;
-
- source = ds_source;
- colormap = ds_colormap;
- dest = topleft + ds_y*SCREENWIDTH + ds_x1;
- count = ds_x2 - ds_x1 + 1;
+#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
+ // only slightly faster
+ asm volatile (
+ "tst %[count] \n"
+ "beq endspanloop \n"
+ "clr.l %%d4 \n"
+ "spanloop: \n"
+ "move.l %[xfrac], %%d1 \n"
+ "move.l %[yfrac], %%d2 \n"
+ "lsr.l #8,%%d1 \n"
+ "lsr.l #8,%%d2 \n"
+ "lsr.l #8,%%d1 \n"
+ "lsr.l #2,%%d2 \n"
+ "and.l #63,%%d1 \n"
+ "and.l #4032,%%d2 \n"
+ "or.l %%d2, %%d1 \n"
+ "move.b (%[source], %%d1), %%d4 \n"
+ "add.l %[ds_xstep], %[xfrac] \n"
+ "add.l %[ds_ystep], %[yfrac] \n"
+ "move.b (%[colormap],%%d4.l), (%[dest])+ \n"
+ "subq.l #1, %[count] \n"
+ "bne spanloop \n"
+ "endspanloop: \n"
+ : /* outputs */
+ : /* inputs */
+ [count] "d" (ds_x2-ds_x1+1),
+ [xfrac] "d" (ds_xfrac),
+ [yfrac] "d" (ds_yfrac),
+ [source] "a" (ds_source),
+ [colormap] "a" (ds_colormap),
+ [dest] "a" (topleft+ds_y*SCREENWIDTH +ds_x1),
+ [ds_xstep] "d" (ds_xstep),
+ [ds_ystep] "d" (ds_ystep)
+ : /* clobbers */
+ "d1", "d2", "d4"
+ );
+#else
+ register unsigned count = ds_x2 - ds_x1 + 1,xfrac = ds_xfrac,yfrac = ds_yfrac;
+
+ register byte *source = ds_source;
+ register byte *colormap = ds_colormap;
+ register byte *dest = topleft + ds_y*SCREENWIDTH + ds_x1;
while (count)
{
@@ -550,6 +582,7 @@ void R_DrawSpan (void)
*dest++ = colormap[source[spot]];
count--;
}
+#endif
}
//