diff options
author | Jens Arnold <amiconn@rockbox.org> | 2006-02-19 12:43:50 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2006-02-19 12:43:50 +0000 |
commit | 2ea75fdbec5a5df403e8fd18a7cb0b16b7cf69c5 (patch) | |
tree | 586678d390eb0b7bf2b9cb9584476ce85e7d94bf /apps/plugins/jpeg.c | |
parent | da5cef63301a2b13bbbcd36bf409a921fbb5402d (diff) |
Replace the range_limit lookup table with an inline function, asm optimised for SH1 and coldfire. Slight speedup on SH1, up to 15% speedup on coldfire. Saves almost 1KB.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8736 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/jpeg.c')
-rw-r--r-- | apps/plugins/jpeg.c | 183 |
1 files changed, 71 insertions, 112 deletions
diff --git a/apps/plugins/jpeg.c b/apps/plugins/jpeg.c index 43c1ce111a..d08765db33 100644 --- a/apps/plugins/jpeg.c +++ b/apps/plugins/jpeg.c @@ -143,78 +143,47 @@ static struct plugin_api* rb; /**************** begin JPEG code ********************/ -/* LUT for IDCT, this could also be used for gamma correction */ -const unsigned char range_limit[1024] = +INLINE unsigned range_limit(int value) { - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, - - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, - 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, - 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, - 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, - 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127 -}; +#if CONFIG_CPU == SH7034 + unsigned tmp; + asm ( /* Note: Uses knowledge that only the low byte of the result is used */ + "mov #-128,%[t] \n" + "sub %[t],%[v] \n" /* value -= -128; equals value += 128; */ + "extu.b %[v],%[t] \n" + "cmp/eq %[v],%[t] \n" /* low byte == whole number ? */ + "bt 1f \n" /* yes: no overflow */ + "cmp/pz %[v] \n" /* overflow: positive? */ + "subc %[v],%[v] \n" /* %[r] now either 0 or 0xffffffff */ + "1: \n" + : /* outputs */ + [v]"+r"(value), + [t]"=&r"(tmp) + ); + return value; +#elif defined(CPU_COLDFIRE) + asm ( /* Note: Uses knowledge that only the low byte of the result is used */ + "add.l #128,%[v] \n" /* value += 128; */ + "cmp.l #255,%[v] \n" /* overflow? */ + "bls.b 1f \n" /* no: return value */ + "spl.b %[v] \n" /* yes: set low byte to appropriate boundary */ + "1: \n" + : /* outputs */ + [v]"+r"(value) + ); + return value; +#else + value += 128; + if ((unsigned)value <= 255) + return value; + + if (value < 0) + return 0; + + return 255; +#endif +} /* IDCT implementation */ @@ -266,8 +235,6 @@ const unsigned char range_limit[1024] = */ #define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n)) -#define RANGE_MASK (255 * 4 + 3) /* 2 bits wider than legal samples */ - /* @@ -277,7 +244,7 @@ const unsigned char range_limit[1024] = void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) { (void)skip_line; /* unused */ - *p_byte = range_limit[(inptr[0] * quantptr[0] >> 3) & RANGE_MASK]; + *p_byte = range_limit(inptr[0] * quantptr[0] >> 3); } @@ -312,18 +279,14 @@ void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) /* Row 0 */ outptr = p_byte; - outptr[0] = range_limit[(int) DESCALE(tmp0 + tmp1, 3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp0 - tmp1, 3) - & RANGE_MASK]; + outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3)); + outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3)); /* Row 1 */ outptr = p_byte + skip_line; - outptr[0] = range_limit[(int) DESCALE(tmp2 + tmp3, 3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp2 - tmp3, 3) - & RANGE_MASK]; + outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3)); + outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3)); } @@ -398,18 +361,14 @@ void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) /* Final output stage */ - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2, + CONST_BITS+PASS1_BITS+3)); + outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2, + CONST_BITS+PASS1_BITS+3)); + outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0, + CONST_BITS+PASS1_BITS+3)); + outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0, + CONST_BITS+PASS1_BITS+3)); wsptr += 4; /* advance pointer to next row */ } @@ -549,8 +508,8 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0) { /* AC terms all zero */ - unsigned char dcval = range_limit[(int) DESCALE((long) wsptr[0], - PASS1_BITS+3) & RANGE_MASK]; + unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0], + PASS1_BITS+3)); outptr[0] = dcval; outptr[1] = dcval; @@ -617,22 +576,22 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; - outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, - CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; + outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3)); + outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3)); + outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3)); + outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3)); + outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3)); + outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3)); + outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3)); + outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3)); wsptr += 8; /* advance pointer to next row */ } @@ -2273,7 +2232,7 @@ int jpegmem(struct jpeg *p_jpg, int ds) { int size; - size = (p_jpg->x_phys/ds/p_jpg->subsample_x[0]) + size = (p_jpg->x_phys/ds/p_jpg->subsample_x[0]) * (p_jpg->y_phys/ds/p_jpg->subsample_y[0]); #ifdef HAVE_LCD_COLOR if (p_jpg->blocks > 1) /* colour, add requirements for chroma */ |