summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-02-19 02:49:26 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-02-19 02:49:26 +0000
commit36175ac9453999d2d079c521126ecc5ac7a8d984 (patch)
treea37e87b5fd7283d1456b7a346e16c1a5ed590a2c
parent2801a87d543f38cadd076330f329c84e23852997 (diff)
SWCODEC: DSP optimizations for conversion to internal format and resampling. Assembly resampling for Coldfire. Word has it ARM will get that soon.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12399 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp.c388
-rw-r--r--apps/dsp.h10
-rw-r--r--apps/dsp_asm.h8
-rw-r--r--apps/dsp_cf.S145
4 files changed, 391 insertions, 160 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index c7eed8bd76..c062f2c088 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -46,6 +46,18 @@
#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/
#define DEFAULT_GAIN 0x01000000
+
+enum
+{
+ CONVERT_LE_NATIVE_I_STEREO = STEREO_INTERLEAVED,
+ CONVERT_LE_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED,
+ CONVERT_LE_NATIVE_MONO = STEREO_MONO,
+ CONVERT_GT_NATIVE_I_STEREO = STEREO_INTERLEAVED + STEREO_NUM_MODES,
+ CONVERT_GT_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED + STEREO_NUM_MODES,
+ CONVERT_GT_NATIVE_MONO = STEREO_MONO + STEREO_NUM_MODES,
+ CONVERT_GT_NATIVE_1ST_INDEX = STEREO_NUM_MODES
+};
+
struct dsp_config
{
long codec_frequency; /* Sample rate of data coming from the codec */
@@ -60,6 +72,7 @@ struct dsp_config
int sample_depth;
int sample_bytes;
int stereo_mode;
+ int num_channels;
int frac_bits;
bool dither_enabled;
long dither_bias;
@@ -69,11 +82,13 @@ struct dsp_config
bool eq_enabled;
long eq_precut;
long gain; /* Note that this is in S8.23 format. */
+ int (*convert_to_internal)(const char* src[], int32_t* dst[], int count);
};
struct resample_data
{
- long phase, delta;
+ long phase;
+ long delta;
int32_t last_sample[2];
};
@@ -139,88 +154,157 @@ void sound_set_pitch(int permille)
* consume. Note that for mono, dst[0] equals dst[1], as there is no point
* in processing the same data twice.
*/
-static int convert_to_internal(const char* src[], int count, int32_t* dst[])
+
+/* convert count 16-bit mono to 32-bit mono */
+static int convert_lte_native_mono(
+ const char *src[], int32_t *dst[], int count)
{
- count = MIN(SAMPLE_BUF_COUNT / 2, count);
+ count = MIN(SAMPLE_BUF_COUNT/2, count);
- if ((dsp->sample_depth <= NATIVE_DEPTH)
- || (dsp->stereo_mode == STEREO_INTERLEAVED))
- {
- dst[0] = &sample_buf[0];
- dst[1] = (dsp->stereo_mode == STEREO_MONO)
- ? dst[0] : &sample_buf[SAMPLE_BUF_COUNT / 2];
- }
- else
+ const short *s = (short*) src[0];
+ const short * const send = s + count;
+ int32_t *d = dst[0] = dst[1] = sample_buf;
+ const int scale = WORD_SHIFT;
+
+ do
{
- dst[0] = (int32_t*) src[0];
- dst[1] = (int32_t*) ((dsp->stereo_mode == STEREO_MONO) ? src[0] : src[1]);
+ *d++ = *s++ << scale;
}
+ while (s < send);
- if (dsp->sample_depth <= NATIVE_DEPTH)
- {
- short* s0 = (short*) src[0];
- int32_t* d0 = dst[0];
- int32_t* d1 = dst[1];
- int scale = WORD_SHIFT;
- int i;
+ src[0] = (char *)s;
- if (dsp->stereo_mode == STEREO_INTERLEAVED)
- {
- for (i = 0; i < count; i++)
- {
- *d0++ = *s0++ << scale;
- *d1++ = *s0++ << scale;
- }
- }
- else if (dsp->stereo_mode == STEREO_NONINTERLEAVED)
- {
- short* s1 = (short*) src[1];
+ return count;
+}
- for (i = 0; i < count; i++)
- {
- *d0++ = *s0++ << scale;
- *d1++ = *s1++ << scale;
- }
- }
- else
- {
- for (i = 0; i < count; i++)
- {
- *d0++ = *s0++ << scale;
- }
- }
- }
- else if (dsp->stereo_mode == STEREO_INTERLEAVED)
- {
- int32_t* s0 = (int32_t*) src[0];
- int32_t* d0 = dst[0];
- int32_t* d1 = dst[1];
- int i;
+/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
+static int convert_lte_native_interleaved_stereo(
+ const char *src[], int32_t *dst[], int count)
+{
+ count = MIN(SAMPLE_BUF_COUNT/2, count);
- for (i = 0; i < count; i++)
- {
- *d0++ = *s0++;
- *d1++ = *s0++;
- }
- }
+ const int32_t *s = (int32_t *) src[0];
+ const int32_t * const send = s + count;
+ int32_t *dl = dst[0] = sample_buf;
+ int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
+ const int scale = WORD_SHIFT;
- if (dsp->stereo_mode == STEREO_NONINTERLEAVED)
+ do
{
- src[0] += count * dsp->sample_bytes;
- src[1] += count * dsp->sample_bytes;
+ short slr = *s++;
+#ifdef ROCKBOX_LITTLE_ENDIAN
+ *dl++ = (slr >> 16) << scale;
+ *dr++ = (int32_t)(short)slr << scale;
+#else /* ROCKBOX_BIG_ENDIAN */
+ *dl++ = (int32_t)(short)slr << scale;
+ *dr++ = (slr >> 16) << scale;
+#endif
}
- else if (dsp->stereo_mode == STEREO_INTERLEAVED)
+ while (s < send);
+
+ src[0] = (char *)s;
+
+ return count;
+}
+
+/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
+static int convert_lte_native_noninterleaved_stereo(
+ const char *src[], int32_t *dst[], int count)
+{
+ const short *sl = (short *) src[0];
+ const short *sr = (short *) src[1];
+ const short * const slend = sl + count;
+ int32_t *dl = dst[0] = sample_buf;
+ int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
+ const int scale = WORD_SHIFT;
+
+ do
{
- src[0] += count * dsp->sample_bytes * 2;
+ *dl++ = *sl++ << scale;
+ *dr++ = *sr++ << scale;
}
- else
+ while (sl < slend);
+
+ src[0] = (char *)sl;
+ src[1] = (char *)sr;
+
+ return count;
+}
+
+/* convert count 32-bit mono to 32-bit mono */
+static int convert_gt_native_mono(
+ const char *src[], int32_t *dst[], int count)
+{
+ count = MIN(SAMPLE_BUF_COUNT/2, count);
+
+ dst[0] = dst[1] = (int32_t *)src[0];
+ src[0] = (char *)(dst[0] + count);
+
+ return count;
+}
+
+/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
+static int convert_gt_native_interleaved_stereo(
+ const char *src[], int32_t *dst[], int count)
+{
+ count = MIN(SAMPLE_BUF_COUNT/2, count);
+
+ const int32_t *s = (int32_t *)src[0];
+ const int32_t * const send = s + 2*count;
+ int32_t *dl = sample_buf;
+ int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2;
+
+ dst[0] = dl;
+ dst[1] = dr;
+
+ do
{
- src[0] += count * dsp->sample_bytes;
+ *dl++ = *s++;
+ *dr++ = *s++;
}
+ while (s < send);
+
+ src[0] = (char *)send;
+
+ return count;
+}
+
+/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
+static int convert_gt_native_noninterleaved_stereo(
+ const char *src[], int32_t *dst[], int count)
+{
+ count = MIN(SAMPLE_BUF_COUNT/2, count);
+
+ dst[0] = (int32_t *)src[0];
+ dst[1] = (int32_t *)src[1];
+ src[0] = (char *)(dst[0] + count);
+ src[1] = (char *)(dst[1] + count);
return count;
}
+/* set the to-native sample conversion function based on dsp sample parameters */
+static void new_sample_conversion(void)
+{
+ static int (*convert_to_internal_functions[])(
+ const char* src[], int32_t *dst[], int count) =
+ {
+ [CONVERT_LE_NATIVE_MONO] = convert_lte_native_mono,
+ [CONVERT_LE_NATIVE_I_STEREO] = convert_lte_native_interleaved_stereo,
+ [CONVERT_LE_NATIVE_NI_STEREO] = convert_lte_native_noninterleaved_stereo,
+ [CONVERT_GT_NATIVE_MONO] = convert_gt_native_mono,
+ [CONVERT_GT_NATIVE_I_STEREO] = convert_gt_native_interleaved_stereo,
+ [CONVERT_GT_NATIVE_NI_STEREO] = convert_gt_native_noninterleaved_stereo,
+ };
+
+ int convert = dsp->stereo_mode;
+
+ if (dsp->sample_depth > NATIVE_DEPTH)
+ convert += CONVERT_GT_NATIVE_1ST_INDEX;
+
+ dsp->convert_to_internal = convert_to_internal_functions[convert];
+}
+
static void resampler_set_delta(int frequency)
{
resample_data[current_codec].delta = (unsigned long)
@@ -230,124 +314,118 @@ static void resampler_set_delta(int frequency)
/* Linear interpolation resampling that introduces a one sample delay because
* of our inability to look into the future at the end of a frame.
*/
-
-/* TODO: we really should have a separate set of resample functions for both
- mono and stereo to avoid all this internal branching and looping. */
-static int downsample(int32_t **dst, int32_t **src, int count,
- struct resample_data *r)
+#ifndef DSP_HAVE_ASM_RESAMPLING
+static int dsp_downsample(int channels, int count, struct resample_data *r,
+ int32_t **src, int32_t **dst)
{
- long phase = r->phase;
long delta = r->delta;
- int32_t last_sample;
- int32_t *d[2] = { dst[0], dst[1] };
- int pos = phase >> 16;
- int i = 1, j;
- int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2;
-
- for (j = 0; j < num_channels; j++) {
- last_sample = r->last_sample[j];
+ long phase, pos;
+ int32_t *d;
+
+ /* Rolled channel loop actually showed slightly faster. */
+ do
+ {
+ /* Just initialize things and not worry too much about the relatively
+ * uncommon case of not being able to spit out a sample for the frame.
+ */
+ int32_t *s = src[--channels];
+ int32_t last = r->last_sample[channels];
+
+ r->last_sample[channels] = s[count - 1];
+ d = dst[channels];
+ phase = r->phase;
+ pos = phase >> 16;
+
/* Do we need last sample of previous frame for interpolation? */
if (pos > 0)
- last_sample = src[j][pos - 1];
+ last = s[pos - 1];
- /* Be sure starting position isn't passed the available data */
- if (pos < count)
- *d[j]++ = last_sample + FRACMUL((phase & 0xffff) << 15,
- src[j][pos] - last_sample);
- else
+ while (pos < count)
{
- /* No samples can be output here since were already passed the
- end. Keep phase, save the last sample and return nothing. */
- i = 0;
- goto done;
+ *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
+ phase += delta;
+ pos = phase >> 16;
+ last = s[pos - 1];
}
}
-
- phase += delta;
-
- while ((pos = phase >> 16) < count)
- {
- for (j = 0; j < num_channels; j++)
- *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15,
- src[j][pos] - src[j][pos - 1]);
- phase += delta;
- i++;
- }
+ while (channels > 0);
/* Wrap phase accumulator back to start of next frame. */
-done:
r->phase = phase - (count << 16);
- r->last_sample[0] = src[0][count - 1];
- r->last_sample[1] = src[1][count - 1];
- return i;
+ return d - dst[0];
}
-static long upsample(int32_t **dst, int32_t **src, int count, struct resample_data *r)
+static int dsp_upsample(int channels, int count, struct resample_data *r,
+ int32_t **src, int32_t **dst)
{
- long phase = r->phase;
long delta = r->delta;
- int32_t *d[2] = { dst[0], dst[1] };
- int i = 0, j;
- int pos;
- int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2;
-
- while ((phase >> 16) == 0)
- {
- for (j = 0; j < num_channels; j++)
- *d[j]++ = r->last_sample[j] + FRACMUL((phase & 0xffff) << 15,
- src[j][0] - r->last_sample[j]);
- phase += delta;
- i++;
- }
+ long phase, pos;
+ int32_t *d;
- while ((pos = phase >> 16) < count)
+ /* Rolled channel loop actually showed slightly faster. */
+ do
{
- for (j = 0; j < num_channels; j++)
- *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15,
- src[j][pos] - src[j][pos - 1]);
- phase += delta;
- i++;
+ /* Should always be able to output a sample for a ratio up to
+ RESAMPLE_BUF_COUNT / SAMPLE_BUF_COUNT. */
+ int32_t *s = src[--channels];
+ int32_t last = r->last_sample[channels];
+
+ r->last_sample[channels] = s[count - 1];
+ d = dst[channels];
+ phase = r->phase;
+ pos = phase >> 16;
+
+ while (pos == 0)
+ {
+ *d++ = last + FRACMUL((phase & 0xffff) << 15, s[0] - last);
+ phase += delta;
+ pos = phase >> 16;
+ }
+
+ while (pos < count)
+ {
+ last = s[pos - 1];
+ *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
+ phase += delta;
+ pos = phase >> 16;
+ }
}
+ while (channels > 0);
/* Wrap phase accumulator back to start of next frame. */
- r->phase = phase - (count << 16);
- r->last_sample[0] = src[0][count - 1];
- r->last_sample[1] = src[1][count - 1];
- return i;
+ r->phase = phase & 0xffff;
+ return d - dst[0];
}
+#endif /* DSP_HAVE_ASM_RESAMPLING */
/* Resample count stereo samples. Updates the src array, if resampling is
* done, to refer to the resampled data. Returns number of stereo samples
* for further processing.
*/
-static inline int resample(int32_t* src[], int count)
+static inline int resample(int32_t *src[], int count)
{
- long new_count;
+ long new_count = count;
if (dsp->frequency != NATIVE_FREQUENCY)
{
- int32_t* dst[2] = {&resample_buf[0], &resample_buf[RESAMPLE_BUF_COUNT / 2]};
+ int32_t *dst[2] =
+ {
+ resample_buf,
+ resample_buf + RESAMPLE_BUF_COUNT/2,
+ };
+ int channels = dsp->num_channels;
if (dsp->frequency < NATIVE_FREQUENCY)
- {
- new_count = upsample(dst, src, count,
- &resample_data[current_codec]);
- }
+ new_count = dsp_upsample(channels, count,
+ &resample_data[current_codec],
+ src, dst);
else
- {
- new_count = downsample(dst, src, count,
- &resample_data[current_codec]);
- }
+ new_count = dsp_downsample(channels, count,
+ &resample_data[current_codec],
+ src, dst);
src[0] = dst[0];
- if (dsp->stereo_mode != STEREO_MONO)
- src[1] = dst[1];
- else
- src[1] = dst[0];
- }
- else
- {
- new_count = count;
+ src[1] = dst[channels - 1];
}
return new_count;
@@ -378,8 +456,7 @@ void dsp_dither_enable(bool enable)
static void dither_init(void)
{
- memset(&dither_data[0], 0, sizeof(struct dither_data));
- memset(&dither_data[1], 0, sizeof(struct dither_data));
+ memset(dither_data, 0, sizeof(dither_data));
dsp->dither_bias = (1L << (dsp->frac_bits - NATIVE_DEPTH));
dsp->dither_mask = (1L << (dsp->frac_bits + 1 - NATIVE_DEPTH)) - 1;
}
@@ -592,7 +669,7 @@ void dsp_set_eq_coefs(int band)
static void eq_process(int32_t **x, unsigned num)
{
int i;
- unsigned int channels = dsp->stereo_mode != STEREO_MONO ? 2 : 1;
+ unsigned int channels = dsp->num_channels;
unsigned shift;
/* filter configuration currently is 1 low shelf filter, 3 band peaking
@@ -772,7 +849,7 @@ int dsp_process(char *dst, const char *src[], int count)
while (count > 0)
{
- samples = convert_to_internal(src, count, tmp);
+ samples = dsp->convert_to_internal(src, tmp, count);
count -= samples;
apply_gain(tmp, samples);
samples = resample(tmp, samples);
@@ -886,7 +963,7 @@ bool dsp_configure(int setting, intptr_t value)
case DSP_SET_SAMPLE_DEPTH:
dsp->sample_depth = value;
-
+
if (dsp->sample_depth <= NATIVE_DEPTH)
{
dsp->frac_bits = WORD_FRACBITS;
@@ -902,15 +979,19 @@ bool dsp_configure(int setting, intptr_t value)
dsp->clip_min = -(1 << value);
}
+ new_sample_conversion();
dither_init();
break;
case DSP_SET_STEREO_MODE:
- dsp->stereo_mode = (long) value;
+ dsp->stereo_mode = value;
+ dsp->num_channels = value == STEREO_MONO ? 1 : 2;
+ new_sample_conversion();
break;
case DSP_RESET:
dsp->stereo_mode = STEREO_NONINTERLEAVED;
+ dsp->num_channels = 2;
dsp->clip_max = ((1 << WORD_FRACBITS) - 1);
dsp->clip_min = -((1 << WORD_FRACBITS));
dsp->track_gain = 0;
@@ -921,6 +1002,7 @@ bool dsp_configure(int setting, intptr_t value)
dsp->sample_depth = NATIVE_DEPTH;
dsp->frac_bits = WORD_FRACBITS;
dsp->new_gain = true;
+ new_sample_conversion();
break;
case DSP_FLUSH:
diff --git a/apps/dsp.h b/apps/dsp.h
index 8e82b6118d..b99ac213ab 100644
--- a/apps/dsp.h
+++ b/apps/dsp.h
@@ -24,9 +24,13 @@
#include <stdbool.h>
#define NATIVE_FREQUENCY 44100
-#define STEREO_INTERLEAVED 0
-#define STEREO_NONINTERLEAVED 1
-#define STEREO_MONO 2
+enum
+{
+ STEREO_INTERLEAVED = 0,
+ STEREO_NONINTERLEAVED,
+ STEREO_MONO,
+ STEREO_NUM_MODES,
+};
enum {
CODEC_SET_FILEBUF_WATERMARK = 1,
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index 04c2848a98..add76a07f8 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -27,5 +27,13 @@
void apply_crossfeed(int32_t* src[], int count);
#endif
+#if defined (CPU_COLDFIRE)
+#define DSP_HAVE_ASM_RESAMPLING
+int dsp_downsample(int channels, int count, void *resample_data,
+ int32_t **src, int32_t **dst);
+int dsp_upsample(int channels, int count, void *resample_data,
+ int32_t **src, int32_t **dst);
#endif
+#endif /* _DSP_ASM_H */
+
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index 719d1db1d5..233be82860 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -17,8 +17,11 @@
*
****************************************************************************/
- .section .text
- .global apply_crossfeed
+/****************************************************************************
+ * apply_crossfeed(int32_t* src[], int count)
+ */
+ .section .text
+ .global apply_crossfeed
apply_crossfeed:
lea.l (-44, %sp), %sp
movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
@@ -67,11 +70,11 @@ apply_crossfeed:
addq.l #1, %d4 | index++
moveq.l #13, %d6
cmp.l %d6, %d4 | wrap index to 0 if it overflows
- jlt .nowrap
+ jlt .cfnowrap
moveq.l #13*8, %d4
sub.l %d4, %a0 | wrap back delay line ptr as well
clr.l %d4
-.nowrap:
+.cfnowrap:
subq.l #1, %d7
jne .cfloop
| save data back to struct
@@ -81,4 +84,138 @@ apply_crossfeed:
movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (44, %sp), %sp
rts
+.cfend:
+ .size apply_crossfeed,.cfend-apply_crossfeed
+/****************************************************************************
+ * dsp_downsample(int channels, int count, struct resample_data *r,
+ * in32_t **src, int32_t **dst)
+ */
+ .section .text
+ .global dsp_downsample
+dsp_downsample:
+ lea.l -40(%sp), %sp | save non-clobberables
+ movem.l %d2-%d7/%a2-%a5, (%sp) |
+ movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels
+ | %d3 = count
+ | %a0 = r
+ | %a1 = src
+ | %a2 = dst
+ move.l 4(%a0), %d4 | %d4 = delta = r->delta
+ move.l #16, %d7 | %d7 = shift
+.dschannel_loop:
+ move.l (%a0), %d5 | %d5 = phase = r->phase
+ move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1]
+ move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1]
+ lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1]
+ move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1]
+ move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1]
+ move.l %d1, (%a5) |
+ move.l %d5, %d6 | %d6 = pos = phase >> 16
+ lsr.l %d7, %d6 |
+ cmp.l %d3, %d6 | past end of samples?
+ bge.b .dsloop_skip | yes? skip loop
+ tst.l %d6 | need last sample of prev. frame?
+ bne.b .dsloop | no? start main loop
+ move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
+ bra.b .dsuse_last_start | start with last (last in %d0)
+.dsloop:
+ lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
+ movem.l (%a5), %d0-%d1 |
+.dsuse_last_start:
+ sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
+ move.l %d0, %acc0 | %acc0 = previous sample
+ move.l %d5, %d0 | frac = (phase << 16) >> 1
+ lsl.l %d7, %d0 |
+ lsr.l #1, %d0 |
+ mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
+ move.l %acc0, %d0 |
+ add.l %d4, %d5 | phase += delta
+ move.l %d5, %d6 | pos = phase >> 16
+ lsr.l %d7, %d6 |
+ move.l %d0, (%a4)+ | *d++ = %d0
+ cmp.l %d3, %d6 | pos < count?
+ blt.b .dsloop | yes? continue resampling
+.dsloop_skip:
+ subq.l #1, %d2 | ch > 0?
+ bgt.b .dschannel_loop | yes? process next channel
+ asl.l %d7, %d3 | wrap phase to start of next frame
+ sub.l %d3, %d5 | r->phase = phase - (count << 16)
+ move.l %d5, (%a0) |
+ move.l %a4, %d0 | return d - d[0]
+ sub.l (%a2), %d0 |
+ asr.l #2, %d0 | convert bytes->samples
+ movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
+ move.l %acc1, %acc0 | clear %acc0
+ lea.l 40(%sp), %sp | cleanup stack
+ rts | buh-bye
+.dsend:
+ .size dsp_downsample,.dsend-dsp_downsample
+
+/****************************************************************************
+ * dsp_upsample(int channels, int count, struct resample_data *r,
+ * in32_t **src, int32_t **dst)
+ */
+ .section .text
+ .global dsp_upsample
+dsp_upsample:
+ lea.l -40(%sp), %sp | save non-clobberables
+ movem.l %d2-%d7/%a2-%a5, (%sp) |
+ movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels
+ | %d3 = count
+ | %a0 = r
+ | %a1 = src
+ | %a2 = dst
+ move.l 4(%a0), %d4 | %d4 = delta = r->delta
+ swap %d4 | swap delta to high word to use
+ | carries to increment position
+.uschannel_loop:
+ move.l (%a0), %d5 | %d5 = phase = r->phase
+ move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1]
+ move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1]
+ lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1]
+ move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1]
+ move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1]
+ move.l %d1, (%a5) |
+ moveq.l #16, %d1 | %d0 = shift
+ move.l %d5, %d6 | %d6 = pos = phase >> 16
+ lsl.l %d1, %d5 | swap phase to high word to use
+ | carries to increment position
+ lsr.l %d1, %d6 | pos == 0?
+ bne.b .usstart_1 | no? transistion from down
+ move.l (%a3), %d1 | %d1 = s[0]
+ sub.l %d0, %d1 | diff = s[pos] - last
+ bra.b .usloop_0 | jump to typical start point
+.usstart_1:
+ cmp.l %d3, %d6 | past end of samples?
+ bge.b .usloop_skip | yes? skip loop
+.usloop_1:
+ lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
+ movem.l (%a5), %d0-%d1 |
+ sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
+.usloop_0:
+ move.l %d0, %acc0 | %acc0 = previous sample
+ lsr.l #1, %d5 | make phase into frac
+ mac.l %d1, %d5, %acc0 | %acc0 += diff * frac
+ move.l %acc0, %d7 |
+ lsl.l #1, %d5 | restore frac to phase
+ move.l %d7, (%a4)+ | *d++ = %d0
+ add.l %d4, %d5 | phase += delta
+ bcc.b .usloop_0 | load next values?
+ addq.l #1, %d6 | increment position
+ cmp.l %d3, %d6 | pos < count?
+ blt.b .usloop_1 | yes? continue resampling
+.usloop_skip:
+ subq.l #1, %d2 | ch > 0?
+ bgt.b .uschannel_loop | yes? process next channel
+ swap %d5 | wrap phase to start of next frame
+ move.l %d5, (%a0) | ...and save in r->phase
+ move.l %a4, %d0 | return d - d[0]
+ sub.l (%a2), %d0 |
+ asr.l #2, %d0 | convert bytes->samples
+ movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
+ move.l %acc1, %acc0 | clear %acc0
+ lea.l 40(%sp), %sp | cleanup stack
+ rts | buh-bye
+.usend:
+ .size dsp_upsample,.usend-dsp_upsample