summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/dsp.c354
-rw-r--r--apps/dsp_asm.h59
-rw-r--r--apps/dsp_cf.S424
3 files changed, 468 insertions, 369 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index be851e2305..3b95145b39 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -38,9 +38,14 @@
#define WORD_FRACBITS 27
#define NATIVE_DEPTH 16
+/* If the buffer sizes change, check the assembly code! */
#define SAMPLE_BUF_COUNT 256
#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/
#define DEFAULT_GAIN 0x01000000
+#define SAMPLE_BUF_LEFT_CHANNEL 0
+#define SAMPLE_BUF_RIGHT_CHANNEL (SAMPLE_BUF_COUNT/2)
+#define RESAMPLE_BUF_LEFT_CHANNEL 0
+#define RESAMPLE_BUF_RIGHT_CHANNEL (RESAMPLE_BUF_COUNT/2)
/* enums to index conversion properly with stereo mode and other settings */
enum
@@ -66,11 +71,10 @@ enum
* NOTE: Any assembly routines that use these structures must be updated
* if current data members are moved or changed.
*/
- /* 32-bit achitecture offset */
struct resample_data
{
- long delta; /* 00h */
- long phase; /* 04h */
+ uint32_t delta; /* 00h */
+ uint32_t phase; /* 04h */
int32_t last_sample[2]; /* 08h */
/* 10h */
};
@@ -93,9 +97,10 @@ struct dsp_data
int output_scale; /* 00h */
int num_channels; /* 04h */
struct resample_data resample_data; /* 08h */
- int clip_min; /* 18h */
- int clip_max; /* 2ch */
- /* 30h */
+ int32_t clip_min; /* 18h */
+ int32_t clip_max; /* 1ch */
+ int32_t gain; /* 20h - Note that this is in S8.23 format. */
+ /* 24h */
};
/* No asm...yet */
@@ -132,13 +137,18 @@ struct eq_state
#include <dsp_asm.h>
/* Typedefs keep things much neater in this case */
-typedef int (*sample_input_fn_type)(int count, const char *src[],
- int32_t *dst[]);
+typedef void (*sample_input_fn_type)(int count, const char *src[],
+ int32_t *dst[]);
typedef int (*resample_fn_type)(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[]);
typedef void (*sample_output_fn_type)(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst);
+/* Single-DSP channel processing in place */
typedef void (*channels_process_fn_type)(int count, int32_t *buf[]);
+/* DSP local channel processing in place */
+typedef void (*channels_process_dsp_fn_type)(int count, struct dsp_data *data,
+ int32_t *buf[]);
+
/*
***************************************************************************/
@@ -152,16 +162,16 @@ struct dsp_config
int sample_bytes;
int stereo_mode;
int frac_bits;
- long gain; /* Note that this is in S8.23 format. */
/* Functions that change depending upon settings - NULL if stage is
disabled */
- sample_input_fn_type input_samples;
- resample_fn_type resample;
- sample_output_fn_type output_samples;
+ sample_input_fn_type input_samples;
+ resample_fn_type resample;
+ sample_output_fn_type output_samples;
/* These will be NULL for the voice codec and is more economical that
way */
- channels_process_fn_type apply_crossfeed;
- channels_process_fn_type channels_process;
+ channels_process_dsp_fn_type apply_gain;
+ channels_process_fn_type apply_crossfeed;
+ channels_process_fn_type channels_process;
};
/* General DSP config */
@@ -211,7 +221,7 @@ static struct dsp_config *dsp IDATA_ATTR = audio_dsp;
* of copying needed is minimized for that case.
*/
-static int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR;
+int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR;
static int32_t resample_buf[RESAMPLE_BUF_COUNT] IBSS_ATTR;
/* set a new dsp and return old one */
@@ -258,23 +268,20 @@ void sound_set_pitch(int permille)
dsp_configure(DSP_SWITCH_FREQUENCY, dsp->codec_frequency);
}
-/* Convert at most count samples to the internal format, if needed. Returns
- * number of samples ready for further processing. Updates src to point
- * past the samples "consumed" and dst is set to point to the samples to
- * consume. Note that for mono, dst[0] equals dst[1], as there is no point
- * in processing the same data twice.
+/* Convert count samples to the internal format, if needed. Updates src
+ * to point past the samples "consumed" and dst is set to point to the
+ * samples to consume. Note that for mono, dst[0] equals dst[1], as there
+ * is no point in processing the same data twice.
*/
/* convert count 16-bit mono to 32-bit mono */
-static int sample_input_lte_native_mono(
+static void sample_input_lte_native_mono(
int count, const char *src[], int32_t *dst[])
{
- count = MIN(SAMPLE_BUF_COUNT/2, count);
-
const int16_t *s = (int16_t *) src[0];
const int16_t * const send = s + count;
- int32_t *d = dst[0] = dst[1] = sample_buf;
- const int scale = WORD_SHIFT;
+ int32_t *d = dst[0] = dst[1] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
+ int scale = WORD_SHIFT;
do
{
@@ -283,21 +290,17 @@ static int sample_input_lte_native_mono(
while (s < send);
src[0] = (char *)s;
-
- return count;
}
/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
-static int sample_input_lte_native_i_stereo(
+static void sample_input_lte_native_i_stereo(
int count, const char *src[], int32_t *dst[])
{
- count = MIN(SAMPLE_BUF_COUNT/2, count);
-
const int32_t *s = (int32_t *) src[0];
const int32_t * const send = s + count;
- int32_t *dl = dst[0] = sample_buf;
- int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
- const int scale = WORD_SHIFT;
+ int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
+ int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
+ int scale = WORD_SHIFT;
do
{
@@ -313,22 +316,18 @@ static int sample_input_lte_native_i_stereo(
while (s < send);
src[0] = (char *)s;
-
- return count;
}
/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
-static int sample_input_lte_native_ni_stereo(
+static void sample_input_lte_native_ni_stereo(
int count, const char *src[], int32_t *dst[])
{
- count = MIN(SAMPLE_BUF_COUNT/2, count);
-
const int16_t *sl = (int16_t *) src[0];
const int16_t *sr = (int16_t *) src[1];
const int16_t * const slend = sl + count;
- int32_t *dl = dst[0] = sample_buf;
- int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
- const int scale = WORD_SHIFT;
+ int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
+ int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
+ int scale = WORD_SHIFT;
do
{
@@ -339,35 +338,24 @@ static int sample_input_lte_native_ni_stereo(
src[0] = (char *)sl;
src[1] = (char *)sr;
-
- return count;
}
/* convert count 32-bit mono to 32-bit mono */
-static int sample_input_gt_native_mono(
+static void sample_input_gt_native_mono(
int count, const char *src[], int32_t *dst[])
{
- count = MIN(SAMPLE_BUF_COUNT/2, count);
-
dst[0] = dst[1] = (int32_t *)src[0];
src[0] = (char *)(dst[0] + count);
-
- return count;
}
/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
-static int sample_input_gt_native_i_stereo(
+static void sample_input_gt_native_i_stereo(
int count, const char *src[], int32_t *dst[])
{
- count = MIN(SAMPLE_BUF_COUNT/2, count);
-
const int32_t *s = (int32_t *)src[0];
const int32_t * const send = s + 2*count;
- int32_t *dl = sample_buf;
- int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2;
-
- dst[0] = dl;
- dst[1] = dr;
+ int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
+ int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
do
{
@@ -377,22 +365,16 @@ static int sample_input_gt_native_i_stereo(
while (s < send);
src[0] = (char *)send;
-
- return count;
}
/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
-static int sample_input_gt_native_ni_stereo(
+static void sample_input_gt_native_ni_stereo(
int count, const char *src[], int32_t *dst[])
{
- count = MIN(SAMPLE_BUF_COUNT/2, count);
-
dst[0] = (int32_t *)src[0];
dst[1] = (int32_t *)src[1];
src[0] = (char *)(dst[0] + count);
src[1] = (char *)(dst[1] + count);
-
- return count;
}
/**
@@ -573,12 +555,6 @@ static void sample_output_new_format(void)
dsp->output_samples = sample_output_functions[out];
}
-static void resampler_set_delta(int frequency)
-{
- dsp->data.resample_data.delta = (unsigned long)
- frequency * 65536LL / NATIVE_FREQUENCY;
-}
-
/**
* Linear interpolation resampling that introduces a one sample delay because
* of our inability to look into the future at the end of a frame.
@@ -587,9 +563,9 @@ static void resampler_set_delta(int frequency)
static int dsp_downsample(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[])
{
- int ch = data->num_channels - 1;
- long delta = data->resample_data.delta;
- long phase, pos;
+ int ch = data->num_channels - 1;
+ uint32_t delta = data->resample_data.delta;
+ uint32_t phase, pos;
int32_t *d;
/* Rolled channel loop actually showed slightly faster. */
@@ -610,7 +586,7 @@ static int dsp_downsample(int count, struct dsp_data *data,
if (pos > 0)
last = s[pos - 1];
- while (pos < count)
+ while (pos < (uint32_t)count)
{
*d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
phase += delta;
@@ -625,12 +601,12 @@ static int dsp_downsample(int count, struct dsp_data *data,
return d - dst[0];
}
-static int dsp_upsample(int count, struct dsp_data *data,
+static int dsp_upsample(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[])
{
int ch = data->num_channels - 1;
- long delta = data->resample_data.delta;
- long phase, pos;
+ uint32_t delta = data->resample_data.delta;
+ uint32_t phase, pos;
int32_t *d;
/* Rolled channel loop actually showed slightly faster. */
@@ -653,7 +629,7 @@ static int dsp_upsample(int count, struct dsp_data *data,
pos = phase >> 16;
}
- while (pos < count)
+ while (pos < (uint32_t)count)
{
last = s[pos - 1];
*d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
@@ -669,24 +645,43 @@ static int dsp_upsample(int count, struct dsp_data *data,
}
#endif /* DSP_HAVE_ASM_RESAMPLING */
+static void resampler_new_delta(void)
+{
+ dsp->data.resample_data.delta = (unsigned long)
+ dsp->frequency * 65536LL / NATIVE_FREQUENCY;
+
+ if (dsp->frequency == NATIVE_FREQUENCY)
+ {
+ /* NOTE: If fully glitch-free transistions from no resampling to
+ resampling are desired, last_sample history should be maintained
+ even when not resampling. */
+ dsp->resample = NULL;
+ dsp->data.resample_data.phase = 0;
+ dsp->data.resample_data.last_sample[0] = 0;
+ dsp->data.resample_data.last_sample[1] = 0;
+ }
+ else if (dsp->frequency < NATIVE_FREQUENCY)
+ dsp->resample = dsp_upsample;
+ else
+ dsp->resample = dsp_downsample;
+}
+
/* Resample count stereo samples. Updates the src array, if resampling is
* done, to refer to the resampled data. Returns number of stereo samples
* for further processing.
*/
static inline int resample(int count, int32_t *src[])
{
- if (dsp->resample)
+ int32_t *dst[2] =
{
- int32_t *dst[2] =
- {
- resample_buf,
- resample_buf + RESAMPLE_BUF_COUNT/2,
- };
+ &resample_buf[RESAMPLE_BUF_LEFT_CHANNEL],
+ &resample_buf[RESAMPLE_BUF_RIGHT_CHANNEL],
+ };
- count = dsp->resample(count, &dsp->data, src, dst);
- src[0] = dst[0];
- src[1] = dst[dsp->data.num_channels - 1];
- }
+ count = dsp->resample(count, &dsp->data, src, dst);
+
+ src[0] = dst[0];
+ src[1] = dst[dsp->data.num_channels - 1];
return count;
}
@@ -810,30 +805,59 @@ void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
c[2] <<= 4;
}
+/* Apply a constant gain to the samples (e.g., for ReplayGain).
+ * Note that this must be called before the resampler.
+ */
+#ifndef DSP_HAVE_ASM_APPLY_GAIN
+static void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
+{
+ const int32_t gain = data->gain;
+ int ch = data->num_channels - 1;
+
+ do
+ {
+ int32_t *s = buf[ch];
+ int32_t *d = buf[ch];
+ int32_t samp = *s++;
+ int i = 0;
+
+ do
+ {
+ FRACMUL_8_LOOP(samp, gain, s, d);
+ }
+ while (++i < count);
+ }
+ while (--ch >= 0);
+}
+#endif /* DSP_HAVE_ASM_APPLY_GAIN */
+
/* Combine all gains to a global gain. */
static void set_gain(struct dsp_config *dsp)
{
- dsp->gain = DEFAULT_GAIN;
+ dsp->data.gain = DEFAULT_GAIN;
/* Replay gain not relevant to voice */
if (dsp == audio_dsp && replaygain)
{
- dsp->gain = replaygain;
+ dsp->data.gain = replaygain;
}
if (eq_enabled && eq_precut)
{
- dsp->gain = (long) (((int64_t) dsp->gain * eq_precut) >> 24);
+ dsp->data.gain =
+ (long) (((int64_t) dsp->data.gain * eq_precut) >> 24);
}
- if (dsp->gain == DEFAULT_GAIN)
+ if (dsp->data.gain == DEFAULT_GAIN)
{
- dsp->gain = 0;
+ dsp->data.gain = 0;
}
else
{
- dsp->gain >>= 1;
+ dsp->data.gain >>= 1;
}
+
+ dsp->apply_gain = dsp->data.gain != 0 ? dsp_apply_gain : NULL;
}
/**
@@ -927,50 +951,6 @@ static void eq_process(int count, int32_t *buf[])
}
}
-/* Apply a constant gain to the samples (e.g., for ReplayGain). May update
- * the src array if gain was applied.
- * Note that this must be called before the resampler.
- */
-static void apply_gain(int count, int32_t *buf[])
-{
- int32_t *sl, *sr;
- int32_t s, *d;
- long gain;
- int i;
-
- if (new_gain)
- {
- /* Gain has changed */
- dsp_set_replaygain();
- if (dsp->gain == 0)
- return; /* No gain to apply now */
- }
-
- sl = buf[0], sr = buf[1];
- gain = dsp->gain;
-
- if (sl != sr)
- {
- d = &sample_buf[SAMPLE_BUF_COUNT / 2];
- buf[1] = d;
- s = *sr++;
-
- for (i = 0; i < count; i++)
- FRACMUL_8_LOOP(s, gain, sr, d);
- }
- else
- {
- buf[1] = &sample_buf[0];
- }
-
- d = &sample_buf[0];
- buf[0] = d;
- s = *sl++;
-
- for (i = 0; i < count; i++)
- FRACMUL_8_LOOP(s, gain, sl, d);
-}
-
void dsp_set_stereo_width(int value)
{
long width, straight, cross;
@@ -993,35 +973,6 @@ void dsp_set_stereo_width(int value)
dsp_sw_cross = cross << 8;
}
-/**
- * Implements the different channel configurations and stereo width.
- */
-
-/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
- * completeness. */
-#if 0
-static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
-{
- /* The channels are each just themselves */
- (void)count; (void)buf;
-}
-#endif
-
-#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
-static void channels_process_sound_chan_mono(int count, int32_t *buf[])
-{
- int32_t *sl = buf[0], *sr = buf[1];
-
- do
- {
- int32_t lr = *sl/2 + *sr/2;
- *sl++ = lr;
- *sr++ = lr;
- }
- while (--count > 0);
-}
-#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
-
#if CONFIG_CODEC == SWCODEC
#ifdef HAVE_SW_TONE_CONTROLS
@@ -1063,6 +1014,35 @@ int dsp_callback(int msg, intptr_t param)
}
#endif
+/**
+ * Implements the different channel configurations and stereo width.
+ */
+
+/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
+ * completeness. */
+#if 0
+static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
+{
+ /* The channels are each just themselves */
+ (void)count; (void)buf;
+}
+#endif
+
+#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
+static void channels_process_sound_chan_mono(int count, int32_t *buf[])
+{
+ int32_t *sl = buf[0], *sr = buf[1];
+
+ do
+ {
+ int32_t lr = *sl/2 + *sr/2;
+ *sl++ = lr;
+ *sr++ = lr;
+ }
+ while (--count > 0);
+}
+#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
+
#ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
static void channels_process_sound_chan_custom(int count, int32_t *buf[])
{
@@ -1151,30 +1131,47 @@ int dsp_process(char *dst, const char *src[], int count)
coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
#endif
+ if (new_gain)
+ dsp_set_replaygain(); /* Gain has changed */
+
+ /* Testing function pointers for NULL is preferred since the pointer
+ will be preloaded to be used for the call if not. */
while (count > 0)
{
- samples = dsp->input_samples(count, src, tmp);
+ samples = MIN(SAMPLE_BUF_COUNT/2, count);
count -= samples;
- if (dsp->gain != 0)
- apply_gain(samples, tmp);
- if ((samples = resample(samples, tmp)) <= 0)
+
+ dsp->input_samples(samples, src, tmp);
+
+ if (dsp->apply_gain)
+ dsp->apply_gain(samples, &dsp->data, tmp);
+
+ if (dsp->resample && (samples = resample(samples, tmp)) <= 0)
break; /* I'm pretty sure we're downsampling here */
+
if (dsp->apply_crossfeed)
dsp->apply_crossfeed(samples, tmp);
+
/* TODO: EQ and tone controls need separate structs for audio and voice
* DSP processing thanks to filter history. isn't really audible now, but
- * might be the day we start handling voice more delicately.
+ * might be the day we start handling voice more delicately. Planned
+ * changes may well run all relevent channels through the same EQ so
+ * perhaps not.
*/
if (eq_enabled)
eq_process(samples, tmp);
+
#ifdef HAVE_SW_TONE_CONTROLS
if ((bass | treble) != 0)
eq_filter(tmp, &tone_filter, samples, dsp->data.num_channels,
FILTER_BISHELF_SHIFT);
#endif
+
if (dsp->channels_process)
dsp->channels_process(samples, tmp);
+
dsp->output_samples(samples, &dsp->data, tmp, (int16_t *)dst);
+
written += samples;
dst += samples * sizeof (int16_t) * 2;
yield();
@@ -1245,9 +1242,6 @@ bool dsp_configure(int setting, intptr_t value)
if (dsp == audio_dsp)
{
*var = value;
- /* In case current gain is zero, force at least one call
- to apply_gain or apply_gain won't pick up on new_gain */
- audio_dsp->gain = -1;
new_gain = true;
}
}
@@ -1282,15 +1276,7 @@ bool dsp_configure(int setting, intptr_t value)
else
dsp->frequency = dsp->codec_frequency;
- resampler_set_delta(dsp->frequency);
-
- if (dsp->frequency == NATIVE_FREQUENCY)
- dsp->resample = NULL;
- else if (dsp->frequency < NATIVE_FREQUENCY)
- dsp->resample = dsp_upsample;
- else
- dsp->resample = dsp_downsample;
-
+ resampler_new_delta();
break;
case DSP_SET_SAMPLE_DEPTH:
@@ -1348,7 +1334,7 @@ bool dsp_configure(int setting, intptr_t value)
case DSP_FLUSH:
memset(&dsp->data.resample_data, 0,
sizeof (dsp->data.resample_data));
- resampler_set_delta(dsp->frequency);
+ resampler_new_delta();
dither_init();
break;
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index f8df337b37..14875d21d8 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -22,32 +22,61 @@
#ifndef _DSP_ASM_H
#define _DSP_ASM_H
+/* Set the appropriate #defines based on CPU or whatever matters */
#ifndef SIMULATOR
-#if defined(CPU_COLDFIRE) || defined(CPU_ARM)
+#if defined(CPU_ARM)
+#define DSP_HAVE_ASM_RESAMPLING
#define DSP_HAVE_ASM_CROSSFEED
-void apply_crossfeed(int count, int32_t *buf[]);
+#elif defined (CPU_COLDFIRE)
+#define DSP_HAVE_ASM_APPLY_GAIN
#define DSP_HAVE_ASM_RESAMPLING
-int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]);
-int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]);
-#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */
-
-#if defined (CPU_COLDFIRE)
+#define DSP_HAVE_ASM_CROSSFEED
#define DSP_HAVE_ASM_SOUND_CHAN_MONO
-void channels_process_sound_chan_mono(int count, int32_t *buf[]);
#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
-void channels_process_sound_chan_custom(int count, int32_t *buf[]);
#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
-void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
-
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
-void sample_output_mono(int count, struct dsp_data *data,
- int32_t *src[], int16_t *dst);
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
-void sample_output_stereo(int count, struct dsp_data *data,
- int32_t *src[], int16_t *dst);
#endif /* CPU_COLDFIRE */
#endif /* SIMULATOR */
+/* Declare prototypes based upon what's #defined above */
+#ifdef DSP_HAVE_ASM_CROSSFEED
+void apply_crossfeed(int count, int32_t *buf[]);
+#endif
+
+#ifdef DSP_HAVE_ASM_APPLY_GAIN
+void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]);
+#endif /* DSP_HAVE_ASM_APPLY_GAIN* */
+
+#ifdef DSP_HAVE_ASM_RESAMPLING
+int dsp_upsample(int count, struct dsp_data *data,
+ int32_t *src[], int32_t *dst[]);
+int dsp_downsample(int count, struct dsp_data *data,
+ int32_t *src[], int32_t *dst[]);
+#endif /* DSP_HAVE_ASM_RESAMPLING */
+
+#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO
+void channels_process_sound_chan_mono(int count, int32_t *buf[]);
+#endif
+
+#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
+void channels_process_sound_chan_custom(int count, int32_t *buf[]);
+#endif
+
+#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
+void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
+#endif
+
+#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
+void sample_output_stereo(int count, struct dsp_data *data,
+ int32_t *src[], int16_t *dst);
+#endif
+
+#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
+void sample_output_mono(int count, struct dsp_data *data,
+ int32_t *src[], int16_t *dst);
+#endif
+
#endif /* _DSP_ASM_H */
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index af9ac1fa4b..e5d3ee8c55 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -19,68 +19,117 @@
****************************************************************************/
/****************************************************************************
- * void apply_crossfeed(int count, int32_t *src[])
+ * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
*/
.section .text
+ .align 2
+ .global dsp_apply_gain
+dsp_apply_gain:
+ lea.l -20(%sp), %sp | save registers
+ movem.l %d2-%d4/%a2-%a3, (%sp) |
+ movem.l 28(%sp), %a0-%a1 | %a0 = data,
+ | %a1 = buf
+ move.l 4(%a0), %d1 | %d1 = data->num_channels
+ move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
+10: | channel loop |
+ move.l 24(%sp), %d0 | %d0 = count
+ move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
+ move.l %a2, %a3 | %a3 = d = s
+ move.l (%a2)+, %d2 | %d2 = *s++,
+ mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
+ subq.l #1, %d0 | --count > 0 ? : effectively n++
+ ble.b 30f | loop done | no? finish up
+20: | loop |
+ move.l %accext01, %d4 | fetch S(n-1)[7:0]
+ movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
+ asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
+ mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
+ move.b %d4, %d3 |
+ move.l %d3, (%a3)+ |
+ subq.l #1, %d0 | --count > 0 ? : effectively n++
+ bgt.b 20b | loop | yes? do more samples
+30: | loop done |
+ move.l %accext01, %d4 | fetch S(n-1)[7:0]
+ movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
+ asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
+ move.b %d4, %d3 |
+ move.l %d3, (%a3) |
+ subq.l #1, %d1 | next channel
+ bgt.b 10b | channel loop |
+ movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
+ lea.l 20(%sp), %sp | cleanup stack
+ rts |
+ .size dsp_apply_gain,.-dsp_apply_gain
+
+/****************************************************************************
+ * void apply_crossfeed(int count, int32_t *buf[])
+ */
+ .section .text
+ .align 2
.global apply_crossfeed
apply_crossfeed:
- lea.l -44(%sp), %sp
+ lea.l -44(%sp), %sp |
movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
- lea.l crossfeed_data, %a1
- move.l (%a1)+, %a6 | a6 = direct gain
+ lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
+ move.l (%a1)+, %d6 | %d6 = direct gain
movem.l 12(%a1), %d0-%d3 | fetch filter history samples
move.l 132(%a1), %a0 | fetch delay line address
movem.l (%a1), %a1-%a3 | load filter coefs
+ lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
+ bra.b 20f | loop start | go to loop start point
/* Register usage in loop:
* %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
- * %a4 = src[0], %a5 = src[1], %a6 = direct gain,
+ * %a4 = buf[0], %a5 = buf[1],
+ * %a6 = delay line pointer wrap limit,
* %d0..%d3 = history
- * %d4..%d6 = temp.
+ * %d4..%d5 = temp.
+ * %d6 = direct gain,
* %d7 = count
*/
-.cfloop:
- mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n]
- mac.l %a1, %d0 , %acc0 | acc += b0*dr[n]
- mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L
- move.l %acc0, %d1 | get filtered delayed sample
- mac.l %a6, %d4, %acc0 | acc += gain*x_l[n]
- movclr.l %acc0, %d6 |
- move.l %d6, (%a4)+ | write result
-
- mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n]
- mac.l %a1, %d2 , %acc0 | acc += b0*dl[n]
- mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R
- movem.l %d4-%d5, (%a0) | save left & right inputs to delay line
- move.l %acc0, %d3 | get filtered delayed sample
- mac.l %a6, %d5, %acc0 | acc += gain*x_r[n]
- lea.l 8(%a0), %a0 | increment delay pointer
- movclr.l %acc0, %d6 |
- move.l %d6, (%a5)+ | write result
-
- cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end
- bge.b .cfwrap |
- .word 0x51fb | tpf.l - trap the buffer wrap
-.cfwrap:
- lea.l -104(%a0), %a0 | wrap
- subq.l #1, %d7 | --count < 0 ?
- bgt.b .cfloop |
+10: | loop |
+ movclr.l %acc0, %d4 | write outputs
+ move.l %d4, (%a4)+ | .
+ movclr.l %acc1, %d5 | .
+ move.l %d5, (%a5)+ | .
+20: | loop start |
+ mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
+ mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
+ mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
+ mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
+ mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
+ mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
+ movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
+ move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
+ move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
+ mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
+ mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
+ cmp.l %a6, %a0 | wrap %a0 if passed end
+ bhs.b 30f | wrap buffer |
+ .word 0x51fb | tpf.l | trap the buffer wrap
+30: | wrap buffer | ...fwd taken branches more costly
+ lea.l -104(%a0), %a0 | wrap it up
+ subq.l #1, %d7 | --count > 0 ?
+ bgt.b 10b | loop | yes? do more
+ movclr.l %acc0, %d4 | write last outputs
+ move.l %d4, (%a4) | .
+ movclr.l %acc1, %d5 | .
+ move.l %d5, (%a5) | .
lea.l crossfeed_data+16, %a1 | save data back to struct
movem.l %d0-%d3, (%a1) | ...history
move.l %a0, 120(%a1) | ...delay_p
movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
- lea.l 44(%sp), %sp
- rts
-.cfend:
- .size apply_crossfeed,.cfend-apply_crossfeed
-
+ lea.l 44(%sp), %sp |
+ rts |
+ .size apply_crossfeed,.-apply_crossfeed
/****************************************************************************
* int dsp_downsample(int count, struct dsp_data *data,
* in32_t *src[], int32_t *dst[])
*/
.section .text
+ .align 2
.global dsp_downsample
dsp_downsample:
lea.l -40(%sp), %sp | save non-clobberables
@@ -92,7 +141,7 @@ dsp_downsample:
movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
| %d4 = delta = data->resample_data.delta
moveq.l #16, %d7 | %d7 = shift
-.dschannel_loop:
+10: | channel loop |
move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
@@ -102,15 +151,15 @@ dsp_downsample:
move.l %d5, %d6 | %d6 = pos = phase >> 16
lsr.l %d7, %d6 |
cmp.l %d2, %d6 | past end of samples?
- bge.b .dsloop_skip | yes? skip loop
+ bge.b 40f | skip resample loop| yes? skip loop
tst.l %d6 | need last sample of prev. frame?
- bne.b .dsloop | no? start main loop
+ bne.b 20f | resample loop | no? start main loop
move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
- bra.b .dsuse_last_start | start with last (last in %d0)
-.dsloop:
+ bra.b 30f | resample start last | start with last (last in %d0)
+20: | resample loop |
lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
movem.l (%a5), %d0-%d1 |
-.dsuse_last_start:
+30: | resample start last |
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
move.l %d0, %acc0 | %acc0 = previous sample
move.l %d5, %d0 | frac = (phase << 16) >> 1
@@ -123,11 +172,11 @@ dsp_downsample:
movclr.l %acc0, %d0 |
move.l %d0, (%a4)+ | *d++ = %d0
cmp.l %d2, %d6 | pos < count?
- blt.b .dsloop | yes? continue resampling
-.dsloop_skip:
+ blt.b 20b | resample loop | yes? continue resampling
+40: | skip resample loop |
subq.l #1, %d3 | ch > 0?
- bgt.b .dschannel_loop | yes? process next channel
- asl.l %d7, %d2 | wrap phase to start of next frame
+ bgt.b 10b | channel loop | yes? process next channel
+ lsl.l %d7, %d2 | wrap phase to start of next frame
sub.l %d2, %d5 | data->resample_data.phase =
move.l %d5, 12(%a0) | ... phase - (count << 16)
move.l %a4, %d0 | return d - d[0]
@@ -136,14 +185,14 @@ dsp_downsample:
movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
lea.l 40(%sp), %sp | cleanup stack
rts | buh-bye
-.dsend:
- .size dsp_downsample,.dsend-dsp_downsample
+ .size dsp_downsample,.-dsp_downsample
/****************************************************************************
* int dsp_upsample(int count, struct dsp_data *dsp,
- * in32_t *src[], int32_t *dst[])
+ * int32_t *src[], int32_t *dst[])
*/
.section .text
+ .align 2
.global dsp_upsample
dsp_upsample:
lea.l -40(%sp), %sp | save non-clobberables
@@ -154,47 +203,55 @@ dsp_upsample:
| %a2 = dst
movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
| %d4 = delta = data->resample_data.delta
- swap %d4 | swap delta to high word to use
- | carries to increment position
-.uschannel_loop:
+ swap %d4 | swap delta to high word to use...
+ | ...carries to increment position
+10: | channel loop |
move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
- lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count]
+ lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
- move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
+ move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
+ move.l (%a3)+, %d1 | fetch first sample - might throw this...
+ | ...away later but we'll be preincremented
+ move.l %d1, %d6 | save sample value
+ sub.l %d0, %d1 | %d1 = diff = s[0] - last
swap %d5 | swap phase to high word to use
| carries to increment position
- move.l %d5, %d6 | %d6 = pos = phase >> 16
+ move.l %d5, %d7 | %d7 = pos = phase >> 16
clr.w %d5 |
- eor.l %d5, %d6 | pos == 0?
- beq.b .usstart_0 | no? transistion from down
- cmp.l %d2, %d6 | past end of samples?
- bge.b .usloop_skip | yes? skip loop
- lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous)
- move.l (%a3)+, %d0 | %d0 = *s++
- .word 0x51fa | tpf.w - trap next instruction
-.usloop_1:
+ eor.l %d5, %d7 | pos == 0?
+ beq.b 40f | loop start | yes? start loop
+ cmp.l %d2, %d7 | past end of samples?
+ bge.b 50f | skip resample loop| yes? go to next channel and collect info
+ lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
+ movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
+ move.l %d1, %d6 | save sample value
+ sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
+ bra.b 40f | loop start |
+20: | next sample loop |
move.l %d6, %d0 | move previous sample to %d0
-.usstart_0:
move.l (%a3)+, %d1 | fetch next sample
move.l %d1, %d6 | save sample value
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
-.usloop_0:
+30: | same sample loop |
+ movclr.l %acc0, %d7 | %d7 = result
+ move.l %d7, (%a4)+ | *d++ = %d7
+40: | loop start |
lsr.l #1, %d5 | make phase into frac
+ move.l %d0, %acc0 | %acc0 = s[pos-1]
mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
lsl.l #1, %d5 | restore frac to phase
- movclr.l %acc0, %d7 | %d7 = product
- add.l %d0, %d7 | %d7 = last + product
- move.l %d7, (%a4)+ | *d++ = %d7
add.l %d4, %d5 | phase += delta
- bcc.b .usloop_0 | load next values?
+ bcc.b 30b | same sample loop | load next values?
cmp.l %a5, %a3 | src <= src_end?
- ble.b .usloop_1 | yes? continue resampling
-.usloop_skip:
+ bls.b 20b | next sample loop | yes? continue resampling
+ movclr.l %acc0, %d7 | %d7 = result
+ move.l %d7, (%a4)+ | *d++ = %d7
+50: | skip resample loop |
subq.l #1, %d3 | ch > 0?
- bgt.b .uschannel_loop | yes? process next channel
+ bgt.b 10b | channel loop | yes? process next channel
swap %d5 | wrap phase to start of next frame
move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
move.l %a4, %d0 | return d - d[0]
@@ -203,12 +260,7 @@ dsp_upsample:
asr.l #2, %d0 | convert bytes->samples
lea.l 40(%sp), %sp | cleanup stack
rts | buh-bye
-.usend:
- .size dsp_upsample,.usend-dsp_upsample
-
-/* These routines might benefit from burst transfers but we'll keep them
- * small for now since they're rather light weight
- */
+ .size dsp_upsample,.-dsp_upsample
/****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
@@ -216,31 +268,39 @@ dsp_upsample:
* Mix left and right channels 50/50 into a center channel.
*/
.section .text
+ .align 2
.global channels_process_sound_chan_mono
channels_process_sound_chan_mono:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
- lea.l -12(%sp), %sp | save registers
- move.l %macsr, %d1 |
- movem.l %d1-%d3, (%sp) |
- move.l #0xb0, %macsr | put emac in rounding fractional mode
+ lea.l -20(%sp), %sp | save registers
+ movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l (%a0), %a0-%a1 | get channel pointers
+ move.l %a0, %a2 | use separate dst pointers since read
+ move.l %a1, %a3 | pointers run one ahead of write
move.l #0x40000000, %d3 | %d3 = 0.5
-1:
- move.l (%a0), %d1 | L = R = l/2 + r/2
- mac.l %d1, %d3, (%a1), %d2, %acc0 |
- mac.l %d2, %d3, %acc0 |
- movclr.l %acc0, %d1 |
- move.l %d1, (%a0)+ | output to original buffer
- move.l %d1, (%a1)+ |
- subq.l #1, %d0 |
- bgt.s 1b |
- movem.l (%sp), %d1-%d3 | restore registers
- move.l %d1, %macsr |
- lea.l 12(%sp), %sp | cleanup
- rts
-.cpmono_end:
- .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono
-
+ move.l (%a0)+, %d1 | prime the input registers
+ move.l (%a1)+, %d2 |
+ mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
+ mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
+ subq.l #1, %d0 |
+ ble.s 20f | loop done |
+10: | loop |
+ movclr.l %acc0, %d4 | L = R = l/2 + r/2
+ mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
+ mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
+ move.l %d4, (%a2)+ | output to original buffer
+ move.l %d4, (%a3)+ |
+ subq.l #1, %d0 |
+ bgt.s 10b | loop |
+20: | loop done |
+ movclr.l %acc0, %d4 | output last sample
+ move.l %d4, (%a2) |
+ move.l %d4, (%a3) |
+ movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
+ lea.l 20(%sp), %sp | cleanup
+ rts |
+ .size channels_process_sound_chan_mono, \
+ .-channels_process_sound_chan_mono
/****************************************************************************
* void channels_process_sound_chan_custom(int count, int32_t *buf[])
@@ -248,34 +308,47 @@ channels_process_sound_chan_mono:
* Apply stereo width (narrowing/expanding) effect.
*/
.section .text
+ .align 2
.global channels_process_sound_chan_custom
channels_process_sound_chan_custom:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
- lea.l -16(%sp), %sp | save registers
- move.l %macsr, %d1 |
- movem.l %d1-%d4, (%sp) |
- move.l #0xb0, %macsr | put emac in rounding fractional mode
+ lea.l -28(%sp), %sp | save registers
+ movem.l %d2-%d6/%a2-%a3, (%sp) |
movem.l (%a0), %a0-%a1 | get channel pointers
+ move.l %a0, %a2 | use separate dst pointers since read
+ move.l %a1, %a3 | pointers run one ahead of write
move.l dsp_sw_gain, %d3 | load straight (mid) gain
move.l dsp_sw_cross, %d4 | load cross (side) gain
-1:
- move.l (%a0), %d1 |
- mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross
- mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross
- mac.l %d2, %d4 , %acc0 |
- mac.l %d2, %d3 , %acc1 |
- movclr.l %acc0, %d1 |
- movclr.l %acc1, %d2 |
- move.l %d1, (%a0)+ |
- move.l %d2, (%a1)+ |
+ move.l (%a0)+, %d1 | prime the input registers
+ move.l (%a1)+, %d2 |
+ mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
+ mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
+ mac.l %d2, %d4 , %acc0 |
+ mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
subq.l #1, %d0 |
- bgt.s 1b |
- movem.l (%sp), %d1-%d4 | restore registers
- move.l %d1, %macsr |
- lea.l 16(%sp), %sp | cleanup
- rts
-.cpcustom_end:
- .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom
+ ble.b 20f | loop done |
+10: | loop |
+ movclr.l %acc0, %d5 |
+ movclr.l %acc1, %d6 |
+15: | loop start |
+ mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
+ mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
+ mac.l %d2, %d4 , %acc0 |
+ mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
+ move.l %d5, (%a2)+ |
+ move.l %d6, (%a3)+ |
+ subq.l #1, %d0 |
+ bgt.s 10b | loop |
+20: | loop done |
+ movclr.l %acc0, %d5 | output last sample
+ movclr.l %acc1, %d6 |
+ move.l %d5, (%a2) |
+ move.l %d6, (%a3) |
+ movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
+ lea.l 28(%sp), %sp | cleanup
+ rts |
+ .size channels_process_sound_chan_custom, \
+ .-channels_process_sound_chan_custom
/****************************************************************************
* void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
@@ -283,31 +356,42 @@ channels_process_sound_chan_custom:
* Separate channels into side channels.
*/
.section .text
+ .align 2
.global channels_process_sound_chan_karaoke
channels_process_sound_chan_karaoke:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
- lea.l -16(%sp), %sp | save registers
- move.l %macsr, %d1 |
- movem.l %d1-%d4, (%sp) |
- move.l #0xb0, %macsr | put emac in rounding fractional mode
- movem.l (%a0), %a0-%a1 | get channel pointers
- move.l #0x40000000, %d4 | %d3 = 0.5
-1:
- move.l (%a0), %d1 |
- msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2
- mac.l %d2, %d4 , %acc0 |
- movclr.l %acc0, %d1 |
- move.l %d1, (%a1)+ |
- neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2
- move.l %d1, (%a0)+ |
- subq.l #1, %d0 |
- bgt.s 1b |
- movem.l (%sp), %d1-%d4 | restore registers
- move.l %d1, %macsr |
- lea.l 16(%sp), %sp | cleanup
- rts
-.cpkaraoke_end:
- .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke
+ lea.l -20(%sp), %sp | save registers
+ movem.l %d2-%d4/%a2-%a3, (%sp) |
+ movem.l (%a0), %a0-%a1 | get channel src pointers
+ move.l %a0, %a2 | use separate dst pointers since read
+ move.l %a1, %a3 | pointers run one ahead of write
+ move.l #0x40000000, %d3 | %d3 = 0.5
+ move.l (%a0)+, %d1 | prime the input registers
+ move.l (%a1)+, %d2 |
+ mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
+ msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
+ subq.l #1, %d0 |
+ ble.b 20f | loop done |
+10: | loop |
+ movclr.l %acc0, %d4 |
+ mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
+ msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
+ move.l %d4, (%a2)+ |
+ neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
+ move.l %d4, (%a3)+ |
+ subq.l #1, %d0 |
+ bgt.s 10b | loop |
+20: | loop done |
+ movclr.l %acc0, %d4 | output last sample
+ move.l %d4, (%a2) |
+ neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
+ move.l %d4, (%a3) |
+ movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
+ lea.l 20(%sp), %sp | cleanup
+ rts |
+ .size channels_process_sound_chan_karaoke, \
+ .-channels_process_sound_chan_karaoke
+
/****************************************************************************
* void sample_output_stereo(int count, struct dsp_data *data,
* int32_t *src[], int16_t *dst)
@@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke:
*
*/
.section .text
+ .align 2
.global sample_output_stereo
sample_output_stereo:
lea.l -44(%sp), %sp | save registers
@@ -348,11 +433,11 @@ sample_output_stereo:
add.l %a4, %d0 |
and.l #0xfffffff0, %d0 |
cmp.l %a0, %d0 | at least a full line?
- bhi.w .sos_longloop_1_start | no? jump to trailing longword
+ bhi.w 40f | long loop 1 start | no? do as trailing longwords
sub.l #16, %d0 | %d1 = first line bound
cmp.l %a4, %d0 | any leading longwords?
- bls.b .sos_lineloop_start | no? jump to line loop
-.sos_longloop_0:
+ bls.b 20f | line loop start | no? start line loop
+10: | long loop 0 |
move.l (%a2)+, %d1 | read longword from L and R
mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
mac.l %d2, %a1, %acc1 | shift R to high word
@@ -362,10 +447,10 @@ sample_output_stereo:
move.w %d2, %d1 | interleave MS 16 bits of each
move.l %d1, (%a4)+ | ...and write both
cmp.l %a4, %d0 |
- bhi.b .sos_longloop_0 |
-.sos_lineloop_start:
+ bhi.b 10b | long loop 0 |
+20: | line loop start |
lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
-.sos_lineloop:
+30: | line loop |
move.l (%a3)+, %d4 | get next 4 R samples and scale
mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
@@ -394,11 +479,11 @@ sample_output_stereo:
move.w %d7, %d3 |
movem.l %d0-%d3, -16(%a4) | write four stereo samples
cmp.l %a4, %a5 |
- bhi.b .sos_lineloop |
-.sos_longloop_1_start:
+ bhi.b 30b | line loop |
+40: | long loop 1 start |
cmp.l %a4, %a0 | any longwords left?
- bls.b .sos_done | no? finished.
-.sos_longloop_1:
+ bls.b 60f | output end | no? stop
+50: | long loop 1 |
move.l (%a2)+, %d1 | handle trailing longwords
mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
mac.l %d2, %a1, %acc1 |
@@ -408,14 +493,13 @@ sample_output_stereo:
move.w %d2, %d1 |
move.l %d1, (%a4)+ |
cmp.l %a4, %a0 |
- bhi.b .sos_longloop_1 |
-.sos_done:
+ bhi.b 50b | long loop 1
+60: | output end |
movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
move.l %d1, %macsr |
lea.l 44(%sp), %sp | cleanup
rts |
-.sos_end:
- .size sample_output_stereo, .sos_end-sample_output_stereo
+ .size sample_output_stereo, .-sample_output_stereo
/****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data,
@@ -424,6 +508,7 @@ sample_output_stereo:
* Same treatment as sample_output_stereo but for one channel.
*/
.section .text
+ .align 2
.global sample_output_mono
sample_output_mono:
lea.l -28(%sp), %sp | save registers
@@ -442,11 +527,11 @@ sample_output_mono:
add.l %a3, %d0 |
and.l #0xfffffff0, %d0 |
cmp.l %a0, %d0 | at least a full line?
- bhi.w .som_longloop_1_start | no? jump to trailing longword
+ bhi.w 40f | long loop 1 start | no? do as trailing longwords
sub.l #16, %d0 | %d1 = first line bound
cmp.l %a3, %d0 | any leading longwords?
- bls.b .som_lineloop_start | no? jump to line loop
-.som_longloop_0:
+ bls.b 20f | line loop start | no? start line loop
+10: | long loop 0 |
move.l (%a2)+, %d1 | read longword from L and R
mac.l %d1, %d5, %acc0 | shift L to high word
movclr.l %acc0, %d1 | get possibly saturated results
@@ -455,10 +540,10 @@ sample_output_mono:
move.w %d2, %d1 | duplicate single channel into
move.l %d1, (%a3)+ | L and R
cmp.l %a3, %d0 |
- bhi.b .som_longloop_0 |
-.som_lineloop_start:
+ bhi.b 10b | long loop 0 |
+20: | line loop start |
lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
-.som_lineloop:
+30: | line loop |
move.l (%a2)+, %d0 | get next 4 L samples and scale
mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
@@ -483,11 +568,11 @@ sample_output_mono:
move.w %d4, %d3 |
movem.l %d0-%d3, -16(%a3) | write four stereo samples
cmp.l %a3, %a1 |
- bhi.b .som_lineloop |
-.som_longloop_1_start:
+ bhi.b 30b | line loop |
+40: | long loop 1 start |
cmp.l %a3, %a0 | any longwords left?
- bls.b .som_done | no? finished.
-.som_longloop_1:
+ bls.b 60f | output end | no? stop
+50: | loop loop 1 |
move.l (%a2)+, %d1 | handle trailing longwords
mac.l %d1, %d5, %acc0 | the same way as leading ones
movclr.l %acc0, %d1 |
@@ -496,11 +581,10 @@ sample_output_mono:
move.w %d2, %d1 |
move.l %d1, (%a3)+ |
cmp.l %a3, %a0 |
- bhi.b .som_longloop_1 |
-.som_done:
+ bhi.b 50b | long loop 1 |
+60: | output end |
movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
move.l %d1, %macsr |
lea.l 28(%sp), %sp | cleanup
rts |
-.som_end:
- .size sample_output_mono, .som_end-sample_output_mono
+ .size sample_output_mono, .-sample_output_mono