diff options
-rw-r--r-- | apps/dsp.c | 354 | ||||
-rw-r--r-- | apps/dsp_asm.h | 59 | ||||
-rw-r--r-- | apps/dsp_cf.S | 424 |
3 files changed, 468 insertions, 369 deletions
diff --git a/apps/dsp.c b/apps/dsp.c index be851e2305..3b95145b39 100644 --- a/apps/dsp.c +++ b/apps/dsp.c @@ -38,9 +38,14 @@ #define WORD_FRACBITS 27 #define NATIVE_DEPTH 16 +/* If the buffer sizes change, check the assembly code! */ #define SAMPLE_BUF_COUNT 256 #define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/ #define DEFAULT_GAIN 0x01000000 +#define SAMPLE_BUF_LEFT_CHANNEL 0 +#define SAMPLE_BUF_RIGHT_CHANNEL (SAMPLE_BUF_COUNT/2) +#define RESAMPLE_BUF_LEFT_CHANNEL 0 +#define RESAMPLE_BUF_RIGHT_CHANNEL (RESAMPLE_BUF_COUNT/2) /* enums to index conversion properly with stereo mode and other settings */ enum @@ -66,11 +71,10 @@ enum * NOTE: Any assembly routines that use these structures must be updated * if current data members are moved or changed. */ - /* 32-bit achitecture offset */ struct resample_data { - long delta; /* 00h */ - long phase; /* 04h */ + uint32_t delta; /* 00h */ + uint32_t phase; /* 04h */ int32_t last_sample[2]; /* 08h */ /* 10h */ }; @@ -93,9 +97,10 @@ struct dsp_data int output_scale; /* 00h */ int num_channels; /* 04h */ struct resample_data resample_data; /* 08h */ - int clip_min; /* 18h */ - int clip_max; /* 2ch */ - /* 30h */ + int32_t clip_min; /* 18h */ + int32_t clip_max; /* 1ch */ + int32_t gain; /* 20h - Note that this is in S8.23 format. */ + /* 24h */ }; /* No asm...yet */ @@ -132,13 +137,18 @@ struct eq_state #include <dsp_asm.h> /* Typedefs keep things much neater in this case */ -typedef int (*sample_input_fn_type)(int count, const char *src[], - int32_t *dst[]); +typedef void (*sample_input_fn_type)(int count, const char *src[], + int32_t *dst[]); typedef int (*resample_fn_type)(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); typedef void (*sample_output_fn_type)(int count, struct dsp_data *data, int32_t *src[], int16_t *dst); +/* Single-DSP channel processing in place */ typedef void (*channels_process_fn_type)(int count, int32_t *buf[]); +/* DSP local channel processing in place */ +typedef void (*channels_process_dsp_fn_type)(int count, struct dsp_data *data, + int32_t *buf[]); + /* ***************************************************************************/ @@ -152,16 +162,16 @@ struct dsp_config int sample_bytes; int stereo_mode; int frac_bits; - long gain; /* Note that this is in S8.23 format. */ /* Functions that change depending upon settings - NULL if stage is disabled */ - sample_input_fn_type input_samples; - resample_fn_type resample; - sample_output_fn_type output_samples; + sample_input_fn_type input_samples; + resample_fn_type resample; + sample_output_fn_type output_samples; /* These will be NULL for the voice codec and is more economical that way */ - channels_process_fn_type apply_crossfeed; - channels_process_fn_type channels_process; + channels_process_dsp_fn_type apply_gain; + channels_process_fn_type apply_crossfeed; + channels_process_fn_type channels_process; }; /* General DSP config */ @@ -211,7 +221,7 @@ static struct dsp_config *dsp IDATA_ATTR = audio_dsp; * of copying needed is minimized for that case. */ -static int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR; +int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR; static int32_t resample_buf[RESAMPLE_BUF_COUNT] IBSS_ATTR; /* set a new dsp and return old one */ @@ -258,23 +268,20 @@ void sound_set_pitch(int permille) dsp_configure(DSP_SWITCH_FREQUENCY, dsp->codec_frequency); } -/* Convert at most count samples to the internal format, if needed. Returns - * number of samples ready for further processing. Updates src to point - * past the samples "consumed" and dst is set to point to the samples to - * consume. Note that for mono, dst[0] equals dst[1], as there is no point - * in processing the same data twice. +/* Convert count samples to the internal format, if needed. Updates src + * to point past the samples "consumed" and dst is set to point to the + * samples to consume. Note that for mono, dst[0] equals dst[1], as there + * is no point in processing the same data twice. */ /* convert count 16-bit mono to 32-bit mono */ -static int sample_input_lte_native_mono( +static void sample_input_lte_native_mono( int count, const char *src[], int32_t *dst[]) { - count = MIN(SAMPLE_BUF_COUNT/2, count); - const int16_t *s = (int16_t *) src[0]; const int16_t * const send = s + count; - int32_t *d = dst[0] = dst[1] = sample_buf; - const int scale = WORD_SHIFT; + int32_t *d = dst[0] = dst[1] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL]; + int scale = WORD_SHIFT; do { @@ -283,21 +290,17 @@ static int sample_input_lte_native_mono( while (s < send); src[0] = (char *)s; - - return count; } /* convert count 16-bit interleaved stereo to 32-bit noninterleaved */ -static int sample_input_lte_native_i_stereo( +static void sample_input_lte_native_i_stereo( int count, const char *src[], int32_t *dst[]) { - count = MIN(SAMPLE_BUF_COUNT/2, count); - const int32_t *s = (int32_t *) src[0]; const int32_t * const send = s + count; - int32_t *dl = dst[0] = sample_buf; - int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; - const int scale = WORD_SHIFT; + int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL]; + int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL]; + int scale = WORD_SHIFT; do { @@ -313,22 +316,18 @@ static int sample_input_lte_native_i_stereo( while (s < send); src[0] = (char *)s; - - return count; } /* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */ -static int sample_input_lte_native_ni_stereo( +static void sample_input_lte_native_ni_stereo( int count, const char *src[], int32_t *dst[]) { - count = MIN(SAMPLE_BUF_COUNT/2, count); - const int16_t *sl = (int16_t *) src[0]; const int16_t *sr = (int16_t *) src[1]; const int16_t * const slend = sl + count; - int32_t *dl = dst[0] = sample_buf; - int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; - const int scale = WORD_SHIFT; + int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL]; + int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL]; + int scale = WORD_SHIFT; do { @@ -339,35 +338,24 @@ static int sample_input_lte_native_ni_stereo( src[0] = (char *)sl; src[1] = (char *)sr; - - return count; } /* convert count 32-bit mono to 32-bit mono */ -static int sample_input_gt_native_mono( +static void sample_input_gt_native_mono( int count, const char *src[], int32_t *dst[]) { - count = MIN(SAMPLE_BUF_COUNT/2, count); - dst[0] = dst[1] = (int32_t *)src[0]; src[0] = (char *)(dst[0] + count); - - return count; } /* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */ -static int sample_input_gt_native_i_stereo( +static void sample_input_gt_native_i_stereo( int count, const char *src[], int32_t *dst[]) { - count = MIN(SAMPLE_BUF_COUNT/2, count); - const int32_t *s = (int32_t *)src[0]; const int32_t * const send = s + 2*count; - int32_t *dl = sample_buf; - int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2; - - dst[0] = dl; - dst[1] = dr; + int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL]; + int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL]; do { @@ -377,22 +365,16 @@ static int sample_input_gt_native_i_stereo( while (s < send); src[0] = (char *)send; - - return count; } /* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */ -static int sample_input_gt_native_ni_stereo( +static void sample_input_gt_native_ni_stereo( int count, const char *src[], int32_t *dst[]) { - count = MIN(SAMPLE_BUF_COUNT/2, count); - dst[0] = (int32_t *)src[0]; dst[1] = (int32_t *)src[1]; src[0] = (char *)(dst[0] + count); src[1] = (char *)(dst[1] + count); - - return count; } /** @@ -573,12 +555,6 @@ static void sample_output_new_format(void) dsp->output_samples = sample_output_functions[out]; } -static void resampler_set_delta(int frequency) -{ - dsp->data.resample_data.delta = (unsigned long) - frequency * 65536LL / NATIVE_FREQUENCY; -} - /** * Linear interpolation resampling that introduces a one sample delay because * of our inability to look into the future at the end of a frame. @@ -587,9 +563,9 @@ static void resampler_set_delta(int frequency) static int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]) { - int ch = data->num_channels - 1; - long delta = data->resample_data.delta; - long phase, pos; + int ch = data->num_channels - 1; + uint32_t delta = data->resample_data.delta; + uint32_t phase, pos; int32_t *d; /* Rolled channel loop actually showed slightly faster. */ @@ -610,7 +586,7 @@ static int dsp_downsample(int count, struct dsp_data *data, if (pos > 0) last = s[pos - 1]; - while (pos < count) + while (pos < (uint32_t)count) { *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); phase += delta; @@ -625,12 +601,12 @@ static int dsp_downsample(int count, struct dsp_data *data, return d - dst[0]; } -static int dsp_upsample(int count, struct dsp_data *data, +static int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]) { int ch = data->num_channels - 1; - long delta = data->resample_data.delta; - long phase, pos; + uint32_t delta = data->resample_data.delta; + uint32_t phase, pos; int32_t *d; /* Rolled channel loop actually showed slightly faster. */ @@ -653,7 +629,7 @@ static int dsp_upsample(int count, struct dsp_data *data, pos = phase >> 16; } - while (pos < count) + while (pos < (uint32_t)count) { last = s[pos - 1]; *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); @@ -669,24 +645,43 @@ static int dsp_upsample(int count, struct dsp_data *data, } #endif /* DSP_HAVE_ASM_RESAMPLING */ +static void resampler_new_delta(void) +{ + dsp->data.resample_data.delta = (unsigned long) + dsp->frequency * 65536LL / NATIVE_FREQUENCY; + + if (dsp->frequency == NATIVE_FREQUENCY) + { + /* NOTE: If fully glitch-free transistions from no resampling to + resampling are desired, last_sample history should be maintained + even when not resampling. */ + dsp->resample = NULL; + dsp->data.resample_data.phase = 0; + dsp->data.resample_data.last_sample[0] = 0; + dsp->data.resample_data.last_sample[1] = 0; + } + else if (dsp->frequency < NATIVE_FREQUENCY) + dsp->resample = dsp_upsample; + else + dsp->resample = dsp_downsample; +} + /* Resample count stereo samples. Updates the src array, if resampling is * done, to refer to the resampled data. Returns number of stereo samples * for further processing. */ static inline int resample(int count, int32_t *src[]) { - if (dsp->resample) + int32_t *dst[2] = { - int32_t *dst[2] = - { - resample_buf, - resample_buf + RESAMPLE_BUF_COUNT/2, - }; + &resample_buf[RESAMPLE_BUF_LEFT_CHANNEL], + &resample_buf[RESAMPLE_BUF_RIGHT_CHANNEL], + }; - count = dsp->resample(count, &dsp->data, src, dst); - src[0] = dst[0]; - src[1] = dst[dsp->data.num_channels - 1]; - } + count = dsp->resample(count, &dsp->data, src, dst); + + src[0] = dst[0]; + src[1] = dst[dsp->data.num_channels - 1]; return count; } @@ -810,30 +805,59 @@ void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff) c[2] <<= 4; } +/* Apply a constant gain to the samples (e.g., for ReplayGain). + * Note that this must be called before the resampler. + */ +#ifndef DSP_HAVE_ASM_APPLY_GAIN +static void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) +{ + const int32_t gain = data->gain; + int ch = data->num_channels - 1; + + do + { + int32_t *s = buf[ch]; + int32_t *d = buf[ch]; + int32_t samp = *s++; + int i = 0; + + do + { + FRACMUL_8_LOOP(samp, gain, s, d); + } + while (++i < count); + } + while (--ch >= 0); +} +#endif /* DSP_HAVE_ASM_APPLY_GAIN */ + /* Combine all gains to a global gain. */ static void set_gain(struct dsp_config *dsp) { - dsp->gain = DEFAULT_GAIN; + dsp->data.gain = DEFAULT_GAIN; /* Replay gain not relevant to voice */ if (dsp == audio_dsp && replaygain) { - dsp->gain = replaygain; + dsp->data.gain = replaygain; } if (eq_enabled && eq_precut) { - dsp->gain = (long) (((int64_t) dsp->gain * eq_precut) >> 24); + dsp->data.gain = + (long) (((int64_t) dsp->data.gain * eq_precut) >> 24); } - if (dsp->gain == DEFAULT_GAIN) + if (dsp->data.gain == DEFAULT_GAIN) { - dsp->gain = 0; + dsp->data.gain = 0; } else { - dsp->gain >>= 1; + dsp->data.gain >>= 1; } + + dsp->apply_gain = dsp->data.gain != 0 ? dsp_apply_gain : NULL; } /** @@ -927,50 +951,6 @@ static void eq_process(int count, int32_t *buf[]) } } -/* Apply a constant gain to the samples (e.g., for ReplayGain). May update - * the src array if gain was applied. - * Note that this must be called before the resampler. - */ -static void apply_gain(int count, int32_t *buf[]) -{ - int32_t *sl, *sr; - int32_t s, *d; - long gain; - int i; - - if (new_gain) - { - /* Gain has changed */ - dsp_set_replaygain(); - if (dsp->gain == 0) - return; /* No gain to apply now */ - } - - sl = buf[0], sr = buf[1]; - gain = dsp->gain; - - if (sl != sr) - { - d = &sample_buf[SAMPLE_BUF_COUNT / 2]; - buf[1] = d; - s = *sr++; - - for (i = 0; i < count; i++) - FRACMUL_8_LOOP(s, gain, sr, d); - } - else - { - buf[1] = &sample_buf[0]; - } - - d = &sample_buf[0]; - buf[0] = d; - s = *sl++; - - for (i = 0; i < count; i++) - FRACMUL_8_LOOP(s, gain, sl, d); -} - void dsp_set_stereo_width(int value) { long width, straight, cross; @@ -993,35 +973,6 @@ void dsp_set_stereo_width(int value) dsp_sw_cross = cross << 8; } -/** - * Implements the different channel configurations and stereo width. - */ - -/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for - * completeness. */ -#if 0 -static void channels_process_sound_chan_stereo(int count, int32_t *buf[]) -{ - /* The channels are each just themselves */ - (void)count; (void)buf; -} -#endif - -#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO -static void channels_process_sound_chan_mono(int count, int32_t *buf[]) -{ - int32_t *sl = buf[0], *sr = buf[1]; - - do - { - int32_t lr = *sl/2 + *sr/2; - *sl++ = lr; - *sr++ = lr; - } - while (--count > 0); -} -#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */ - #if CONFIG_CODEC == SWCODEC #ifdef HAVE_SW_TONE_CONTROLS @@ -1063,6 +1014,35 @@ int dsp_callback(int msg, intptr_t param) } #endif +/** + * Implements the different channel configurations and stereo width. + */ + +/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for + * completeness. */ +#if 0 +static void channels_process_sound_chan_stereo(int count, int32_t *buf[]) +{ + /* The channels are each just themselves */ + (void)count; (void)buf; +} +#endif + +#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO +static void channels_process_sound_chan_mono(int count, int32_t *buf[]) +{ + int32_t *sl = buf[0], *sr = buf[1]; + + do + { + int32_t lr = *sl/2 + *sr/2; + *sl++ = lr; + *sr++ = lr; + } + while (--count > 0); +} +#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */ + #ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM static void channels_process_sound_chan_custom(int count, int32_t *buf[]) { @@ -1151,30 +1131,47 @@ int dsp_process(char *dst, const char *src[], int count) coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE); #endif + if (new_gain) + dsp_set_replaygain(); /* Gain has changed */ + + /* Testing function pointers for NULL is preferred since the pointer + will be preloaded to be used for the call if not. */ while (count > 0) { - samples = dsp->input_samples(count, src, tmp); + samples = MIN(SAMPLE_BUF_COUNT/2, count); count -= samples; - if (dsp->gain != 0) - apply_gain(samples, tmp); - if ((samples = resample(samples, tmp)) <= 0) + + dsp->input_samples(samples, src, tmp); + + if (dsp->apply_gain) + dsp->apply_gain(samples, &dsp->data, tmp); + + if (dsp->resample && (samples = resample(samples, tmp)) <= 0) break; /* I'm pretty sure we're downsampling here */ + if (dsp->apply_crossfeed) dsp->apply_crossfeed(samples, tmp); + /* TODO: EQ and tone controls need separate structs for audio and voice * DSP processing thanks to filter history. isn't really audible now, but - * might be the day we start handling voice more delicately. + * might be the day we start handling voice more delicately. Planned + * changes may well run all relevent channels through the same EQ so + * perhaps not. */ if (eq_enabled) eq_process(samples, tmp); + #ifdef HAVE_SW_TONE_CONTROLS if ((bass | treble) != 0) eq_filter(tmp, &tone_filter, samples, dsp->data.num_channels, FILTER_BISHELF_SHIFT); #endif + if (dsp->channels_process) dsp->channels_process(samples, tmp); + dsp->output_samples(samples, &dsp->data, tmp, (int16_t *)dst); + written += samples; dst += samples * sizeof (int16_t) * 2; yield(); @@ -1245,9 +1242,6 @@ bool dsp_configure(int setting, intptr_t value) if (dsp == audio_dsp) { *var = value; - /* In case current gain is zero, force at least one call - to apply_gain or apply_gain won't pick up on new_gain */ - audio_dsp->gain = -1; new_gain = true; } } @@ -1282,15 +1276,7 @@ bool dsp_configure(int setting, intptr_t value) else dsp->frequency = dsp->codec_frequency; - resampler_set_delta(dsp->frequency); - - if (dsp->frequency == NATIVE_FREQUENCY) - dsp->resample = NULL; - else if (dsp->frequency < NATIVE_FREQUENCY) - dsp->resample = dsp_upsample; - else - dsp->resample = dsp_downsample; - + resampler_new_delta(); break; case DSP_SET_SAMPLE_DEPTH: @@ -1348,7 +1334,7 @@ bool dsp_configure(int setting, intptr_t value) case DSP_FLUSH: memset(&dsp->data.resample_data, 0, sizeof (dsp->data.resample_data)); - resampler_set_delta(dsp->frequency); + resampler_new_delta(); dither_init(); break; diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h index f8df337b37..14875d21d8 100644 --- a/apps/dsp_asm.h +++ b/apps/dsp_asm.h @@ -22,32 +22,61 @@ #ifndef _DSP_ASM_H #define _DSP_ASM_H +/* Set the appropriate #defines based on CPU or whatever matters */ #ifndef SIMULATOR -#if defined(CPU_COLDFIRE) || defined(CPU_ARM) +#if defined(CPU_ARM) +#define DSP_HAVE_ASM_RESAMPLING #define DSP_HAVE_ASM_CROSSFEED -void apply_crossfeed(int count, int32_t *buf[]); +#elif defined (CPU_COLDFIRE) +#define DSP_HAVE_ASM_APPLY_GAIN #define DSP_HAVE_ASM_RESAMPLING -int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); -int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); -#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */ - -#if defined (CPU_COLDFIRE) +#define DSP_HAVE_ASM_CROSSFEED #define DSP_HAVE_ASM_SOUND_CHAN_MONO -void channels_process_sound_chan_mono(int count, int32_t *buf[]); #define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM -void channels_process_sound_chan_custom(int count, int32_t *buf[]); #define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE -void channels_process_sound_chan_karaoke(int count, int32_t *buf[]); - #define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO -void sample_output_mono(int count, struct dsp_data *data, - int32_t *src[], int16_t *dst); #define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO -void sample_output_stereo(int count, struct dsp_data *data, - int32_t *src[], int16_t *dst); #endif /* CPU_COLDFIRE */ #endif /* SIMULATOR */ +/* Declare prototypes based upon what's #defined above */ +#ifdef DSP_HAVE_ASM_CROSSFEED +void apply_crossfeed(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_APPLY_GAIN +void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]); +#endif /* DSP_HAVE_ASM_APPLY_GAIN* */ + +#ifdef DSP_HAVE_ASM_RESAMPLING +int dsp_upsample(int count, struct dsp_data *data, + int32_t *src[], int32_t *dst[]); +int dsp_downsample(int count, struct dsp_data *data, + int32_t *src[], int32_t *dst[]); +#endif /* DSP_HAVE_ASM_RESAMPLING */ + +#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO +void channels_process_sound_chan_mono(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM +void channels_process_sound_chan_custom(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE +void channels_process_sound_chan_karaoke(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO +void sample_output_stereo(int count, struct dsp_data *data, + int32_t *src[], int16_t *dst); +#endif + +#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO +void sample_output_mono(int count, struct dsp_data *data, + int32_t *src[], int16_t *dst); +#endif + #endif /* _DSP_ASM_H */ diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S index af9ac1fa4b..e5d3ee8c55 100644 --- a/apps/dsp_cf.S +++ b/apps/dsp_cf.S @@ -19,68 +19,117 @@ ****************************************************************************/ /**************************************************************************** - * void apply_crossfeed(int count, int32_t *src[]) + * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) */ .section .text + .align 2 + .global dsp_apply_gain +dsp_apply_gain: + lea.l -20(%sp), %sp | save registers + movem.l %d2-%d4/%a2-%a3, (%sp) | + movem.l 28(%sp), %a0-%a1 | %a0 = data, + | %a1 = buf + move.l 4(%a0), %d1 | %d1 = data->num_channels + move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23) +10: | channel loop | + move.l 24(%sp), %d0 | %d0 = count + move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1] + move.l %a2, %a3 | %a3 = d = s + move.l (%a2)+, %d2 | %d2 = *s++, + mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) + subq.l #1, %d0 | --count > 0 ? : effectively n++ + ble.b 30f | loop done | no? finish up +20: | loop | + move.l %accext01, %d4 | fetch S(n-1)[7:0] + movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0] + asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0] + mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) + move.b %d4, %d3 | + move.l %d3, (%a3)+ | + subq.l #1, %d0 | --count > 0 ? : effectively n++ + bgt.b 20b | loop | yes? do more samples +30: | loop done | + move.l %accext01, %d4 | fetch S(n-1)[7:0] + movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0] + asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0] + move.b %d4, %d3 | + move.l %d3, (%a3) | + subq.l #1, %d1 | next channel + bgt.b 10b | channel loop | + movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers + lea.l 20(%sp), %sp | cleanup stack + rts | + .size dsp_apply_gain,.-dsp_apply_gain + +/**************************************************************************** + * void apply_crossfeed(int count, int32_t *buf[]) + */ + .section .text + .align 2 .global apply_crossfeed apply_crossfeed: - lea.l -44(%sp), %sp + lea.l -44(%sp), %sp | movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] - lea.l crossfeed_data, %a1 - move.l (%a1)+, %a6 | a6 = direct gain + lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data + move.l (%a1)+, %d6 | %d6 = direct gain movem.l 12(%a1), %d0-%d3 | fetch filter history samples move.l 132(%a1), %a0 | fetch delay line address movem.l (%a1), %a1-%a3 | load filter coefs + lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit + bra.b 20f | loop start | go to loop start point /* Register usage in loop: * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), - * %a4 = src[0], %a5 = src[1], %a6 = direct gain, + * %a4 = buf[0], %a5 = buf[1], + * %a6 = delay line pointer wrap limit, * %d0..%d3 = history - * %d4..%d6 = temp. + * %d4..%d5 = temp. + * %d6 = direct gain, * %d7 = count */ -.cfloop: - mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] - mac.l %a1, %d0 , %acc0 | acc += b0*dr[n] - mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L - move.l %acc0, %d1 | get filtered delayed sample - mac.l %a6, %d4, %acc0 | acc += gain*x_l[n] - movclr.l %acc0, %d6 | - move.l %d6, (%a4)+ | write result - - mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] - mac.l %a1, %d2 , %acc0 | acc += b0*dl[n] - mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R - movem.l %d4-%d5, (%a0) | save left & right inputs to delay line - move.l %acc0, %d3 | get filtered delayed sample - mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] - lea.l 8(%a0), %a0 | increment delay pointer - movclr.l %acc0, %d6 | - move.l %d6, (%a5)+ | write result - - cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end - bge.b .cfwrap | - .word 0x51fb | tpf.l - trap the buffer wrap -.cfwrap: - lea.l -104(%a0), %a0 | wrap - subq.l #1, %d7 | --count < 0 ? - bgt.b .cfloop | +10: | loop | + movclr.l %acc0, %d4 | write outputs + move.l %d4, (%a4)+ | . + movclr.l %acc1, %d5 | . + move.l %d5, (%a5)+ | . +20: | loop start | + mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n] + mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n] + mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R + mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n] + mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n] + mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L + movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line + move.l %acc0, %d3 | get filtered delayed left sample (y_l[n]) + move.l %acc1, %d1 | get filtered delayed right sample (y_r[n]) + mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n] + mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n] + cmp.l %a6, %a0 | wrap %a0 if passed end + bhs.b 30f | wrap buffer | + .word 0x51fb | tpf.l | trap the buffer wrap +30: | wrap buffer | ...fwd taken branches more costly + lea.l -104(%a0), %a0 | wrap it up + subq.l #1, %d7 | --count > 0 ? + bgt.b 10b | loop | yes? do more + movclr.l %acc0, %d4 | write last outputs + move.l %d4, (%a4) | . + movclr.l %acc1, %d5 | . + move.l %d5, (%a5) | . lea.l crossfeed_data+16, %a1 | save data back to struct movem.l %d0-%d3, (%a1) | ...history move.l %a0, 120(%a1) | ...delay_p movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs - lea.l 44(%sp), %sp - rts -.cfend: - .size apply_crossfeed,.cfend-apply_crossfeed - + lea.l 44(%sp), %sp | + rts | + .size apply_crossfeed,.-apply_crossfeed /**************************************************************************** * int dsp_downsample(int count, struct dsp_data *data, * in32_t *src[], int32_t *dst[]) */ .section .text + .align 2 .global dsp_downsample dsp_downsample: lea.l -40(%sp), %sp | save non-clobberables @@ -92,7 +141,7 @@ dsp_downsample: movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels | %d4 = delta = data->resample_data.delta moveq.l #16, %d7 | %d7 = shift -.dschannel_loop: +10: | channel loop | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] @@ -102,15 +151,15 @@ dsp_downsample: move.l %d5, %d6 | %d6 = pos = phase >> 16 lsr.l %d7, %d6 | cmp.l %d2, %d6 | past end of samples? - bge.b .dsloop_skip | yes? skip loop + bge.b 40f | skip resample loop| yes? skip loop tst.l %d6 | need last sample of prev. frame? - bne.b .dsloop | no? start main loop + bne.b 20f | resample loop | no? start main loop move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] - bra.b .dsuse_last_start | start with last (last in %d0) -.dsloop: + bra.b 30f | resample start last | start with last (last in %d0) +20: | resample loop | lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] movem.l (%a5), %d0-%d1 | -.dsuse_last_start: +30: | resample start last | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] move.l %d0, %acc0 | %acc0 = previous sample move.l %d5, %d0 | frac = (phase << 16) >> 1 @@ -123,11 +172,11 @@ dsp_downsample: movclr.l %acc0, %d0 | move.l %d0, (%a4)+ | *d++ = %d0 cmp.l %d2, %d6 | pos < count? - blt.b .dsloop | yes? continue resampling -.dsloop_skip: + blt.b 20b | resample loop | yes? continue resampling +40: | skip resample loop | subq.l #1, %d3 | ch > 0? - bgt.b .dschannel_loop | yes? process next channel - asl.l %d7, %d2 | wrap phase to start of next frame + bgt.b 10b | channel loop | yes? process next channel + lsl.l %d7, %d2 | wrap phase to start of next frame sub.l %d2, %d5 | data->resample_data.phase = move.l %d5, 12(%a0) | ... phase - (count << 16) move.l %a4, %d0 | return d - d[0] @@ -136,14 +185,14 @@ dsp_downsample: movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables lea.l 40(%sp), %sp | cleanup stack rts | buh-bye -.dsend: - .size dsp_downsample,.dsend-dsp_downsample + .size dsp_downsample,.-dsp_downsample /**************************************************************************** * int dsp_upsample(int count, struct dsp_data *dsp, - * in32_t *src[], int32_t *dst[]) + * int32_t *src[], int32_t *dst[]) */ .section .text + .align 2 .global dsp_upsample dsp_upsample: lea.l -40(%sp), %sp | save non-clobberables @@ -154,47 +203,55 @@ dsp_upsample: | %a2 = dst movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels | %d4 = delta = data->resample_data.delta - swap %d4 | swap delta to high word to use - | carries to increment position -.uschannel_loop: + swap %d4 | swap delta to high word to use... + | ...carries to increment position +10: | channel loop | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] - lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] + lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1] move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] - move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] + move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] + move.l (%a3)+, %d1 | fetch first sample - might throw this... + | ...away later but we'll be preincremented + move.l %d1, %d6 | save sample value + sub.l %d0, %d1 | %d1 = diff = s[0] - last swap %d5 | swap phase to high word to use | carries to increment position - move.l %d5, %d6 | %d6 = pos = phase >> 16 + move.l %d5, %d7 | %d7 = pos = phase >> 16 clr.w %d5 | - eor.l %d5, %d6 | pos == 0? - beq.b .usstart_0 | no? transistion from down - cmp.l %d2, %d6 | past end of samples? - bge.b .usloop_skip | yes? skip loop - lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous) - move.l (%a3)+, %d0 | %d0 = *s++ - .word 0x51fa | tpf.w - trap next instruction -.usloop_1: + eor.l %d5, %d7 | pos == 0? + beq.b 40f | loop start | yes? start loop + cmp.l %d2, %d7 | past end of samples? + bge.b 50f | skip resample loop| yes? go to next channel and collect info + lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1] + movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos] + move.l %d1, %d6 | save sample value + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] + bra.b 40f | loop start | +20: | next sample loop | move.l %d6, %d0 | move previous sample to %d0 -.usstart_0: move.l (%a3)+, %d1 | fetch next sample move.l %d1, %d6 | save sample value sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] -.usloop_0: +30: | same sample loop | + movclr.l %acc0, %d7 | %d7 = result + move.l %d7, (%a4)+ | *d++ = %d7 +40: | loop start | lsr.l #1, %d5 | make phase into frac + move.l %d0, %acc0 | %acc0 = s[pos-1] mac.l %d1, %d5, %acc0 | %acc0 = diff * frac lsl.l #1, %d5 | restore frac to phase - movclr.l %acc0, %d7 | %d7 = product - add.l %d0, %d7 | %d7 = last + product - move.l %d7, (%a4)+ | *d++ = %d7 add.l %d4, %d5 | phase += delta - bcc.b .usloop_0 | load next values? + bcc.b 30b | same sample loop | load next values? cmp.l %a5, %a3 | src <= src_end? - ble.b .usloop_1 | yes? continue resampling -.usloop_skip: + bls.b 20b | next sample loop | yes? continue resampling + movclr.l %acc0, %d7 | %d7 = result + move.l %d7, (%a4)+ | *d++ = %d7 +50: | skip resample loop | subq.l #1, %d3 | ch > 0? - bgt.b .uschannel_loop | yes? process next channel + bgt.b 10b | channel loop | yes? process next channel swap %d5 | wrap phase to start of next frame move.l %d5, 12(%a0) | ...and save in data->resample_data.phase move.l %a4, %d0 | return d - d[0] @@ -203,12 +260,7 @@ dsp_upsample: asr.l #2, %d0 | convert bytes->samples lea.l 40(%sp), %sp | cleanup stack rts | buh-bye -.usend: - .size dsp_upsample,.usend-dsp_upsample - -/* These routines might benefit from burst transfers but we'll keep them - * small for now since they're rather light weight - */ + .size dsp_upsample,.-dsp_upsample /**************************************************************************** * void channels_process_sound_chan_mono(int count, int32_t *buf[]) @@ -216,31 +268,39 @@ dsp_upsample: * Mix left and right channels 50/50 into a center channel. */ .section .text + .align 2 .global channels_process_sound_chan_mono channels_process_sound_chan_mono: movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf - lea.l -12(%sp), %sp | save registers - move.l %macsr, %d1 | - movem.l %d1-%d3, (%sp) | - move.l #0xb0, %macsr | put emac in rounding fractional mode + lea.l -20(%sp), %sp | save registers + movem.l %d2-%d4/%a2-%a3, (%sp) | movem.l (%a0), %a0-%a1 | get channel pointers + move.l %a0, %a2 | use separate dst pointers since read + move.l %a1, %a3 | pointers run one ahead of write move.l #0x40000000, %d3 | %d3 = 0.5 -1: - move.l (%a0), %d1 | L = R = l/2 + r/2 - mac.l %d1, %d3, (%a1), %d2, %acc0 | - mac.l %d2, %d3, %acc0 | - movclr.l %acc0, %d1 | - move.l %d1, (%a0)+ | output to original buffer - move.l %d1, (%a1)+ | - subq.l #1, %d0 | - bgt.s 1b | - movem.l (%sp), %d1-%d3 | restore registers - move.l %d1, %macsr | - lea.l 12(%sp), %sp | cleanup - rts -.cpmono_end: - .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono - + move.l (%a0)+, %d1 | prime the input registers + move.l (%a1)+, %d2 | + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc0 | + subq.l #1, %d0 | + ble.s 20f | loop done | +10: | loop | + movclr.l %acc0, %d4 | L = R = l/2 + r/2 + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc0 | + move.l %d4, (%a2)+ | output to original buffer + move.l %d4, (%a3)+ | + subq.l #1, %d0 | + bgt.s 10b | loop | +20: | loop done | + movclr.l %acc0, %d4 | output last sample + move.l %d4, (%a2) | + move.l %d4, (%a3) | + movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers + lea.l 20(%sp), %sp | cleanup + rts | + .size channels_process_sound_chan_mono, \ + .-channels_process_sound_chan_mono /**************************************************************************** * void channels_process_sound_chan_custom(int count, int32_t *buf[]) @@ -248,34 +308,47 @@ channels_process_sound_chan_mono: * Apply stereo width (narrowing/expanding) effect. */ .section .text + .align 2 .global channels_process_sound_chan_custom channels_process_sound_chan_custom: movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf - lea.l -16(%sp), %sp | save registers - move.l %macsr, %d1 | - movem.l %d1-%d4, (%sp) | - move.l #0xb0, %macsr | put emac in rounding fractional mode + lea.l -28(%sp), %sp | save registers + movem.l %d2-%d6/%a2-%a3, (%sp) | movem.l (%a0), %a0-%a1 | get channel pointers + move.l %a0, %a2 | use separate dst pointers since read + move.l %a1, %a3 | pointers run one ahead of write move.l dsp_sw_gain, %d3 | load straight (mid) gain move.l dsp_sw_cross, %d4 | load cross (side) gain -1: - move.l (%a0), %d1 | - mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross - mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross - mac.l %d2, %d4 , %acc0 | - mac.l %d2, %d3 , %acc1 | - movclr.l %acc0, %d1 | - movclr.l %acc1, %d2 | - move.l %d1, (%a0)+ | - move.l %d2, (%a1)+ | + move.l (%a0)+, %d1 | prime the input registers + move.l (%a1)+, %d2 | + mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross + mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross + mac.l %d2, %d4 , %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc1 | subq.l #1, %d0 | - bgt.s 1b | - movem.l (%sp), %d1-%d4 | restore registers - move.l %d1, %macsr | - lea.l 16(%sp), %sp | cleanup - rts -.cpcustom_end: - .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom + ble.b 20f | loop done | +10: | loop | + movclr.l %acc0, %d5 | + movclr.l %acc1, %d6 | +15: | loop start | + mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross + mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross + mac.l %d2, %d4 , %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc1 | + move.l %d5, (%a2)+ | + move.l %d6, (%a3)+ | + subq.l #1, %d0 | + bgt.s 10b | loop | +20: | loop done | + movclr.l %acc0, %d5 | output last sample + movclr.l %acc1, %d6 | + move.l %d5, (%a2) | + move.l %d6, (%a3) | + movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers + lea.l 28(%sp), %sp | cleanup + rts | + .size channels_process_sound_chan_custom, \ + .-channels_process_sound_chan_custom /**************************************************************************** * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) @@ -283,31 +356,42 @@ channels_process_sound_chan_custom: * Separate channels into side channels. */ .section .text + .align 2 .global channels_process_sound_chan_karaoke channels_process_sound_chan_karaoke: movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf - lea.l -16(%sp), %sp | save registers - move.l %macsr, %d1 | - movem.l %d1-%d4, (%sp) | - move.l #0xb0, %macsr | put emac in rounding fractional mode - movem.l (%a0), %a0-%a1 | get channel pointers - move.l #0x40000000, %d4 | %d3 = 0.5 -1: - move.l (%a0), %d1 | - msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2 - mac.l %d2, %d4 , %acc0 | - movclr.l %acc0, %d1 | - move.l %d1, (%a1)+ | - neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2 - move.l %d1, (%a0)+ | - subq.l #1, %d0 | - bgt.s 1b | - movem.l (%sp), %d1-%d4 | restore registers - move.l %d1, %macsr | - lea.l 16(%sp), %sp | cleanup - rts -.cpkaraoke_end: - .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke + lea.l -20(%sp), %sp | save registers + movem.l %d2-%d4/%a2-%a3, (%sp) | + movem.l (%a0), %a0-%a1 | get channel src pointers + move.l %a0, %a2 | use separate dst pointers since read + move.l %a1, %a3 | pointers run one ahead of write + move.l #0x40000000, %d3 | %d3 = 0.5 + move.l (%a0)+, %d1 | prime the input registers + move.l (%a1)+, %d2 | + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2 + msac.l %d2, %d3, (%a1)+, %d2, %acc0 | + subq.l #1, %d0 | + ble.b 20f | loop done | +10: | loop | + movclr.l %acc0, %d4 | + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2 + msac.l %d2, %d3, (%a1)+, %d2, %acc0 | + move.l %d4, (%a2)+ | + neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2 + move.l %d4, (%a3)+ | + subq.l #1, %d0 | + bgt.s 10b | loop | +20: | loop done | + movclr.l %acc0, %d4 | output last sample + move.l %d4, (%a2) | + neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2 + move.l %d4, (%a3) | + movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers + lea.l 20(%sp), %sp | cleanup + rts | + .size channels_process_sound_chan_karaoke, \ + .-channels_process_sound_chan_karaoke + /**************************************************************************** * void sample_output_stereo(int count, struct dsp_data *data, * int32_t *src[], int16_t *dst) @@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke: * */ .section .text + .align 2 .global sample_output_stereo sample_output_stereo: lea.l -44(%sp), %sp | save registers @@ -348,11 +433,11 @@ sample_output_stereo: add.l %a4, %d0 | and.l #0xfffffff0, %d0 | cmp.l %a0, %d0 | at least a full line? - bhi.w .sos_longloop_1_start | no? jump to trailing longword + bhi.w 40f | long loop 1 start | no? do as trailing longwords sub.l #16, %d0 | %d1 = first line bound cmp.l %a4, %d0 | any leading longwords? - bls.b .sos_lineloop_start | no? jump to line loop -.sos_longloop_0: + bls.b 20f | line loop start | no? start line loop +10: | long loop 0 | move.l (%a2)+, %d1 | read longword from L and R mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word mac.l %d2, %a1, %acc1 | shift R to high word @@ -362,10 +447,10 @@ sample_output_stereo: move.w %d2, %d1 | interleave MS 16 bits of each move.l %d1, (%a4)+ | ...and write both cmp.l %a4, %d0 | - bhi.b .sos_longloop_0 | -.sos_lineloop_start: + bhi.b 10b | long loop 0 | +20: | line loop start | lea.l -12(%a0), %a5 | %a5 = at or just before last line bound -.sos_lineloop: +30: | line loop | move.l (%a3)+, %d4 | get next 4 R samples and scale mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation mac.l %d5, %a1, (%a3)+, %d6, %acc1 | @@ -394,11 +479,11 @@ sample_output_stereo: move.w %d7, %d3 | movem.l %d0-%d3, -16(%a4) | write four stereo samples cmp.l %a4, %a5 | - bhi.b .sos_lineloop | -.sos_longloop_1_start: + bhi.b 30b | line loop | +40: | long loop 1 start | cmp.l %a4, %a0 | any longwords left? - bls.b .sos_done | no? finished. -.sos_longloop_1: + bls.b 60f | output end | no? stop +50: | long loop 1 | move.l (%a2)+, %d1 | handle trailing longwords mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones mac.l %d2, %a1, %acc1 | @@ -408,14 +493,13 @@ sample_output_stereo: move.w %d2, %d1 | move.l %d1, (%a4)+ | cmp.l %a4, %a0 | - bhi.b .sos_longloop_1 | -.sos_done: + bhi.b 50b | long loop 1 +60: | output end | movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers move.l %d1, %macsr | lea.l 44(%sp), %sp | cleanup rts | -.sos_end: - .size sample_output_stereo, .sos_end-sample_output_stereo + .size sample_output_stereo, .-sample_output_stereo /**************************************************************************** * void sample_output_mono(int count, struct dsp_data *data, @@ -424,6 +508,7 @@ sample_output_stereo: * Same treatment as sample_output_stereo but for one channel. */ .section .text + .align 2 .global sample_output_mono sample_output_mono: lea.l -28(%sp), %sp | save registers @@ -442,11 +527,11 @@ sample_output_mono: add.l %a3, %d0 | and.l #0xfffffff0, %d0 | cmp.l %a0, %d0 | at least a full line? - bhi.w .som_longloop_1_start | no? jump to trailing longword + bhi.w 40f | long loop 1 start | no? do as trailing longwords sub.l #16, %d0 | %d1 = first line bound cmp.l %a3, %d0 | any leading longwords? - bls.b .som_lineloop_start | no? jump to line loop -.som_longloop_0: + bls.b 20f | line loop start | no? start line loop +10: | long loop 0 | move.l (%a2)+, %d1 | read longword from L and R mac.l %d1, %d5, %acc0 | shift L to high word movclr.l %acc0, %d1 | get possibly saturated results @@ -455,10 +540,10 @@ sample_output_mono: move.w %d2, %d1 | duplicate single channel into move.l %d1, (%a3)+ | L and R cmp.l %a3, %d0 | - bhi.b .som_longloop_0 | -.som_lineloop_start: + bhi.b 10b | long loop 0 | +20: | line loop start | lea.l -12(%a0), %a1 | %a1 = at or just before last line bound -.som_lineloop: +30: | line loop | move.l (%a2)+, %d0 | get next 4 L samples and scale mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation mac.l %d1, %d5, (%a2)+, %d2, %acc1 | @@ -483,11 +568,11 @@ sample_output_mono: move.w %d4, %d3 | movem.l %d0-%d3, -16(%a3) | write four stereo samples cmp.l %a3, %a1 | - bhi.b .som_lineloop | -.som_longloop_1_start: + bhi.b 30b | line loop | +40: | long loop 1 start | cmp.l %a3, %a0 | any longwords left? - bls.b .som_done | no? finished. -.som_longloop_1: + bls.b 60f | output end | no? stop +50: | loop loop 1 | move.l (%a2)+, %d1 | handle trailing longwords mac.l %d1, %d5, %acc0 | the same way as leading ones movclr.l %acc0, %d1 | @@ -496,11 +581,10 @@ sample_output_mono: move.w %d2, %d1 | move.l %d1, (%a3)+ | cmp.l %a3, %a0 | - bhi.b .som_longloop_1 | -.som_done: + bhi.b 50b | long loop 1 | +60: | output end | movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers move.l %d1, %macsr | lea.l 28(%sp), %sp | cleanup rts | -.som_end: - .size sample_output_mono, .som_end-sample_output_mono + .size sample_output_mono, .-sample_output_mono |