1
0
mirror of http://mpg123.de/trunk/.git synced 2025-10-23 16:48:31 +03:00

mpg123: Revamped audio output logic for resampling and pitching.

Now the integration of the libsyn123 resampler and handling of the
NtoM decoder are hopefully correct in the again-enlarged audio.c,
which became a hollow shell with the advent of libout123, and now
is strong again.

Settling decoder and output formats has historically been the most
confusing aspect of libmpg123 and I raise that with the integration
of post-decoder resampling and pitching in the decoder, in the
output device, or in said resampling between these two. Insanity!

The new resampler is the default for forcing output rates now. If
you wonder why, try this in the current mpg123 source tree with
your fresh build:

for resampler in ntom dirty fine
do
  src/mpg123 -r 22040 --resample $resampler --pitch -0.72 \
    src/tests/sweep.mp3
done

With a pure sine sweep like that (generated via out123), you
can appreciate

a) the quality differences between resamplers, and
b) how little those seem to matter when you just listen
   to music.

Really, without a detailed comparison and some noisy pop/rock
music, it is surprisingly hard to tell how bad the drop-sample
resampling of the NtoM decoder really is.

But this has a price: Even when I took great pains in designing
the syn123 resampler, it needs more computing time than the
MPEG decoder itself. That's life. You can make resamplers that
are faster, but at the cost of increased latency which makes
programming tedious. Here, I just ignored that aspect, as the
syn123 resampling latency is just a handful of samples, well
below the farts you get from ALSA on closing a device.




git-svn-id: svn://scm.orgis.org/mpg123/trunk@4662 35dc7657-300d-0410-a2e5-dc2837fedb53
This commit is contained in:
thor
2020-04-26 16:33:28 +00:00
parent 01ea1fb42d
commit 8e8da9974c
4 changed files with 166 additions and 108 deletions

View File

@@ -254,13 +254,25 @@ change this if you need a constant bitrate independent of
the mpeg stream rate. mpg123 automagically converts the
rate. You should then combine this with \-\-stereo or \-\-mono.
.TP
\fB\-\^\-resample \fImethod
Set resampling method to employ if forcing an output rate. Choices (case-insensitive) are NtoM,
dirty, and fine. The fine resampler is the default. It employs libsyn123's low-latency fairly
efficient resampler to postprocess the output from libmpg123 instead of the fast but very crude
NtoM decoder (drop sample method) that mpg123 offers since decades. If you are really low on
CPU time, choose NtoM, as the resampler usually needs more time than the MPEG decoder itself.
The mpg123 program is smart enough to combine the 2to1 or 4to1 downsampling modes with the
postprocessing for extreme downsampling.
.TP
.BR \-2 ", " \-\^\-2to1 "; " \-4 ", " \-\^\-4to1
Performs a downsampling of ratio 2:1 (22 kHz) or 4:1 (11 kHz)
on the output stream, respectively. Saves some CPU cycles, but
at least the 4:1 ratio sounds ugly.
Performs a downsampling of ratio 2:1 (22 kHz from 44.1 kHz) or 4:1 (11 kHz)
on the output stream, respectively. Saves some CPU cycles, but of course throws away
the high frequencies, as the decoder does not bother producing them.
.TP
.BR \-\-pitch\ \fIvalue
Set hardware pitch (speedup/down, 0 is neutral; 0.05 is 5%). This changes the output sampling rate, so it only works in the range your audio system/hardware supports.
Set a pitch change (speedup/down, 0 is neutral; 0.05 is 5% speedup). When not enforcing an
output rate, this changes the output sampling rate, so it only works in the range your audio
system/hardware supports. When you combine this with a fixed output rate, it modifies a
software resampling ratio instead.
.TP
.BR \-\-8bit
Forces 8bit output

View File

@@ -47,6 +47,7 @@
static syn123_handle *sh = NULL;
static struct mpg123_fmt outfmt = { .encoding=0, .rate=0, .channels=0 };
static int outch = 0; // currently used number of output channels
// A convoluted way to say outch*4, for semantic clarity.
#define RESAMPLE_FRAMESIZE(ch) ((ch)*MPG123_SAMPLESIZE(MPG123_ENC_FLOAT_32))
#define OUTPUT_FRAMESIZE(ch) ((ch)*MPG123_SAMPLESIZE(outfmt.encoding))
@@ -61,20 +62,28 @@ static size_t resample_block = 0;
// 1152*48/44.1*2*4 = 10032 ... let's go 16K.
// This should work for final output data, too.
// We'll loop over pieces if the buffer size is not enough for upsampling.
static size_t resample_bytes = 1<<14;
static size_t resample_bytes = 1<<16;
int do_resample = 0;
int do_resample_now = 0; // really apply resampler for current stream.
/* Quick-shot paired table setup with remembering search in it.
this is for storing pairs of output sampling rate and decoding
sampling rate. */
struct ratepair { long a; long b; };
static long *outrates = NULL;
static struct ratepair *unpitch = NULL;
static int audio_capabilities(out123_handle *ao, mpg123_handle *mh);
#define CLEAN_POINTER(p, func) if(p) func(p); p = NULL;
void audio_cleanup(void)
{
if(sh)
syn123_del(sh);
if(resample_outbuf)
free(resample_outbuf);
if(resample_buffer)
free(resample_buffer);
CLEAN_POINTER(outrates, free)
CLEAN_POINTER(unpitch, free)
CLEAN_POINTER(sh, syn123_del)
CLEAN_POINTER(resample_outbuf, free)
CLEAN_POINTER(resample_buffer, free)
}
int audio_setup(out123_handle *ao, mpg123_handle *mh)
@@ -96,8 +105,8 @@ int audio_setup(out123_handle *ao, mpg123_handle *mh)
merror("Cannot initialize syn123: %s\n", syn123_strerror(err));
return -1;
}
resample_buffer = malloc(resample_bytes);
resample_outbuf = malloc(resample_bytes);
resample_buffer = malloc(resample_bytes*10);
resample_outbuf = malloc(resample_bytes*10);
if(!resample_buffer || !resample_outbuf)
return -1;
}
@@ -106,9 +115,9 @@ int audio_setup(out123_handle *ao, mpg123_handle *mh)
int audio_prepare(out123_handle *ao, long rate, int channels, int encoding)
{
mdebug( "audio_prepare %ld Hz / %ld Hz, %i ch, enc %i"
, rate, outfmt.rate, channels, encoding );
if(do_resample && rate == outfmt.rate)
mdebug( "audio_prepare %ld Hz / %ld Hz, %i ch, enc %s"
, rate, outfmt.rate, channels, out123_enc_name(encoding) );
if(do_resample && param.pitch == 0. && rate == outfmt.rate)
{
do_resample_now = 0;
debug("disabled resampler for native rate");
@@ -116,11 +125,11 @@ int audio_prepare(out123_handle *ao, long rate, int channels, int encoding)
{
do_resample_now = 1;
// Smooth option could be considered once pitching is implemented with the
// resampler.The exiting state might fit the coming data if this is two
// resampler.The existing state might fit the coming data if this is two
// seamless tracks. If not, it's jut the first few samples that differ
// significantly depending on which data went through the resampler
// previously.
int err = syn123_setup_resample( sh, rate, outfmt.rate, channels
int err = syn123_setup_resample( sh, pitch_rate(rate), outfmt.rate, channels
, (param.resample < 2), 0 );
if(err)
{
@@ -135,14 +144,17 @@ int audio_prepare(out123_handle *ao, long rate, int channels, int encoding)
? RESAMPLE_FRAMESIZE(channels)
: OUTPUT_FRAMESIZE(channels) );
// Minimum amount of input samples to fill the buffer.
resample_block = syn123_resample_fillcount(rate, outfmt.rate, frames);
resample_block = syn123_resample_fillcount(pitch_rate(rate), outfmt.rate, frames);
if(!resample_block)
return -1; // WTF? No comment.
mdebug("resampler setup %ld -> %ld, block %zu", rate, outfmt.rate, resample_block);
mdebug("resampler setup %ld -> %ld, block %zu", pitch_rate(rate), outfmt.rate, resample_block);
rate = outfmt.rate;
encoding = outfmt.encoding;
}
return out123_start(ao, pitch_rate(rate), channels, encoding);
} else if(outfmt.rate)
rate = outfmt.rate; // That's pitching with NtoM.
else
rate = pitch_rate(rate); // That's plain hardware pitching.
return out123_start(ao, rate, channels, encoding);
}
// Loop over blocks with the resampler, think about intflag.
@@ -216,7 +228,7 @@ static void capline(mpg123_handle *mh, long rate, struct mpg123_fmt *outfmt)
const int *encs;
size_t num_encs;
mpg123_encodings(&encs, &num_encs);
fprintf(stderr," %5ld |", pitch_rate(outfmt ? outfmt->rate : rate));
fprintf(stderr," %5ld |", outfmt ? outfmt->rate : rate);
for(enci=0; enci<num_encs; ++enci)
{
int fmt = outfmt
@@ -272,22 +284,29 @@ void print_capabilities(out123_handle *ao, mpg123_handle *mh)
if(do_resample)
capline(mh, 0, &outfmt);
else
capline(mh, param.force_rate, NULL);
capline(mh, bpitch_rate(param.force_rate), NULL);
}
fprintf(stderr,"\n");
if(do_resample)
{
if(param.pitch != 0.)
fprintf( stderr, "Resampler with pitch: %g\n"
, param.pitch );
else
fprintf(stderr, "Resampler configured.\n");
fprintf( stderr, "%s\n%s\n"
, "Resampler configured. Decoding to f32 as intermediate if needed."
, "Decoding to f32 as intermediate if needed."
, "Resampler output format is in the last line." );
}
else if(param.force_rate)
fprintf( stderr, "%s\n"
, "Decoder rate forced. Resulting format support shown in last line." );
fprintf( stderr
, "%s rate forced. Resulting format support shown in last line.\n"
, param.pitch != 0. ? "Pitched decoder" : "Decoder" );
else if(param.pitch != 0.)
fprintf( stderr, "Actual output rates adjusted by pitch value %g.\n"
, param.pitch );
}
/* Quick-shot paired table setup with remembering search in it.
this is for storing pairs of output sampling rate and decoding
sampling rate. */
struct ratepair { long a; long b; };
long brate(struct ratepair *table, long arate, int count, int *last)
{
@@ -330,12 +349,12 @@ static int audio_capabilities(out123_handle *ao, mpg123_handle *mh)
/* Pitching introduces a difference between decoder rate and playback rate. */
long decode_rate;
const long *rates;
long *outrates;
struct ratepair *unpitch;
struct mpg123_fmt *outfmts = NULL;
int fmtcount;
size_t num_rates, rlimit;
long ntom_rate = do_resample ? 0 : param.force_rate;
if(param.pitch < -0.99)
param.pitch = -0.99;
long ntom_rate = do_resample ? 0 : bpitch_rate(param.force_rate);
outfmt.rate = param.force_rate;
outfmt.channels = 0;
outfmt.encoding = 0;
@@ -371,14 +390,18 @@ static int audio_capabilities(out123_handle *ao, mpg123_handle *mh)
, (unsigned)force_fmt, out123_enc_name(force_fmt));
}
// A possible optimization for resampling mode is to keep existing output
// format support configured and don't even interrupt the output device at
// all. If you change pitch, you just change a number for the resampler.
// But currently, the idea of re-opening the output device on format
// changes is rather ingrained in mpg123.
if(do_resample)
{
if(param.pitch != 0)
fprintf(stderr, "WARNING: interaction of pitch and resampler not yet settled\n");
// If really doing the extra resampling, output will always run with
// this setup, regardless of decoder.
int enc1 = out123_encodings(ao, param.force_rate, 1);
int enc2 = out123_encodings(ao, param.force_rate, 2);
int enc1 = out123_encodings(ao, outfmt.rate, 1);
int enc2 = out123_encodings(ao, outfmt.rate, 2);
if(force_fmt)
{
enc1 &= force_fmt;
@@ -431,27 +454,80 @@ static int audio_capabilities(out123_handle *ao, mpg123_handle *mh)
error("Perhaps your forced output encoding is not supported.");
return -1;
}
const char *encname = out123_enc_name(outfmt.encoding);
if(param.verbose > 1)
for(int ch=MPG123_MONO; ch<=MPG123_STEREO; ++ch)
if(outfmt.channels & ch)
fprintf(stderr, "Note: output format %li Hz, %s, %s\n"
, outfmt.rate, ch==MPG123_MONO ? "mono" : "stereo"
, encname ? encname : "???" );
}
/* Lots of preparation of rate lists. */
rlimit = ntom_rate > 0 ? num_rates+1 : num_rates;
outrates = malloc(sizeof(*rates)*rlimit);
unpitch = malloc(sizeof(*unpitch)*rlimit);
// Either enable or disable rate forcing, whith ntom_rate non-zero or not.
if(mpg123_param(mh, MPG123_FORCE_RATE, ntom_rate, 0) != MPG123_OK)
{
merror("Cannot force NtoM rate: %s", mpg123_strerror(mh));
return -1;
}
if(ntom_rate)
{
// Only that one rate is enforced. Nothing else needs to be checked.
// For pitching, ntom_rate has been adjusted. The output uses outfmt.rate.
// Need to tell mpg123 about the forced rate to make it work.
for(int ch=1; ch<=2; ++ch)
{
int fmts = out123_encodings(ao, outfmt.rate, ch);
if(param.verbose > 2)
fprintf( stderr
, "Note: output support for %li Hz, %s: 0x%x\n"
, outfmt.rate, ch==MPG123_MONO ? "mono" : "stereo", fmts );
if(force_fmt)
fmts = ((fmts & force_fmt) == force_fmt) ? force_fmt : 0;
mpg123_format(mh, ntom_rate, ch, fmts);
}
} else if(do_resample)
{
// Support any decoding rate with float output for the resampler and also
// direct decoding to confiugred output format.
// One twist: Disable high rates with signal that the resampler will throw
// away anyway. This includes pitch. 22040 Hz output rate with pitch 0.5
// still wants the full 44100 Hz input data, as original signal up to
// 22040 Hz will be heard as up to 11020 Hz. So we want pitch_rate()
// to be above outfmt.rate. Final resampling ratio not above 2.
for(ri=0; ri<num_rates; ++ri)
{
if(rates[ri] > 12000 && pitch_rate(rates[ri]) > outfmt.rate*2)
break;
int fmt = (param.pitch == 0. && rates[ri] == outfmt.rate)
? outfmt.encoding
: MPG123_ENC_FLOAT_32;
mpg123_format(mh, rates[ri], outfmt.channels, fmt);
}
} else
{
// Finally, the old style, direct decoding to possibly pitched output.
if(!outrates)
outrates = malloc(sizeof(*rates)*num_rates);
if(!unpitch)
unpitch = malloc(sizeof(*unpitch)*num_rates);
if(!outrates || !unpitch)
{
CLEAN_POINTER(outrates, free)
CLEAN_POINTER(unpitch, free)
error("DOOM");
return -1;
}
for(ri = 0; ri<rlimit; ri++)
for(ri = 0; ri<num_rates; ri++)
{
decode_rate = ri < num_rates ? rates[ri] : ntom_rate;
decode_rate = rates[ri];
outrates[ri] = pitch_rate(decode_rate);
unpitch[ri].a = outrates[ri];
unpitch[ri].b = decode_rate;
}
/* Actually query formats possible with given rates. */
fmtcount = out123_formats(ao, outrates, rlimit, 1, 2, &outfmts);
free(outrates);
fmtcount = out123_formats(ao, outrates, num_rates, 1, 2, &outfmts);
// Remember: First one is a default format, then come my rates.
if(fmtcount > 0)
{
int fi;
@@ -471,41 +547,13 @@ static int audio_capabilities(out123_handle *ao, mpg123_handle *mh)
, "Note: output support for %li Hz, %i channels: 0x%x\n"
, outfmts[fi].rate, outfmts[fi].channels, outfmts[fi].encoding );
if(force_fmt)
{ /* Filter for forced encoding. */
if((fmts & force_fmt) == force_fmt)
fmts = force_fmt;
else /* Nothing else! */
fmts = 0;
}
// Support the resampler or native playback. Condition for the resampler
// to work is decoding to float and keeping a channel count compatible
// with configured output (in a case that might differ for various encodings).
long decode_rate = brate(unpitch, outfmts[fi].rate, rlimit, &unpitch_i);
if(do_resample && decode_rate != outfmt.rate)
{
fmts = 0;
// Only enable float outupt for resampler if needed and channel
// count supported for real output format.
if((outfmts[fi].channels & outfmt.channels) == outfmts[fi].channels)
fmts = MPG123_ENC_FLOAT_32;
// Also, be smart and let the internal downsampling work for small output
// rates. If target is 22050, decoding to 44100 and 48000 is not sensible,
// rather do 22050 or 24000. We have a factor of 4 to play with.
// So any input file, with max rate of 48000, can be decoded down to
// 12000 at least, actually saving computing time, if not in the decoder,
// then in the resampler.
// Every rate above 12000 can be halved to still get a valid MPEG rate.
// Output of 12001 Hz needs decoding to 16000 up to 23999, but not more.
// My resampler does not care about very small resampling steps; the less
// samples, the less work.
if(decode_rate > 12000 && decode_rate > outfmt.rate*2)
fmts = 0;
}
fmts = ((fmts & force_fmt) == force_fmt) ? force_fmt : 0;
decode_rate = brate(unpitch, outfmts[fi].rate, num_rates, &unpitch_i);
mpg123_format(mh, decode_rate, outfmts[fi].channels, fmts);
}
}
free(outfmts);
free(unpitch);
}
if(param.verbose > 1) print_capabilities(ao, mh);
@@ -534,7 +582,6 @@ int set_pitch(mpg123_handle *fr, out123_handle *ao, double new_pitch)
}
param.pitch = new_pitch;
if(param.pitch < -0.99) param.pitch = -0.99;
if(channels == 1) smode = MPG123_MONO;
if(channels == 2) smode = MPG123_STEREO;
@@ -552,7 +599,7 @@ int set_pitch(mpg123_handle *fr, out123_handle *ao, double new_pitch)
param.pitch = old_pitch;
audio_capabilities(ao, fr);
}
return out123_start(ao, pitch_rate(rate), channels, format);
return audio_prepare(ao, rate, channels, format);
}
int set_mute(out123_handle *ao, int mutestate)

View File

@@ -21,6 +21,8 @@
#include "out123.h"
#define pitch_rate(rate) (param.pitch == 0 ? (rate) : (long) ((param.pitch+1.0)*(rate)))
#define bpitch_rate(rate) (param.pitch == 0 ? (rate) : (long) ((rate)/(param.pitch+1.0)))
void audio_cleanup(void);
int audio_setup(out123_handle *ao, mpg123_handle *mh);

View File

@@ -1124,7 +1124,6 @@ int main(int sys_argc, char ** sys_argv)
if(dnow != 0) param.cpu = (dnow == SET_3DNOW) ? "3dnow" : "i586";
#endif
if(param.cpu != NULL && (!strcmp(param.cpu, "auto") || !strcmp(param.cpu, ""))) param.cpu = NULL;
long ntom_rate = param.resample ? 0 : param.force_rate;
if(!( MPG123_OK == (result = mpg123_par(mp, MPG123_VERBOSE, param.verbose, 0))
&& ++libpar
&& MPG123_OK == (result = mpg123_par(mp, MPG123_FLAGS, param.flags, 0))
@@ -1133,8 +1132,6 @@ int main(int sys_argc, char ** sys_argv)
&& ++libpar
&& MPG123_OK == (result = mpg123_par(mp, MPG123_RVA, param.rva, 0))
&& ++libpar
&& MPG123_OK == (result = mpg123_par(mp, MPG123_FORCE_RATE, ntom_rate, 0))
&& ++libpar
&& MPG123_OK == (result = mpg123_par(mp, MPG123_DOWNSPEED, param.halfspeed, 0))
&& ++libpar
&& MPG123_OK == (result = mpg123_par(mp, MPG123_UPSPEED, param.doublespeed, 0))
@@ -1632,11 +1629,11 @@ static void long_usage(int err)
fprintf(o," -m --mono --mix mix stereo to mono\n");
fprintf(o," --stereo duplicate mono channel\n");
fprintf(o," -r --rate force a specific audio output rate\n");
fprintf(o," --resample choose resampling mode for forced rate:\n"
fprintf(o," --resample <s> choose resampling mode for forced rate:\n"
" NtoM, dirty, fine (default)\n");
fprintf(o," -2 --2to1 2:1 downsampling\n");
fprintf(o," -4 --4to1 4:1 downsampling\n");
fprintf(o," --pitch <value> set hardware pitch (speedup/down, 0 is neutral; 0.05 is 5%%)\n");
fprintf(o," --pitch <value> set pitch (speedup/down, 0 is neutral; 0.05 is 5%%)\n");
fprintf(o," --8bit force 8 bit output\n");
fprintf(o," --float force floating point output (internal precision)\n");
fprintf(o," -e <c> --encoding <c> force a specific encoding (%s)\n"