diff --git a/man1/mpg123.1 b/man1/mpg123.1 index f73d73c8..ce1b0bb5 100644 --- a/man1/mpg123.1 +++ b/man1/mpg123.1 @@ -254,13 +254,25 @@ change this if you need a constant bitrate independent of the mpeg stream rate. mpg123 automagically converts the rate. You should then combine this with \-\-stereo or \-\-mono. .TP +\fB\-\^\-resample \fImethod +Set resampling method to employ if forcing an output rate. Choices (case-insensitive) are NtoM, +dirty, and fine. The fine resampler is the default. It employs libsyn123's low-latency fairly +efficient resampler to postprocess the output from libmpg123 instead of the fast but very crude +NtoM decoder (drop sample method) that mpg123 offers since decades. If you are really low on +CPU time, choose NtoM, as the resampler usually needs more time than the MPEG decoder itself. +The mpg123 program is smart enough to combine the 2to1 or 4to1 downsampling modes with the +postprocessing for extreme downsampling. +.TP .BR \-2 ", " \-\^\-2to1 "; " \-4 ", " \-\^\-4to1 -Performs a downsampling of ratio 2:1 (22 kHz) or 4:1 (11 kHz) -on the output stream, respectively. Saves some CPU cycles, but -at least the 4:1 ratio sounds ugly. +Performs a downsampling of ratio 2:1 (22 kHz from 44.1 kHz) or 4:1 (11 kHz) +on the output stream, respectively. Saves some CPU cycles, but of course throws away +the high frequencies, as the decoder does not bother producing them. .TP .BR \-\-pitch\ \fIvalue -Set hardware pitch (speedup/down, 0 is neutral; 0.05 is 5%). This changes the output sampling rate, so it only works in the range your audio system/hardware supports. +Set a pitch change (speedup/down, 0 is neutral; 0.05 is 5% speedup). When not enforcing an +output rate, this changes the output sampling rate, so it only works in the range your audio +system/hardware supports. When you combine this with a fixed output rate, it modifies a +software resampling ratio instead. .TP .BR \-\-8bit Forces 8bit output diff --git a/src/audio.c b/src/audio.c index 9fc0386f..92db786f 100644 --- a/src/audio.c +++ b/src/audio.c @@ -47,6 +47,7 @@ static syn123_handle *sh = NULL; static struct mpg123_fmt outfmt = { .encoding=0, .rate=0, .channels=0 }; static int outch = 0; // currently used number of output channels + // A convoluted way to say outch*4, for semantic clarity. #define RESAMPLE_FRAMESIZE(ch) ((ch)*MPG123_SAMPLESIZE(MPG123_ENC_FLOAT_32)) #define OUTPUT_FRAMESIZE(ch) ((ch)*MPG123_SAMPLESIZE(outfmt.encoding)) @@ -61,20 +62,28 @@ static size_t resample_block = 0; // 1152*48/44.1*2*4 = 10032 ... let's go 16K. // This should work for final output data, too. // We'll loop over pieces if the buffer size is not enough for upsampling. -static size_t resample_bytes = 1<<14; +static size_t resample_bytes = 1<<16; int do_resample = 0; int do_resample_now = 0; // really apply resampler for current stream. +/* Quick-shot paired table setup with remembering search in it. + this is for storing pairs of output sampling rate and decoding + sampling rate. */ +struct ratepair { long a; long b; }; +static long *outrates = NULL; +static struct ratepair *unpitch = NULL; + + static int audio_capabilities(out123_handle *ao, mpg123_handle *mh); +#define CLEAN_POINTER(p, func) if(p) func(p); p = NULL; void audio_cleanup(void) { - if(sh) - syn123_del(sh); - if(resample_outbuf) - free(resample_outbuf); - if(resample_buffer) - free(resample_buffer); + CLEAN_POINTER(outrates, free) + CLEAN_POINTER(unpitch, free) + CLEAN_POINTER(sh, syn123_del) + CLEAN_POINTER(resample_outbuf, free) + CLEAN_POINTER(resample_buffer, free) } int audio_setup(out123_handle *ao, mpg123_handle *mh) @@ -96,8 +105,8 @@ int audio_setup(out123_handle *ao, mpg123_handle *mh) merror("Cannot initialize syn123: %s\n", syn123_strerror(err)); return -1; } - resample_buffer = malloc(resample_bytes); - resample_outbuf = malloc(resample_bytes); + resample_buffer = malloc(resample_bytes*10); + resample_outbuf = malloc(resample_bytes*10); if(!resample_buffer || !resample_outbuf) return -1; } @@ -106,9 +115,9 @@ int audio_setup(out123_handle *ao, mpg123_handle *mh) int audio_prepare(out123_handle *ao, long rate, int channels, int encoding) { - mdebug( "audio_prepare %ld Hz / %ld Hz, %i ch, enc %i" - , rate, outfmt.rate, channels, encoding ); - if(do_resample && rate == outfmt.rate) + mdebug( "audio_prepare %ld Hz / %ld Hz, %i ch, enc %s" + , rate, outfmt.rate, channels, out123_enc_name(encoding) ); + if(do_resample && param.pitch == 0. && rate == outfmt.rate) { do_resample_now = 0; debug("disabled resampler for native rate"); @@ -116,11 +125,11 @@ int audio_prepare(out123_handle *ao, long rate, int channels, int encoding) { do_resample_now = 1; // Smooth option could be considered once pitching is implemented with the - // resampler.The exiting state might fit the coming data if this is two + // resampler.The existing state might fit the coming data if this is two // seamless tracks. If not, it's jut the first few samples that differ // significantly depending on which data went through the resampler // previously. - int err = syn123_setup_resample( sh, rate, outfmt.rate, channels + int err = syn123_setup_resample( sh, pitch_rate(rate), outfmt.rate, channels , (param.resample < 2), 0 ); if(err) { @@ -135,14 +144,17 @@ int audio_prepare(out123_handle *ao, long rate, int channels, int encoding) ? RESAMPLE_FRAMESIZE(channels) : OUTPUT_FRAMESIZE(channels) ); // Minimum amount of input samples to fill the buffer. - resample_block = syn123_resample_fillcount(rate, outfmt.rate, frames); + resample_block = syn123_resample_fillcount(pitch_rate(rate), outfmt.rate, frames); if(!resample_block) return -1; // WTF? No comment. - mdebug("resampler setup %ld -> %ld, block %zu", rate, outfmt.rate, resample_block); + mdebug("resampler setup %ld -> %ld, block %zu", pitch_rate(rate), outfmt.rate, resample_block); rate = outfmt.rate; encoding = outfmt.encoding; - } - return out123_start(ao, pitch_rate(rate), channels, encoding); + } else if(outfmt.rate) + rate = outfmt.rate; // That's pitching with NtoM. + else + rate = pitch_rate(rate); // That's plain hardware pitching. + return out123_start(ao, rate, channels, encoding); } // Loop over blocks with the resampler, think about intflag. @@ -216,7 +228,7 @@ static void capline(mpg123_handle *mh, long rate, struct mpg123_fmt *outfmt) const int *encs; size_t num_encs; mpg123_encodings(&encs, &num_encs); - fprintf(stderr," %5ld |", pitch_rate(outfmt ? outfmt->rate : rate)); + fprintf(stderr," %5ld |", outfmt ? outfmt->rate : rate); for(enci=0; enci 1) + for(int ch=MPG123_MONO; ch<=MPG123_STEREO; ++ch) + if(outfmt.channels & ch) + fprintf(stderr, "Note: output format %li Hz, %s, %s\n" + , outfmt.rate, ch==MPG123_MONO ? "mono" : "stereo" + , encname ? encname : "???" ); } - /* Lots of preparation of rate lists. */ - rlimit = ntom_rate > 0 ? num_rates+1 : num_rates; - outrates = malloc(sizeof(*rates)*rlimit); - unpitch = malloc(sizeof(*unpitch)*rlimit); - if(!outrates || !unpitch) + // Either enable or disable rate forcing, whith ntom_rate non-zero or not. + if(mpg123_param(mh, MPG123_FORCE_RATE, ntom_rate, 0) != MPG123_OK) { - error("DOOM"); + merror("Cannot force NtoM rate: %s", mpg123_strerror(mh)); return -1; } - for(ri = 0; ri 0) - { - int fi; - int unpitch_i = 0; - if(param.verbose > 1 && outfmts[0].encoding > 0) + // Only that one rate is enforced. Nothing else needs to be checked. + // For pitching, ntom_rate has been adjusted. The output uses outfmt.rate. + // Need to tell mpg123 about the forced rate to make it work. + for(int ch=1; ch<=2; ++ch) { - const char *encname = out123_enc_name(outfmts[0].encoding); - fprintf(stderr, "Note: default format %li Hz, %i channels, %s\n" - , outfmts[0].rate, outfmts[0].channels - , encname ? encname : "???" ); - } - for(fi=1; fi 2) fprintf( stderr - , "Note: output support for %li Hz, %i channels: 0x%x\n" - , outfmts[fi].rate, outfmts[fi].channels, outfmts[fi].encoding ); + , "Note: output support for %li Hz, %s: 0x%x\n" + , outfmt.rate, ch==MPG123_MONO ? "mono" : "stereo", fmts ); if(force_fmt) - { /* Filter for forced encoding. */ - if((fmts & force_fmt) == force_fmt) - fmts = force_fmt; - else /* Nothing else! */ - fmts = 0; - } - // Support the resampler or native playback. Condition for the resampler - // to work is decoding to float and keeping a channel count compatible - // with configured output (in a case that might differ for various encodings). - long decode_rate = brate(unpitch, outfmts[fi].rate, rlimit, &unpitch_i); - if(do_resample && decode_rate != outfmt.rate) - { - fmts = 0; - // Only enable float outupt for resampler if needed and channel - // count supported for real output format. - if((outfmts[fi].channels & outfmt.channels) == outfmts[fi].channels) - fmts = MPG123_ENC_FLOAT_32; - // Also, be smart and let the internal downsampling work for small output - // rates. If target is 22050, decoding to 44100 and 48000 is not sensible, - // rather do 22050 or 24000. We have a factor of 4 to play with. - // So any input file, with max rate of 48000, can be decoded down to - // 12000 at least, actually saving computing time, if not in the decoder, - // then in the resampler. - // Every rate above 12000 can be halved to still get a valid MPEG rate. - // Output of 12001 Hz needs decoding to 16000 up to 23999, but not more. - // My resampler does not care about very small resampling steps; the less - // samples, the less work. - if(decode_rate > 12000 && decode_rate > outfmt.rate*2) - fmts = 0; - } - mpg123_format(mh, decode_rate, outfmts[fi].channels, fmts); + fmts = ((fmts & force_fmt) == force_fmt) ? force_fmt : 0; + mpg123_format(mh, ntom_rate, ch, fmts); } + } else if(do_resample) + { + // Support any decoding rate with float output for the resampler and also + // direct decoding to confiugred output format. + // One twist: Disable high rates with signal that the resampler will throw + // away anyway. This includes pitch. 22040 Hz output rate with pitch 0.5 + // still wants the full 44100 Hz input data, as original signal up to + // 22040 Hz will be heard as up to 11020 Hz. So we want pitch_rate() + // to be above outfmt.rate. Final resampling ratio not above 2. + for(ri=0; ri 12000 && pitch_rate(rates[ri]) > outfmt.rate*2) + break; + int fmt = (param.pitch == 0. && rates[ri] == outfmt.rate) + ? outfmt.encoding + : MPG123_ENC_FLOAT_32; + mpg123_format(mh, rates[ri], outfmt.channels, fmt); + } + } else + { + // Finally, the old style, direct decoding to possibly pitched output. + if(!outrates) + outrates = malloc(sizeof(*rates)*num_rates); + if(!unpitch) + unpitch = malloc(sizeof(*unpitch)*num_rates); + if(!outrates || !unpitch) + { + CLEAN_POINTER(outrates, free) + CLEAN_POINTER(unpitch, free) + error("DOOM"); + return -1; + } + for(ri = 0; ri 0) + { + int fi; + int unpitch_i = 0; + if(param.verbose > 1 && outfmts[0].encoding > 0) + { + const char *encname = out123_enc_name(outfmts[0].encoding); + fprintf(stderr, "Note: default format %li Hz, %i channels, %s\n" + , outfmts[0].rate, outfmts[0].channels + , encname ? encname : "???" ); + } + for(fi=1; fi 2) + fprintf( stderr + , "Note: output support for %li Hz, %i channels: 0x%x\n" + , outfmts[fi].rate, outfmts[fi].channels, outfmts[fi].encoding ); + if(force_fmt) + fmts = ((fmts & force_fmt) == force_fmt) ? force_fmt : 0; + decode_rate = brate(unpitch, outfmts[fi].rate, num_rates, &unpitch_i); + mpg123_format(mh, decode_rate, outfmts[fi].channels, fmts); + } + } + free(outfmts); } - free(outfmts); - free(unpitch); if(param.verbose > 1) print_capabilities(ao, mh); @@ -534,7 +582,6 @@ int set_pitch(mpg123_handle *fr, out123_handle *ao, double new_pitch) } param.pitch = new_pitch; - if(param.pitch < -0.99) param.pitch = -0.99; if(channels == 1) smode = MPG123_MONO; if(channels == 2) smode = MPG123_STEREO; @@ -552,7 +599,7 @@ int set_pitch(mpg123_handle *fr, out123_handle *ao, double new_pitch) param.pitch = old_pitch; audio_capabilities(ao, fr); } - return out123_start(ao, pitch_rate(rate), channels, format); + return audio_prepare(ao, rate, channels, format); } int set_mute(out123_handle *ao, int mutestate) diff --git a/src/audio.h b/src/audio.h index 8fb6d015..18c46253 100644 --- a/src/audio.h +++ b/src/audio.h @@ -21,6 +21,8 @@ #include "out123.h" #define pitch_rate(rate) (param.pitch == 0 ? (rate) : (long) ((param.pitch+1.0)*(rate))) +#define bpitch_rate(rate) (param.pitch == 0 ? (rate) : (long) ((rate)/(param.pitch+1.0))) + void audio_cleanup(void); int audio_setup(out123_handle *ao, mpg123_handle *mh); diff --git a/src/mpg123.c b/src/mpg123.c index dfe0d0da..371df95b 100644 --- a/src/mpg123.c +++ b/src/mpg123.c @@ -1124,7 +1124,6 @@ int main(int sys_argc, char ** sys_argv) if(dnow != 0) param.cpu = (dnow == SET_3DNOW) ? "3dnow" : "i586"; #endif if(param.cpu != NULL && (!strcmp(param.cpu, "auto") || !strcmp(param.cpu, ""))) param.cpu = NULL; - long ntom_rate = param.resample ? 0 : param.force_rate; if(!( MPG123_OK == (result = mpg123_par(mp, MPG123_VERBOSE, param.verbose, 0)) && ++libpar && MPG123_OK == (result = mpg123_par(mp, MPG123_FLAGS, param.flags, 0)) @@ -1133,8 +1132,6 @@ int main(int sys_argc, char ** sys_argv) && ++libpar && MPG123_OK == (result = mpg123_par(mp, MPG123_RVA, param.rva, 0)) && ++libpar - && MPG123_OK == (result = mpg123_par(mp, MPG123_FORCE_RATE, ntom_rate, 0)) - && ++libpar && MPG123_OK == (result = mpg123_par(mp, MPG123_DOWNSPEED, param.halfspeed, 0)) && ++libpar && MPG123_OK == (result = mpg123_par(mp, MPG123_UPSPEED, param.doublespeed, 0)) @@ -1632,11 +1629,11 @@ static void long_usage(int err) fprintf(o," -m --mono --mix mix stereo to mono\n"); fprintf(o," --stereo duplicate mono channel\n"); fprintf(o," -r --rate force a specific audio output rate\n"); - fprintf(o," --resample choose resampling mode for forced rate:\n" + fprintf(o," --resample choose resampling mode for forced rate:\n" " NtoM, dirty, fine (default)\n"); fprintf(o," -2 --2to1 2:1 downsampling\n"); fprintf(o," -4 --4to1 4:1 downsampling\n"); - fprintf(o," --pitch set hardware pitch (speedup/down, 0 is neutral; 0.05 is 5%%)\n"); + fprintf(o," --pitch set pitch (speedup/down, 0 is neutral; 0.05 is 5%%)\n"); fprintf(o," --8bit force 8 bit output\n"); fprintf(o," --float force floating point output (internal precision)\n"); fprintf(o," -e --encoding force a specific encoding (%s)\n"