1
0
mirror of http://mpg123.de/trunk/.git synced 2025-10-28 02:55:29 +03:00
Files
mpg123/src/optimize.h
thor 61dd602f71 ALIGN(16) -> ALIGNED(16), as there is already such a macro on MacOSX.
git-svn-id: svn://scm.orgis.org/mpg123/trunk@877 35dc7657-300d-0410-a2e5-dc2837fedb53
2007-08-01 15:28:59 +00:00

334 lines
12 KiB
C

/*
optimize: get a grip on the different optimizations
copyright 2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Thomas Orgis, taking from mpg123.[hc]
for building mpg123 with one optimization only, you have to choose exclusively between
OPT_GENERIC (generic C code for everyone)
OPT_I386 (Intel i386)
OPT_I486 (...)
OPT_I586 (Intel Pentium)
OPT_I586_DITHER (Intel Pentium with dithering/noise shaping for enhanced quality)
OPT_MMX (Intel Pentium and compatibles with MMX, fast, but not the best accuracy)
OPT_3DNOW (AMD 3DNow!, K6-2/3, Athlon, compatibles...)
OPT_ALTIVEC (Motorola/IBM PPC with AltiVec under MacOSX)
or you define OPT_MULTI and give a combination which makes sense (do not include i486, do not mix altivec and x86).
I still have to examine the dynamics of this here together with REAL_IS_FIXED.
*/
/* this is included in mpg123.h, which includes config.h */
#ifdef CCALIGN
#define ALIGNED(a) __attribute__((aligned(a)))
#else
#define ALIGNED(a)
#endif
/* the optimizations only cover the synth1to1 mode and the dct36 function */
/* the first two types are needed in set_synth_functions regardless of optimizations */
typedef int (*func_synth)(real *,int,unsigned char *,int *);
typedef int (*func_synth_mono)(real *,unsigned char *,int *);
typedef void (*func_dct36)(real *,real *,real *,real *,real *);
typedef void (*func_dct64)(real *,real *,real *);
typedef void (*func_make_decode_tables)(long);
typedef real (*func_init_layer3_gainpow2)(int);
typedef real* (*func_init_layer2_table)(real*, double);
typedef int (*func_synth_pent)(real *,int,unsigned char *);
/* last headaches about getting mmx hardcode out */
real init_layer3_gainpow2(int i);
real* init_layer2_table(real *table, double m);
void make_decode_tables(scale_t scaleval);
/* only 3dnow replaces that one, it's internal to layer3.c otherwise */
void dct36(real *,real *,real *,real *,real *);
#define opt_dct36 dct36
/* only mmx replaces those */
#define opt_make_decode_tables make_decode_tables
#define opt_decwin decwin
#define opt_init_layer3_gainpow2 init_layer3_gainpow2
#define opt_init_layer2_table init_layer2_table
#ifdef OPT_GENERIC
#define PENTIUM_FALLBACK
void dct64(real *,real *,real *);
int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt);
int synth_1to1 (real *,int,unsigned char *,int *);
int synth_1to1_8bit (real *,int,unsigned char *,int *);
int synth_1to1_mono (real *,unsigned char *,int *);
int synth_1to1_mono2stereo (real *,unsigned char *,int *);
int synth_1to1_8bit_mono (real *,unsigned char *,int *);
int synth_1to1_8bit_mono2stereo (real *,unsigned char *,int *);
#ifndef OPT_MULTI
#define opt_dct64 dct64
#define opt_synth_1to1 synth_1to1
#define opt_synth_1to1_mono synth_1to1_mono
#define opt_synth_1to1_mono2stereo synth_1to1_mono2stereo
#define opt_synth_1to1_8bit synth_1to1_8bit
#define opt_synth_1to1_8bit_mono synth_1to1_8bit_mono
#define opt_synth_1to1_8bit_mono2stereo synth_1to1_8bit_mono2stereo
#endif
#endif
/* i486 is special */
#ifdef OPT_I486
#define OPT_I386
#define FIR_BUFFER_SIZE 128
int synth_1to1_486(real *bandPtr,int channel,unsigned char *out,int nb_blocks);
void dct64_i486(int *a,int *b,real *c); /* not used generally */
#endif
#ifdef OPT_I386
#define PENTIUM_FALLBACK
#define OPT_X86
int synth_1to1_i386(real *bandPtr,int channel,unsigned char *out,int *pnt);
#ifndef OPT_MULTI
#define opt_synth_1to1 synth_1to1_i386
#endif
#endif
#ifdef OPT_I586
#define PENTIUM_FALLBACK
#define OPT_PENTIUM
#define OPT_X86
int synth_1to1_i586(real *bandPtr,int channel,unsigned char *out,int *pnt);
int synth_1to1_i586_asm(real *,int,unsigned char *);
#ifndef OPT_MULTI
#define opt_synth_1to1 synth_1to1_i586
#define opt_synth_1to1_i586_asm synth_1to1_i586_asm
#endif
#endif
#ifdef OPT_I586_DITHER
#define PENTIUM_FALLBACK
#define OPT_PENTIUM
#define OPT_X86
int synth_1to1_i586(real *bandPtr,int channel,unsigned char *out,int *pnt);
int synth_1to1_i586_asm_dither(real *,int,unsigned char *);
#ifndef OPT_MULTI
#define opt_synth_1to1 synth_1to1_i586
#define opt_synth_1to1_i586_asm synth_1to1_i586_asm_dither
#endif
#endif
/* That one has by far the most ugly hacks to make it cooperative. */
#ifdef OPT_MMX
#define OPT_MMXORSSE
#define OPT_X86
real init_layer3_gainpow2_mmx(int i);
real* init_layer2_table_mmx(real *table, double m);
/* I think one can optimize storage here with the normal decwin */
extern real decwin_mmx[512+32];
void dct64_mmx(real *,real *,real *);
int synth_1to1_mmx(real *bandPtr,int channel,unsigned char *out,int *pnt);
void make_decode_tables_mmx(long scaleval); /* tabinit_mmx.s */
#ifndef OPT_MULTI
#undef opt_decwin
#define opt_decwin decwin_mmx
#define opt_dct64 dct64_mmx
#define opt_synth_1to1 synth_1to1_mmx
#undef opt_make_decode_tables
#define opt_make_decode_tables make_decode_tables_mmx
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2 init_layer3_gainpow2_mmx
#undef opt_init_layer2_table
#define opt_init_layer2_table init_layer2_table_mmx
#define OPT_MMX_ONLY
#endif
#endif
/* first crude hack into our source */
#ifdef OPT_SSE
#define OPT_MMXORSSE
#define OPT_MPLAYER
#define OPT_X86
real init_layer3_gainpow2_mmx(int i);
real* init_layer2_table_mmx(real *table, double m);
/* I think one can optimize storage here with the normal decwin */
extern real decwin_mmx[512+32];
void dct64_mmx(real *,real *,real *);
void dct64_sse(real *,real *,real *);
int synth_1to1_sse(real *bandPtr,int channel,unsigned char *out,int *pnt);
void make_decode_tables_mmx(long scaleval); /* tabinit_mmx.s */
/* ugly! */
extern func_dct64 mpl_dct64;
#ifndef OPT_MULTI
#define opt_mpl_dct64 dct64_sse
#undef opt_decwin
#define opt_decwin decwin_mmx
#define opt_dct64 dct64_mmx /* dct64_sse is silent in downsampling modes */
#define opt_synth_1to1 synth_1to1_sse
#undef opt_make_decode_tables
#define opt_make_decode_tables make_decode_tables_mmx
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2 init_layer3_gainpow2_mmx
#undef opt_init_layer2_table
#define opt_init_layer2_table init_layer2_table_mmx
#define OPT_MMX_ONLY /* watch out! */
#endif
#endif
/* first crude hack into our source */
#ifdef OPT_3DNOWEXT
#define OPT_MMXORSSE
#define OPT_MPLAYER
#define OPT_X86
real init_layer3_gainpow2_mmx(int i);
real* init_layer2_table_mmx(real *table, double m);
/* I think one can optimize storage here with the normal decwin */
extern real decwin_mmx[512+32];
void dct64_mmx(real *,real *,real *);
void dct64_3dnowext(real *,real *,real *);
void dct36_3dnowext(real *,real *,real *,real *,real *);
int synth_1to1_sse(real *bandPtr,int channel,unsigned char *out,int *pnt);
void make_decode_tables_mmx(long scaleval); /* tabinit_mmx.s */
/* ugly! */
extern func_dct64 mpl_dct64;
#ifndef OPT_MULTI
#define opt_mpl_dct64 dct64_3dnowext
#undef opt_dct36
#define opt_dct36 dct36_3dnowext
#undef opt_decwin
#define opt_decwin decwin_mmx
#define opt_dct64 dct64_mmx /* dct64_sse is silent in downsampling modes */
#define opt_synth_1to1 synth_1to1_sse
#undef opt_make_decode_tables
#define opt_make_decode_tables make_decode_tables_mmx
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2 init_layer3_gainpow2_mmx
#undef opt_init_layer2_table
#define opt_init_layer2_table init_layer2_table_mmx
#define OPT_MMX_ONLY /* watch out! */
#endif
#endif
#ifndef OPT_MMX_ONLY
extern real *pnts[5];
extern real decwin[512+32];
#endif
/* 3dnow used to use synth_1to1_i586 for mono / 8bit conversion - was that intentional? */
/* I'm trying to skip the pentium code here ... until I see that that is indeed a bad idea */
#ifdef OPT_3DNOW
#define K6_FALLBACK /* a fallback for 3DNowExt */
#define OPT_X86
void dct36_3dnow(real *,real *,real *,real *,real *);
int synth_1to1_3dnow(real *,int,unsigned char *,int *);
#ifndef OPT_MULTI
#undef opt_dct36
#define opt_dct36 dct36_3dnow
#define opt_synth_1to1 synth_1to1_3dnow
#endif
#endif
#ifdef OPT_X86
/* these have to be merged back into one! */
unsigned int getcpuid();
unsigned int getextcpuflags();
unsigned int getstdcpuflags();
unsigned int getstd2cpuflags();
void dct64_i386(real *,real *,real *);
int synth_1to1_mono_i386(real *,unsigned char *,int *);
int synth_1to1_mono2stereo_i386(real *,unsigned char *,int *);
int synth_1to1_8bit_i386(real *,int,unsigned char *,int *);
int synth_1to1_8bit_mono_i386(real *,unsigned char *,int *);
int synth_1to1_8bit_mono2stereo_i386(real *,unsigned char *,int *);
#ifndef OPT_MULTI
#ifndef opt_dct64
#define opt_dct64 dct64_i386 /* default one even for 3dnow and i486 in decode_2to1, decode_ntom */
#endif
#define opt_synth_1to1_mono synth_1to1_mono_i386
#define opt_synth_1to1_mono2stereo synth_1to1_mono2stereo_i386
#define opt_synth_1to1_8bit synth_1to1_8bit_i386
#define opt_synth_1to1_8bit_mono synth_1to1_8bit_mono_i386
#define opt_synth_1to1_8bit_mono2stereo synth_1to1_8bit_mono2stereo_i386
#endif
#endif
#ifdef OPT_ALTIVEC
void dct64_altivec(real *out0,real *out1,real *samples);
int synth_1to1_altivec(real *,int,unsigned char *,int *);
int synth_1to1_mono_altivec(real *,unsigned char *,int *);
int synth_1to1_mono2stereo_altivec(real *,unsigned char *,int *);
int synth_1to1_8bit_altivec(real *,int,unsigned char *,int *);
int synth_1to1_8bit_mono_altivec(real *,unsigned char *,int *);
int synth_1to1_8bit_mono2stereo_altivec(real *,unsigned char *,int *);
#ifndef OPT_MULTI
#define opt_dct64 dct64_altivec
#define opt_synth_1to1 synth_1to1_altivec
#define opt_synth_1to1_mono synth_1to1_mono_altivec
#define opt_synth_1to1_mono2stereo synth_1to1_mono2stereo_altivec
#define opt_synth_1to1_8bit synth_1to1_8bit_altivec
#define opt_synth_1to1_8bit_mono synth_1to1_8bit_mono_altivec
#define opt_synth_1to1_8bit_mono2stereo synth_1to1_8bit_mono2stereo_altivec
#endif
#endif
/* used for multi opt mode and the single 3dnow mode to have the old 3dnow test flag still working */
void test_cpu_flags();
void list_cpu_opt();
#ifdef OPT_MULTI
int set_cpu_opt();
/* a simple global struct to hold the decoding function pointers, could be localized later if really wanted */
typedef struct
{
func_synth synth_1to1;
func_synth_mono synth_1to1_mono;
func_synth_mono synth_1to1_mono2stereo;
func_synth synth_1to1_8bit;
func_synth_mono synth_1to1_8bit_mono;
func_synth_mono synth_1to1_8bit_mono2stereo;
#ifdef OPT_PENTIUM
func_synth_pent synth_1to1_i586_asm;
#endif
#ifdef OPT_MMXORSSE
real *decwin; /* ugly... needed to get mmx together with folks*/
func_make_decode_tables make_decode_tables;
func_init_layer3_gainpow2 init_layer3_gainpow2;
func_init_layer2_table init_layer2_table;
#endif
#ifdef OPT_3DNOW
func_dct36 dct36;
#endif
func_dct64 dct64;
#ifdef OPT_MPLAYER
func_dct64 mpl_dct64;
#endif
} struct_opts;
extern struct_opts cpu_opts;
#define opt_synth_1to1 (cpu_opts.synth_1to1)
#define opt_synth_1to1_mono (cpu_opts.synth_1to1_mono)
#define opt_synth_1to1_mono2stereo (cpu_opts.synth_1to1_mono2stereo)
#define opt_synth_1to1_8bit (cpu_opts.synth_1to1_8bit)
#define opt_synth_1to1_8bit_mono (cpu_opts.synth_1to1_8bit_mono)
#define opt_synth_1to1_8bit_mono2stereo (cpu_opts.synth_1to1_8bit_mono2stereo)
#ifdef OPT_PENTIUM
#define opt_synth_1to1_i586_asm (cpu_opts.synth_1to1_i586_asm)
#endif
#ifdef OPT_MMXORSSE
#undef opt_make_decode_tables
#define opt_make_decode_tables (cpu_opts.make_decode_tables)
#undef opt_decwin
#define opt_decwin cpu_opts.decwin
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2 (cpu_opts.init_layer3_gainpow2)
#undef opt_init_layer2_table
#define opt_init_layer2_table (cpu_opts.init_layer2_table)
#endif
#ifdef OPT_3DNOW
#undef opt_dct36
#define opt_dct36 (cpu_opts.dct36)
#endif
#define opt_dct64 (cpu_opts.dct64)
#ifdef OPT_MPLAYER
#define opt_mpl_dct64 (cpu_opts.mpl_dct64)
#endif
#endif