Moving in the library source from the mpg123lib branch.

git-svn-id: svn://scm.orgis.org/mpg123/trunk@998 35dc7657-300d-0410-a2e5-dc2837fedb53
2025-10-26 15:31:15 +03:00 · 2007-10-14 22:32:17 +00:00
parent 0061205665
commit 78a4864a32
64 changed files with 84662 additions and 0 deletions
--- a/src/libmpg123/Makefile.am
+++ b/src/libmpg123/Makefile.am
@@ -0,0 +1,90 @@
 ## Makefile.am: produce Makefile.in from this
 ## copyright by the mpg123 project - free software under the terms of the LGPL 2.1
 ## see COPYING and AUTHORS files in distribution or http://mpg123.org
 ## initially written by Nicholas J. Humfrey
 #AM_CFLAGS = @AUDIO_CFLAGS@
 #AM_LDFLAGS = 
 EXTRA_DIST =
 CLEANFILES = *.a
 #lib_LIBRARIES = libmpg123.a
 lib_LTLIBRARIES = libmpg123.la
 include_HEADERS = mpg123.h
 #libmpg123_a_LIBADD = @DECODER_OBJ@
 #libmpg123_a_DEPENDENCIES = @DECODER_OBJ@
 libmpg123_la_LDFLAGS = -no-undefined -version-info @LIBMPG123_VERSION@ -export-symbols libmpg123.sym
 libmpg123_la_LIBADD = @DECODER_LOBJ@
 libmpg123_la_DEPENDENCIES = @DECODER_LOBJ@
 libmpg123_la_SOURCES = \
 	parse.c \
 	parse.h \
 	frame.c \
 	format.c \
 	frame.h \
 	reader.h \
 	debug.h \
 	decode.h \
 	decode_2to1.c \
 	decode_4to1.c \
 	decode_ntom.c \
 	equalizer.c \
 	huffman.h \
 	icy.c \
 	icy.h \
 	id3.c \
 	id3.h \
 	true.h \
 	l2tables.h \
 	layer1.c \
 	layer2.c \
 	layer3.c \
 	getbits.h \
 	optimize.h \
 	optimize.c \
 	readers.c \
 	tabinit.c \
 	stringbuf.c \
 	libmpg123.c \
 	mpg123.h \
 	mpg123lib_intern.h \
 	mangle.h \
 	getcpuflags.h \
 	libmpg123.sym
 EXTRA_libmpg123_la_SOURCES = \
 	dct36_3dnowext.S \
 	dct36_3dnow.S \
 	dct64_3dnowext.S \
 	dct64_3dnow.S \
 	dct64_altivec.c \
 	dct64.c \
 	dct64_i386.c \
 	dct64_mmx.S \
 	dct64_sse.S \
 	decode_3dnowext.S \
 	decode_3dnow.S \
 	decode_altivec.c \
 	decode.c \
 	decode_i386.c \
 	decode_i586_dither.S \
 	decode_i586.S \
 	decode_mmx.S \
 	decode_sse3d.h \
 	decode_sse.S \
 	dnoise.c \
 	equalizer_3dnow.S \
 	tabinit_mmx.S \
 	getcpuflags.S
 # explicit preprocessing since mingw32 does not honor the big .S
 .S.o:
 	$(CPP) $(CPPFLAGS) $< > $<.s
 	$(CCAS) $(CCASFLAGS) -c -o $@ $<.s && rm $<.s
--- a/src/libmpg123/dct36_3dnow.S
+++ b/src/libmpg123/dct36_3dnow.S
@@ -0,0 +1,503 @@
 /*
 	dct64_3dnow.s: Replacement of dct36() with AMD's 3DNow! SIMD operations support
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Syuuhei Kashiyama
 	This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
 	<squash@mb.kcom.ne.jp>,only two types of changes have been made:
 	- remove PREFETCH instruction for speedup
 	- change function name for support 3DNow! automatic detect
 	You can find Kashiyama's original 3dnow! support patch
 	(for mpg123-0.59o) at
 	http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
 	by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
 	                   <kim@comtec.co.jp>               - after  1.Apr.1999
 	Replacement of dct36() with AMD's 3DNow! SIMD operations support
 	Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
 	The author of this program disclaim whole expressed or implied
 	warranties with regard to this program, and in no event shall the
 	author of this program liable to whatever resulted from the use of
 	this program. Use it at your own risk.
 */
 #include "mangle.h"
 	.globl ASM_NAME(dct36_3dnow)
 /*	.type	 ASM_NAME(dct36_3dnow),@function */
 ASM_NAME(dct36_3dnow):
 	pushl %ebp
 	movl %esp,%ebp
 	subl $120,%esp
 	pushl %esi
 	pushl %ebx
 	movl 8(%ebp),%eax
 	movl 12(%ebp),%esi
 	movl 16(%ebp),%ecx
 	movl 20(%ebp),%edx
 	movl 24(%ebp),%ebx
 	leal -128(%ebp),%esp
 	femms
 	movq (%eax),%mm0
 	movq 4(%eax),%mm1
 	pfadd %mm1,%mm0
 	movq %mm0,4(%eax)
 	psrlq $32,%mm1
 	movq 12(%eax),%mm2
 	punpckldq %mm2,%mm1
 	pfadd %mm2,%mm1
 	movq %mm1,12(%eax)
 	psrlq $32,%mm2
 	movq 20(%eax),%mm3
 	punpckldq %mm3,%mm2
 	pfadd %mm3,%mm2
 	movq %mm2,20(%eax)
 	psrlq $32,%mm3
 	movq 28(%eax),%mm4
 	punpckldq %mm4,%mm3
 	pfadd %mm4,%mm3
 	movq %mm3,28(%eax)
 	psrlq $32,%mm4
 	movq 36(%eax),%mm5
 	punpckldq %mm5,%mm4
 	pfadd %mm5,%mm4
 	movq %mm4,36(%eax)
 	psrlq $32,%mm5
 	movq 44(%eax),%mm6
 	punpckldq %mm6,%mm5
 	pfadd %mm6,%mm5
 	movq %mm5,44(%eax)
 	psrlq $32,%mm6
 	movq 52(%eax),%mm7
 	punpckldq %mm7,%mm6
 	pfadd %mm7,%mm6
 	movq %mm6,52(%eax)
 	psrlq $32,%mm7
 	movq 60(%eax),%mm0
 	punpckldq %mm0,%mm7
 	pfadd %mm0,%mm7
 	movq %mm7,60(%eax)
 	psrlq $32,%mm0
 	movd 68(%eax),%mm1
 	pfadd %mm1,%mm0
 	movd %mm0,68(%eax)
 	movd 4(%eax),%mm0
 	movd 12(%eax),%mm1
 	punpckldq %mm1,%mm0
 	punpckldq 20(%eax),%mm1
 	pfadd %mm1,%mm0
 	movd %mm0,12(%eax)
 	psrlq $32,%mm0
 	movd %mm0,20(%eax)
 	psrlq $32,%mm1
 	movd 28(%eax),%mm2
 	punpckldq %mm2,%mm1
 	punpckldq 36(%eax),%mm2
 	pfadd %mm2,%mm1
 	movd %mm1,28(%eax)
 	psrlq $32,%mm1
 	movd %mm1,36(%eax)
 	psrlq $32,%mm2
 	movd 44(%eax),%mm3
 	punpckldq %mm3,%mm2
 	punpckldq 52(%eax),%mm3
 	pfadd %mm3,%mm2
 	movd %mm2,44(%eax)
 	psrlq $32,%mm2
 	movd %mm2,52(%eax)
 	psrlq $32,%mm3
 	movd 60(%eax),%mm4
 	punpckldq %mm4,%mm3
 	punpckldq 68(%eax),%mm4
 	pfadd %mm4,%mm3
 	movd %mm3,60(%eax)
 	psrlq $32,%mm3
 	movd %mm3,68(%eax)
 	movq 24(%eax),%mm0
 	movq 48(%eax),%mm1
 	movd ASM_NAME(COS9)+12,%mm2
 	punpckldq %mm2,%mm2
 	movd ASM_NAME(COS9)+24,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm2,%mm0
 	pfmul %mm3,%mm1
 	pushl %eax
 	movl $1,%eax
 	movd %eax,%mm7
 	pi2fd %mm7,%mm7
 	popl %eax
 	movq 8(%eax),%mm2
 	movd ASM_NAME(COS9)+4,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	pfadd %mm0,%mm2
 	movq 40(%eax),%mm3
 	movd ASM_NAME(COS9)+20,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq 56(%eax),%mm3
 	movd ASM_NAME(COS9)+28,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq (%eax),%mm3
 	movq 16(%eax),%mm4
 	movd ASM_NAME(COS9)+8,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	movq 32(%eax),%mm4
 	movd ASM_NAME(COS9)+16,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	pfadd %mm1,%mm3
 	movq 64(%eax),%mm4
 	movd ASM_NAME(COS9)+32,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+0,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 108(%edx),%mm6
 	punpckldq 104(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,36(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,32(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 32(%edx),%mm6
 	punpckldq 36(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 32(%esi),%mm6
 	punpckldq 36(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,1024(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1152(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+32,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 140(%edx),%mm6
 	punpckldq 72(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,68(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,0(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 0(%edx),%mm6
 	punpckldq 68(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 0(%esi),%mm6
 	punpckldq 68(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,0(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,2176(%ebx)
 	movq 8(%eax),%mm2
 	movq 40(%eax),%mm3
 	pfsub %mm3,%mm2
 	movq 56(%eax),%mm3
 	pfsub %mm3,%mm2
 	movd ASM_NAME(COS9)+12,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	movq 16(%eax),%mm3
 	movq 32(%eax),%mm4
 	pfsub %mm4,%mm3
 	movq 64(%eax),%mm4
 	pfsub %mm4,%mm3
 	movd ASM_NAME(COS9)+24,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	movq 48(%eax),%mm4
 	pfsub %mm4,%mm3
 	movq (%eax),%mm4
 	pfadd %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+4,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 112(%edx),%mm6
 	punpckldq 100(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,40(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,28(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 28(%edx),%mm6
 	punpckldq 40(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 28(%esi),%mm6
 	punpckldq 40(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,896(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1280(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+28,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 136(%edx),%mm6
 	punpckldq 76(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,64(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,4(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 4(%edx),%mm6
 	punpckldq 64(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 4(%esi),%mm6
 	punpckldq 64(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,128(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,2048(%ebx)
 	movq 8(%eax),%mm2
 	movd ASM_NAME(COS9)+20,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	pfsub %mm0,%mm2
 	movq 40(%eax),%mm3
 	movd ASM_NAME(COS9)+28,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfsub %mm3,%mm2
 	movq 56(%eax),%mm3
 	movd ASM_NAME(COS9)+4,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq (%eax),%mm3
 	movq 16(%eax),%mm4
 	movd ASM_NAME(COS9)+32,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	movq 32(%eax),%mm4
 	movd ASM_NAME(COS9)+8,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	pfadd %mm1,%mm3
 	movq 64(%eax),%mm4
 	movd ASM_NAME(COS9)+16,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+8,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 116(%edx),%mm6
 	punpckldq 96(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,44(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,24(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 24(%edx),%mm6
 	punpckldq 44(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 24(%esi),%mm6
 	punpckldq 44(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,768(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1408(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+24,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 132(%edx),%mm6
 	punpckldq 80(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,60(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,8(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 8(%edx),%mm6
 	punpckldq 60(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 8(%esi),%mm6
 	punpckldq 60(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,256(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1920(%ebx)
 	movq 8(%eax),%mm2
 	movd ASM_NAME(COS9)+28,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	pfsub %mm0,%mm2
 	movq 40(%eax),%mm3
 	movd ASM_NAME(COS9)+4,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq 56(%eax),%mm3
 	movd ASM_NAME(COS9)+20,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfsub %mm3,%mm2
 	movq (%eax),%mm3
 	movq 16(%eax),%mm4
 	movd ASM_NAME(COS9)+16,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	movq 32(%eax),%mm4
 	movd ASM_NAME(COS9)+32,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	pfadd %mm1,%mm3
 	movq 64(%eax),%mm4
 	movd ASM_NAME(COS9)+8,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+12,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 120(%edx),%mm6
 	punpckldq 92(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,48(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,20(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 20(%edx),%mm6
 	punpckldq 48(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 20(%esi),%mm6
 	punpckldq 48(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,640(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1536(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+20,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 128(%edx),%mm6
 	punpckldq 84(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,56(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,12(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 12(%edx),%mm6
 	punpckldq 56(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 12(%esi),%mm6
 	punpckldq 56(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,384(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1792(%ebx)
 	movq (%eax),%mm4
 	movq 16(%eax),%mm3
 	pfsub %mm3,%mm4
 	movq 32(%eax),%mm3
 	pfadd %mm3,%mm4
 	movq 48(%eax),%mm3
 	pfsub %mm3,%mm4
 	movq 64(%eax),%mm3
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+16,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 124(%edx),%mm6
 	punpckldq 88(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,52(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,16(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 16(%edx),%mm6
 	punpckldq 52(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 16(%esi),%mm6
 	punpckldq 52(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,512(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1664(%ebx)
 	femms
 	popl %ebx
 	popl %esi
 	movl %ebp,%esp
 	popl %ebp
 	ret
--- a/src/libmpg123/dct36_3dnowext.S
+++ b/src/libmpg123/dct36_3dnowext.S
@@ -0,0 +1,510 @@
 /*
 	dct36_3dnowext: extended 3DNow optimized DCT36
 	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	Transformed back into standalone asm, with help of
 	gcc -S -DHAVE_CONFIG_H -I.  -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing  -DREAL_IS_FLOAT -c -o dct36_3dnowext.{S,c}
 	MPlayer comment follows.
 */
 /*
 * dct36_3dnow.c - 3DNow! optimized dct36()
 *
 * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
 * <squash@mb.kcom.ne.jp>, only two types of changes have been made:
 *
 * - removed PREFETCH instruction for speedup
 * - changed function name for support 3DNow! automatic detection
 *
 * You can find Kashiyama's original 3dnow! support patch
 * (for mpg123-0.59o) at
 * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
 *
 * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
 *                    <kim@comtec.co.jp>               - after  1.Apr.1999
 *
 * Modified for use with MPlayer, for details see the changelog at
 * http://svn.mplayerhq.hu/mplayer/trunk/
 * $Id: dct36_3dnow.c 18786 2006-06-22 13:34:00Z diego $
 *
 * Original disclaimer:
 *  The author of this program disclaim whole expressed or implied
 *  warranties with regard to this program, and in no event shall the
 *  author of this program liable to whatever resulted from the use of
 *  this program. Use it at your own risk.
 *
 * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
 */
 #include "mangle.h"
 	.text
 	ALIGN32,,31
 .globl ASM_NAME(dct36_3dnowext)
 	/* .type	ASM_NAME(dct36_3dnowext), @function */
 ASM_NAME(dct36_3dnowext):
 	pushl	%ebp
 	movl	%esp, %ebp
 	pushl	%esi
 	pushl	%ebx
 	movl	8(%ebp), %eax
 	movl	12(%ebp), %esi
 	movl	16(%ebp), %ecx
 	movl	20(%ebp), %edx
 	movl	24(%ebp), %ebx
 #APP
 	movq (%eax),%mm0
 	movq 4(%eax),%mm1
 	pfadd %mm1,%mm0
 	movq %mm0,4(%eax)
 	psrlq $32,%mm1
 	movq 12(%eax),%mm2
 	punpckldq %mm2,%mm1
 	pfadd %mm2,%mm1
 	movq %mm1,12(%eax)
 	psrlq $32,%mm2
 	movq 20(%eax),%mm3
 	punpckldq %mm3,%mm2
 	pfadd %mm3,%mm2
 	movq %mm2,20(%eax)
 	psrlq $32,%mm3
 	movq 28(%eax),%mm4
 	punpckldq %mm4,%mm3
 	pfadd %mm4,%mm3
 	movq %mm3,28(%eax)
 	psrlq $32,%mm4
 	movq 36(%eax),%mm5
 	punpckldq %mm5,%mm4
 	pfadd %mm5,%mm4
 	movq %mm4,36(%eax)
 	psrlq $32,%mm5
 	movq 44(%eax),%mm6
 	punpckldq %mm6,%mm5
 	pfadd %mm6,%mm5
 	movq %mm5,44(%eax)
 	psrlq $32,%mm6
 	movq 52(%eax),%mm7
 	punpckldq %mm7,%mm6
 	pfadd %mm7,%mm6
 	movq %mm6,52(%eax)
 	psrlq $32,%mm7
 	movq 60(%eax),%mm0
 	punpckldq %mm0,%mm7
 	pfadd %mm0,%mm7
 	movq %mm7,60(%eax)
 	psrlq $32,%mm0
 	movd 68(%eax),%mm1
 	pfadd %mm1,%mm0
 	movd %mm0,68(%eax)
 	movd 4(%eax),%mm0
 	movd 12(%eax),%mm1
 	punpckldq %mm1,%mm0
 	punpckldq 20(%eax),%mm1
 	pfadd %mm1,%mm0
 	movd %mm0,12(%eax)
 	psrlq $32,%mm0
 	movd %mm0,20(%eax)
 	psrlq $32,%mm1
 	movd 28(%eax),%mm2
 	punpckldq %mm2,%mm1
 	punpckldq 36(%eax),%mm2
 	pfadd %mm2,%mm1
 	movd %mm1,28(%eax)
 	psrlq $32,%mm1
 	movd %mm1,36(%eax)
 	psrlq $32,%mm2
 	movd 44(%eax),%mm3
 	punpckldq %mm3,%mm2
 	punpckldq 52(%eax),%mm3
 	pfadd %mm3,%mm2
 	movd %mm2,44(%eax)
 	psrlq $32,%mm2
 	movd %mm2,52(%eax)
 	psrlq $32,%mm3
 	movd 60(%eax),%mm4
 	punpckldq %mm4,%mm3
 	punpckldq 68(%eax),%mm4
 	pfadd %mm4,%mm3
 	movd %mm3,60(%eax)
 	psrlq $32,%mm3
 	movd %mm3,68(%eax)
 	movq 24(%eax),%mm0
 	movq 48(%eax),%mm1
 	movd ASM_NAME(COS9)+12,%mm2
 	punpckldq %mm2,%mm2
 	movd ASM_NAME(COS9)+24,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm2,%mm0
 	pfmul %mm3,%mm1
 	pushl %eax
 	movl $1,%eax
 	movd %eax,%mm7
 	pi2fd %mm7,%mm7
 	popl %eax
 	movq 8(%eax),%mm2
 	movd ASM_NAME(COS9)+4,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	pfadd %mm0,%mm2
 	movq 40(%eax),%mm3
 	movd ASM_NAME(COS9)+20,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq 56(%eax),%mm3
 	movd ASM_NAME(COS9)+28,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq (%eax),%mm3
 	movq 16(%eax),%mm4
 	movd ASM_NAME(COS9)+8,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	movq 32(%eax),%mm4
 	movd ASM_NAME(COS9)+16,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	pfadd %mm1,%mm3
 	movq 64(%eax),%mm4
 	movd ASM_NAME(COS9)+32,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+0,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 108(%edx),%mm6
 	punpckldq 104(%edx),%mm6
 	pfmul %mm6,%mm5
 	pswapd %mm5,%mm5
 	movq %mm5,32(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 32(%edx),%mm6
 	punpckldq 36(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 32(%esi),%mm6
 	punpckldq 36(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,1024(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1152(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+32,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 140(%edx),%mm6
 	punpckldq 72(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,68(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,0(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 0(%edx),%mm6
 	punpckldq 68(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 0(%esi),%mm6
 	punpckldq 68(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,0(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,2176(%ebx)
 	movq 8(%eax),%mm2
 	movq 40(%eax),%mm3
 	pfsub %mm3,%mm2
 	movq 56(%eax),%mm3
 	pfsub %mm3,%mm2
 	movd ASM_NAME(COS9)+12,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	movq 16(%eax),%mm3
 	movq 32(%eax),%mm4
 	pfsub %mm4,%mm3
 	movq 64(%eax),%mm4
 	pfsub %mm4,%mm3
 	movd ASM_NAME(COS9)+24,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	movq 48(%eax),%mm4
 	pfsub %mm4,%mm3
 	movq (%eax),%mm4
 	pfadd %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+4,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 112(%edx),%mm6
 	punpckldq 100(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,40(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,28(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 28(%edx),%mm6
 	punpckldq 40(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 28(%esi),%mm6
 	punpckldq 40(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,896(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1280(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+28,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 136(%edx),%mm6
 	punpckldq 76(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,64(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,4(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 4(%edx),%mm6
 	punpckldq 64(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 4(%esi),%mm6
 	punpckldq 64(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,128(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,2048(%ebx)
 	movq 8(%eax),%mm2
 	movd ASM_NAME(COS9)+20,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	pfsub %mm0,%mm2
 	movq 40(%eax),%mm3
 	movd ASM_NAME(COS9)+28,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfsub %mm3,%mm2
 	movq 56(%eax),%mm3
 	movd ASM_NAME(COS9)+4,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq (%eax),%mm3
 	movq 16(%eax),%mm4
 	movd ASM_NAME(COS9)+32,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	movq 32(%eax),%mm4
 	movd ASM_NAME(COS9)+8,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	pfadd %mm1,%mm3
 	movq 64(%eax),%mm4
 	movd ASM_NAME(COS9)+16,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+8,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 116(%edx),%mm6
 	punpckldq 96(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,44(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,24(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 24(%edx),%mm6
 	punpckldq 44(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 24(%esi),%mm6
 	punpckldq 44(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,768(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1408(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+24,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 132(%edx),%mm6
 	punpckldq 80(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,60(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,8(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 8(%edx),%mm6
 	punpckldq 60(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 8(%esi),%mm6
 	punpckldq 60(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,256(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1920(%ebx)
 	movq 8(%eax),%mm2
 	movd ASM_NAME(COS9)+28,%mm3
 	punpckldq %mm3,%mm3
 	pfmul %mm3,%mm2
 	pfsub %mm0,%mm2
 	movq 40(%eax),%mm3
 	movd ASM_NAME(COS9)+4,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfadd %mm3,%mm2
 	movq 56(%eax),%mm3
 	movd ASM_NAME(COS9)+20,%mm4
 	punpckldq %mm4,%mm4
 	pfmul %mm4,%mm3
 	pfsub %mm3,%mm2
 	movq (%eax),%mm3
 	movq 16(%eax),%mm4
 	movd ASM_NAME(COS9)+16,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	movq 32(%eax),%mm4
 	movd ASM_NAME(COS9)+32,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfadd %mm4,%mm3
 	pfadd %mm1,%mm3
 	movq 64(%eax),%mm4
 	movd ASM_NAME(COS9)+8,%mm5
 	punpckldq %mm5,%mm5
 	pfmul %mm5,%mm4
 	pfsub %mm4,%mm3
 	movq %mm2,%mm4
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+12,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 120(%edx),%mm6
 	punpckldq 92(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,48(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,20(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 20(%edx),%mm6
 	punpckldq 48(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 20(%esi),%mm6
 	punpckldq 48(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,640(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1536(%ebx)
 	movq %mm3,%mm4
 	pfsub %mm2,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+20,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 128(%edx),%mm6
 	punpckldq 84(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,56(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,12(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 12(%edx),%mm6
 	punpckldq 56(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 12(%esi),%mm6
 	punpckldq 56(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,384(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1792(%ebx)
 	movq (%eax),%mm4
 	movq 16(%eax),%mm3
 	pfsub %mm3,%mm4
 	movq 32(%eax),%mm3
 	pfadd %mm3,%mm4
 	movq 48(%eax),%mm3
 	pfsub %mm3,%mm4
 	movq 64(%eax),%mm3
 	pfadd %mm3,%mm4
 	movq %mm7,%mm5
 	punpckldq ASM_NAME(tfcos36)+16,%mm5
 	pfmul %mm5,%mm4
 	movq %mm4,%mm5
 	pfacc %mm5,%mm5
 	movd 124(%edx),%mm6
 	punpckldq 88(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd %mm5,52(%ecx)
 	psrlq $32,%mm5
 	movd %mm5,16(%ecx)
 	movq %mm4,%mm6
 	punpckldq %mm6,%mm5
 	pfsub %mm6,%mm5
 	punpckhdq %mm5,%mm5
 	movd 16(%edx),%mm6
 	punpckldq 52(%edx),%mm6
 	pfmul %mm6,%mm5
 	movd 16(%esi),%mm6
 	punpckldq 52(%esi),%mm6
 	pfadd %mm6,%mm5
 	movd %mm5,512(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,1664(%ebx)
 	femms
 #NO_APP
 	popl	%ebx
 	popl	%esi
 	leave
 	ret
 	/* .size	ASM_NAME(dct36_3dnowext), .-ASM_NAME(dct36_3dnowext) */
--- a/src/libmpg123/dct64.c
+++ b/src/libmpg123/dct64.c
@@ -0,0 +1,174 @@
 /*
 	dct64.c: DCT64, the plain C version
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 /*
 * Discrete Cosine Tansform (DCT) for subband synthesis
 *
 * -funroll-loops (for gcc) will remove the loops for better performance
 * using loops in the source-code enhances readabillity
 *
 *
 * TODO: write an optimized version for the down-sampling modes
 *       (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero 
 */
 #include "mpg123lib_intern.h"
 void dct64(real *out0,real *out1,real *samples)
 {
  real bufs[64];
 {
  register int i,j;
  register real *b1,*b2,*bs,*costab;
  b1 = samples;
  bs = bufs;
  costab = pnts[0]+16;
  b2 = b1 + 32;
  for(i=15;i>=0;i--)
    *bs++ = (*b1++ + *--b2); 
  for(i=15;i>=0;i--)
    *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
  b1 = bufs;
  costab = pnts[1]+8;
  b2 = b1 + 16;
  {
    for(i=7;i>=0;i--)
      *bs++ = (*b1++ + *--b2); 
    for(i=7;i>=0;i--)
      *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
    b2 += 32;
    costab += 8;
    for(i=7;i>=0;i--)
      *bs++ = (*b1++ + *--b2); 
    for(i=7;i>=0;i--)
      *bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
    b2 += 32;
  }
  bs = bufs;
  costab = pnts[2];
  b2 = b1 + 8;
  for(j=2;j;j--)
  {
    for(i=3;i>=0;i--)
      *bs++ = (*b1++ + *--b2); 
    for(i=3;i>=0;i--)
      *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
    b2 += 16;
    for(i=3;i>=0;i--)
      *bs++ = (*b1++ + *--b2); 
    for(i=3;i>=0;i--)
      *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
    b2 += 16;
  }
  b1 = bufs;
  costab = pnts[3];
  b2 = b1 + 4;
  for(j=4;j;j--)
  {
    *bs++ = (*b1++ + *--b2); 
    *bs++ = (*b1++ + *--b2);
    *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
    *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
    b2 += 8;
    *bs++ = (*b1++ + *--b2); 
    *bs++ = (*b1++ + *--b2);
    *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
    *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
    b2 += 8;
  }
  bs = bufs;
  costab = pnts[4];
  for(j=8;j;j--)
  {
    real v0,v1;
    v0=*b1++; v1 = *b1++;
    *bs++ = (v0 + v1);
    *bs++ = REAL_MUL((v0 - v1), (*costab));
    v0=*b1++; v1 = *b1++;
    *bs++ = (v0 + v1);
    *bs++ = REAL_MUL((v1 - v0), (*costab));
  }
 }
 {
  register real *b1;
  register int i;
  for(b1=bufs,i=8;i;i--,b1+=4)
    b1[2] += b1[3];
  for(b1=bufs,i=4;i;i--,b1+=8)
  {
    b1[4] += b1[6];
    b1[6] += b1[5];
    b1[5] += b1[7];
  }
  for(b1=bufs,i=2;i;i--,b1+=16)
  {
    b1[8]  += b1[12];
    b1[12] += b1[10];
    b1[10] += b1[14];
    b1[14] += b1[9];
    b1[9]  += b1[13];
    b1[13] += b1[11];
    b1[11] += b1[15];
  }
 }
  out0[0x10*16] = bufs[0];
  out0[0x10*15] = bufs[16+0]  + bufs[16+8];
  out0[0x10*14] = bufs[8];
  out0[0x10*13] = bufs[16+8]  + bufs[16+4];
  out0[0x10*12] = bufs[4];
  out0[0x10*11] = bufs[16+4]  + bufs[16+12];
  out0[0x10*10] = bufs[12];
  out0[0x10* 9] = bufs[16+12] + bufs[16+2];
  out0[0x10* 8] = bufs[2];
  out0[0x10* 7] = bufs[16+2]  + bufs[16+10];
  out0[0x10* 6] = bufs[10];
  out0[0x10* 5] = bufs[16+10] + bufs[16+6];
  out0[0x10* 4] = bufs[6];
  out0[0x10* 3] = bufs[16+6]  + bufs[16+14];
  out0[0x10* 2] = bufs[14];
  out0[0x10* 1] = bufs[16+14] + bufs[16+1];
  out0[0x10* 0] = bufs[1];
  out1[0x10* 0] = bufs[1];
  out1[0x10* 1] = bufs[16+1]  + bufs[16+9];
  out1[0x10* 2] = bufs[9];
  out1[0x10* 3] = bufs[16+9]  + bufs[16+5];
  out1[0x10* 4] = bufs[5];
  out1[0x10* 5] = bufs[16+5]  + bufs[16+13];
  out1[0x10* 6] = bufs[13];
  out1[0x10* 7] = bufs[16+13] + bufs[16+3];
  out1[0x10* 8] = bufs[3];
  out1[0x10* 9] = bufs[16+3]  + bufs[16+11];
  out1[0x10*10] = bufs[11];
  out1[0x10*11] = bufs[16+11] + bufs[16+7];
  out1[0x10*12] = bufs[7];
  out1[0x10*13] = bufs[16+7]  + bufs[16+15];
  out1[0x10*14] = bufs[15];
  out1[0x10*15] = bufs[16+15];
 }
--- a/src/libmpg123/dct64_3dnow.S
+++ b/src/libmpg123/dct64_3dnow.S
@@ -0,0 +1,711 @@
 /*
 	dct64_3dnow.s: Replacement of dct64() with AMD's 3DNow! SIMD operations support
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Syuuhei Kashiyama
 	Original "license" statement:
 	The author of this program disclaim whole expressed or implied
 	warranties with regard to this program, and in no event shall the
 	author of this program liable to whatever resulted from the use of
 	this program. Use it at your own risk.
 */
 #include "mangle.h"
 	.globl ASM_NAME(dct64_3dnow)
 /*	.type	 ASM_NAME(dct64_3dnow),@function */
 ASM_NAME(dct64_3dnow):
 	subl $256,%esp
 	pushl %ebp
 	pushl %edi
 	pushl %esi
 	pushl %ebx
 	leal 16(%esp),%ebx
 	movl 284(%esp),%edi
 	movl 276(%esp),%ebp
 	movl 280(%esp),%edx
 	leal 128(%ebx),%esi
 	/* femms */
 	/* 1 */
 	movl ASM_NAME(pnts),%eax
 	movq 0(%edi),%mm0
        movq %mm0,%mm1
 	movd 124(%edi),%mm2
 	punpckldq 120(%edi),%mm2
 	movq 0(%eax),%mm3
 	pfadd %mm2,%mm0
 	movq %mm0,0(%ebx)
 	pfsub %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,124(%ebx)
 	psrlq $32,%mm1
 	movd %mm1,120(%ebx)
 	movq 8(%edi),%mm4
 	movq %mm4,%mm5
 	movd 116(%edi),%mm6
 	punpckldq 112(%edi),%mm6
 	movq 8(%eax),%mm7
 	pfadd %mm6,%mm4
 	movq %mm4,8(%ebx)
 	pfsub %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,116(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,112(%ebx)
 	movq 16(%edi),%mm0
 	movq %mm0,%mm1
 	movd 108(%edi),%mm2
 	punpckldq 104(%edi),%mm2
 	movq 16(%eax),%mm3
 	pfadd %mm2,%mm0
 	movq %mm0,16(%ebx)
 	pfsub %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,108(%ebx)
 	psrlq $32,%mm1
 	movd %mm1,104(%ebx)
 	movq 24(%edi),%mm4
 	movq %mm4,%mm5
 	movd 100(%edi),%mm6
 	punpckldq 96(%edi),%mm6
 	movq 24(%eax),%mm7
 	pfadd %mm6,%mm4
 	movq %mm4,24(%ebx)
 	pfsub %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,100(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,96(%ebx)
 	movq 32(%edi),%mm0
 	movq %mm0,%mm1
 	movd 92(%edi),%mm2
 	punpckldq 88(%edi),%mm2
 	movq 32(%eax),%mm3
 	pfadd %mm2,%mm0
 	movq %mm0,32(%ebx)
 	pfsub %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,92(%ebx)
 	psrlq $32,%mm1
 	movd %mm1,88(%ebx)
 	movq 40(%edi),%mm4
 	movq %mm4,%mm5
 	movd 84(%edi),%mm6
 	punpckldq 80(%edi),%mm6
 	movq 40(%eax),%mm7
 	pfadd %mm6,%mm4
 	movq %mm4,40(%ebx)
 	pfsub %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,84(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,80(%ebx)
 	movq 48(%edi),%mm0
 	movq %mm0,%mm1
 	movd 76(%edi),%mm2
 	punpckldq 72(%edi),%mm2
 	movq 48(%eax),%mm3
 	pfadd %mm2,%mm0
 	movq %mm0,48(%ebx)
 	pfsub %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,76(%ebx)
 	psrlq $32,%mm1
 	movd %mm1,72(%ebx)
 	movq 56(%edi),%mm4
 	movq %mm4,%mm5
 	movd 68(%edi),%mm6
 	punpckldq 64(%edi),%mm6
 	movq 56(%eax),%mm7
 	pfadd %mm6,%mm4
 	movq %mm4,56(%ebx)
 	pfsub %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,68(%ebx)
 	psrlq $32,%mm5
 	movd %mm5,64(%ebx)
 	/* 2 */
 	movl ASM_NAME(pnts)+4,%eax
 	/* 0,14 */
 	movq 0(%ebx),%mm0
 	movq %mm0,%mm1
 	movd 60(%ebx),%mm2
 	punpckldq 56(%ebx),%mm2
 	movq 0(%eax),%mm3
 	pfadd %mm2,%mm0
 	movq %mm0,0(%esi)
 	pfsub %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,60(%esi)
 	psrlq $32,%mm1
 	movd %mm1,56(%esi)
 	/* 16,30 */
 	movq 64(%ebx),%mm0
 	movq %mm0,%mm1
 	movd 124(%ebx),%mm2
 	punpckldq 120(%ebx),%mm2
 	pfadd %mm2,%mm0
 	movq %mm0,64(%esi)
 	pfsubr %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,124(%esi)
 	psrlq $32,%mm1
 	movd %mm1,120(%esi)
 	/* 2,12 */
 	movq 8(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 52(%ebx),%mm6
 	punpckldq 48(%ebx),%mm6
 	movq 8(%eax),%mm7
 	pfadd %mm6,%mm4
 	movq %mm4,8(%esi)
 	pfsub %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,52(%esi)
 	psrlq $32,%mm5
 	movd %mm5,48(%esi)
 	/* 18,28 */
 	movq 72(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 116(%ebx),%mm6
 	punpckldq 112(%ebx),%mm6
 	pfadd %mm6,%mm4
 	movq %mm4,72(%esi)
 	pfsubr %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,116(%esi)
 	psrlq $32,%mm5
 	movd %mm5,112(%esi)
 	/* 4,10 */
 	movq 16(%ebx),%mm0
 	movq %mm0,%mm1
 	movd 44(%ebx),%mm2
 	punpckldq 40(%ebx),%mm2
 	movq 16(%eax),%mm3
 	pfadd %mm2,%mm0
 	movq %mm0,16(%esi)
 	pfsub %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,44(%esi)
 	psrlq $32,%mm1
 	movd %mm1,40(%esi)
 	/* 20,26 */
 	movq 80(%ebx),%mm0
 	movq %mm0,%mm1
 	movd 108(%ebx),%mm2
 	punpckldq 104(%ebx),%mm2
 	pfadd %mm2,%mm0
 	movq %mm0,80(%esi)
 	pfsubr %mm2,%mm1
 	pfmul %mm3,%mm1
 	movd %mm1,108(%esi)
 	psrlq $32,%mm1
 	movd %mm1,104(%esi)
 	/* 6,8 */
 	movq 24(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 36(%ebx),%mm6
 	punpckldq 32(%ebx),%mm6
 	movq 24(%eax),%mm7
 	pfadd %mm6,%mm4
 	movq %mm4,24(%esi)
 	pfsub %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,36(%esi)
 	psrlq $32,%mm5
 	movd %mm5,32(%esi)
 	/* 22,24 */
 	movq 88(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 100(%ebx),%mm6
 	punpckldq 96(%ebx),%mm6
 	pfadd %mm6,%mm4
 	movq %mm4,88(%esi)
 	pfsubr %mm6,%mm5
 	pfmul %mm7,%mm5
 	movd %mm5,100(%esi)
 	psrlq $32,%mm5
 	movd %mm5,96(%esi)
 	/* 3 */
 	movl ASM_NAME(pnts)+8,%eax
 	movq 0(%eax),%mm0
 	movq 8(%eax),%mm1
 	/* 0,6 */
 	movq 0(%esi),%mm2
 	movq %mm2,%mm3
 	movd 28(%esi),%mm4
 	punpckldq 24(%esi),%mm4
 	pfadd %mm4,%mm2
 	pfsub %mm4,%mm3
 	pfmul %mm0,%mm3
 	movq %mm2,0(%ebx)
 	movd %mm3,28(%ebx)
 	psrlq $32,%mm3
 	movd %mm3,24(%ebx)
 	/* 2,4 */
 	movq 8(%esi),%mm5
 	movq %mm5,%mm6
 	movd 20(%esi),%mm7
 	punpckldq 16(%esi),%mm7
 	pfadd %mm7,%mm5
 	pfsub %mm7,%mm6
 	pfmul %mm1,%mm6
 	movq %mm5,8(%ebx)
 	movd %mm6,20(%ebx)
 	psrlq $32,%mm6
 	movd %mm6,16(%ebx)
 	/* 8,14 */
 	movq 32(%esi),%mm2
 	movq %mm2,%mm3
 	movd 60(%esi),%mm4
 	punpckldq 56(%esi),%mm4
 	pfadd %mm4,%mm2
 	pfsubr %mm4,%mm3
 	pfmul %mm0,%mm3
 	movq %mm2,32(%ebx)
 	movd %mm3,60(%ebx)
 	psrlq $32,%mm3
 	movd %mm3,56(%ebx)
 	/* 10,12 */
 	movq 40(%esi),%mm5
 	movq %mm5,%mm6
 	movd 52(%esi),%mm7
 	punpckldq 48(%esi),%mm7
 	pfadd %mm7,%mm5
 	pfsubr %mm7,%mm6
 	pfmul %mm1,%mm6
 	movq %mm5,40(%ebx)
 	movd %mm6,52(%ebx)
 	psrlq $32,%mm6
 	movd %mm6,48(%ebx)
 	/* 16,22 */
 	movq 64(%esi),%mm2
 	movq %mm2,%mm3
 	movd 92(%esi),%mm4
 	punpckldq 88(%esi),%mm4
 	pfadd %mm4,%mm2
 	pfsub %mm4,%mm3
 	pfmul %mm0,%mm3
 	movq %mm2,64(%ebx)
 	movd %mm3,92(%ebx)
 	psrlq $32,%mm3
 	movd %mm3,88(%ebx)
 	/*  18,20 */
 	movq 72(%esi),%mm5
 	movq %mm5,%mm6
 	movd 84(%esi),%mm7
 	punpckldq 80(%esi),%mm7
 	pfadd %mm7,%mm5
 	pfsub %mm7,%mm6
 	pfmul %mm1,%mm6
 	movq %mm5,72(%ebx)
 	movd %mm6,84(%ebx)
 	psrlq $32,%mm6
 	movd %mm6,80(%ebx)
 	/*  24,30 */
 	movq 96(%esi),%mm2
 	movq %mm2,%mm3
 	movd 124(%esi),%mm4
 	punpckldq 120(%esi),%mm4
 	pfadd %mm4,%mm2
 	pfsubr %mm4,%mm3
 	pfmul %mm0,%mm3
 	movq %mm2,96(%ebx)
 	movd %mm3,124(%ebx)
 	psrlq $32,%mm3
 	movd %mm3,120(%ebx)
 	/*  26,28 */
 	movq 104(%esi),%mm5
 	movq %mm5,%mm6
 	movd 116(%esi),%mm7
 	punpckldq 112(%esi),%mm7
 	pfadd %mm7,%mm5
 	pfsubr %mm7,%mm6
 	pfmul %mm1,%mm6
 	movq %mm5,104(%ebx)
 	movd %mm6,116(%ebx)
 	psrlq $32,%mm6
 	movd %mm6,112(%ebx)
 	/*  4 */
 	movl ASM_NAME(pnts)+12,%eax
 	movq 0(%eax),%mm0
 	/*  0 */
 	movq 0(%ebx),%mm1
 	movq %mm1,%mm2
 	movd 12(%ebx),%mm3
 	punpckldq 8(%ebx),%mm3
 	pfadd %mm3,%mm1
 	pfsub %mm3,%mm2
 	pfmul %mm0,%mm2
 	movq %mm1,0(%esi)
 	movd %mm2,12(%esi)
 	psrlq $32,%mm2
 	movd %mm2,8(%esi)
 	/*  4 */
 	movq 16(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 28(%ebx),%mm6
 	punpckldq 24(%ebx),%mm6
 	pfadd %mm6,%mm4
 	pfsubr %mm6,%mm5
 	pfmul %mm0,%mm5
 	movq %mm4,16(%esi)
 	movd %mm5,28(%esi)
 	psrlq $32,%mm5
 	movd %mm5,24(%esi)
 	/*  8 */
 	movq 32(%ebx),%mm1
 	movq %mm1,%mm2
 	movd 44(%ebx),%mm3
 	punpckldq 40(%ebx),%mm3
 	pfadd %mm3,%mm1
 	pfsub %mm3,%mm2
 	pfmul %mm0,%mm2
 	movq %mm1,32(%esi)
 	movd %mm2,44(%esi)
 	psrlq $32,%mm2
 	movd %mm2,40(%esi)
 	/*  12 */
 	movq 48(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 60(%ebx),%mm6
 	punpckldq 56(%ebx),%mm6
 	pfadd %mm6,%mm4
 	pfsubr %mm6,%mm5
 	pfmul %mm0,%mm5
 	movq %mm4,48(%esi)
 	movd %mm5,60(%esi)
 	psrlq $32,%mm5
 	movd %mm5,56(%esi)
 	/*  16 */
 	movq 64(%ebx),%mm1
 	movq %mm1,%mm2
 	movd 76(%ebx),%mm3
 	punpckldq 72(%ebx),%mm3
 	pfadd %mm3,%mm1
 	pfsub %mm3,%mm2
 	pfmul %mm0,%mm2
 	movq %mm1,64(%esi)
 	movd %mm2,76(%esi)
 	psrlq $32,%mm2
 	movd %mm2,72(%esi)
 	/*  20 */
 	movq 80(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 92(%ebx),%mm6
 	punpckldq 88(%ebx),%mm6
 	pfadd %mm6,%mm4
 	pfsubr %mm6,%mm5
 	pfmul %mm0,%mm5
 	movq %mm4,80(%esi)
 	movd %mm5,92(%esi)
 	psrlq $32,%mm5
 	movd %mm5,88(%esi)
 	/*  24 */
 	movq 96(%ebx),%mm1
 	movq %mm1,%mm2
 	movd 108(%ebx),%mm3
 	punpckldq 104(%ebx),%mm3
 	pfadd %mm3,%mm1
 	pfsub %mm3,%mm2
 	pfmul %mm0,%mm2
 	movq %mm1,96(%esi)
 	movd %mm2,108(%esi)
 	psrlq $32,%mm2
 	movd %mm2,104(%esi)
 	/*  28 */
 	movq 112(%ebx),%mm4
 	movq %mm4,%mm5
 	movd 124(%ebx),%mm6
 	punpckldq 120(%ebx),%mm6
 	pfadd %mm6,%mm4
 	pfsubr %mm6,%mm5
 	pfmul %mm0,%mm5
 	movq %mm4,112(%esi)
 	movd %mm5,124(%esi)
 	psrlq $32,%mm5
 	movd %mm5,120(%esi)
 	/*  5 */
 	movl $-1,%eax
 	movd %eax,%mm1
 	movl $1,%eax
 	/*  L | H */
 	movd %eax,%mm0	
 	punpckldq %mm1,%mm0
 	/*  1.0 | -1.0 */
 	pi2fd %mm0,%mm0	
 	movd %eax,%mm1
 	pi2fd %mm1,%mm1
 	movl ASM_NAME(pnts)+16,%eax
 	movd 0(%eax),%mm2
 	/*  1.0 | cos0 */
 	punpckldq %mm2,%mm1
 	/*  0 */
 	movq 0(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2
 	pfmul %mm1,%mm2
 	movq %mm2,0(%ebx)
 	movq 8(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm4,8(%ebx)
 	/*  4 */
 	movq 16(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2 
 	pfmul %mm1,%mm2
 	movq 24(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm2,%mm3
 	psrlq $32,%mm3
 	pfadd %mm4,%mm2
 	pfadd %mm3,%mm4
 	movq %mm2,16(%ebx)
 	movq %mm4,24(%ebx)
 	/*  8 */
 	movq 32(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2
 	pfmul %mm1,%mm2
 	movq %mm2,32(%ebx)
 	movq 40(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm4,40(%ebx)
 	/*  12 */
 	movq 48(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2 
 	pfmul %mm1,%mm2
 	movq 56(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm2,%mm3
 	psrlq $32,%mm3
 	pfadd %mm4,%mm2
 	pfadd %mm3,%mm4
 	movq %mm2,48(%ebx)
 	movq %mm4,56(%ebx)
 	/*  16 */
 	movq 64(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2
 	pfmul %mm1,%mm2
 	movq %mm2,64(%ebx)
 	movq 72(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm4,72(%ebx)
 	/*  20 */
 	movq 80(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2 
 	pfmul %mm1,%mm2
 	movq 88(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm2,%mm3
 	psrlq $32,%mm3
 	pfadd %mm4,%mm2
 	pfadd %mm3,%mm4
 	movq %mm2,80(%ebx)
 	movq %mm4,88(%ebx)
 	/*  24 */
 	movq 96(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2
 	pfmul %mm1,%mm2
 	movq %mm2,96(%ebx)
 	movq 104(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm4,104(%ebx)
 	/*  28 */
 	movq 112(%esi),%mm2
 	movq %mm2,%mm3
 	pfmul %mm0,%mm3
 	pfacc %mm3,%mm2 
 	pfmul %mm1,%mm2
 	movq 120(%esi),%mm4
 	movq %mm4,%mm5
 	pfmul %mm0,%mm5
 	pfacc %mm5,%mm4
 	pfmul %mm0,%mm4
 	pfmul %mm1,%mm4
 	movq %mm4,%mm5
 	psrlq $32,%mm5
 	pfacc %mm5,%mm4
 	movq %mm2,%mm3
 	psrlq $32,%mm3
 	pfadd %mm4,%mm2
 	pfadd %mm3,%mm4
 	movq %mm2,112(%ebx)
 	movq %mm4,120(%ebx)
 	/*  Phase6 */
 	movl 0(%ebx),%eax
 	movl %eax,1024(%ebp)
 	movl 4(%ebx),%eax
 	movl %eax,0(%ebp)
 	movl %eax,0(%edx)
 	movl 8(%ebx),%eax
 	movl %eax,512(%ebp)
 	movl 12(%ebx),%eax
 	movl %eax,512(%edx)
 	movl 16(%ebx),%eax
 	movl %eax,768(%ebp)
 	movl 20(%ebx),%eax
 	movl %eax,256(%edx)
 	movl 24(%ebx),%eax
 	movl %eax,256(%ebp)
 	movl 28(%ebx),%eax
 	movl %eax,768(%edx)
 	movq 32(%ebx),%mm0
 	movq 48(%ebx),%mm1
 	pfadd %mm1,%mm0
 	movd %mm0,896(%ebp)
 	psrlq $32,%mm0
 	movd %mm0,128(%edx)
 	movq 40(%ebx),%mm2
 	pfadd %mm2,%mm1
 	movd %mm1,640(%ebp)
 	psrlq $32,%mm1
 	movd %mm1,384(%edx)
 	movq 56(%ebx),%mm3
 	pfadd %mm3,%mm2
 	movd %mm2,384(%ebp)
        psrlq $32,%mm2
 	movd %mm2,640(%edx)
 	movd 36(%ebx),%mm4
 	pfadd %mm4,%mm3
 	movd %mm3,128(%ebp)
 	psrlq $32,%mm3
 	movd %mm3,896(%edx)
 	movq 96(%ebx),%mm0
 	movq 64(%ebx),%mm1
 	movq 112(%ebx),%mm2
        pfadd %mm2,%mm0
 	movq %mm0,%mm3
 	pfadd %mm1,%mm3
 	movd %mm3,960(%ebp)
 	psrlq $32,%mm3
 	movd %mm3,64(%edx)
 	movq 80(%ebx),%mm1
 	pfadd %mm1,%mm0
 	movd %mm0,832(%ebp)
        psrlq $32,%mm0
 	movd %mm0,192(%edx)
 	movq 104(%ebx),%mm3
 	pfadd %mm3,%mm2
 	movq %mm2,%mm4
 	pfadd %mm1,%mm4
 	movd %mm4,704(%ebp)
 	psrlq $32,%mm4
 	movd %mm4,320(%edx)
 	movq 72(%ebx),%mm1
 	pfadd %mm1,%mm2
 	movd %mm2,576(%ebp)
 	psrlq $32,%mm2
 	movd %mm2,448(%edx)
 	movq 120(%ebx),%mm4
 	pfadd %mm4,%mm3
 	movq %mm3,%mm5
 	pfadd %mm1,%mm5
 	movd %mm5,448(%ebp)
 	psrlq $32,%mm5
 	movd %mm5,576(%edx)
 	movq 88(%ebx),%mm1
 	pfadd %mm1,%mm3
 	movd %mm3,320(%ebp)
 	psrlq $32,%mm3
 	movd %mm3,704(%edx)
 	movd 100(%ebx),%mm5
 	pfadd %mm5,%mm4
 	movq %mm4,%mm6
 	pfadd %mm1,%mm6
 	movd %mm6,192(%ebp)
 	psrlq $32,%mm6
 	movd %mm6,832(%edx)	
 	movd 68(%ebx),%mm1
 	pfadd %mm1,%mm4
 	movd %mm4,64(%ebp)
 	psrlq $32,%mm4
 	movd %mm4,960(%edx)
 	/*  femms */
        popl %ebx
 	popl %esi
 	popl %edi
        popl %ebp
 	addl $256,%esp
        ret
--- a/src/libmpg123/dct64_3dnowext.S
+++ b/src/libmpg123/dct64_3dnowext.S
@@ -0,0 +1,712 @@
 /*
 	dct64_3dnowext: extended 3DNow optimized DCT64
 	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	Transformed back into standalone asm, with help of
 	gcc -S -DHAVE_CONFIG_H -I.  -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing  -DREAL_IS_FLOAT -c -o dct64_3dnowext.{S,c}
 	MPlayer comment follows.
 */
 /*
 * This code was taken from http://www.mpg123.org
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
 * Partial 3dnowex-DSP! optimization by Nick Kurshev
 *
 * TODO: optimize scalar 3dnow! code
 * Warning: Phases 7 & 8 are not tested
 */
 #include "mangle.h"
 	.data
 	ALIGN4
 	/* .type	plus_1f, @object
 	   .size	plus_1f, 4  */
 plus_1f:
 	.long	1065353216
 	ALIGN8
 	/* .type	x_plus_minus_3dnow, @object
 	   .size	x_plus_minus_3dnow, 8 */
 x_plus_minus_3dnow:
 	.long	0
 	.long	-2147483648
 	.text
 	ALIGN32,,31
 .globl ASM_NAME(dct64_3dnowext)
 	/* .type	ASM_NAME(dct64_3dnowext), @function */
 ASM_NAME(dct64_3dnowext):
 	pushl	%ebp
 	movl	%esp, %ebp
 	pushl	%edi
 	pushl	%esi
 	pushl	%ebx
 	subl	$256, %esp
 #APP
 	movl 16(%ebp),%eax
 	leal 128+-268(%ebp),%edx
 	movl 8(%ebp),%esi
 	movl 12(%ebp),%edi
 	movl $ASM_NAME(costab_mmxsse),%ebx
 	leal -268(%ebp),%ecx
 	movq	(%eax), %mm0
 	movq	8(%eax), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	120(%eax), %mm1
 	pswapd	112(%eax), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, (%edx)
 	movq	%mm4, 8(%edx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	(%ebx), %mm3
 	pfmul	8(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 120(%edx)
 	movq	%mm7, 112(%edx)
 	movq	16(%eax), %mm0
 	movq	24(%eax), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	104(%eax), %mm1
 	pswapd	96(%eax), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 16(%edx)
 	movq	%mm4, 24(%edx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	16(%ebx), %mm3
 	pfmul	24(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 104(%edx)
 	movq	%mm7, 96(%edx)
 	movq	32(%eax), %mm0
 	movq	40(%eax), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	88(%eax), %mm1
 	pswapd	80(%eax), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 32(%edx)
 	movq	%mm4, 40(%edx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	32(%ebx), %mm3
 	pfmul	40(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 88(%edx)
 	movq	%mm7, 80(%edx)
 	movq	48(%eax), %mm0
 	movq	56(%eax), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	72(%eax), %mm1
 	pswapd	64(%eax), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 48(%edx)
 	movq	%mm4, 56(%edx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	48(%ebx), %mm3
 	pfmul	56(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 72(%edx)
 	movq	%mm7, 64(%edx)
 	movq	(%edx), %mm0
 	movq	8(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	56(%edx), %mm1
 	pswapd	48(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, (%ecx)
 	movq	%mm4, 8(%ecx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	64(%ebx), %mm3
 	pfmul	72(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 56(%ecx)
 	movq	%mm7, 48(%ecx)
 	movq	16(%edx), %mm0
 	movq	24(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	40(%edx), %mm1
 	pswapd	32(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 16(%ecx)
 	movq	%mm4, 24(%ecx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	80(%ebx), %mm3
 	pfmul	88(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 40(%ecx)
 	movq	%mm7, 32(%ecx)
 	movq	64(%edx), %mm0
 	movq	72(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	120(%edx), %mm1
 	pswapd	112(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 64(%ecx)
 	movq	%mm4, 72(%ecx)
 	pfsubr	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	64(%ebx), %mm3
 	pfmul	72(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 120(%ecx)
 	movq	%mm7, 112(%ecx)
 	movq	80(%edx), %mm0
 	movq	88(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	104(%edx), %mm1
 	pswapd	96(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 80(%ecx)
 	movq	%mm4, 88(%ecx)
 	pfsubr	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	80(%ebx), %mm3
 	pfmul	88(%ebx), %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 104(%ecx)
 	movq	%mm7, 96(%ecx)
 	movq	96(%ebx), %mm2
 	movq	104(%ebx), %mm6
 	movq	(%ecx), %mm0
 	movq	8(%ecx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	24(%ecx), %mm1
 	pswapd	16(%ecx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, (%edx)
 	movq	%mm4, 8(%edx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm6, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 24(%edx)
 	movq	%mm7, 16(%edx)
 	movq	32(%ecx), %mm0
 	movq	40(%ecx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	56(%ecx), %mm1
 	pswapd	48(%ecx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 32(%edx)
 	movq	%mm4, 40(%edx)
 	pfsubr	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm6, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 56(%edx)
 	movq	%mm7, 48(%edx)
 	movq	64(%ecx), %mm0
 	movq	72(%ecx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	88(%ecx), %mm1
 	pswapd	80(%ecx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 64(%edx)
 	movq	%mm4, 72(%edx)
 	pfsub	%mm1, %mm3
 	pfsub	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm6, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 88(%edx)
 	movq	%mm7, 80(%edx)
 	movq	96(%ecx), %mm0
 	movq	104(%ecx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	120(%ecx), %mm1
 	pswapd	112(%ecx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 96(%edx)
 	movq	%mm4, 104(%edx)
 	pfsubr	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm6, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 120(%edx)
 	movq	%mm7, 112(%edx)
 	movq	112(%ebx), %mm2
 	movq	(%edx), %mm0
 	movq	16(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	8(%edx), %mm1
 	pswapd	24(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, (%ecx)
 	movq	%mm4, 16(%ecx)
 	pfsub	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm2, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 8(%ecx)
 	movq	%mm7, 24(%ecx)
 	movq	32(%edx), %mm0
 	movq	48(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	40(%edx), %mm1
 	pswapd	56(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 32(%ecx)
 	movq	%mm4, 48(%ecx)
 	pfsub	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm2, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 40(%ecx)
 	movq	%mm7, 56(%ecx)
 	movq	64(%edx), %mm0
 	movq	80(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	72(%edx), %mm1
 	pswapd	88(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 64(%ecx)
 	movq	%mm4, 80(%ecx)
 	pfsub	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm2, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 72(%ecx)
 	movq	%mm7, 88(%ecx)
 	movq	96(%edx), %mm0
 	movq	112(%edx), %mm4
 	movq	%mm0, %mm3
 	movq	%mm4, %mm7
 	pswapd	104(%edx), %mm1
 	pswapd	120(%edx), %mm5
 	pfadd	%mm1, %mm0
 	pfadd	%mm5, %mm4
 	movq	%mm0, 96(%ecx)
 	movq	%mm4, 112(%ecx)
 	pfsub	%mm1, %mm3
 	pfsubr	%mm5, %mm7
 	pfmul	%mm2, %mm3
 	pfmul	%mm2, %mm7
 	pswapd	%mm3, %mm3
 	pswapd	%mm7, %mm7
 	movq	%mm3, 104(%ecx)
 	movq	%mm7, 120(%ecx)
 	movd	plus_1f, %mm6
 	punpckldq 120(%ebx), %mm6
 	movq	x_plus_minus_3dnow, %mm7
 	movq	32(%ecx), %mm0
 	movq	64(%ecx), %mm2
 	movq	%mm0, %mm1
 	movq	%mm2, %mm3
 	pxor	%mm7, %mm1
 	pxor	%mm7, %mm3
 	pfacc	%mm1, %mm0
 	pfacc	%mm3, %mm2
 	pfmul	%mm6, %mm0
 	pfmul	%mm6, %mm2
 	movq	%mm0, 32(%edx)
 	movq	%mm2, 64(%edx)
 	movd	44(%ecx), %mm0
 	movd	40(%ecx), %mm2
 	movd	120(%ebx), %mm3
 	punpckldq 76(%ecx), %mm0
 	punpckldq 72(%ecx), %mm2
 	punpckldq %mm3, %mm3
 	movq	%mm0, %mm4
 	movq	%mm2, %mm5
 	pfsub	%mm2, %mm0
 	pfmul	%mm3, %mm0
 	movq	%mm0, %mm1
 	pfadd	%mm5, %mm0
 	pfadd	%mm4, %mm0
 	movq	%mm0, %mm2
 	punpckldq %mm1, %mm0
 	punpckhdq %mm1, %mm2
 	movq	%mm0, 40(%edx)
 	movq	%mm2, 72(%edx)
 	movd   48(%ecx), %mm3
 	movd   60(%ecx), %mm2
 	pfsub  52(%ecx), %mm3
 	pfsub  56(%ecx), %mm2
 	pfmul 120(%ebx), %mm3
 	pfmul 120(%ebx), %mm2
 	movq	%mm2, %mm1
 	pfadd  56(%ecx), %mm1
 	pfadd  60(%ecx), %mm1
 	movq	%mm1, %mm0
 	pfadd  48(%ecx), %mm0
 	pfadd  52(%ecx), %mm0
 	pfadd	%mm3, %mm1
 	punpckldq %mm2, %mm1
 	pfadd	%mm3, %mm2
 	punpckldq %mm2, %mm0
 	movq	%mm1, 56(%edx)
 	movq	%mm0, 48(%edx)
 	movd   92(%ecx), %mm1
 	pfsub  88(%ecx), %mm1
 	pfmul 120(%ebx), %mm1
 	movd   %mm1, 92(%edx)
 	pfadd  92(%ecx), %mm1
 	pfadd  88(%ecx), %mm1
 	movq   %mm1, %mm0
 	pfadd  80(%ecx), %mm0
 	pfadd  84(%ecx), %mm0
 	movd   %mm0, 80(%edx)
 	movd   80(%ecx), %mm0
 	pfsub  84(%ecx), %mm0
 	pfmul 120(%ebx), %mm0
 	pfadd  %mm0, %mm1
 	pfadd  92(%edx), %mm0
 	punpckldq %mm1, %mm0
 	movq   %mm0, 84(%edx)
 	movq	96(%ecx), %mm0
 	movq	%mm0, %mm1
 	pxor	%mm7, %mm1
 	pfacc	%mm1, %mm0
 	pfmul	%mm6, %mm0
 	movq	%mm0, 96(%edx)
 	movd  108(%ecx), %mm0
 	pfsub 104(%ecx), %mm0
 	pfmul 120(%ebx), %mm0
 	movd  %mm0, 108(%edx)
 	pfadd 104(%ecx), %mm0
 	pfadd 108(%ecx), %mm0
 	movd  %mm0, 104(%edx)
 	movd  124(%ecx), %mm1
 	pfsub 120(%ecx), %mm1
 	pfmul 120(%ebx), %mm1
 	movd  %mm1, 124(%edx)
 	pfadd 120(%ecx), %mm1
 	pfadd 124(%ecx), %mm1
 	movq  %mm1, %mm0
 	pfadd 112(%ecx), %mm0
 	pfadd 116(%ecx), %mm0
 	movd  %mm0, 112(%edx)
 	movd  112(%ecx), %mm0
 	pfsub 116(%ecx), %mm0
 	pfmul 120(%ebx), %mm0
 	pfadd %mm0,%mm1
 	pfadd 124(%edx), %mm0
 	punpckldq %mm1, %mm0
 	movq  %mm0, 116(%edx)
 	jnz .L01
 	movd      (%ecx), %mm0
 	pfadd    4(%ecx), %mm0
 	movd     %mm0, 1024(%esi)
 	movd      (%ecx), %mm0
 	pfsub    4(%ecx), %mm0
 	pfmul  120(%ebx), %mm0
 	movd      %mm0, (%esi)
 	movd      %mm0, (%edi)
 	movd   12(%ecx), %mm0
 	pfsub   8(%ecx), %mm0
 	pfmul 120(%ebx), %mm0
 	movd    %mm0, 512(%edi)
 	pfadd   12(%ecx), %mm0
 	pfadd   8(%ecx), %mm0
 	movd    %mm0, 512(%esi)
 	movd   16(%ecx), %mm0
 	pfsub  20(%ecx), %mm0
 	pfmul 120(%ebx), %mm0
 	movq	%mm0, %mm3
 	movd   28(%ecx), %mm0
 	pfsub  24(%ecx), %mm0
 	pfmul 120(%ebx), %mm0
 	movd    %mm0, 768(%edi)
 	movq	%mm0, %mm2
 	pfadd  24(%ecx), %mm0
 	pfadd  28(%ecx), %mm0
 	movq	%mm0, %mm1
 	pfadd  16(%ecx), %mm0
 	pfadd  20(%ecx), %mm0
 	movd   %mm0, 768(%esi)
 	pfadd  %mm3, %mm1
 	movd   %mm1, 256(%esi)
 	pfadd  %mm3, %mm2
 	movd   %mm2, 256(%edi)
 	movq   32(%edx), %mm0
 	movq   48(%edx), %mm1
 	pfadd  48(%edx), %mm0
 	pfadd  40(%edx), %mm1
 	movd   %mm0, 896(%esi)
 	movd   %mm1, 640(%esi)
 	psrlq  $32, %mm0
 	psrlq  $32, %mm1
 	movd   %mm0, 128(%edi)
 	movd   %mm1, 384(%edi)
 	movd   40(%edx), %mm0
 	pfadd  56(%edx), %mm0
 	movd   %mm0, 384(%esi)
 	movd   56(%edx), %mm0
 	pfadd  36(%edx), %mm0
 	movd   %mm0, 128(%esi)
 	movd   60(%edx), %mm0
 	movd   %mm0, 896(%edi)
 	pfadd  44(%edx), %mm0
 	movd   %mm0, 640(%edi)
 	movq   96(%edx), %mm0
 	movq   112(%edx), %mm2
 	movq   104(%edx), %mm4
 	pfadd  112(%edx), %mm0
 	pfadd  104(%edx), %mm2
 	pfadd  120(%edx), %mm4
 	movq   %mm0, %mm1
 	movq   %mm2, %mm3
 	movq   %mm4, %mm5
 	pfadd  64(%edx), %mm0
 	pfadd  80(%edx), %mm2
 	pfadd  72(%edx), %mm4
 	movd   %mm0, 960(%esi)
 	movd   %mm2, 704(%esi)
 	movd   %mm4, 448(%esi)
 	psrlq  $32, %mm0
 	psrlq  $32, %mm2
 	psrlq  $32, %mm4
 	movd   %mm0, 64(%edi)
 	movd   %mm2, 320(%edi)
 	movd   %mm4, 576(%edi)
 	pfadd  80(%edx), %mm1
 	pfadd  72(%edx), %mm3
 	pfadd  88(%edx), %mm5
 	movd   %mm1, 832(%esi)
 	movd   %mm3, 576(%esi)
 	movd   %mm5, 320(%esi)
 	psrlq  $32, %mm1
 	psrlq  $32, %mm3
 	psrlq  $32, %mm5
 	movd   %mm1, 192(%edi)
 	movd   %mm3, 448(%edi)
 	movd   %mm5, 704(%edi)
 	movd   120(%edx), %mm0
 	pfadd  100(%edx), %mm0
 	movq   %mm0, %mm1
 	pfadd  88(%edx), %mm0
 	movd   %mm0, 192(%esi)
 	pfadd  68(%edx), %mm1
 	movd   %mm1, 64(%esi)
 	movd  124(%edx), %mm0
 	movd  %mm0, 960(%edi)
 	pfadd  92(%edx), %mm0
 	movd  %mm0, 832(%edi)
 	jmp	.L_bye
 .L01:	
 	movq	(%ecx), %mm0
 	movq	%mm0, %mm1
 	pxor    %mm7, %mm1
 	pfacc	%mm1, %mm0
 	pfmul	%mm6, %mm0
 	pf2iw	%mm0, %mm0
 	movd	%mm0, %eax
 	movw    %ax, 512(%esi)
 	psrlq	$32, %mm0
 	movd	%mm0, %eax
 	movw    %ax, (%esi)
 	movd    12(%ecx), %mm0
 	pfsub    8(%ecx), %mm0
 	pfmul  120(%ebx), %mm0
 	pf2iw    %mm0, %mm7
 	movd	 %mm7, %eax
 	movw     %ax, 256(%edi)
 	pfadd   12(%ecx), %mm0
 	pfadd    8(%ecx), %mm0
 	pf2iw    %mm0, %mm0
 	movd	 %mm0, %eax
 	movw     %ax, 256(%esi)
 	movd   16(%ecx), %mm3
 	pfsub  20(%ecx), %mm3
 	pfmul  120(%ebx), %mm3
 	movq   %mm3, %mm2
 	movd   28(%ecx), %mm2
 	pfsub  24(%ecx), %mm2
 	pfmul 120(%ebx), %mm2
 	movq   %mm2, %mm1
 	pf2iw  %mm2, %mm7
 	movd   %mm7, %eax
 	movw   %ax, 384(%edi)
 	pfadd  24(%ecx), %mm1
 	pfadd  28(%ecx), %mm1
 	movq   %mm1, %mm0
 	pfadd  16(%ecx), %mm0
 	pfadd  20(%ecx), %mm0
 	pf2iw  %mm0, %mm0
 	movd   %mm0, %eax
 	movw   %ax, 384(%esi)
 	pfadd  %mm3, %mm1
 	pf2iw  %mm1, %mm1
 	movd   %mm1, %eax
 	movw   %ax, 128(%esi)
 	pfadd  %mm3, %mm2
 	pf2iw  %mm2, %mm2
 	movd   %mm2, %eax
 	movw   %ax, 128(%edi)
 	movq    32(%edx), %mm0
 	movq    48(%edx), %mm1
 	pfadd   48(%edx), %mm0
 	pfadd   40(%edx), %mm1
 	pf2iw   %mm0, %mm0
 	pf2iw   %mm1, %mm1
 	movd	%mm0, %eax
 	movd	%mm1, %ecx
 	movw    %ax, 448(%esi)
 	movw    %cx, 320(%esi)
 	psrlq   $32, %mm0
 	psrlq   $32, %mm1
 	movd	%mm0, %eax
 	movd	%mm1, %ecx
 	movw    %ax, 64(%edi)
 	movw    %cx, 192(%edi)
 	movd   40(%edx), %mm3
 	movd   56(%edx), %mm4
 	movd   60(%edx), %mm0
 	movd   44(%edx), %mm2
 	movd  120(%edx), %mm5
 	punpckldq %mm4, %mm3
 	punpckldq 124(%edx), %mm0
 	pfadd 100(%edx), %mm5
 	punpckldq 36(%edx), %mm4
 	punpckldq 92(%edx), %mm2
 	movq  %mm5, %mm6
 	pfadd  %mm4, %mm3
 	pf2iw  %mm0, %mm1
 	pf2iw  %mm3, %mm3
 	pfadd  88(%edx), %mm5
 	movd   %mm1, %eax
 	movd   %mm3, %ecx
 	movw   %ax, 448(%edi)
 	movw   %cx, 192(%esi)
 	pf2iw  %mm5, %mm5
 	psrlq  $32, %mm1
 	psrlq  $32, %mm3
 	movd   %mm5, %ebx
 	movd   %mm1, %eax
 	movd   %mm3, %ecx
 	movw   %bx, 96(%esi)
 	movw   %ax, 480(%edi)
 	movw   %cx, 64(%esi)
 	pfadd  %mm2, %mm0
 	pf2iw  %mm0, %mm0
 	movd   %mm0, %eax
 	pfadd  68(%edx), %mm6
 	movw   %ax, 320(%edi)
 	psrlq  $32, %mm0
 	pf2iw  %mm6, %mm6
 	movd   %mm0, %eax
 	movd   %mm6, %ebx
 	movw   %ax, 416(%edi)
 	movw   %bx, 32(%esi)
 	movq   96(%edx), %mm0
 	movq  112(%edx), %mm2
 	movq  104(%edx), %mm4
 	pfadd %mm2, %mm0
 	pfadd %mm4, %mm2
 	pfadd 120(%edx), %mm4
 	movq  %mm0, %mm1
 	movq  %mm2, %mm3
 	movq  %mm4, %mm5
 	pfadd  64(%edx), %mm0
 	pfadd  80(%edx), %mm2
 	pfadd  72(%edx), %mm4
 	pf2iw  %mm0, %mm0
 	pf2iw  %mm2, %mm2
 	pf2iw  %mm4, %mm4
 	movd   %mm0, %eax
 	movd   %mm2, %ecx
 	movd   %mm4, %ebx
 	movw   %ax, 480(%esi)
 	movw   %cx, 352(%esi)
 	movw   %bx, 224(%esi)
 	psrlq  $32, %mm0
 	psrlq  $32, %mm2
 	psrlq  $32, %mm4
 	movd   %mm0, %eax
 	movd   %mm2, %ecx
 	movd   %mm4, %ebx
 	movw   %ax, 32(%edi)
 	movw   %cx, 160(%edi)
 	movw   %bx, 288(%edi)
 	pfadd  80(%edx), %mm1
 	pfadd  72(%edx), %mm3
 	pfadd  88(%edx), %mm5
 	pf2iw  %mm1, %mm1
 	pf2iw  %mm3, %mm3
 	pf2iw  %mm5, %mm5
 	movd   %mm1, %eax
 	movd   %mm3, %ecx
 	movd   %mm5, %ebx
 	movw   %ax, 416(%esi)
 	movw   %cx, 288(%esi)
 	movw   %bx, 160(%esi)
 	psrlq  $32, %mm1
 	psrlq  $32, %mm3
 	psrlq  $32, %mm5
 	movd   %mm1, %eax
 	movd   %mm3, %ecx
 	movd   %mm5, %ebx
 	movw   %ax, 96(%edi)
 	movw   %cx, 224(%edi)
 	movw   %bx, 352(%edi)
 	movsw
 .L_bye:
 	femms
 #NO_APP
 	addl	$256, %esp
 	popl	%ebx
 	popl	%esi
 	popl	%edi
 	leave
 	ret
 	/* .size	ASM_NAME(dct64_3dnowext), .-ASM_NAME(dct64_3dnowext) */
--- a/src/libmpg123/dct64_altivec.c
+++ b/src/libmpg123/dct64_altivec.c
@@ -0,0 +1,325 @@
 /*
 	dct64_altivec.c: Discrete Cosine Tansform (DCT) for Altivec
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	altivec optimization by tmkk
 */
 /*
 * Discrete Cosine Tansform (DCT) for subband synthesis
 *
 * -funroll-loops (for gcc) will remove the loops for better performance
 * using loops in the source-code enhances readabillity
 *
 *
 * TODO: write an optimized version for the down-sampling modes
 *       (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero 
 */
 #include "mpg123lib_intern.h"
 #ifndef __APPLE__
 #include <altivec.h>
 #endif
 void dct64_altivec(real *out0,real *out1,real *samples)
 {
  real __attribute__ ((aligned (16))) bufs[64];
 	{
 		register real *b1,*costab;
 		vector unsigned char vinvert,vperm1,vperm2,vperm3,vperm4;
 		vector float v1,v2,v3,v4,v5,v6,v7,v8;
 		vector float vbs1,vbs2,vbs3,vbs4,vbs5,vbs6,vbs7,vbs8;
 		vector float vbs9,vbs10,vbs11,vbs12,vbs13,vbs14,vbs15,vbs16;
 		vector float vzero;
 		b1 = samples;
 		costab = pnts[0];
 		vzero = vec_xor(vzero,vzero);
 #ifdef __APPLE__
 		vinvert = (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
 #else
 		vinvert = (vector unsigned char){12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
 #endif
 		vperm1 = vec_lvsl(0,b1);
 		vperm2 = vec_perm(vperm1,vperm1,vinvert);
 		v1 = vec_ld(0,b1);
 		v2 = vec_ld(16,b1);
 		v3 = vec_ld(112,b1);
 		v4 = vec_ld(127,b1);
 		v5 = vec_perm(v1,v2,vperm1); /* b1[0,1,2,3] */
 		v6 = vec_perm(v3,v4,vperm2); /* b1[31,30,29,28] */
 		vbs1 = vec_add(v5,v6);
 		vbs8 = vec_sub(v5,v6);
 		v1 = vec_ld(32,b1);
 		v4 = vec_ld(96,b1);
 		v5 = vec_perm(v2,v1,vperm1); /* b1[4,5,6,7] */
 		v6 = vec_perm(v4,v3,vperm2); /* b1[27,26,25,24] */
 		vbs2 = vec_add(v5,v6);
 		vbs7 = vec_sub(v5,v6);
 		v2 = vec_ld(48,b1);
 		v3 = vec_ld(80,b1);
 		v5 = vec_perm(v1,v2,vperm1); /* b1[8,9,10,11] */
 		v6 = vec_perm(v3,v4,vperm2); /* b1[23,22,21,20] */
 		vbs3 = vec_add(v5,v6);
 		vbs6 = vec_sub(v5,v6);
 		v1 = vec_ld(64,b1);
 		v5 = vec_perm(v2,v1,vperm1); /* b1[12,13,14,15] */
 		v6 = vec_perm(v1,v3,vperm2); /* b1[19,18,17,16] */
 		vbs4 = vec_add(v5,v6);
 		vbs5 = vec_sub(v5,v6);
 		v1 = vec_ld(0,costab);
 		vbs8 = vec_madd(vbs8,v1,vzero);
 		v2 = vec_ld(16,costab);
 		vbs7 = vec_madd(vbs7,v2,vzero);
 		v3 = vec_ld(32,costab);
 		vbs6 = vec_madd(vbs6,v3,vzero);
 		v4 = vec_ld(48,costab);
 		vbs5 = vec_madd(vbs5,v4,vzero);
 		vbs6 = vec_perm(vbs6,vbs6,vinvert);
 		vbs5 = vec_perm(vbs5,vbs5,vinvert);
 		costab = pnts[1];
 		v1 = vec_perm(vbs4,vbs4,vinvert);
 		vbs9 = vec_add(vbs1,v1);
 		v3 = vec_sub(vbs1,v1);
 		v5 = vec_ld(0,costab);
 		v2 = vec_perm(vbs3,vbs3,vinvert);
 		vbs10 = vec_add(vbs2,v2);
 		v4 = vec_sub(vbs2,v2);
 		v6 = vec_ld(16,costab);
 		vbs12 = vec_madd(v3,v5,vzero);
 		vbs11 = vec_madd(v4,v6,vzero);
 		v7 = vec_sub(vbs7,vbs6);
 		v8 = vec_sub(vbs8,vbs5);
 		vbs13 = vec_add(vbs5,vbs8);
 		vbs14 = vec_add(vbs6,vbs7);
 		vbs15 = vec_madd(v7,v6,vzero);
 		vbs16 = vec_madd(v8,v5,vzero);
 		costab = pnts[2];
 		v1 = vec_perm(vbs10,vbs10,vinvert);
 		v5 = vec_perm(vbs14,vbs14,vinvert);
 		vbs1 = vec_add(v1,vbs9);
 		vbs5 = vec_add(v5,vbs13);
 		v2 = vec_sub(vbs9,v1);
 		v6 = vec_sub(vbs13,v5);
 		v3 = vec_ld(0,costab);
 		vbs11 = vec_perm(vbs11,vbs11,vinvert);
 		vbs15 = vec_perm(vbs15,vbs15,vinvert);
 		vbs3 = vec_add(vbs11,vbs12);
 		vbs7 = vec_add(vbs15,vbs16);
 		v4 = vec_sub(vbs12,vbs11);
 		v7 = vec_sub(vbs16,vbs15);
 		vbs2 = vec_madd(v2,v3,vzero);
 		vbs4 = vec_madd(v4,v3,vzero);
 		vbs6 = vec_madd(v6,v3,vzero);
 		vbs8 = vec_madd(v7,v3,vzero);
 		vbs2 = vec_perm(vbs2,vbs2,vinvert);
 		vbs4 = vec_perm(vbs4,vbs4,vinvert);
 		vbs6 = vec_perm(vbs6,vbs6,vinvert);
 		vbs8 = vec_perm(vbs8,vbs8,vinvert);
 		costab = pnts[3];
 #ifdef __APPLE__
 		vperm1 = (vector unsigned char)(0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
 		vperm2 = (vector unsigned char)(12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27);
 		vperm3 = (vector unsigned char)(0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19);
 #else
 		vperm1 = (vector unsigned char){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23};
 		vperm2 = (vector unsigned char){12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27};
 		vperm3 = (vector unsigned char){0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19};
 #endif
 		vperm4 = vec_add(vperm3,vec_splat_u8(8));
 		v1 = vec_ld(0,costab);
 		v2 = vec_splat(v1,0);
 		v3 = vec_splat(v1,1);
 		v1 = vec_mergeh(v2,v3);
 		v2 = vec_perm(vbs1,vbs3,vperm1);
 		v3 = vec_perm(vbs2,vbs4,vperm1);
 		v4 = vec_perm(vbs1,vbs3,vperm2);
 		v5 = vec_perm(vbs2,vbs4,vperm2);
 		v6 = vec_sub(v2,v4);
 		v7 = vec_sub(v3,v5);
 		v2 = vec_add(v2,v4);
 		v3 = vec_add(v3,v5);
 		v4 = vec_madd(v6,v1,vzero);
 		v5 = vec_nmsub(v7,v1,vzero);
 		vbs9 = vec_perm(v2,v4,vperm3);
 		vbs11 = vec_perm(v2,v4,vperm4);
 		vbs10 = vec_perm(v3,v5,vperm3);
 		vbs12 = vec_perm(v3,v5,vperm4);
 		v2 = vec_perm(vbs5,vbs7,vperm1);
 		v3 = vec_perm(vbs6,vbs8,vperm1);
 		v4 = vec_perm(vbs5,vbs7,vperm2);
 		v5 = vec_perm(vbs6,vbs8,vperm2);
 		v6 = vec_sub(v2,v4);
 		v7 = vec_sub(v3,v5);
 		v2 = vec_add(v2,v4);
 		v3 = vec_add(v3,v5);
 		v4 = vec_madd(v6,v1,vzero);
 		v5 = vec_nmsub(v7,v1,vzero);
 		vbs13 = vec_perm(v2,v4,vperm3);
 		vbs15 = vec_perm(v2,v4,vperm4);
 		vbs14 = vec_perm(v3,v5,vperm3);
 		vbs16 = vec_perm(v3,v5,vperm4);
 		costab = pnts[4];
 		v1 = vec_lde(0,costab);
 #ifdef __APPLE__
 		v2 = (vector float)(1.0f,-1.0f,1.0f,-1.0f);
 #else
 		v2 = (vector float){1.0f,-1.0f,1.0f,-1.0f};
 #endif
 		v3 = vec_splat(v1,0);
 		v1 = vec_madd(v2,v3,vzero);
 		v2 = vec_mergeh(vbs9,vbs10);
 		v3 = vec_mergel(vbs9,vbs10);
 		v4 = vec_mergeh(vbs11,vbs12);
 		v5 = vec_mergel(vbs11,vbs12);
 		v6 = vec_mergeh(v2,v3);
 		v7 = vec_mergel(v2,v3);
 		v2 = vec_mergeh(v4,v5);
 		v3 = vec_mergel(v4,v5); 
 		v4 = vec_sub(v6,v7);
 		v5 = vec_sub(v2,v3);
 		v6 = vec_add(v6,v7);
 		v7 = vec_add(v2,v3);
 		v2 = vec_madd(v4,v1,vzero);
 		v3 = vec_madd(v5,v1,vzero);
 		vbs1 = vec_mergeh(v6,v2);
 		vbs2 = vec_mergel(v6,v2);
 		vbs3 = vec_mergeh(v7,v3);
 		vbs4 = vec_mergel(v7,v3);
 		v2 = vec_mergeh(vbs13,vbs14);
 		v3 = vec_mergel(vbs13,vbs14);
 		v4 = vec_mergeh(vbs15,vbs16);
 		v5 = vec_mergel(vbs15,vbs16);
 		v6 = vec_mergeh(v2,v3);
 		v7 = vec_mergel(v2,v3);
 		v2 = vec_mergeh(v4,v5);
 		v3 = vec_mergel(v4,v5); 
 		v4 = vec_sub(v6,v7);
 		v5 = vec_sub(v2,v3);
 		v6 = vec_add(v6,v7);
 		v7 = vec_add(v2,v3);
 		v2 = vec_madd(v4,v1,vzero);
 		v3 = vec_madd(v5,v1,vzero);
 		vbs5 = vec_mergeh(v6,v2);
 		vbs6 = vec_mergel(v6,v2);
 		vbs7 = vec_mergeh(v7,v3);
 		vbs8 = vec_mergel(v7,v3);
 		vec_st(vbs1,0,bufs);
 		vec_st(vbs2,16,bufs);
 		vec_st(vbs3,32,bufs);
 		vec_st(vbs4,48,bufs);
 		vec_st(vbs5,64,bufs);
 		vec_st(vbs6,80,bufs);
 		vec_st(vbs7,96,bufs);
 		vec_st(vbs8,112,bufs);
 		vec_st(vbs9,128,bufs);
 		vec_st(vbs10,144,bufs);
 		vec_st(vbs11,160,bufs);
 		vec_st(vbs12,176,bufs);
 		vec_st(vbs13,192,bufs);
 		vec_st(vbs14,208,bufs);
 		vec_st(vbs15,224,bufs);
 		vec_st(vbs16,240,bufs);
 	}
 {
  register real *b1;
  register int i;
  for(b1=bufs,i=8;i;i--,b1+=4)
    b1[2] += b1[3];
  for(b1=bufs,i=4;i;i--,b1+=8)
  {
    b1[4] += b1[6];
    b1[6] += b1[5];
    b1[5] += b1[7];
  }
  for(b1=bufs,i=2;i;i--,b1+=16)
  {
    b1[8]  += b1[12];
    b1[12] += b1[10];
    b1[10] += b1[14];
    b1[14] += b1[9];
    b1[9]  += b1[13];
    b1[13] += b1[11];
    b1[11] += b1[15];
  }
 }
  out0[0x10*16] = bufs[0];
  out0[0x10*15] = bufs[16+0]  + bufs[16+8];
  out0[0x10*14] = bufs[8];
  out0[0x10*13] = bufs[16+8]  + bufs[16+4];
  out0[0x10*12] = bufs[4];
  out0[0x10*11] = bufs[16+4]  + bufs[16+12];
  out0[0x10*10] = bufs[12];
  out0[0x10* 9] = bufs[16+12] + bufs[16+2];
  out0[0x10* 8] = bufs[2];
  out0[0x10* 7] = bufs[16+2]  + bufs[16+10];
  out0[0x10* 6] = bufs[10];
  out0[0x10* 5] = bufs[16+10] + bufs[16+6];
  out0[0x10* 4] = bufs[6];
  out0[0x10* 3] = bufs[16+6]  + bufs[16+14];
  out0[0x10* 2] = bufs[14];
  out0[0x10* 1] = bufs[16+14] + bufs[16+1];
  out0[0x10* 0] = bufs[1];
  out1[0x10* 0] = bufs[1];
  out1[0x10* 1] = bufs[16+1]  + bufs[16+9];
  out1[0x10* 2] = bufs[9];
  out1[0x10* 3] = bufs[16+9]  + bufs[16+5];
  out1[0x10* 4] = bufs[5];
  out1[0x10* 5] = bufs[16+5]  + bufs[16+13];
  out1[0x10* 6] = bufs[13];
  out1[0x10* 7] = bufs[16+13] + bufs[16+3];
  out1[0x10* 8] = bufs[3];
  out1[0x10* 9] = bufs[16+3]  + bufs[16+11];
  out1[0x10*10] = bufs[11];
  out1[0x10*11] = bufs[16+11] + bufs[16+7];
  out1[0x10*12] = bufs[7];
  out1[0x10*13] = bufs[16+7]  + bufs[16+15];
  out1[0x10*14] = bufs[15];
  out1[0x10*15] = bufs[16+15];
 }
--- a/src/libmpg123/dct64_i386.c
+++ b/src/libmpg123/dct64_i386.c
@@ -0,0 +1,336 @@
 /*
 	dct64_i386.c: DCT64, a C variant for i386
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 /*
 * Discrete Cosine Tansform (DCT) for subband synthesis
 * optimized for machines with no auto-increment. 
 * The performance is highly compiler dependend. Maybe
 * the dct64.c version for 'normal' processor may be faster
 * even for Intel processors.
 */
 #include "mpg123lib_intern.h"
 static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples)
 {
 {
  register real *costab = pnts[0];
  b1[0x00] = samples[0x00] + samples[0x1F];
  b1[0x01] = samples[0x01] + samples[0x1E];
  b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
  b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
  b1[0x02] = samples[0x02] + samples[0x1D];
  b1[0x03] = samples[0x03] + samples[0x1C];
  b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
  b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
  b1[0x04] = samples[0x04] + samples[0x1B];
  b1[0x05] = samples[0x05] + samples[0x1A];
  b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
  b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
  b1[0x06] = samples[0x06] + samples[0x19];
  b1[0x07] = samples[0x07] + samples[0x18];
  b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
  b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
  b1[0x08] = samples[0x08] + samples[0x17];
  b1[0x09] = samples[0x09] + samples[0x16];
  b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
  b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
  b1[0x0A] = samples[0x0A] + samples[0x15];
  b1[0x0B] = samples[0x0B] + samples[0x14];
  b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
  b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
  b1[0x0C] = samples[0x0C] + samples[0x13];
  b1[0x0D] = samples[0x0D] + samples[0x12];
  b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
  b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
  b1[0x0E] = samples[0x0E] + samples[0x11];
  b1[0x0F] = samples[0x0F] + samples[0x10];
  b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
  b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
 }
 {
  register real *costab = pnts[1];
  b2[0x00] = b1[0x00] + b1[0x0F]; 
  b2[0x01] = b1[0x01] + b1[0x0E]; 
  b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
  b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
  b2[0x02] = b1[0x02] + b1[0x0D]; 
  b2[0x03] = b1[0x03] + b1[0x0C]; 
  b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
  b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
  b2[0x04] = b1[0x04] + b1[0x0B]; 
  b2[0x05] = b1[0x05] + b1[0x0A]; 
  b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
  b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
  b2[0x06] = b1[0x06] + b1[0x09]; 
  b2[0x07] = b1[0x07] + b1[0x08]; 
  b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
  b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
  /* */
  b2[0x10] = b1[0x10] + b1[0x1F];
  b2[0x11] = b1[0x11] + b1[0x1E];
  b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
  b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
  b2[0x12] = b1[0x12] + b1[0x1D];
  b2[0x13] = b1[0x13] + b1[0x1C];
  b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
  b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
  b2[0x14] = b1[0x14] + b1[0x1B];
  b2[0x15] = b1[0x15] + b1[0x1A];
  b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
  b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
  b2[0x16] = b1[0x16] + b1[0x19];
  b2[0x17] = b1[0x17] + b1[0x18];
  b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
  b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
 }
 {
  register real *costab = pnts[2];
  b1[0x00] = b2[0x00] + b2[0x07];
  b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
  b1[0x01] = b2[0x01] + b2[0x06];
  b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
  b1[0x02] = b2[0x02] + b2[0x05];
  b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
  b1[0x03] = b2[0x03] + b2[0x04];
  b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
  b1[0x08] = b2[0x08] + b2[0x0F];
  b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
  b1[0x09] = b2[0x09] + b2[0x0E];
  b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
  b1[0x0A] = b2[0x0A] + b2[0x0D];
  b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
  b1[0x0B] = b2[0x0B] + b2[0x0C];
  b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
  b1[0x10] = b2[0x10] + b2[0x17];
  b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
  b1[0x11] = b2[0x11] + b2[0x16];
  b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
  b1[0x12] = b2[0x12] + b2[0x15];
  b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
  b1[0x13] = b2[0x13] + b2[0x14];
  b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
  b1[0x18] = b2[0x18] + b2[0x1F];
  b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
  b1[0x19] = b2[0x19] + b2[0x1E];
  b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
  b1[0x1A] = b2[0x1A] + b2[0x1D];
  b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
  b1[0x1B] = b2[0x1B] + b2[0x1C];
  b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
 }
 {
  register real const cos0 = pnts[3][0];
  register real const cos1 = pnts[3][1];
  b2[0x00] = b1[0x00] + b1[0x03];
  b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
  b2[0x01] = b1[0x01] + b1[0x02];
  b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
  b2[0x04] = b1[0x04] + b1[0x07];
  b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
  b2[0x05] = b1[0x05] + b1[0x06];
  b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
  b2[0x08] = b1[0x08] + b1[0x0B];
  b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
  b2[0x09] = b1[0x09] + b1[0x0A];
  b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
  b2[0x0C] = b1[0x0C] + b1[0x0F];
  b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
  b2[0x0D] = b1[0x0D] + b1[0x0E];
  b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
  b2[0x10] = b1[0x10] + b1[0x13];
  b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
  b2[0x11] = b1[0x11] + b1[0x12];
  b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
  b2[0x14] = b1[0x14] + b1[0x17];
  b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
  b2[0x15] = b1[0x15] + b1[0x16];
  b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
  b2[0x18] = b1[0x18] + b1[0x1B];
  b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
  b2[0x19] = b1[0x19] + b1[0x1A];
  b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
  b2[0x1C] = b1[0x1C] + b1[0x1F];
  b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
  b2[0x1D] = b1[0x1D] + b1[0x1E];
  b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
 }
 {
  register real const cos0 = pnts[4][0];
  b1[0x00] = b2[0x00] + b2[0x01];
  b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
  b1[0x02] = b2[0x02] + b2[0x03];
  b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
  b1[0x02] += b1[0x03];
  b1[0x04] = b2[0x04] + b2[0x05];
  b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
  b1[0x06] = b2[0x06] + b2[0x07];
  b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
  b1[0x06] += b1[0x07];
  b1[0x04] += b1[0x06];
  b1[0x06] += b1[0x05];
  b1[0x05] += b1[0x07];
  b1[0x08] = b2[0x08] + b2[0x09];
  b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
  b1[0x0A] = b2[0x0A] + b2[0x0B];
  b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
  b1[0x0A] += b1[0x0B];
  b1[0x0C] = b2[0x0C] + b2[0x0D];
  b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
  b1[0x0E] = b2[0x0E] + b2[0x0F];
  b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
  b1[0x0E] += b1[0x0F];
  b1[0x0C] += b1[0x0E];
  b1[0x0E] += b1[0x0D];
  b1[0x0D] += b1[0x0F];
  b1[0x10] = b2[0x10] + b2[0x11];
  b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
  b1[0x12] = b2[0x12] + b2[0x13];
  b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
  b1[0x12] += b1[0x13];
  b1[0x14] = b2[0x14] + b2[0x15];
  b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
  b1[0x16] = b2[0x16] + b2[0x17];
  b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
  b1[0x16] += b1[0x17];
  b1[0x14] += b1[0x16];
  b1[0x16] += b1[0x15];
  b1[0x15] += b1[0x17];
  b1[0x18] = b2[0x18] + b2[0x19];
  b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
  b1[0x1A] = b2[0x1A] + b2[0x1B];
  b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
  b1[0x1A] += b1[0x1B];
  b1[0x1C] = b2[0x1C] + b2[0x1D];
  b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
  b1[0x1E] = b2[0x1E] + b2[0x1F];
  b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
  b1[0x1E] += b1[0x1F];
  b1[0x1C] += b1[0x1E];
  b1[0x1E] += b1[0x1D];
  b1[0x1D] += b1[0x1F];
 }
 out0[0x10*16] = b1[0x00];
 out0[0x10*12] = b1[0x04];
 out0[0x10* 8] = b1[0x02];
 out0[0x10* 4] = b1[0x06];
 out0[0x10* 0] = b1[0x01];
 out1[0x10* 0] = b1[0x01];
 out1[0x10* 4] = b1[0x05];
 out1[0x10* 8] = b1[0x03];
 out1[0x10*12] = b1[0x07];
 #if 1
 out0[0x10*14] = b1[0x08] + b1[0x0C];
 out0[0x10*10] = b1[0x0C] + b1[0x0a];
 out0[0x10* 6] = b1[0x0A] + b1[0x0E];
 out0[0x10* 2] = b1[0x0E] + b1[0x09];
 out1[0x10* 2] = b1[0x09] + b1[0x0D];
 out1[0x10* 6] = b1[0x0D] + b1[0x0B];
 out1[0x10*10] = b1[0x0B] + b1[0x0F];
 out1[0x10*14] = b1[0x0F];
 #else
 b1[0x08] += b1[0x0C];
 out0[0x10*14] = b1[0x08];
 b1[0x0C] += b1[0x0a];
 out0[0x10*10] = b1[0x0C];
 b1[0x0A] += b1[0x0E];
 out0[0x10* 6] = b1[0x0A];
 b1[0x0E] += b1[0x09];
 out0[0x10* 2] = b1[0x0E];
 b1[0x09] += b1[0x0D];
 out1[0x10* 2] = b1[0x09];
 b1[0x0D] += b1[0x0B];
 out1[0x10* 6] = b1[0x0D];
 b1[0x0B] += b1[0x0F];
 out1[0x10*10] = b1[0x0B];
 out1[0x10*14] = b1[0x0F];
 #endif
 { 
 real tmp;
 tmp = b1[0x18] + b1[0x1C];
 out0[0x10*15] = tmp + b1[0x10];
 out0[0x10*13] = tmp + b1[0x14];
 tmp = b1[0x1C] + b1[0x1A];
 out0[0x10*11] = tmp + b1[0x14];
 out0[0x10* 9] = tmp + b1[0x12];
 tmp = b1[0x1A] + b1[0x1E];
 out0[0x10* 7] = tmp + b1[0x12];
 out0[0x10* 5] = tmp + b1[0x16];
 tmp = b1[0x1E] + b1[0x19];
 out0[0x10* 3] = tmp + b1[0x16];
 out0[0x10* 1] = tmp + b1[0x11];
 tmp = b1[0x19] + b1[0x1D];
 out1[0x10* 1] = tmp + b1[0x11];
 out1[0x10* 3] = tmp + b1[0x15]; 
 tmp = b1[0x1D] + b1[0x1B];
 out1[0x10* 5] = tmp + b1[0x15];
 out1[0x10* 7] = tmp + b1[0x13];
 tmp = b1[0x1B] + b1[0x1F];
 out1[0x10* 9] = tmp + b1[0x13];
 out1[0x10*11] = tmp + b1[0x17];
 out1[0x10*13] = b1[0x17] + b1[0x1F];
 out1[0x10*15] = b1[0x1F];
 }
 }
 /*
 * the call via dct64 is a trick to force GCC to use
 * (new) registers for the b1,b2 pointer to the bufs[xx] field
 */
 void dct64_i386(real *a,real *b,real *c)
 {
  real bufs[0x40];
  dct64_1(a,b,bufs,bufs+0x20,c);
 }
--- a/src/libmpg123/dct64_i486.c
+++ b/src/libmpg123/dct64_i486.c
@@ -0,0 +1,342 @@
 /*
 	dct64_i486.c: DCT64, a plain C variant for i486
 	copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Fabrice Bellard
 */
 /* Discrete Cosine Tansform (DCT) for subband synthesis.
 *
 * This code is optimized for 80486. It should be compiled with gcc
 * 2.7.2 or higher.
 *
 * Note: This code does not give the necessary accuracy. Moreover, no
 * overflow test are done.
 *
 * (c) 1998 Fabrice Bellard.  
 */
 #include "mpg123lib_intern.h"
 #define COS_0_0 16403
 #define COS_0_1 16563
 #define COS_0_2 16890
 #define COS_0_3 17401
 #define COS_0_4 18124
 #define COS_0_5 19101
 #define COS_0_6 20398
 #define COS_0_7 22112
 #define COS_0_8 24396
 #define COS_0_9 27503
 #define COS_0_10 31869
 #define COS_0_11 38320
 #define COS_0_12 48633
 #define COS_0_13 67429
 #define COS_0_14 111660
 #define COS_0_15 333906
 #define COS_1_0 16463
 #define COS_1_1 17121
 #define COS_1_2 18577
 #define COS_1_3 21195
 #define COS_1_4 25826
 #define COS_1_5 34756
 #define COS_1_6 56441
 #define COS_1_7 167154
 #define COS_2_0 16704
 #define COS_2_1 19704
 #define COS_2_2 29490
 #define COS_2_3 83981
 #define COS_3_0 17733
 #define COS_3_1 42813
 #define COS_4_0 23170         
 #define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr)
 #define MULL(a,b) (((long long)(a)*(long long)(b)) >> 15)
 #define MUL(a,b) \
 (\
       ((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\
       ((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\
       ((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\
       ((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\
       ((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\
       ((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\
                        (((a)*(b   )) >> 15))))))))
 void dct64_1_486(int *out0,int *out1,int *b1,int *b2)
 {
  b1[0x00] = b2[0x00] + b2[0x1F];
  b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0);
  b1[0x01] = b2[0x01] + b2[0x1E];
  b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1);
  b1[0x02] = b2[0x02] + b2[0x1D];
  b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2);
  b1[0x03] = b2[0x03] + b2[0x1C];
  b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3);
  b1[0x04] = b2[0x04] + b2[0x1B];
  b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4);
  b1[0x05] = b2[0x05] + b2[0x1A];
  b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5);
  b1[0x06] = b2[0x06] + b2[0x19];
  b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6);
  b1[0x07] = b2[0x07] + b2[0x18];
  b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7);
  b1[0x08] = b2[0x08] + b2[0x17];
  b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8);
  b1[0x09] = b2[0x09] + b2[0x16];
  b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9);
  b1[0x0A] = b2[0x0A] + b2[0x15];
  b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10);
  b1[0x0B] = b2[0x0B] + b2[0x14];
  b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11);
  b1[0x0C] = b2[0x0C] + b2[0x13];
  b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12);
  b1[0x0D] = b2[0x0D] + b2[0x12];
  b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13);
  b1[0x0E] = b2[0x0E] + b2[0x11];
  b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14);
  b1[0x0F] = b2[0x0F] + b2[0x10];
  b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15);
  b2[0x00] = b1[0x00] + b1[0x0F]; 
  b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0);
  b2[0x01] = b1[0x01] + b1[0x0E]; 
  b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1);
  b2[0x02] = b1[0x02] + b1[0x0D]; 
  b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2);
  b2[0x03] = b1[0x03] + b1[0x0C]; 
  b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3);
  b2[0x04] = b1[0x04] + b1[0x0B]; 
  b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4);
  b2[0x05] = b1[0x05] + b1[0x0A]; 
  b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5);
  b2[0x06] = b1[0x06] + b1[0x09]; 
  b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6);
  b2[0x07] = b1[0x07] + b1[0x08]; 
  b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7);
  b2[0x10] = b1[0x10] + b1[0x1F];
  b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0);
  b2[0x11] = b1[0x11] + b1[0x1E];
  b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1);
  b2[0x12] = b1[0x12] + b1[0x1D];
  b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2);
  b2[0x13] = b1[0x13] + b1[0x1C];
  b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3);
  b2[0x14] = b1[0x14] + b1[0x1B];
  b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4);
  b2[0x15] = b1[0x15] + b1[0x1A];
  b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5);
  b2[0x16] = b1[0x16] + b1[0x19];
  b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6);
  b2[0x17] = b1[0x17] + b1[0x18];
  b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7);
  b1[0x00] = b2[0x00] + b2[0x07];
  b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0);
  b1[0x01] = b2[0x01] + b2[0x06];
  b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1);
  b1[0x02] = b2[0x02] + b2[0x05];
  b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2);
  b1[0x03] = b2[0x03] + b2[0x04];
  b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3);
  b1[0x08] = b2[0x08] + b2[0x0F];
  b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0);
  b1[0x09] = b2[0x09] + b2[0x0E];
  b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1);
  b1[0x0A] = b2[0x0A] + b2[0x0D];
  b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2);
  b1[0x0B] = b2[0x0B] + b2[0x0C];
  b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3);
  b1[0x10] = b2[0x10] + b2[0x17];
  b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0);
  b1[0x11] = b2[0x11] + b2[0x16];
  b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1);
  b1[0x12] = b2[0x12] + b2[0x15];
  b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2);
  b1[0x13] = b2[0x13] + b2[0x14];
  b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3);
  b1[0x18] = b2[0x18] + b2[0x1F];
  b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0);
  b1[0x19] = b2[0x19] + b2[0x1E];
  b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1);
  b1[0x1A] = b2[0x1A] + b2[0x1D];
  b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2);
  b1[0x1B] = b2[0x1B] + b2[0x1C];
  b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3);
  b2[0x00] = b1[0x00] + b1[0x03];
  b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0);
  b2[0x01] = b1[0x01] + b1[0x02];
  b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1);
  b2[0x04] = b1[0x04] + b1[0x07];
  b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0);
  b2[0x05] = b1[0x05] + b1[0x06];
  b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1);
  b2[0x08] = b1[0x08] + b1[0x0B];
  b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0);
  b2[0x09] = b1[0x09] + b1[0x0A];
  b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1);
  b2[0x0C] = b1[0x0C] + b1[0x0F];
  b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0);
  b2[0x0D] = b1[0x0D] + b1[0x0E];
  b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1);
  b2[0x10] = b1[0x10] + b1[0x13];
  b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0);
  b2[0x11] = b1[0x11] + b1[0x12];
  b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1);
  b2[0x14] = b1[0x14] + b1[0x17];
  b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0);
  b2[0x15] = b1[0x15] + b1[0x16];
  b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1);
  b2[0x18] = b1[0x18] + b1[0x1B];
  b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0);
  b2[0x19] = b1[0x19] + b1[0x1A];
  b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1);
  b2[0x1C] = b1[0x1C] + b1[0x1F];
  b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0);
  b2[0x1D] = b1[0x1D] + b1[0x1E];
  b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1);
  {
    int i;
    for(i=0;i<32;i+=4) {
      b1[i+0x00] = b2[i+0x00] + b2[i+0x01];
      b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0);
      b1[i+0x02] = b2[i+0x02] + b2[i+0x03];
      b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0);
    }
  }
  b1[0x02] += b1[0x03];
  b1[0x06] += b1[0x07];
  b1[0x04] += b1[0x06];
  b1[0x06] += b1[0x05];
  b1[0x05] += b1[0x07];
  b1[0x0A] += b1[0x0B];
  b1[0x0E] += b1[0x0F];
  b1[0x0C] += b1[0x0E];
  b1[0x0E] += b1[0x0D];
  b1[0x0D] += b1[0x0F];
  b1[0x12] += b1[0x13];
  b1[0x16] += b1[0x17];
  b1[0x14] += b1[0x16];
  b1[0x16] += b1[0x15];
  b1[0x15] += b1[0x17];
  b1[0x1A] += b1[0x1B];
  b1[0x1E] += b1[0x1F];
  b1[0x1C] += b1[0x1E];
  b1[0x1E] += b1[0x1D];
  b1[0x1D] += b1[0x1F];
 SETOUT(out0,16,b1[0x00]);
 SETOUT(out0,12,b1[0x04]);
 SETOUT(out0, 8,b1[0x02]);
 SETOUT(out0, 4,b1[0x06]);
 SETOUT(out0, 0,b1[0x01]);
 SETOUT(out1, 0,b1[0x01]);
 SETOUT(out1, 4,b1[0x05]);
 SETOUT(out1, 8,b1[0x03]);
 SETOUT(out1,12,b1[0x07]);
 b1[0x08] += b1[0x0C];
 SETOUT(out0,14,b1[0x08]);
 b1[0x0C] += b1[0x0a];
 SETOUT(out0,10,b1[0x0C]);
 b1[0x0A] += b1[0x0E];
 SETOUT(out0, 6,b1[0x0A]);
 b1[0x0E] += b1[0x09];
 SETOUT(out0, 2,b1[0x0E]);
 b1[0x09] += b1[0x0D];
 SETOUT(out1, 2,b1[0x09]);
 b1[0x0D] += b1[0x0B];
 SETOUT(out1, 6,b1[0x0D]);
 b1[0x0B] += b1[0x0F];
 SETOUT(out1,10,b1[0x0B]);
 SETOUT(out1,14,b1[0x0F]);
 b1[0x18] += b1[0x1C];
 SETOUT(out0,15,b1[0x10] + b1[0x18]);
 SETOUT(out0,13,b1[0x18] + b1[0x14]);
 b1[0x1C] += b1[0x1a];
 SETOUT(out0,11,b1[0x14] + b1[0x1C]);
 SETOUT(out0, 9,b1[0x1C] + b1[0x12]);
 b1[0x1A] += b1[0x1E];
 SETOUT(out0, 7,b1[0x12] + b1[0x1A]);
 SETOUT(out0, 5,b1[0x1A] + b1[0x16]);
 b1[0x1E] += b1[0x19];
 SETOUT(out0, 3,b1[0x16] + b1[0x1E]);
 SETOUT(out0, 1,b1[0x1E] + b1[0x11]);
 b1[0x19] += b1[0x1D];
 SETOUT(out1, 1,b1[0x11] + b1[0x19]);
 SETOUT(out1, 3,b1[0x19] + b1[0x15]);
 b1[0x1D] += b1[0x1B];
 SETOUT(out1, 5,b1[0x15] + b1[0x1D]);
 SETOUT(out1, 7,b1[0x1D] + b1[0x13]);
 b1[0x1B] += b1[0x1F];
 SETOUT(out1, 9,b1[0x13] + b1[0x1B]);
 SETOUT(out1,11,b1[0x1B] + b1[0x17]);
 SETOUT(out1,13,b1[0x17] + b1[0x1F]);
 SETOUT(out1,15,b1[0x1F]);
 }
 /*
 * the call via dct64 is a trick to force GCC to use
 * (new) registers for the b1,b2 pointer to the bufs[xx] field
 */
 void dct64_i486(int *a,int *b,real *samples)
 {
  int bufs[64];
  int i;
 #ifdef REAL_IS_FIXED  
 #define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR)
  for(i=0;i<32;i++) {
    bufs[i]=TOINT(samples[i]);
  }
 #else      
  int *p = bufs;
  register double const scale = ((65536.0 * 32) + 1) * 65536.0;
  for(i=0;i<32;i++) {
    *((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */
  }
 #endif
  dct64_1_486(a,b,bufs+32,bufs);
 }
--- a/src/libmpg123/dct64_mmx.S
+++ b/src/libmpg123/dct64_mmx.S
@@ -0,0 +1,811 @@
 /*
 	dct64_mmx.s: MMX optimized DCT64
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by the mysterious higway (apparently)
 */
 #include "mangle.h"
 .text
 	ALIGN32
 .globl ASM_NAME(dct64_mmx)
 ASM_NAME(dct64_mmx):
 	xorl %ecx,%ecx
 .globl ASM_NAME(dct64_MMX)
 ASM_NAME(dct64_MMX):
 	pushl %ebx
 	pushl %esi
 	pushl %edi
 	subl $256,%esp
 	movl 280(%esp),%eax
 	flds     (%eax)
 	leal 128(%esp),%edx
 	fadds 124(%eax)
 	movl 272(%esp),%esi
 	fstps    (%edx)
 	movl 276(%esp),%edi
 	flds    4(%eax)
 	movl $ASM_NAME(costab_mmxsse),%ebx
 	fadds 120(%eax)
 	orl %ecx,%ecx
 	fstps   4(%edx)
 	flds     (%eax)
 	movl %esp,%ecx
 	fsubs 124(%eax)
 	fmuls    (%ebx)
 	fstps 124(%edx)
 	flds    4(%eax)
 	fsubs 120(%eax)
 	fmuls   4(%ebx)
 	fstps 120(%edx)
 	flds    8(%eax)
 	fadds 116(%eax)
 	fstps   8(%edx)
 	flds   12(%eax)
 	fadds 112(%eax)
 	fstps  12(%edx)
 	flds    8(%eax)
 	fsubs 116(%eax)
 	fmuls   8(%ebx)
 	fstps 116(%edx)
 	flds   12(%eax)
 	fsubs 112(%eax)
 	fmuls  12(%ebx)
 	fstps 112(%edx)
 	flds   16(%eax)
 	fadds 108(%eax)
 	fstps  16(%edx)
 	flds   20(%eax)
 	fadds 104(%eax)
 	fstps  20(%edx)
 	flds   16(%eax)
 	fsubs 108(%eax)
 	fmuls  16(%ebx)
 	fstps 108(%edx)
 	flds   20(%eax)
 	fsubs 104(%eax)
 	fmuls  20(%ebx)
 	fstps 104(%edx)
 	flds   24(%eax)
 	fadds 100(%eax)
 	fstps  24(%edx)
 	flds   28(%eax)
 	fadds  96(%eax)
 	fstps  28(%edx)
 	flds   24(%eax)
 	fsubs 100(%eax)
 	fmuls  24(%ebx)
 	fstps 100(%edx)
 	flds   28(%eax)
 	fsubs  96(%eax)
 	fmuls  28(%ebx)
 	fstps  96(%edx)
 	flds   32(%eax)
 	fadds  92(%eax)
 	fstps  32(%edx)
 	flds   36(%eax)
 	fadds  88(%eax)
 	fstps  36(%edx)
 	flds   32(%eax)
 	fsubs  92(%eax)
 	fmuls  32(%ebx)
 	fstps  92(%edx)
 	flds   36(%eax)
 	fsubs  88(%eax)
 	fmuls  36(%ebx)
 	fstps  88(%edx)
 	flds   40(%eax)
 	fadds  84(%eax)
 	fstps  40(%edx)
 	flds   44(%eax)
 	fadds  80(%eax)
 	fstps  44(%edx)
 	flds   40(%eax)
 	fsubs  84(%eax)
 	fmuls  40(%ebx)
 	fstps  84(%edx)
 	flds   44(%eax)
 	fsubs  80(%eax)
 	fmuls  44(%ebx)
 	fstps  80(%edx)
 	flds   48(%eax)
 	fadds  76(%eax)
 	fstps  48(%edx)
 	flds   52(%eax)
 	fadds  72(%eax)
 	fstps  52(%edx)
 	flds   48(%eax)
 	fsubs  76(%eax)
 	fmuls  48(%ebx)
 	fstps  76(%edx)
 	flds   52(%eax)
 	fsubs  72(%eax)
 	fmuls  52(%ebx)
 	fstps  72(%edx)
 	flds   56(%eax)
 	fadds  68(%eax)
 	fstps  56(%edx)
 	flds   60(%eax)
 	fadds  64(%eax)
 	fstps  60(%edx)
 	flds   56(%eax)
 	fsubs  68(%eax)
 	fmuls  56(%ebx)
 	fstps  68(%edx)
 	flds   60(%eax)
 	fsubs  64(%eax)
 	fmuls  60(%ebx)
 	fstps  64(%edx)
 	flds     (%edx)
 	fadds  60(%edx)
 	fstps    (%ecx)
 	flds    4(%edx)
 	fadds  56(%edx)
 	fstps   4(%ecx)
 	flds     (%edx)
 	fsubs  60(%edx)
 	fmuls  64(%ebx)
 	fstps  60(%ecx)
 	flds    4(%edx)
 	fsubs  56(%edx)
 	fmuls  68(%ebx)
 	fstps  56(%ecx)
 	flds    8(%edx)
 	fadds  52(%edx)
 	fstps   8(%ecx)
 	flds   12(%edx)
 	fadds  48(%edx)
 	fstps  12(%ecx)
 	flds    8(%edx)
 	fsubs  52(%edx)
 	fmuls  72(%ebx)
 	fstps  52(%ecx)
 	flds   12(%edx)
 	fsubs  48(%edx)
 	fmuls  76(%ebx)
 	fstps  48(%ecx)
 	flds   16(%edx)
 	fadds  44(%edx)
 	fstps  16(%ecx)
 	flds   20(%edx)
 	fadds  40(%edx)
 	fstps  20(%ecx)
 	flds   16(%edx)
 	fsubs  44(%edx)
 	fmuls  80(%ebx)
 	fstps  44(%ecx)
 	flds   20(%edx)
 	fsubs  40(%edx)
 	fmuls  84(%ebx)
 	fstps  40(%ecx)
 	flds   24(%edx)
 	fadds  36(%edx)
 	fstps  24(%ecx)
 	flds   28(%edx)
 	fadds  32(%edx)
 	fstps  28(%ecx)
 	flds   24(%edx)
 	fsubs  36(%edx)
 	fmuls  88(%ebx)
 	fstps  36(%ecx)
 	flds   28(%edx)
 	fsubs  32(%edx)
 	fmuls  92(%ebx)
 	fstps  32(%ecx)
 	flds   64(%edx)
 	fadds 124(%edx)
 	fstps  64(%ecx)
 	flds   68(%edx)
 	fadds 120(%edx)
 	fstps  68(%ecx)
 	flds  124(%edx)
 	fsubs  64(%edx)
 	fmuls  64(%ebx)
 	fstps 124(%ecx)
 	flds  120(%edx)
 	fsubs  68(%edx)
 	fmuls  68(%ebx)
 	fstps 120(%ecx)
 	flds   72(%edx)
 	fadds 116(%edx)
 	fstps  72(%ecx)
 	flds   76(%edx)
 	fadds 112(%edx)
 	fstps  76(%ecx)
 	flds  116(%edx)
 	fsubs  72(%edx)
 	fmuls  72(%ebx)
 	fstps 116(%ecx)
 	flds  112(%edx)
 	fsubs  76(%edx)
 	fmuls  76(%ebx)
 	fstps 112(%ecx)
 	flds   80(%edx)
 	fadds 108(%edx)
 	fstps  80(%ecx)
 	flds   84(%edx)
 	fadds 104(%edx)
 	fstps  84(%ecx)
 	flds  108(%edx)
 	fsubs  80(%edx)
 	fmuls  80(%ebx)
 	fstps 108(%ecx)
 	flds  104(%edx)
 	fsubs  84(%edx)
 	fmuls  84(%ebx)
 	fstps 104(%ecx)
 	flds   88(%edx)
 	fadds 100(%edx)
 	fstps  88(%ecx)
 	flds   92(%edx)
 	fadds  96(%edx)
 	fstps  92(%ecx)
 	flds  100(%edx)
 	fsubs  88(%edx)
 	fmuls  88(%ebx)
 	fstps 100(%ecx)
 	flds   96(%edx)
 	fsubs  92(%edx)
 	fmuls  92(%ebx)
 	fstps  96(%ecx)
 	flds     (%ecx)
 	fadds  28(%ecx)
 	fstps    (%edx)
 	flds     (%ecx)
 	fsubs  28(%ecx)
 	fmuls  96(%ebx)
 	fstps  28(%edx)
 	flds    4(%ecx)
 	fadds  24(%ecx)
 	fstps   4(%edx)
 	flds    4(%ecx)
 	fsubs  24(%ecx)
 	fmuls 100(%ebx)
 	fstps  24(%edx)
 	flds    8(%ecx)
 	fadds  20(%ecx)
 	fstps   8(%edx)
 	flds    8(%ecx)
 	fsubs  20(%ecx)
 	fmuls 104(%ebx)
 	fstps  20(%edx)
 	flds   12(%ecx)
 	fadds  16(%ecx)
 	fstps  12(%edx)
 	flds   12(%ecx)
 	fsubs  16(%ecx)
 	fmuls 108(%ebx)
 	fstps  16(%edx)
 	flds   32(%ecx)
 	fadds  60(%ecx)
 	fstps  32(%edx)
 	flds   60(%ecx)
 	fsubs  32(%ecx)
 	fmuls  96(%ebx)
 	fstps  60(%edx)
 	flds   36(%ecx)
 	fadds  56(%ecx)
 	fstps  36(%edx)
 	flds   56(%ecx)
 	fsubs  36(%ecx)
 	fmuls 100(%ebx)
 	fstps  56(%edx)
 	flds   40(%ecx)
 	fadds  52(%ecx)
 	fstps  40(%edx)
 	flds   52(%ecx)
 	fsubs  40(%ecx)
 	fmuls 104(%ebx)
 	fstps  52(%edx)
 	flds   44(%ecx)
 	fadds  48(%ecx)
 	fstps  44(%edx)
 	flds   48(%ecx)
 	fsubs  44(%ecx)
 	fmuls 108(%ebx)
 	fstps  48(%edx)
 	flds   64(%ecx)
 	fadds  92(%ecx)
 	fstps  64(%edx)
 	flds   64(%ecx)
 	fsubs  92(%ecx)
 	fmuls  96(%ebx)
 	fstps  92(%edx)
 	flds   68(%ecx)
 	fadds  88(%ecx)
 	fstps  68(%edx)
 	flds   68(%ecx)
 	fsubs  88(%ecx)
 	fmuls 100(%ebx)
 	fstps  88(%edx)
 	flds   72(%ecx)
 	fadds  84(%ecx)
 	fstps  72(%edx)
 	flds   72(%ecx)
 	fsubs  84(%ecx)
 	fmuls 104(%ebx)
 	fstps  84(%edx)
 	flds   76(%ecx)
 	fadds  80(%ecx)
 	fstps  76(%edx)
 	flds   76(%ecx)
 	fsubs  80(%ecx)
 	fmuls 108(%ebx)
 	fstps  80(%edx)
 	flds   96(%ecx)
 	fadds 124(%ecx)
 	fstps  96(%edx)
 	flds  124(%ecx)
 	fsubs  96(%ecx)
 	fmuls  96(%ebx)
 	fstps 124(%edx)
 	flds  100(%ecx)
 	fadds 120(%ecx)
 	fstps 100(%edx)
 	flds  120(%ecx)
 	fsubs 100(%ecx)
 	fmuls 100(%ebx)
 	fstps 120(%edx)
 	flds  104(%ecx)
 	fadds 116(%ecx)
 	fstps 104(%edx)
 	flds  116(%ecx)
 	fsubs 104(%ecx)
 	fmuls 104(%ebx)
 	fstps 116(%edx)
 	flds  108(%ecx)
 	fadds 112(%ecx)
 	fstps 108(%edx)
 	flds  112(%ecx)
 	fsubs 108(%ecx)
 	fmuls 108(%ebx)
 	fstps 112(%edx)
 	flds     (%edx)
 	fadds  12(%edx)
 	fstps    (%ecx)
 	flds     (%edx)
 	fsubs  12(%edx)
 	fmuls 112(%ebx)
 	fstps  12(%ecx)
 	flds    4(%edx)
 	fadds   8(%edx)
 	fstps   4(%ecx)
 	flds    4(%edx)
 	fsubs   8(%edx)
 	fmuls 116(%ebx)
 	fstps   8(%ecx)
 	flds   16(%edx)
 	fadds  28(%edx)
 	fstps  16(%ecx)
 	flds   28(%edx)
 	fsubs  16(%edx)
 	fmuls 112(%ebx)
 	fstps  28(%ecx)
 	flds   20(%edx)
 	fadds  24(%edx)
 	fstps  20(%ecx)
 	flds   24(%edx)
 	fsubs  20(%edx)
 	fmuls 116(%ebx)
 	fstps  24(%ecx)
 	flds   32(%edx)
 	fadds  44(%edx)
 	fstps  32(%ecx)
 	flds   32(%edx)
 	fsubs  44(%edx)
 	fmuls 112(%ebx)
 	fstps  44(%ecx)
 	flds   36(%edx)
 	fadds  40(%edx)
 	fstps  36(%ecx)
 	flds   36(%edx)
 	fsubs  40(%edx)
 	fmuls 116(%ebx)
 	fstps  40(%ecx)
 	flds   48(%edx)
 	fadds  60(%edx)
 	fstps  48(%ecx)
 	flds   60(%edx)
 	fsubs  48(%edx)
 	fmuls 112(%ebx)
 	fstps  60(%ecx)
 	flds   52(%edx)
 	fadds  56(%edx)
 	fstps  52(%ecx)
 	flds   56(%edx)
 	fsubs  52(%edx)
 	fmuls 116(%ebx)
 	fstps  56(%ecx)
 	flds   64(%edx)
 	fadds  76(%edx)
 	fstps  64(%ecx)
 	flds   64(%edx)
 	fsubs  76(%edx)
 	fmuls 112(%ebx)
 	fstps  76(%ecx)
 	flds   68(%edx)
 	fadds  72(%edx)
 	fstps  68(%ecx)
 	flds   68(%edx)
 	fsubs  72(%edx)
 	fmuls 116(%ebx)
 	fstps  72(%ecx)
 	flds   80(%edx)
 	fadds  92(%edx)
 	fstps  80(%ecx)
 	flds   92(%edx)
 	fsubs  80(%edx)
 	fmuls 112(%ebx)
 	fstps  92(%ecx)
 	flds   84(%edx)
 	fadds  88(%edx)
 	fstps  84(%ecx)
 	flds   88(%edx)
 	fsubs  84(%edx)
 	fmuls 116(%ebx)
 	fstps  88(%ecx)
 	flds   96(%edx)
 	fadds 108(%edx)
 	fstps  96(%ecx)
 	flds   96(%edx)
 	fsubs 108(%edx)
 	fmuls 112(%ebx)
 	fstps 108(%ecx)
 	flds  100(%edx)
 	fadds 104(%edx)
 	fstps 100(%ecx)
 	flds  100(%edx)
 	fsubs 104(%edx)
 	fmuls 116(%ebx)
 	fstps 104(%ecx)
 	flds  112(%edx)
 	fadds 124(%edx)
 	fstps 112(%ecx)
 	flds  124(%edx)
 	fsubs 112(%edx)
 	fmuls 112(%ebx)
 	fstps 124(%ecx)
 	flds  116(%edx)
 	fadds 120(%edx)
 	fstps 116(%ecx)
 	flds  120(%edx)
 	fsubs 116(%edx)
 	fmuls 116(%ebx)
 	fstps 120(%ecx)
 	flds   32(%ecx)
 	fadds  36(%ecx)
 	fstps  32(%edx)
 	flds   32(%ecx)
 	fsubs  36(%ecx)
 	fmuls 120(%ebx)
 	fstps  36(%edx)
 	flds   44(%ecx)
 	fsubs  40(%ecx)
 	fmuls 120(%ebx)
 	fsts   44(%edx)
 	fadds  40(%ecx)
 	fadds  44(%ecx)
 	fstps  40(%edx)
 	flds   48(%ecx)
 	fsubs  52(%ecx)
 	fmuls 120(%ebx)
 	flds   60(%ecx)
 	fsubs  56(%ecx)
 	fmuls 120(%ebx)
 	fld      %st(0)
 	fadds  56(%ecx)
 	fadds  60(%ecx)
 	fld      %st(0)
 	fadds  48(%ecx)
 	fadds  52(%ecx)
 	fstps  48(%edx)
 	fadd     %st(2)
 	fstps  56(%edx)
 	fsts   60(%edx)
 	faddp    %st(1)
 	fstps  52(%edx)
 	flds   64(%ecx)
 	fadds  68(%ecx)
 	fstps  64(%edx)
 	flds   64(%ecx)
 	fsubs  68(%ecx)
 	fmuls 120(%ebx)
 	fstps  68(%edx)
 	flds   76(%ecx)
 	fsubs  72(%ecx)
 	fmuls 120(%ebx)
 	fsts   76(%edx)
 	fadds  72(%ecx)
 	fadds  76(%ecx)
 	fstps  72(%edx)
 	flds   92(%ecx)
 	fsubs  88(%ecx)
 	fmuls 120(%ebx)
 	fsts   92(%edx)
 	fadds  92(%ecx)
 	fadds  88(%ecx)
 	fld      %st(0)
 	fadds  80(%ecx)
 	fadds  84(%ecx)
 	fstps  80(%edx)
 	flds   80(%ecx)
 	fsubs  84(%ecx)
 	fmuls 120(%ebx)
 	fadd  %st(0), %st(1)
 	fadds 92(%edx)
 	fstps 84(%edx)
 	fstps 88(%edx)
 	flds   96(%ecx)
 	fadds 100(%ecx)
 	fstps  96(%edx)
 	flds   96(%ecx)
 	fsubs 100(%ecx)
 	fmuls 120(%ebx)
 	fstps 100(%edx)
 	flds  108(%ecx)
 	fsubs 104(%ecx)
 	fmuls 120(%ebx)
 	fsts  108(%edx)
 	fadds 104(%ecx)
 	fadds 108(%ecx)
 	fstps 104(%edx)
 	flds  124(%ecx)
 	fsubs 120(%ecx)
 	fmuls 120(%ebx)
 	fsts  124(%edx)
 	fadds 120(%ecx)
 	fadds 124(%ecx)
 	fld      %st(0)
 	fadds 112(%ecx)
 	fadds 116(%ecx)
 	fstps 112(%edx)
 	flds  112(%ecx)
 	fsubs 116(%ecx)
 	fmuls 120(%ebx)
 	fadd  %st(0),%st(1)
 	fadds 124(%edx)
 	fstps 116(%edx)
 	fstps 120(%edx)
 	jnz .L01
 	flds      (%ecx)
 	fadds    4(%ecx)
 	fstps 1024(%esi)
 	flds      (%ecx)
 	fsubs    4(%ecx)
 	fmuls  120(%ebx)
 	fsts      (%esi)
 	fstps     (%edi)
 	flds   12(%ecx)
 	fsubs   8(%ecx)
 	fmuls 120(%ebx)
 	fsts  512(%edi)
 	fadds  12(%ecx)
 	fadds   8(%ecx)
 	fstps 512(%esi)
 	flds   16(%ecx)
 	fsubs  20(%ecx)
 	fmuls 120(%ebx)
 	flds   28(%ecx)
 	fsubs  24(%ecx)
 	fmuls 120(%ebx)
 	fsts  768(%edi)
 	fld      %st(0)
 	fadds  24(%ecx)
 	fadds  28(%ecx)
 	fld      %st(0)
 	fadds  16(%ecx)
 	fadds  20(%ecx)
 	fstps 768(%esi)
 	fadd     %st(2)
 	fstps 256(%esi)
 	faddp    %st(1)
 	fstps 256(%edi)
 	flds   32(%edx)
 	fadds  48(%edx)
 	fstps 896(%esi)
 	flds   48(%edx)
 	fadds  40(%edx)
 	fstps 640(%esi)
 	flds   40(%edx)
 	fadds  56(%edx)
 	fstps 384(%esi)
 	flds   56(%edx)
 	fadds  36(%edx)
 	fstps 128(%esi)
 	flds   36(%edx)
 	fadds  52(%edx)
 	fstps 128(%edi)
 	flds   52(%edx)
 	fadds  44(%edx)
 	fstps 384(%edi)
 	flds   60(%edx)
 	fsts  896(%edi)
 	fadds  44(%edx)
 	fstps 640(%edi)
 	flds   96(%edx)
 	fadds 112(%edx)
 	fld      %st(0)
 	fadds  64(%edx)
 	fstps 960(%esi)
 	fadds  80(%edx)
 	fstps 832(%esi)
 	flds  112(%edx)
 	fadds 104(%edx)
 	fld      %st(0)
 	fadds  80(%edx)
 	fstps 704(%esi)
 	fadds  72(%edx)
 	fstps 576(%esi)
 	flds  104(%edx)
 	fadds 120(%edx)
 	fld      %st(0)
 	fadds  72(%edx)
 	fstps 448(%esi)
 	fadds  88(%edx)
 	fstps 320(%esi)
 	flds  120(%edx)
 	fadds 100(%edx)
 	fld      %st(0)
 	fadds  88(%edx)
 	fstps 192(%esi)
 	fadds  68(%edx)
 	fstps  64(%esi)
 	flds  100(%edx)
 	fadds 116(%edx)
 	fld      %st(0)
 	fadds  68(%edx)
 	fstps  64(%edi)
 	fadds  84(%edx)
 	fstps 192(%edi)
 	flds  116(%edx)
 	fadds 108(%edx)
 	fld      %st(0)
 	fadds  84(%edx)
 	fstps 320(%edi)
 	fadds  76(%edx)
 	fstps 448(%edi)
 	flds  108(%edx)
 	fadds 124(%edx)
 	fld      %st(0)
 	fadds  76(%edx)
 	fstps 576(%edi)
 	fadds  92(%edx)
 	fstps 704(%edi)
 	flds  124(%edx)
 	fsts  960(%edi)
 	fadds  92(%edx)
 	fstps 832(%edi)
 	addl $256,%esp
 	popl %edi
 	popl %esi
 	popl %ebx
 	ret
 .L01:	
 	flds      (%ecx)
 	fadds    4(%ecx)
 	fistp  512(%esi)
 	flds      (%ecx)
 	fsubs    4(%ecx)
 	fmuls  120(%ebx)
 	fistp     (%esi)
 	flds    12(%ecx)
 	fsubs    8(%ecx)
 	fmuls  120(%ebx)
 	fist   256(%edi)
 	fadds   12(%ecx)
 	fadds    8(%ecx)
 	fistp  256(%esi)
 	flds   16(%ecx)
 	fsubs  20(%ecx)
 	fmuls 120(%ebx)
 	flds   28(%ecx)
 	fsubs  24(%ecx)
 	fmuls 120(%ebx)
 	fist  384(%edi)
 	fld      %st(0)
 	fadds  24(%ecx)
 	fadds  28(%ecx)
 	fld      %st(0)
 	fadds  16(%ecx)
 	fadds  20(%ecx)
 	fistp  384(%esi)
 	fadd     %st(2)
 	fistp  128(%esi)
 	faddp    %st(1)
 	fistp  128(%edi)
 	flds    32(%edx)
 	fadds   48(%edx)
 	fistp  448(%esi)
 	flds   48(%edx)
 	fadds  40(%edx)
 	fistp 320(%esi)
 	flds   40(%edx)
 	fadds  56(%edx)
 	fistp 192(%esi)
 	flds   56(%edx)
 	fadds  36(%edx)
 	fistp  64(%esi)
 	flds   36(%edx)
 	fadds  52(%edx)
 	fistp  64(%edi)
 	flds   52(%edx)
 	fadds  44(%edx)
 	fistp 192(%edi)
 	flds   60(%edx)
 	fist   448(%edi)
 	fadds  44(%edx)
 	fistp 320(%edi)
 	flds   96(%edx)
 	fadds 112(%edx)
 	fld      %st(0)
 	fadds  64(%edx)
 	fistp 480(%esi)
 	fadds  80(%edx)
 	fistp 416(%esi)
 	flds  112(%edx)
 	fadds 104(%edx)
 	fld      %st(0)
 	fadds  80(%edx)
 	fistp 352(%esi)
 	fadds  72(%edx)
 	fistp 288(%esi)
 	flds  104(%edx)
 	fadds 120(%edx)
 	fld      %st(0)
 	fadds  72(%edx)
 	fistp 224(%esi)
 	fadds  88(%edx)
 	fistp 160(%esi)
 	flds  120(%edx)
 	fadds 100(%edx)
 	fld      %st(0)
 	fadds  88(%edx)
 	fistp  96(%esi)
 	fadds  68(%edx)
 	fistp  32(%esi)
 	flds  100(%edx)
 	fadds 116(%edx)
 	fld      %st(0)
 	fadds  68(%edx)
 	fistp  32(%edi)
 	fadds  84(%edx)
 	fistp  96(%edi)
 	flds  116(%edx)
 	fadds 108(%edx)
 	fld      %st(0)
 	fadds  84(%edx)
 	fistp 160(%edi)
 	fadds  76(%edx)
 	fistp 224(%edi)
 	flds  108(%edx)
 	fadds 124(%edx)
 	fld      %st(0)
 	fadds  76(%edx)
 	fistp 288(%edi)
 	fadds  92(%edx)
 	fistp 352(%edi)
 	flds  124(%edx)
 	fist  480(%edi)
 	fadds  92(%edx)
 	fistp 416(%edi)
 	movsw
 	addl $256,%esp
 	popl %edi
 	popl %esi
 	popl %ebx
 	ret
--- a/src/libmpg123/dct64_sse.S
+++ b/src/libmpg123/dct64_sse.S
@@ -0,0 +1,557 @@
 /*
 	dct64_sse: MMX/SSE optimized dct64
 	copyright 2006-2007 by Zuxy Meng <zuxy.meng@gmail.com> / the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by the mysterious higway for MMX (apparently)
 	then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
 	Both have agreed to distribution under LGPL 2.1 .
 	Transformed back into standalone asm, with help of
 	gcc -S -DHAVE_CONFIG_H -I.  -march=pentium3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_sse.{S,c}
 	Original comment from MPlayer source follows:
 */
 /*
 * Discrete Cosine Tansform (DCT) for SSE
 * based upon code from mp3lib/dct64.c, mp3lib/dct64_altivec.c
 * and mp3lib/dct64_MMX.c
 */
 #include "mangle.h"
 #ifndef __APPLE__
 	.section	.rodata
 #else
 	.data
 #endif
 	ALIGN16
 	/* .type	nnnn, @object
 	   .size	nnnn, 16 */
 nnnn:
 	.long	-2147483648
 	.long	-2147483648
 	.long	-2147483648
 	.long	-2147483648
 	ALIGN16
 	/* .type	ppnn, @object
 	   .size	ppnn, 16 */
 ppnn:
 	.long	0
 	.long	0
 	.long	-2147483648
 	.long	-2147483648
 	ALIGN16
 	/* .type	pnpn, @object
 	   .size	pnpn, 16 */
 pnpn:
 	.long	0
 	.long	-2147483648
 	.long	0
 	.long	-2147483648
 	ALIGN4
 	/* .type	one.4748, @object
 	   .size	one.4748, 4 */
 one.4748:
 	.long	1065353216
 	/* no .data ? */
 	/* .local	b2.4747 */
 	ALIGN16
 	COMM(b2.4747,128,16)
 	/* .local	b1.4746 */
 	ALIGN16
 	COMM(b1.4746,128,16)
 	.text
 	ALIGN16,,15
 .globl ASM_NAME(dct64_sse)
 	/* .type	ASM_NAME(dct64_sse), @function */
 ASM_NAME(dct64_sse):
 	pushl	%ebp
 	movl	%esp, %ebp
 	movl	16(%ebp), %eax
 	pushl	%ebx
 	movl	8(%ebp), %ecx
 #APP
 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 1 */
 	movaps    ASM_NAME(costab_mmxsse), %xmm3
 	shufps    $27, %xmm3, %xmm3
 	MOVUAPS    (%eax), %xmm1
 	movaps    %xmm1, %xmm4
 	MOVUAPS    112(%eax), %xmm2
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm2, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	addps     %xmm0, %xmm1
 	movaps    %xmm1, b1.4746
 	subps     %xmm2, %xmm4
 	mulps     %xmm3, %xmm4
 	movaps    %xmm4, b1.4746+112
 #NO_APP
 	movl	12(%ebp), %ebx
 #APP
 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 2 */
 	movaps    ASM_NAME(costab_mmxsse)+16, %xmm3
 	shufps    $27, %xmm3, %xmm3
 	MOVUAPS    16(%eax), %xmm1
 	movaps    %xmm1, %xmm4
 	MOVUAPS    96(%eax), %xmm2
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm2, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	addps     %xmm0, %xmm1
 	movaps    %xmm1, b1.4746+16
 	subps     %xmm2, %xmm4
 	mulps     %xmm3, %xmm4
 	movaps    %xmm4, b1.4746+96
 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 3 */
 	movaps    ASM_NAME(costab_mmxsse)+32, %xmm3
 	shufps    $27, %xmm3, %xmm3
 	MOVUAPS    32(%eax), %xmm1
 	movaps    %xmm1, %xmm4
 	MOVUAPS    80(%eax), %xmm2
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm2, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	addps     %xmm0, %xmm1
 	movaps    %xmm1, b1.4746+32
 	subps     %xmm2, %xmm4
 	mulps     %xmm3, %xmm4
 	movaps    %xmm4, b1.4746+80
 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 4 */
 	movaps    ASM_NAME(costab_mmxsse)+48, %xmm3
 	shufps    $27, %xmm3, %xmm3
 	MOVUAPS    48(%eax), %xmm1
 	movaps    %xmm1, %xmm4
 	MOVUAPS    64(%eax), %xmm2
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm2, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	addps     %xmm0, %xmm1
 	movaps    %xmm1, b1.4746+48
 	subps     %xmm2, %xmm4
 	mulps     %xmm3, %xmm4
 	movaps    %xmm4, b1.4746+64
 	movaps    b1.4746, %xmm1
 	movaps    b1.4746+16, %xmm3
 	movaps    b1.4746+32, %xmm4
 	movaps    b1.4746+48, %xmm6
 	movaps    %xmm1, %xmm7
 	shufps    $27, %xmm7, %xmm7
 	movaps    %xmm3, %xmm5
 	shufps    $27, %xmm5, %xmm5
 	movaps    %xmm4, %xmm2
 	shufps    $27, %xmm2, %xmm2
 	movaps    %xmm6, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	addps     %xmm0, %xmm1
 	movaps    %xmm1, b2.4747
 	addps     %xmm2, %xmm3
 	movaps    %xmm3, b2.4747+16
 	subps     %xmm4, %xmm5
 	movaps    %xmm5, b2.4747+32
 	subps     %xmm6, %xmm7
 	movaps    %xmm7, b2.4747+48
 	movaps    b1.4746+64, %xmm1
 	movaps    b1.4746+80, %xmm3
 	movaps    b1.4746+96, %xmm4
 	movaps    b1.4746+112, %xmm6
 	movaps    %xmm1, %xmm7
 	shufps    $27, %xmm7, %xmm7
 	movaps    %xmm3, %xmm5
 	shufps    $27, %xmm5, %xmm5
 	movaps    %xmm4, %xmm2
 	shufps    $27, %xmm2, %xmm2
 	movaps    %xmm6, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	addps     %xmm0, %xmm1
 	movaps    %xmm1, b2.4747+64
 	addps     %xmm2, %xmm3
 	movaps    %xmm3, b2.4747+80
 	subps     %xmm4, %xmm5
 	movaps    %xmm5, b2.4747+96
 	subps     %xmm6, %xmm7
 	movaps    %xmm7, b2.4747+112
 	movaps    b2.4747+32, %xmm0
 	movaps    b2.4747+48, %xmm1
 	movaps    ASM_NAME(costab_mmxsse)+64, %xmm4
 	xorps     %xmm6, %xmm6
 	shufps    $27, %xmm4, %xmm4
 	mulps     %xmm4, %xmm1
 	movaps    ASM_NAME(costab_mmxsse)+80, %xmm2
 	xorps     %xmm7, %xmm7
 	shufps    $27, %xmm2, %xmm2
 	mulps     %xmm2, %xmm0
 	movaps    %xmm0, b2.4747+32
 	movaps    %xmm1, b2.4747+48
 	movaps    b2.4747+96, %xmm3
 	mulps     %xmm2, %xmm3
 	subps     %xmm3, %xmm6
 	movaps    %xmm6, b2.4747+96
 	movaps    b2.4747+112, %xmm5
 	mulps     %xmm4, %xmm5
 	subps     %xmm5, %xmm7
 	movaps    %xmm7, b2.4747+112
 	movaps    ASM_NAME(costab_mmxsse)+96, %xmm0
 	shufps    $27, %xmm0, %xmm0
 	movaps    nnnn, %xmm5
 	movaps    %xmm5, %xmm6
 	movaps    b2.4747, %xmm2
 	movaps    b2.4747+16, %xmm3
 	movaps    %xmm2, %xmm4
 	xorps     %xmm5, %xmm6
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm3, %xmm1
 	shufps    $27, %xmm1, %xmm1
 	addps     %xmm1, %xmm2
 	movaps    %xmm2, b1.4746
 	subps     %xmm3, %xmm4
 	xorps     %xmm6, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b1.4746+16
 	movaps    b2.4747+32, %xmm2
 	movaps    b2.4747+48, %xmm3
 	movaps    %xmm2, %xmm4
 	xorps     %xmm5, %xmm6
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm3, %xmm1
 	shufps    $27, %xmm1, %xmm1
 	addps     %xmm1, %xmm2
 	movaps    %xmm2, b1.4746+32
 	subps     %xmm3, %xmm4
 	xorps     %xmm6, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b1.4746+48
 	movaps    b2.4747+64, %xmm2
 	movaps    b2.4747+80, %xmm3
 	movaps    %xmm2, %xmm4
 	xorps     %xmm5, %xmm6
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm3, %xmm1
 	shufps    $27, %xmm1, %xmm1
 	addps     %xmm1, %xmm2
 	movaps    %xmm2, b1.4746+64
 	subps     %xmm3, %xmm4
 	xorps     %xmm6, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b1.4746+80
 	movaps    b2.4747+96, %xmm2
 	movaps    b2.4747+112, %xmm3
 	movaps    %xmm2, %xmm4
 	xorps     %xmm5, %xmm6
 	shufps    $27, %xmm4, %xmm4
 	movaps    %xmm3, %xmm1
 	shufps    $27, %xmm1, %xmm1
 	addps     %xmm1, %xmm2
 	movaps    %xmm2, b1.4746+96
 	subps     %xmm3, %xmm4
 	xorps     %xmm6, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b1.4746+112
 	movss     one.4748, %xmm1
 	movss     ASM_NAME(costab_mmxsse)+112, %xmm0
 	movaps    %xmm1, %xmm3
 	unpcklps  %xmm0, %xmm3
 	movss     ASM_NAME(costab_mmxsse)+116, %xmm2
 	movaps    %xmm1, %xmm0
 	unpcklps  %xmm2, %xmm0
 	unpcklps  %xmm3, %xmm0
 	movaps    ppnn, %xmm2
 	movaps    b1.4746, %xmm3
 	movaps    %xmm3, %xmm4
 	shufps    $20, %xmm4, %xmm4
 	shufps    $235, %xmm3, %xmm3
 	xorps     %xmm2, %xmm3
 	addps     %xmm3, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b2.4747
 	movaps    b1.4746+16, %xmm6
 	movaps    %xmm6, %xmm5
 	shufps    $27, %xmm5, %xmm5
 	xorps     %xmm2, %xmm5
 	addps     %xmm5, %xmm6
 	mulps     %xmm0, %xmm6
 	movaps    %xmm6, b2.4747+16
 	movaps    b1.4746+32, %xmm3
 	movaps    %xmm3, %xmm4
 	shufps    $20, %xmm4, %xmm4
 	shufps    $235, %xmm3, %xmm3
 	xorps     %xmm2, %xmm3
 	addps     %xmm3, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b2.4747+32
 	movaps    b1.4746+48, %xmm6
 	movaps    %xmm6, %xmm5
 	shufps    $27, %xmm5, %xmm5
 	xorps     %xmm2, %xmm5
 	addps     %xmm5, %xmm6
 	mulps     %xmm0, %xmm6
 	movaps    %xmm6, b2.4747+48
 	movaps    b1.4746+64, %xmm3
 	movaps    %xmm3, %xmm4
 	shufps    $20, %xmm4, %xmm4
 	shufps    $235, %xmm3, %xmm3
 	xorps     %xmm2, %xmm3
 	addps     %xmm3, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b2.4747+64
 	movaps    b1.4746+80, %xmm6
 	movaps    %xmm6, %xmm5
 	shufps    $27, %xmm5, %xmm5
 	xorps     %xmm2, %xmm5
 	addps     %xmm5, %xmm6
 	mulps     %xmm0, %xmm6
 	movaps    %xmm6, b2.4747+80
 	movaps    b1.4746+96, %xmm3
 	movaps    %xmm3, %xmm4
 	shufps    $20, %xmm4, %xmm4
 	shufps    $235, %xmm3, %xmm3
 	xorps     %xmm2, %xmm3
 	addps     %xmm3, %xmm4
 	mulps     %xmm0, %xmm4
 	movaps    %xmm4, b2.4747+96
 	movaps    b1.4746+112, %xmm6
 	movaps    %xmm6, %xmm5
 	shufps    $27, %xmm5, %xmm5
 	xorps     %xmm2, %xmm5
 	addps     %xmm5, %xmm6
 	mulps     %xmm0, %xmm6
 	movaps    %xmm6, b2.4747+112
 	movss     ASM_NAME(costab_mmxsse)+120, %xmm0
 	movaps    %xmm1, %xmm2
 	movaps    %xmm0, %xmm7
 	unpcklps  %xmm1, %xmm2
 	unpcklps  %xmm0, %xmm7
 	movaps    pnpn, %xmm0
 	unpcklps  %xmm7, %xmm2
 	movaps    b2.4747+32, %xmm1
 	movaps    %xmm1, %xmm3
 	shufps    $224, %xmm3, %xmm3
 	shufps    $181, %xmm1, %xmm1
 	xorps     %xmm0, %xmm1
 	addps     %xmm1, %xmm3
 	mulps     %xmm2, %xmm3
 	movaps    %xmm3, b1.4746+32
 	movaps    b2.4747+48, %xmm4
 	movaps    %xmm4, %xmm5
 	shufps    $224, %xmm5, %xmm5
 	shufps    $181, %xmm4, %xmm4
 	xorps     %xmm0, %xmm4
 	addps     %xmm4, %xmm5
 	mulps     %xmm2, %xmm5
 	movaps    %xmm5, b1.4746+48
 	movaps    b2.4747+64, %xmm1
 	movaps    %xmm1, %xmm3
 	shufps    $224, %xmm3, %xmm3
 	shufps    $181, %xmm1, %xmm1
 	xorps     %xmm0, %xmm1
 	addps     %xmm1, %xmm3
 	mulps     %xmm2, %xmm3
 	movaps    %xmm3, b1.4746+64
 	movaps    b2.4747+80, %xmm4
 	movaps    %xmm4, %xmm5
 	shufps    $224, %xmm5, %xmm5
 	shufps    $181, %xmm4, %xmm4
 	xorps     %xmm0, %xmm4
 	addps     %xmm4, %xmm5
 	mulps     %xmm2, %xmm5
 	movaps    %xmm5, b1.4746+80
 	movaps    b2.4747+96, %xmm1
 	movaps    %xmm1, %xmm3
 	shufps    $224, %xmm3, %xmm3
 	shufps    $181, %xmm1, %xmm1
 	xorps     %xmm0, %xmm1
 	addps     %xmm1, %xmm3
 	mulps     %xmm2, %xmm3
 	movaps    %xmm3, b1.4746+96
 	movaps    b2.4747+112, %xmm4
 	movaps    %xmm4, %xmm5
 	shufps    $224, %xmm5, %xmm5
 	shufps    $181, %xmm4, %xmm4
 	xorps     %xmm0, %xmm4
 	addps     %xmm4, %xmm5
 	mulps     %xmm2, %xmm5
 	movaps    %xmm5, b1.4746+112
 #NO_APP
 	flds	b1.4746+40
 	movl	$b1.4746, %edx
 	movl	$b2.4747, %eax
 	fadds	b1.4746+44
 	fstps	b1.4746+40
 	flds	b1.4746+56
 	fadds	b1.4746+60
 	flds	b1.4746+48
 	fadd	%st(1), %st
 	fstps	b1.4746+48
 	fadds	b1.4746+52
 	fstps	b1.4746+56
 	flds	b1.4746+52
 	fadds	b1.4746+60
 	fstps	b1.4746+52
 	flds	b1.4746+72
 	fadds	b1.4746+76
 	fstps	b1.4746+72
 	flds	b1.4746+88
 	fadds	b1.4746+92
 	flds	b1.4746+80
 	fadd	%st(1), %st
 	fstps	b1.4746+80
 	fadds	b1.4746+84
 	fstps	b1.4746+88
 	flds	b1.4746+84
 	fadds	b1.4746+92
 	fstps	b1.4746+84
 	flds	b1.4746+104
 	fadds	b1.4746+108
 	fstps	b1.4746+104
 	flds	b1.4746+120
 	fadds	b1.4746+124
 	flds	b1.4746+112
 	fadd	%st(1), %st
 	fstps	b1.4746+112
 	fadds	b1.4746+116
 	fstps	b1.4746+120
 	flds	b1.4746+116
 	fadds	b1.4746+124
 	fstps	b1.4746+116
 #APP
 	flds       ASM_NAME(costab_mmxsse)+120
 	flds     (%eax)
 	fadds   4(%eax)
 	fistp 512(%ecx)
 	flds     (%eax)
 	fsubs   4(%eax)
 	fmul  %st(1)
 	fistp    (%ecx)
 	flds   12(%eax)
 	fsubs   8(%eax)
 	fmul  %st(1)
 	fist  256(%ebx)
 	fadds  12(%eax)
 	fadds   8(%eax)
 	fistp 256(%ecx)
 	flds   16(%eax)
 	fsubs  20(%eax)
 	fmul  %st(1)
 	flds   28(%eax)
 	fsubs  24(%eax)
 	fmul  %st(2)
 	fist  384(%ebx)
 	fld   %st(0)
 	fadds  24(%eax)
 	fadds  28(%eax)
 	fld   %st(0)
 	fadds  16(%eax)
 	fadds  20(%eax)
 	fistp 384(%ecx)
 	fadd  %st(2)
 	fistp 128(%ecx)
 	faddp %st(1)
 	fistp 128(%ebx)
 	flds   32(%edx)
 	fadds  48(%edx)
 	fistp 448(%ecx)
 	flds   48(%edx)
 	fadds  40(%edx)
 	fistp 320(%ecx)
 	flds   40(%edx)
 	fadds  56(%edx)
 	fistp 192(%ecx)
 	flds   56(%edx)
 	fadds  36(%edx)
 	fistp  64(%ecx)
 	flds   36(%edx)
 	fadds  52(%edx)
 	fistp  64(%ebx)
 	flds   52(%edx)
 	fadds  44(%edx)
 	fistp 192(%ebx)
 	flds   60(%edx)
 	fist  448(%ebx)
 	fadds  44(%edx)
 	fistp 320(%ebx)
 	flds   96(%edx)
 	fadds 112(%edx)
 	fld   %st(0)
 	fadds  64(%edx)
 	fistp 480(%ecx)
 	fadds  80(%edx)
 	fistp 416(%ecx)
 	flds  112(%edx)
 	fadds 104(%edx)
 	fld   %st(0)
 	fadds  80(%edx)
 	fistp 352(%ecx)
 	fadds  72(%edx)
 	fistp 288(%ecx)
 	flds  104(%edx)
 	fadds 120(%edx)
 	fld   %st(0)
 	fadds  72(%edx)
 	fistp 224(%ecx)
 	fadds  88(%edx)
 	fistp 160(%ecx)
 	flds  120(%edx)
 	fadds 100(%edx)
 	fld   %st(0)
 	fadds  88(%edx)
 	fistp  96(%ecx)
 	fadds  68(%edx)
 	fistp  32(%ecx)
 	flds  100(%edx)
 	fadds 116(%edx)
 	fld   %st(0)
 	fadds  68(%edx)
 	fistp  32(%ebx)
 	fadds  84(%edx)
 	fistp  96(%ebx)
 	flds  116(%edx)
 	fadds 108(%edx)
 	fld   %st(0)
 	fadds  84(%edx)
 	fistp 160(%ebx)
 	fadds  76(%edx)
 	fistp 224(%ebx)
 	flds  108(%edx)
 	fadds 124(%edx)
 	fld   %st(0)
 	fadds  76(%edx)
 	fistp 288(%ebx)
 	fadds  92(%edx)
 	fistp 352(%ebx)
 	flds  124(%edx)
 	fist  480(%ebx)
 	fadds  92(%edx)
 	fistp 416(%ebx)
 	ffreep %st(0)
 #NO_APP
 	movzwl	(%ecx), %eax
 	movw	%ax, (%ebx)
 	popl	%ebx
 	popl	%ebp
 	ret
 	/* .size	ASM_NAME(dct64_sse), .-ASM_NAME(dct64_sse) */
--- a/src/libmpg123/debug.h
+++ b/src/libmpg123/debug.h
@@ -0,0 +1,96 @@
 /*
 	debug.h: 
 		if DEBUG defined: debugging macro fprintf wrappers
 		else: macros defined to do nothing
 	That saves typing #ifdef DEBUG all the time and still preserves
 	lean code without debugging.
 	public domain (or LGPL / GPL, if you like that more;-)
 	generated by debugdef.pl, what was
 	trivially written by Thomas Orgis <thomas@orgis.org>
 */
 #include "config.h"
 /*
 	I could do that with variadic macros available:
 	#define sdebug(me, s) fprintf(stderr, "[location] " s "\n")
 	#define debug(me, s, ...) fprintf(stderr, "[location] " s "}n", __VA_ARGS__)
 	Variadic macros are a C99 feature...
 	Now just predefining stuff non-variadic for up to 15 arguments.
 	It's cumbersome to have them all with different names, though...
 */
 #ifdef DEBUG
 #include <stdio.h>
 #define debug(s) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__)
 #define debug1(s, a) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a)
 #define debug2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b)
 #define debug3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c)
 #define debug4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d)
 #define debug5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e)
 #define debug6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f)
 #define debug7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g)
 #define debug8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
 #define debug9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
 #define debug10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
 #define debug11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
 #define debug12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
 #define debug13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
 #define debug14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
 #define debug15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
 #else
 #define debug(s) 
 #define debug1(s, a) 
 #define debug2(s, a, b) 
 #define debug3(s, a, b, c) 
 #define debug4(s, a, b, c, d) 
 #define debug5(s, a, b, c, d, e) 
 #define debug6(s, a, b, c, d, e, f) 
 #define debug7(s, a, b, c, d, e, f, g) 
 #define debug8(s, a, b, c, d, e, f, g, h) 
 #define debug9(s, a, b, c, d, e, f, g, h, i) 
 #define debug10(s, a, b, c, d, e, f, g, h, i, j) 
 #define debug11(s, a, b, c, d, e, f, g, h, i, j, k) 
 #define debug12(s, a, b, c, d, e, f, g, h, i, j, k, l) 
 #define debug13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) 
 #define debug14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) 
 #define debug15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) 
 #endif
 /* warning macros also here... */
 #define warning(s) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__)
 #define warning1(s, a) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a)
 #define warning2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b)
 #define warning3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c)
 #define warning4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d)
 #define warning5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e)
 #define warning6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f)
 #define warning7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g)
 #define warning8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
 #define warning9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
 #define warning10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
 #define warning11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
 #define warning12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
 #define warning13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
 #define warning14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
 #define warning15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
 /* error macros also here... */
 #define error(s) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__)
 #define error1(s, a) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a)
 #define error2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b)
 #define error3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c)
 #define error4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d)
 #define error5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e)
 #define error6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f)
 #define error7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g)
 #define error8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
 #define error9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
 #define error10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
 #define error11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
 #define error12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
 #define error13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
 #define error14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
 #define error15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
--- a/src/libmpg123/decode.c
+++ b/src/libmpg123/decode.c
@@ -0,0 +1,246 @@
 /*
 	decode.c: decoding samples...
 	copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include "mpg123lib_intern.h"
 /* 8bit functions silenced for FLOATOUT */
 int synth_1to1_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  sample_t samples_tmp[64];
  sample_t *tmp1 = samples_tmp + channel;
  int i,ret;
  /* save buffer stuff, trick samples_tmp into there, decode, restore */
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1(bandPtr, channel, fr, 0);
  fr->buffer.data = samples; /* restore original value */
  samples += channel + pnt;
  for(i=0;i<32;i++) {
 #ifdef FLOATOUT
    *samples = 0;
 #else
    *samples = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (final ? 64 : 0 );
  return ret;
 }
 int synth_1to1_8bit_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[64];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  /* save buffer stuff, trick samples_tmp into there, decode, restore */
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1(bandPtr,0, fr, 0);
  fr->buffer.data = samples; /* restore original value */
  samples += pnt;
  for(i=0;i<32;i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 32;
  return ret;
 }
 int synth_1to1_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[64];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  /* save buffer stuff, trick samples_tmp into there, decode, restore */
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1(bandPtr, 0, fr, 0);
  fr->buffer.data = samples; /* restore original value */
  samples += pnt;
  for(i=0;i<32;i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 64;
  return ret;
 }
 int synth_1to1_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[64];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  /* save buffer stuff, trick samples_tmp into there, decode, restore */
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1(bandPtr, 0, fr, 0); /* decode into samples_tmp */
  fr->buffer.data = samples; /* restore original value */
  /* now append samples from samples_tmp */
  samples += pnt; /* just the next mem in frame buffer */
  for(i=0;i<32;i++){
    *( (sample_t *)samples) = *tmp1;
    samples += sizeof(sample_t);
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 32*sizeof(sample_t);
  return ret;
 }
 int synth_1to1_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  ret = synth_1to1(bandPtr,0,fr,1);
  samples += fr->buffer.fill - 64*sizeof(sample_t);
  for(i=0;i<32;i++) {
    ((sample_t *)samples)[1] = ((sample_t *)samples)[0];
    samples+=2*sizeof(sample_t);
  }
  return ret;
 }
 int synth_1to1(real *bandPtr,int channel,mpg123_handle *fr, int final)
 {
  static const int step = 2;
  sample_t *samples = (sample_t *) (fr->buffer.data+fr->buffer.fill);
  real *b0, **buf; /* (*buf)[0x110]; */
  int clip = 0; 
  int bo1;
  if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
  if(!channel) {
    fr->bo[0]--;
    fr->bo[0] &= 0xf;
    buf = fr->real_buffs[0];
  }
  else {
    samples++;
    buf = fr->real_buffs[1];
  }
  if(fr->bo[0] & 0x1) {
    b0 = buf[0];
    bo1 = fr->bo[0];
    dct64(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = fr->bo[0]+1;
    dct64(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
  }
  {
    register int j;
    real *window = opt_decwin(fr) + 16 - bo1;
    for (j=16;j;j--,window+=0x10,samples+=step)
    {
      real sum;
      sum  = REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      sum += REAL_MUL(*window++, *b0++);
      sum -= REAL_MUL(*window++, *b0++);
      WRITE_SAMPLE(samples,sum,clip);
    }
    {
      real sum;
      sum  = REAL_MUL(window[0x0], b0[0x0]);
      sum += REAL_MUL(window[0x2], b0[0x2]);
      sum += REAL_MUL(window[0x4], b0[0x4]);
      sum += REAL_MUL(window[0x6], b0[0x6]);
      sum += REAL_MUL(window[0x8], b0[0x8]);
      sum += REAL_MUL(window[0xA], b0[0xA]);
      sum += REAL_MUL(window[0xC], b0[0xC]);
      sum += REAL_MUL(window[0xE], b0[0xE]);
      WRITE_SAMPLE(samples,sum,clip);
      b0-=0x10,window-=0x20,samples+=step;
    }
    window += bo1<<1;
    for (j=15;j;j--,b0-=0x20,window-=0x10,samples+=step)
    {
      real sum;
      sum = -REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      sum -= REAL_MUL(*(--window), *b0++);
      WRITE_SAMPLE(samples,sum,clip);
    }
  }
  if(final) fr->buffer.fill += 64*sizeof(sample_t);
  return clip;
 }
--- a/src/libmpg123/decode.h
+++ b/src/libmpg123/decode.h
@@ -0,0 +1,67 @@
 /*
 	decode.h: common definitions for decode functions
 	copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Thomas Orgis, taking WRITE_SAMPLE from decode.c
 */
 #ifndef MPG123_DECODE_H
 #define MPG123_DECODE_H
 #ifdef FLOATOUT
 #define WRITE_SAMPLE(samples,sum,clip) *(samples) = sum
 #define sample_t float
 #else
 #define WRITE_SAMPLE(samples,sum,clip) \
  if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \
  else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \
  else { *(samples) = REAL_TO_SHORT(sum); }
 #define sample_t short
 #endif
 #define NTOM_MAX 8          /* maximum allowed factor for upsampling */
 #define NTOM_MAX_FREQ 96000 /* maximum frequency to upsample to / downsample from */
 #define NTOM_MUL (32768)
 /* synth_1to1 in optimize.h, one should also use opts for these here... */
 int synth_2to1 (real *,int, mpg123_handle*, int);
 int synth_2to1_8bit (real *,int, mpg123_handle *,int);
 int synth_2to1_mono (real *, mpg123_handle *);
 int synth_2to1_mono2stereo (real *, mpg123_handle *);
 int synth_2to1_8bit_mono (real *, mpg123_handle *);
 int synth_2to1_8bit_mono2stereo (real *, mpg123_handle *);
 int synth_4to1 (real *,int, mpg123_handle*, int);
 int synth_4to1_8bit (real *,int, mpg123_handle *,int);
 int synth_4to1_mono (real *, mpg123_handle *);
 int synth_4to1_mono2stereo (real *, mpg123_handle *);
 int synth_4to1_8bit_mono (real *, mpg123_handle *);
 int synth_4to1_8bit_mono2stereo (real *, mpg123_handle *);
 int synth_ntom (real *,int, mpg123_handle*, int);
 int synth_ntom_8bit (real *,int, mpg123_handle *,int);
 int synth_ntom_mono (real *, mpg123_handle *);
 int synth_ntom_mono2stereo (real *, mpg123_handle *);
 int synth_ntom_8bit_mono (real *, mpg123_handle *);
 int synth_ntom_8bit_mono2stereo (real *, mpg123_handle *);
 int synth_ntom_set_step(mpg123_handle *fr); /* prepare ntom decoding */
 unsigned long ntom_val(mpg123_handle *fr, off_t frame); /* compute ntom_val for frame offset */
 off_t ntom_frmouts(mpg123_handle *fr, off_t frame);
 off_t ntom_ins2outs(mpg123_handle *fr, off_t ins);
 off_t ntom_frameoff(mpg123_handle *fr, off_t soff);
 void init_layer3(void);
 void init_layer3_stuff(mpg123_handle *fr);
 void init_layer2(void);
 void init_layer2_stuff(mpg123_handle *fr);
 int make_conv16to8_table(mpg123_handle *fr);
 int do_layer3(mpg123_handle *fr);
 int do_layer2(mpg123_handle *fr);
 int do_layer1(mpg123_handle *fr);
 void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]);
 #endif
--- a/src/libmpg123/decode_2to1.c
+++ b/src/libmpg123/decode_2to1.c
@@ -0,0 +1,248 @@
 /*
 	decode_2to1.c: ...with 2to1 downsampling
 	copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include "mpg123lib_intern.h"
 int synth_2to1_8bit(real *bandPtr, int channel, mpg123_handle *fr, int final)
 {
  sample_t samples_tmp[32];
  sample_t *tmp1 = samples_tmp + channel;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_2to1(bandPtr,channel, fr, 0);
  fr->buffer.data = samples;
  samples += channel + pnt;
  for(i=0;i<16;i++) {
 #ifdef FLOATOUT
    *samples = 0;
 #else
    *samples = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (final ? 32 : 0);
  return ret;
 }
 int synth_2to1_8bit_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[32];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_2to1(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<16;i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 16;
  return ret;
 }
 int synth_2to1_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[32];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_2to1(bandPtr,0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<16;i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 32;
  return ret;
 }
 int synth_2to1_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[32];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_2to1(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<16;i++) {
    *( (sample_t *) samples) = *tmp1;
    samples += sizeof(sample_t);
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 16*sizeof(sample_t);
  return ret;
 }
 int synth_2to1_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  ret = synth_2to1(bandPtr,0, fr, 1);
  samples += fr->buffer.fill - 32*sizeof(sample_t);
  for(i=0;i<16;i++) {
    ((sample_t *)samples)[1] = ((sample_t *)samples)[0];
    samples+=2*sizeof(sample_t);
  }
  return ret;
 }
 int synth_2to1(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  static const int step = 2;
  sample_t *samples = (sample_t *) (fr->buffer.data + fr->buffer.fill);
  real *b0, **buf; /* (*buf)[0x110]; */
  int clip = 0; 
  int bo1;
  if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
  if(!channel) {
    fr->bo[0]--;
    fr->bo[0] &= 0xf;
    buf = fr->real_buffs[0];
  }
  else {
    samples++;
    buf = fr->real_buffs[1];
  }
  if(fr->bo[0] & 0x1) {
    b0 = buf[0];
    bo1 = fr->bo[0];
    opt_dct64(fr)(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = fr->bo[0]+1;
    opt_dct64(fr)(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
  }
  {
    register int j;
    real *window = opt_decwin(fr) + 16 - bo1;
    for (j=8;j;j--,b0+=0x10,window+=0x30)
    {
      real sum;
      sum  = *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #if 0
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #endif
    }
    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum += window[0x2] * b0[0x2];
      sum += window[0x4] * b0[0x4];
      sum += window[0x6] * b0[0x6];
      sum += window[0x8] * b0[0x8];
      sum += window[0xA] * b0[0xA];
      sum += window[0xC] * b0[0xC];
      sum += window[0xE] * b0[0xE];
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #if 0
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #endif
      b0-=0x20,window-=0x40;
    }
    window += bo1<<1;
    for (j=7;j;j--,b0-=0x30,window-=0x30)
    {
      real sum;
      sum = -*(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #if 0
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #endif
    }
  }
  if(final) fr->buffer.fill += 32*sizeof(sample_t);
  return clip;
 }
--- a/src/libmpg123/decode_3dnow.S
+++ b/src/libmpg123/decode_3dnow.S
@@ -0,0 +1,280 @@
 /*
 	decode_3dnow.s - 3DNow! optimized synth_1to1()
 	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Syuuhei Kashiyama
 	This code based 'decode_3dnow.s' by Syuuhei Kashiyama
 	<squash@mb.kcom.ne.jp>,only two types of changes have been made:
 	- remove PREFETCH instruction for speedup
 	- change function name for support 3DNow! automatic detect
 	- femms moved to before 'call dct64_3dnow'
 	You can find Kashiyama's original 3dnow! support patch
 	(for mpg123-0.59o) at
 	http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
 	by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
                  	<kim@comtec.co.jp>               - after  1.Apr.1999
 	Replacement of synth_1to1() with AMD's 3DNow! SIMD operations support
 	Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
 	The author of this program disclaim whole expressed or implied
 	warranties with regard to this program, and in no event shall the
 	author of this program liable to whatever resulted from the use of
 	this program. Use it at your own risk.
 */
 #include "mangle.h"
 .text
 .globl ASM_NAME(synth_1to1_3dnow_asm)
 /* int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
 ASM_NAME(synth_1to1_3dnow_asm):
 	subl $24,%esp
 	pushl %ebp
 	pushl %edi
 	xorl %ebp,%ebp
 	pushl %esi
 	pushl %ebx
 /* stack old: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28,32,36=local 40=back 44=bandptr 48=channel 52=out 56=pnt */
 /* stack new: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28,32,36=local 40=back 44=bandptr 48=channel 52=out 56=buffs 60=bo 64=decwin */
 #define OUT     52(%esp)
 #define CHANNEL 48(%esp)
 #define BANDPTR 44(%esp)
 #define BUFFS   56(%esp)
 #define BO      60(%esp)
 #define DECWIN  64(%esp)
 #define LOCAL0  16(%esp)
 #define LOCAL1  20(%esp)
 #define LOCAL5  36(%esp)
 	movl OUT,%esi
 	movl %esi,LOCAL0 /* save buffer start (samples pointer) to another local var */
 	movl CHANNEL,%ebx
 	movl BO,%esi     /* bo address */
 	movl (%esi),%edx /* bo value */
 	femms
 	testl %ebx,%ebx
 	jne .L26
 /* if(!channel) */
 	decl %edx   /* --bo */
 	andl $15,%edx
 	movl %edx,(%esi) /* save bo */
 	movl BUFFS,%ecx
 	jmp .L27
 .L26: /* if(channel) */
 	addl $2,LOCAL0   /* samples++ */
 	movl BUFFS,%ecx
 	addl $2176,%ecx
 .L27:
 /* edx (and it's lower end) still holds bo value */
 	testb $1,%dl  /* bo & 0x1 */
 	je .L28
 	movl %edx,LOCAL5
 	movl %ecx,%ebx
 	movl BANDPTR,%esi
 	movl %edx,%edi
 	pushl %esi
 	sall $2,%edi
 	movl %ebx,%eax
 	movl %edi,24(%esp) /* LOCAL1, actually */
 	addl %edi,%eax
 	pushl %eax
 	movl %edx,%eax
 	incl %eax
 	andl $15,%eax
 	leal 1088(,%eax,4),%eax
 	addl %ebx,%eax
 	pushl %eax
 	call ASM_NAME(dct64_3dnow)
 	addl $12,%esp
 	jmp .L29
 .L28:
 	leal 1(%edx),%esi
 	movl BANDPTR,%edi
 	movl %esi,LOCAL5
 	leal 1092(%ecx,%edx,4),%eax
 	pushl %edi
 	leal 1088(%ecx),%ebx
 	pushl %eax
 	sall $2,%esi
 	leal (%ecx,%edx,4),%eax
 	pushl %eax
 	call ASM_NAME(dct64_3dnow)
 	addl $12,%esp
 	movl %esi,LOCAL1
 .L29:
 	movl DECWIN,%edx
 	addl $64,%edx
 	movl $16,%ecx
 	subl LOCAL1,%edx
 	movl LOCAL0,%edi
 	movq (%edx),%mm0
 	movq (%ebx),%mm1
 	ALIGN32
 .L33:
 	movq 8(%edx),%mm3
 	pfmul %mm1,%mm0
 	movq 8(%ebx),%mm4
 	movq 16(%edx),%mm5
 	pfmul %mm4,%mm3
 	movq 16(%ebx),%mm6
 	pfadd %mm3,%mm0
 	movq 24(%edx),%mm1
 	pfmul %mm6,%mm5
 	movq 24(%ebx),%mm2
 	pfadd %mm5,%mm0
 	movq 32(%edx),%mm3
 	pfmul %mm2,%mm1
 	movq 32(%ebx),%mm4
 	pfadd %mm1,%mm0
 	movq 40(%edx),%mm5
 	pfmul %mm4,%mm3
 	movq 40(%ebx),%mm6
 	pfadd %mm3,%mm0
 	movq 48(%edx),%mm1
 	pfmul %mm6,%mm5
 	movq 48(%ebx),%mm2
 	pfadd %mm0,%mm5
 	movq 56(%edx),%mm3
 	pfmul %mm1,%mm2
 	movq 56(%ebx),%mm4
 	pfadd %mm5,%mm2
 	addl $64,%ebx
 	subl $-128,%edx
 	movq (%edx),%mm0
 	pfmul %mm4,%mm3
 	movq (%ebx),%mm1
 	pfadd %mm3,%mm2
 	movq %mm2,%mm3
 	psrlq $32,%mm3
 	pfsub %mm3,%mm2
 	incl %ebp
 	pf2id %mm2,%mm2
 	packssdw %mm2,%mm2
 	movd %mm2,%eax
 	movw %ax,0(%edi)
 	addl $4,%edi
 	decl %ecx
 	jnz .L33
 	movd (%ebx),%mm0
 	movd (%edx),%mm1
 	punpckldq 8(%ebx),%mm0
 	punpckldq 8(%edx),%mm1
 	movd 16(%ebx),%mm3
 	movd 16(%edx),%mm4
 	pfmul %mm1,%mm0
 	punpckldq 24(%ebx),%mm3
 	punpckldq 24(%edx),%mm4
 	movd 32(%ebx),%mm5
 	movd 32(%edx),%mm6
 	pfmul %mm4,%mm3
 	punpckldq 40(%ebx),%mm5
 	punpckldq 40(%edx),%mm6
 	pfadd %mm3,%mm0
 	movd 48(%ebx),%mm1
 	movd 48(%edx),%mm2
 	pfmul %mm6,%mm5
 	punpckldq 56(%ebx),%mm1
 	punpckldq 56(%edx),%mm2
 	pfadd %mm5,%mm0
 	pfmul %mm2,%mm1
 	pfadd %mm1,%mm0
 	pfacc %mm1,%mm0
 	pf2id %mm0,%mm0
 	packssdw %mm0,%mm0
 	movd %mm0,%eax
 	movw %ax,0(%edi)
 	incl %ebp
 	movl LOCAL5,%esi
 	addl $-64,%ebx
 	movl $15,%ebp
 	addl $4,%edi
 	leal -128(%edx,%esi,8),%edx
 	movl $15,%ecx
 	movd (%ebx),%mm0
 	movd -4(%edx),%mm1
 	punpckldq 4(%ebx),%mm0
 	punpckldq -8(%edx),%mm1
 	ALIGN32
 .L46:
 	movd 8(%ebx),%mm3
 	movd -12(%edx),%mm4
 	pfmul %mm1,%mm0
 	punpckldq 12(%ebx),%mm3
 	punpckldq -16(%edx),%mm4
 	movd 16(%ebx),%mm5
 	movd -20(%edx),%mm6
 	pfmul %mm4,%mm3
 	punpckldq 20(%ebx),%mm5
 	punpckldq -24(%edx),%mm6
 	pfadd %mm3,%mm0
 	movd 24(%ebx),%mm1
 	movd -28(%edx),%mm2
 	pfmul %mm6,%mm5
 	punpckldq 28(%ebx),%mm1
 	punpckldq -32(%edx),%mm2
 	pfadd %mm5,%mm0
 	movd 32(%ebx),%mm3
 	movd -36(%edx),%mm4
 	pfmul %mm2,%mm1
 	punpckldq 36(%ebx),%mm3
 	punpckldq -40(%edx),%mm4
 	pfadd %mm1,%mm0
 	movd 40(%ebx),%mm5
 	movd -44(%edx),%mm6
 	pfmul %mm4,%mm3
 	punpckldq 44(%ebx),%mm5
 	punpckldq -48(%edx),%mm6
 	pfadd %mm3,%mm0
 	movd 48(%ebx),%mm1
 	movd -52(%edx),%mm2
 	pfmul %mm6,%mm5
 	punpckldq 52(%ebx),%mm1
 	punpckldq -56(%edx),%mm2
 	pfadd %mm0,%mm5
 	movd 56(%ebx),%mm3
 	movd -60(%edx),%mm4
 	pfmul %mm2,%mm1
 	punpckldq 60(%ebx),%mm3
 	punpckldq (%edx),%mm4
 	pfadd %mm1,%mm5
 	addl $-128,%edx
 	addl $-64,%ebx
 	movd (%ebx),%mm0
 	movd -4(%edx),%mm1
 	pfmul %mm4,%mm3
 	punpckldq 4(%ebx),%mm0
 	punpckldq -8(%edx),%mm1
 	pfadd %mm5,%mm3
 	pfacc %mm3,%mm3
 	incl %ebp
 	pf2id %mm3,%mm3
 	movd %mm3,%eax
 	negl %eax
 	movd %eax,%mm3
 	packssdw %mm3,%mm3
 	movd %mm3,%eax
 	movw %ax,(%edi)
 	addl $4,%edi
 	decl %ecx
 	jnz .L46
 	femms
 	movl %ebp,%eax
 	popl %ebx
 	popl %esi
 	popl %edi
 	popl %ebp
 	addl $24,%esp
 	ret
--- a/src/libmpg123/decode_3dnowext.S
+++ b/src/libmpg123/decode_3dnowext.S
@@ -0,0 +1,4 @@
 #include "mangle.h"
 #define MPL_DCT64 ASM_NAME(dct64_3dnowext)
 #define SYNTH_NAME ASM_NAME(synth_1to1_3dnowext_asm)
 #include "decode_sse3d.h"
--- a/src/libmpg123/decode_4to1.c
+++ b/src/libmpg123/decode_4to1.c
@@ -0,0 +1,257 @@
 /*
 	decode_4to1.c: ...with 4to1 downsampling / decoding of every 4th sample
 	copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	dunno why it sounds THIS annoying (maybe we should adapt the window?)
 	absolutely not optimized for this operation
 */
 #include "mpg123lib_intern.h"
 int synth_4to1_8bit(real *bandPtr, int channel, mpg123_handle *fr, int final)
 {
  sample_t samples_tmp[16];
  sample_t *tmp1 = samples_tmp + channel;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_4to1(bandPtr,channel, fr, 0);
  fr->buffer.data = samples;
  samples += channel + pnt;
  for(i=0;i<8;i++) {
 #ifdef FLOATOUT
    *samples = 0;
 #else
    *samples = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (final ? 16 : 0);
  return ret;
 }
 int synth_4to1_8bit_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[16];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_4to1(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<8;i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 8;
  return ret;
 }
 int synth_4to1_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[16];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_4to1(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<8;i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 16;
  return ret;
 }
 int synth_4to1_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[16];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_4to1(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<8;i++) {
    *( (sample_t *)samples) = *tmp1;
    samples += sizeof(sample_t);
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 8*sizeof(sample_t);
  return ret;
 }
 int synth_4to1_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  int i,ret;
 	unsigned char *samples = fr->buffer.data;
  ret = synth_4to1(bandPtr, 0, fr, 1);
  samples += fr->buffer.fill - 16*sizeof(sample_t);
  for(i=0;i<8;i++) {
    ((sample_t *)samples)[1] = ((sample_t *)samples)[0];
    samples+=2*sizeof(sample_t);
  }
  return ret;
 }
 int synth_4to1(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  static const int step = 2;
  sample_t *samples = (sample_t *) (fr->buffer.data + fr->buffer.fill);
  real *b0, **buf; /* (*buf)[0x110]; */
  int clip = 0; 
  int bo1;
  if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
  if(!channel) {
    fr->bo[0]--;
    fr->bo[0] &= 0xf;
    buf = fr->real_buffs[0];
  }
  else {
    samples++;
    buf = fr->real_buffs[1];
  }
  if(fr->bo[0] & 0x1) {
    b0 = buf[0];
    bo1 = fr->bo[0];
    opt_dct64(fr)(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = fr->bo[0]+1;
    opt_dct64(fr)(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
  }
  {
    register int j;
    real *window = opt_decwin(fr) + 16 - bo1;
    for (j=4;j;j--,b0+=0x30,window+=0x70)
    {
      real sum;
      sum  = *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #if 0
      WRITE_SAMPLE(samples,sum,clip); samples += step;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #endif
    }
    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum += window[0x2] * b0[0x2];
      sum += window[0x4] * b0[0x4];
      sum += window[0x6] * b0[0x6];
      sum += window[0x8] * b0[0x8];
      sum += window[0xA] * b0[0xA];
      sum += window[0xC] * b0[0xC];
      sum += window[0xE] * b0[0xE];
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #if 0
      WRITE_SAMPLE(samples,sum,clip); samples += step;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #endif
      b0-=0x40,window-=0x80;
    }
    window += bo1<<1;
    for (j=3;j;j--,b0-=0x50,window-=0x70)
    {
      real sum;
      sum = -*(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #if 0
      WRITE_SAMPLE(samples,sum,clip); samples += step;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
      WRITE_SAMPLE(samples,sum,clip); samples += step;
 #endif
    }
  }
  if(final) fr->buffer.fill += 16*sizeof(sample_t);
  return clip;
 }
--- a/src/libmpg123/decode_altivec.c
+++ b/src/libmpg123/decode_altivec.c
@@ -0,0 +1,593 @@
 /*
 	decode.c: decoding samples...
 	copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	altivec optimization by tmkk
 */
 #include "mpg123lib_intern.h"
 #ifndef __APPLE__
 #include <altivec.h>
 #endif
 #define WRITE_SAMPLE(samples,sum,clip) \
  if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \
  else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \
  else { *(samples) = REAL_TO_SHORT(sum); }
 int synth_1to1_8bit_altivec(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp + channel;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1_altivec(bandPtr, channel, fr, 0);
  fr->buffer.data = samples;
  samples += channel + pnt;
  for(i=0;i<32;i++) {
    *samples = conv16to8[*tmp1>>AUSHIFT];
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (final ? 64 : 0 );
  return ret;
 }
 int synth_1to1_8bit_mono_altivec(real *bandPtr, mpg123_handle *fr)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1_altivec(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<32;i++) {
    *samples++ = conv16to8[*tmp1>>AUSHIFT];
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 32;
  return ret;
 }
 int synth_1to1_8bit_mono2stereo_altivec(real *bandPtr, mpg123_handle *fr)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1_altivec(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<32;i++) {
    *samples++ = conv16to8[*tmp1>>AUSHIFT];
    *samples++ = conv16to8[*tmp1>>AUSHIFT];
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 64;
  return ret;
 }
 int synth_1to1_mono_altivec(real *bandPtr, mpg123_handle *fr)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_1to1_altivec(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<32;i++) {
    *( (short *)samples) = *tmp1;
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 64;
  return ret;
 }
 int synth_1to1_mono2stereo_altivec(real *bandPtr, mpg123_handle *fr)
 {
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  ret = synth_1to1_altivec(bandPtr, 0, fr, 1);
  samples += fr->buffer.fill - 128;
  for(i=0;i<32;i++) {
    ((short *)samples)[1] = ((short *)samples)[0];
    samples+=4;
  }
  return ret;
 }
 int synth_1to1_altivec(real *bandPtr, int channel, mpg123_handle *fr, int final)
 {
  static const int step = 2;
  short *samples = (short *) (fr->buffer.data + fr->buffer.fill);
  real *b0, **buf;
  int clip = 0; 
  int bo1;
  if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
  if(!channel) {
    fr->bo--;
    fr->bo &= 0xf;
    buf = fr->areal_buffs[0];
  }
  else {
    samples++;
    buf = fr->areal_buffs[1];
  }
  if(fr->bo & 0x1) {
    b0 = buf[0];
    bo1 = fr->bo;
    dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = fr->bo+1;
    dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
  }
  {
    register int j;
    real *window = decwin + 16 - bo1;
 		int __attribute__ ((aligned (16))) clip_tmp[4];
 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
 		vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5;
 		vector float vsum,vsum2,vsum3,vsum4,vmin,vmax;
 		vector signed int vclip;
 		vector signed short vsample1,vsample2;
 		vclip = vec_xor(vclip,vclip);
 #ifdef __APPLE__
 		vmax = (vector float)(32767.0f);
 		vmin = (vector float)(-32768.0f);
 		vperm5 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31);
 #else
 		vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
 		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
 		vperm5 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31};
 #endif
 		vperm1 = vec_lvsl(0,window);
 		vperm3 = vec_lvsl(0,samples);
 		vperm4 = vec_lvsr(0,samples);
 		for (j=4;j;j--)
 		{
 			vsum = vec_xor(vsum,vsum);
 			vsum2 = vec_xor(vsum2,vsum2);
 			vsum3 = vec_xor(vsum3,vsum3);
 			vsum4 = vec_xor(vsum4,vsum4);
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(16,window);
 			v3 = vec_ld(32,window);
 			v4 = vec_ld(48,window);
 			v5 = vec_ld(64,window);
 			v1 = vec_perm(v1,v2,vperm1);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v2,v3,vperm1);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v3,v4,vperm1);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v4,v5,vperm1);
 			v9 = vec_ld(48,b0);
 			vsum = vec_madd(v1,v6,vsum);
 			vsum = vec_madd(v2,v7,vsum);
 			vsum = vec_madd(v3,v8,vsum);
 			vsum = vec_madd(v4,v9,vsum);
 			window += 32;
 			b0 += 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(16,window);
 			v3 = vec_ld(32,window);
 			v4 = vec_ld(48,window);
 			v5 = vec_ld(64,window);
 			v1 = vec_perm(v1,v2,vperm1);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v2,v3,vperm1);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v3,v4,vperm1);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v4,v5,vperm1);
 			v9 = vec_ld(48,b0);
 			vsum2 = vec_madd(v1,v6,vsum2);
 			vsum2 = vec_madd(v2,v7,vsum2);
 			vsum2 = vec_madd(v3,v8,vsum2);
 			vsum2 = vec_madd(v4,v9,vsum2);
 			window += 32;
 			b0 += 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(16,window);
 			v3 = vec_ld(32,window);
 			v4 = vec_ld(48,window);
 			v5 = vec_ld(64,window);
 			v1 = vec_perm(v1,v2,vperm1);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v2,v3,vperm1);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v3,v4,vperm1);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v4,v5,vperm1);
 			v9 = vec_ld(48,b0);
 			vsum3 = vec_madd(v1,v6,vsum3);
 			vsum3 = vec_madd(v2,v7,vsum3);
 			vsum3 = vec_madd(v3,v8,vsum3);
 			vsum3 = vec_madd(v4,v9,vsum3);
 			window += 32;
 			b0 += 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(16,window);
 			v3 = vec_ld(32,window);
 			v4 = vec_ld(48,window);
 			v5 = vec_ld(64,window);
 			v1 = vec_perm(v1,v2,vperm1);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v2,v3,vperm1);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v3,v4,vperm1);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v4,v5,vperm1);
 			v9 = vec_ld(48,b0);
 			vsum4 = vec_madd(v1,v6,vsum4);
 			vsum4 = vec_madd(v2,v7,vsum4);
 			vsum4 = vec_madd(v3,v8,vsum4);
 			vsum4 = vec_madd(v4,v9,vsum4);
 			window += 32;
 			b0 += 16;
 			v1 = vec_mergeh(vsum,vsum3);
 			v2 = vec_mergeh(vsum2,vsum4);
 			v3 = vec_mergel(vsum,vsum3);
 			v4 = vec_mergel(vsum2,vsum4);
 			v5 = vec_mergeh(v1,v2);
 			v6 = vec_mergel(v1,v2);
 			v7 = vec_mergeh(v3,v4);
 			v8 = vec_mergel(v3,v4);
 			vsum = vec_sub(v5,v6);
 			v9 = vec_sub(v7,v8);
 			vsum = vec_add(vsum,v9);
 			v3 = (vector float)vec_cts(vsum,0);
 			v1 = (vector float)vec_cmpgt(vsum,vmax);
 			v2 = (vector float)vec_cmplt(vsum,vmin);
 			vsample1 = vec_ld(0,samples);
 			vsample2 = vec_ld(15,samples);
 			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
 			v4 = (vector float)vec_perm(vsample1,vsample2,vperm3);
 			v5 = (vector float)vec_perm(v3,v4,vperm5);
 			v6 = (vector float)vec_perm(vsample2,vsample1,vperm3);
 			v7 = (vector float)vec_perm(v5,v6,vperm4);
 			v8 = (vector float)vec_perm(v6,v5,vperm4);
 			vec_st((vector signed short)v7,15,samples);
 			vec_st((vector signed short)v8,0,samples);
 			samples += 8;
 #ifdef __APPLE__
 			v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int)(31));
 			v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int)(31));
 #else
 			v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int){31,31,31,31});
 			v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int){31,31,31,31});
 #endif
 			v5 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
 			vclip = vec_sums((vector signed int)v5,vclip);
 		}
 		{
 			real sum;
 			sum  = REAL_MUL(window[0x0], b0[0x0]);
 			sum += REAL_MUL(window[0x2], b0[0x2]);
 			sum += REAL_MUL(window[0x4], b0[0x4]);
 			sum += REAL_MUL(window[0x6], b0[0x6]);
 			sum += REAL_MUL(window[0x8], b0[0x8]);
 			sum += REAL_MUL(window[0xA], b0[0xA]);
 			sum += REAL_MUL(window[0xC], b0[0xC]);
 			sum += REAL_MUL(window[0xE], b0[0xE]);
 			WRITE_SAMPLE(samples,sum,clip);
 			b0-=0x10,window-=0x20,samples+=step;
 		}
 		window += bo1<<1;
 		vperm1 = vec_lvsl(0,window);
 #ifdef __APPLE__
 		vperm2 = vec_perm(vperm1,vperm1,(vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3));
 #else
 		vperm2 = vec_perm(vperm1,vperm1,(vector unsigned char){12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3});
 #endif
 		vperm3 = vec_lvsl(0,samples);
 		vperm4 = vec_lvsr(0,samples);
 		for (j=3;j;j--)
 		{
 			vsum = vec_xor(vsum,vsum);
 			vsum2 = vec_xor(vsum2,vsum2);
 			vsum3 = vec_xor(vsum3,vsum3);
 			vsum4 = vec_xor(vsum4,vsum4);
 			v1 = vec_ld(-1,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum = vec_nmsub(v1,v6,vsum);
 			vsum = vec_nmsub(v2,v7,vsum);
 			vsum = vec_nmsub(v3,v8,vsum);
 			vsum = vec_nmsub(v4,v9,vsum);
 			window -= 32;
 			b0 -= 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum2 = vec_nmsub(v1,v6,vsum2);
 			vsum2 = vec_nmsub(v2,v7,vsum2);
 			vsum2 = vec_nmsub(v3,v8,vsum2);
 			vsum2 = vec_nmsub(v4,v9,vsum2);
 			window -= 32;
 			b0 -= 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum3 = vec_nmsub(v1,v6,vsum3);
 			vsum3 = vec_nmsub(v2,v7,vsum3);
 			vsum3 = vec_nmsub(v3,v8,vsum3);
 			vsum3 = vec_nmsub(v4,v9,vsum3);
 			window -= 32;
 			b0 -= 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum4 = vec_nmsub(v1,v6,vsum4);
 			vsum4 = vec_nmsub(v2,v7,vsum4);
 			vsum4 = vec_nmsub(v3,v8,vsum4);
 			vsum4 = vec_nmsub(v4,v9,vsum4);
 			window -= 32;
 			b0 -= 16;
 			v1 = vec_mergeh(vsum,vsum3);
 			v2 = vec_mergeh(vsum2,vsum4);
 			v3 = vec_mergel(vsum,vsum3);
 			v4 = vec_mergel(vsum2,vsum4);
 			v5 = vec_mergeh(v1,v2);
 			v6 = vec_mergel(v1,v2);
 			v7 = vec_mergeh(v3,v4);
 			v8 = vec_mergel(v3,v4);
 			vsum = vec_add(v5,v6);
 			v9 = vec_add(v7,v8);
 			vsum = vec_add(vsum,v9);
 			v3 = (vector float)vec_cts(vsum,0);
 			v1 = (vector float)vec_cmpgt(vsum,vmax);
 			v2 = (vector float)vec_cmplt(vsum,vmin);
 			vsample1 = vec_ld(0,samples);
 			vsample2 = vec_ld(15,samples);
 			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
 			v4 = (vector float)vec_perm(vsample1,vsample2,vperm3);
 			v5 = (vector float)vec_perm(v3,v4,vperm5);
 			v6 = (vector float)vec_perm(vsample2,vsample1,vperm3);
 			v7 = (vector float)vec_perm(v5,v6,vperm4);
 			v8 = (vector float)vec_perm(v6,v5,vperm4);
 			vec_st((vector signed short)v7,15,samples);
 			vec_st((vector signed short)v8,0,samples);
 			samples += 8;
 #ifdef __APPLE__
 			v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int)(31));
 			v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int)(31));
 #else
 			v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int){31,31,31,31});
 			v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int){31,31,31,31});
 #endif
 			v5 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
 			vclip = vec_sums((vector signed int)v5,vclip);
 		}
 #ifdef __APPLE__
 		vperm5 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,28,29,30,31);
 #else
 		vperm5 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,28,29,30,31};
 #endif
 		{
 			vsum = vec_xor(vsum,vsum);
 			vsum2 = vec_xor(vsum2,vsum2);
 			vsum3 = vec_xor(vsum3,vsum3);
 			vsum4 = vec_xor(vsum4,vsum4);
 			v1 = vec_ld(-1,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum = vec_nmsub(v1,v6,vsum);
 			vsum = vec_nmsub(v2,v7,vsum);
 			vsum = vec_nmsub(v3,v8,vsum);
 			vsum = vec_nmsub(v4,v9,vsum);
 			window -= 32;
 			b0 -= 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum2 = vec_nmsub(v1,v6,vsum2);
 			vsum2 = vec_nmsub(v2,v7,vsum2);
 			vsum2 = vec_nmsub(v3,v8,vsum2);
 			vsum2 = vec_nmsub(v4,v9,vsum2);
 			window -= 32;
 			b0 -= 16;
 			v1 = vec_ld(0,window);
 			v2 = vec_ld(-16,window);
 			v3 = vec_ld(-32,window);
 			v4 = vec_ld(-48,window);
 			v5 = vec_ld(-64,window);
 			v1 = vec_perm(v2,v1,vperm2);
 			v6 = vec_ld(0,b0);
 			v2 = vec_perm(v3,v2,vperm2);
 			v7 = vec_ld(16,b0);
 			v3 = vec_perm(v4,v3,vperm2);
 			v8 = vec_ld(32,b0);
 			v4 = vec_perm(v5,v4,vperm2);
 			v9 = vec_ld(48,b0);
 			vsum3 = vec_nmsub(v1,v6,vsum3);
 			vsum3 = vec_nmsub(v2,v7,vsum3);
 			vsum3 = vec_nmsub(v3,v8,vsum3);
 			vsum3 = vec_nmsub(v4,v9,vsum3);
 			v1 = vec_mergeh(vsum,vsum3);
 			v2 = vec_mergeh(vsum2,vsum2);
 			v3 = vec_mergel(vsum,vsum3);
 			v4 = vec_mergel(vsum2,vsum2);
 			v5 = vec_mergeh(v1,v2);
 			v6 = vec_mergel(v1,v2);
 			v7 = vec_mergeh(v3,v4);
 			v8 = vec_mergel(v3,v4);
 			vsum = vec_add(v5,v6);
 			v9 = vec_add(v7,v8);
 			vsum = vec_add(vsum,v9);
 			v3 = (vector float)vec_cts(vsum,0);
 			v1 = (vector float)vec_cmpgt(vsum,vmax);
 			v2 = (vector float)vec_cmplt(vsum,vmin);
 			vsample1 = vec_ld(0,samples);
 			vsample2 = vec_ld(15,samples);
 			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
 			v4 = (vector float)vec_perm(vsample1,vsample2,vperm3);
 			v5 = (vector float)vec_perm(v3,v4,vperm5);
 			v6 = (vector float)vec_perm(vsample2,vsample1,vperm3);
 			v7 = (vector float)vec_perm(v5,v6,vperm4);
 			v8 = (vector float)vec_perm(v6,v5,vperm4);
 			vec_st((vector signed short)v7,15,samples);
 			vec_st((vector signed short)v8,0,samples);
 			samples += 6;
 #ifdef __APPLE__
 			v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int)(31,31,31,32));
 			v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int)(31,31,31,32));
 #else
 			v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int){31,31,31,32});
 			v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int){31,31,31,32});
 #endif
 			v5 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
 			vclip = vec_sums((vector signed int)v5,vclip);
 			vec_st(vclip,0,clip_tmp);
 			clip += clip_tmp[3];
 		}
  }
  if(final) fr->buffer.fill += 128;
  return clip;
 }
--- a/src/libmpg123/decode_i386.c
+++ b/src/libmpg123/decode_i386.c
@@ -0,0 +1,295 @@
 /*
 	decode_i386.c: decode for i386 (really faster?)
 	copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	slighlty optimized for machines without autoincrement/decrement.
 	The performance is highly compiler dependend. Maybe
 	the decode.c version for 'normal' processor may be faster
 	even for Intel processors.
 */
 #include "mpg123lib_intern.h"
 int synth_1to1_8bit_i386(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp + channel;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = opt_synth_1to1(fr)(bandPtr, channel, fr , 0);
  fr->buffer.data = samples;
  samples += channel + pnt;
  for(i=0;i<32;i++) {
    *samples = fr->conv16to8[*tmp1>>AUSHIFT];
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (final ? 64 : 0 );
  return ret;
 }
 int synth_1to1_8bit_mono_i386(real *bandPtr, mpg123_handle *fr)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<32;i++) {
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    tmp1+=2;
  }
  fr->buffer.fill = pnt + 32;
  return ret;
 }
 int synth_1to1_8bit_mono2stereo_i386(real *bandPtr, mpg123_handle *fr)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<32;i++) {
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 64;
  return ret;
 }
 int synth_1to1_mono_i386(real *bandPtr, mpg123_handle *fr)
 {
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  int pnt = fr->buffer.fill;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 0);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<32;i++) {
    *( (short *) samples) = *tmp1;
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + 64;
  return ret;
 }
 int synth_1to1_mono2stereo_i386(real *bandPtr, mpg123_handle *fr)
 {
  int i,ret;
  unsigned char *samples = fr->buffer.data;
  ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 1);
  samples += fr->buffer.fill - 128;
  for(i=0;i<32;i++) {
    ((short *)samples)[1] = ((short *)samples)[0];
    samples+=4;
  }
  return ret;
 }
 /* needed for i386, i486 */
 #ifdef OPT_I386
 int synth_1to1_i386(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  static const int step = 2;
  short *samples = (short *) (fr->buffer.data + fr->buffer.fill);
  real *b0, **buf;
  int clip = 0; 
  int bo1;
  if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
  if(!channel) {
    fr->bo[0]--;
    fr->bo[0] &= 0xf;
    buf = fr->real_buffs[0];
  }
  else {
    samples++;
    buf = fr->real_buffs[1];
  }
  if(fr->bo[0] & 0x1) {
    b0 = buf[0];
    bo1 = fr->bo[0];
    dct64_i386(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = fr->bo[0]+1;
    dct64_i386(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
  }
  {
    register int j;
    real *window = opt_decwin(fr) + 16 - bo1;
    for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)
    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum -= window[0x1] * b0[0x1];
      sum += window[0x2] * b0[0x2];
      sum -= window[0x3] * b0[0x3];
      sum += window[0x4] * b0[0x4];
      sum -= window[0x5] * b0[0x5];
      sum += window[0x6] * b0[0x6];
      sum -= window[0x7] * b0[0x7];
      sum += window[0x8] * b0[0x8];
      sum -= window[0x9] * b0[0x9];
      sum += window[0xA] * b0[0xA];
      sum -= window[0xB] * b0[0xB];
      sum += window[0xC] * b0[0xC];
      sum -= window[0xD] * b0[0xD];
      sum += window[0xE] * b0[0xE];
      sum -= window[0xF] * b0[0xF];
      WRITE_SAMPLE(samples,sum,clip);
    }
    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum += window[0x2] * b0[0x2];
      sum += window[0x4] * b0[0x4];
      sum += window[0x6] * b0[0x6];
      sum += window[0x8] * b0[0x8];
      sum += window[0xA] * b0[0xA];
      sum += window[0xC] * b0[0xC];
      sum += window[0xE] * b0[0xE];
      WRITE_SAMPLE(samples,sum,clip);
      b0-=0x10,window-=0x20,samples+=step;
    }
    window += bo1<<1;
    for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)
    {
      real sum;
      sum = -window[-0x1] * b0[0x0];
      sum -= window[-0x2] * b0[0x1];
      sum -= window[-0x3] * b0[0x2];
      sum -= window[-0x4] * b0[0x3];
      sum -= window[-0x5] * b0[0x4];
      sum -= window[-0x6] * b0[0x5];
      sum -= window[-0x7] * b0[0x6];
      sum -= window[-0x8] * b0[0x7];
      sum -= window[-0x9] * b0[0x8];
      sum -= window[-0xA] * b0[0x9];
      sum -= window[-0xB] * b0[0xA];
      sum -= window[-0xC] * b0[0xB];
      sum -= window[-0xD] * b0[0xC];
      sum -= window[-0xE] * b0[0xD];
      sum -= window[-0xF] * b0[0xE];
      sum -= window[-0x0] * b0[0xF];
      WRITE_SAMPLE(samples,sum,clip);
    }
  }
  if(final) fr->buffer.fill += 128;
  return clip;
 }
 #endif
 #ifdef OPT_PENTIUM
 int synth_1to1_i586(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
 	int ret;
 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
 	/* this is in asm, can be dither or not */
 	/* uh, is this return from pointer correct? */ 
 	ret = (int) opt_synth_1to1_i586_asm(fr)(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, fr->bo, fr->decwin);
 	if(final) fr->buffer.fill += 128;
 	return ret;
 }
 #endif
 #ifdef OPT_3DNOW
 int synth_1to1_3dnow(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
 	int ret;
 	if(fr->have_eq_settings) do_equalizer_3dnow(bandPtr,channel,fr->equalizer);
 	/* this is in asm, can be dither or not */
 	/* uh, is this return from pointer correct? */ 
 	ret = (int) synth_1to1_3dnow_asm(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, fr->bo, fr->decwin);
 	if(final) fr->buffer.fill += 128;
 	return ret;
 }
 #endif
 #ifdef OPT_MMX
 /* wrapper for da interface */
 int synth_1to1_mmx(real *bandPtr, int channel, mpg123_handle *fr, int final)
 {
 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
 	/* in asm */
 	synth_1to1_MMX(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, fr->bo, fr->decwins); 
 	if(final) fr->buffer.fill += 128;
 	return 0;
 }
 #endif
 #ifdef OPT_SSE
 int synth_1to1_sse(real *bandPtr, int channel, mpg123_handle *fr, int final)
 {
 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
 	synth_1to1_sse_asm(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, fr->bo, fr->decwins); 
 	if(final) fr->buffer.fill += 128;
 	return 0;
 }
 #endif
 #ifdef OPT_3DNOWEXT
 int synth_1to1_3dnowext(real *bandPtr, int channel, mpg123_handle *fr, int final)
 {
 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
 	synth_1to1_3dnowext_asm(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, fr->bo, fr->decwins); 
 	if(final) fr->buffer.fill += 128;
 	return 0;
 }
 #endif
--- a/src/libmpg123/decode_i486.c
+++ b/src/libmpg123/decode_i486.c
@@ -0,0 +1,252 @@
 /*
 	decode_i486.c: i486 decode
 	copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Fabrice Bellard
 	One has to see if the modification for non-static memory kills this optimization (cache locality?).
 */
 /* 
 * Subband Synthesis for MPEG Audio. 
 *
 * Version optimized for 80486 by using integer arithmetic,
 * multiplications by shift and add, and by increasing locality in
 * order to fit the 8KB L1 cache. This code should be compiled with gcc
 * 2.7.2 or higher.
 *
 * Note: this version does not guaranty a good accuracy. The filter
 * coefficients are quantified on 14 bits.
 *
 * (c) 1998 Fabrice Bellard 
 */
 #include "mpg123lib_intern.h"
 #define FIR16_1(pos,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15) \
 {\
  int sum;\
  sum=(c0)*b0[0]+(c1)*b0[1]+(c2)*b0[2]+(c3)*b0[3]+\
  (c4)*b0[4]+(c5)*b0[5]+(c6)*b0[6]+(c7)*b0[7]+\
  (c8)*b0[8]+(c9)*b0[9]+(c10)*b0[10]+(c11)*b0[11]+\
  (c12)*b0[12]+(c13)*b0[13]+(c14)*b0[14]+(c15)*b0[15];\
  sum=(sum+(1 << 13))>>14;\
  if (sum<-32768) sum=-32768;\
  else if (sum>32767) sum=32767;\
  samples[2*(pos)]=sum;\
  b0+=FIR_BUFFER_SIZE;\
 }
 #define FIR16_2(pos1,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,\
              pos2,d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15) \
 {\
  int sum1,sum2,v;\
 \
  v=b0[0];\
  sum1=(c0)*v;\
  sum2=(d0)*v;\
  v=b0[1];\
  sum1+=(c1)*v;\
  sum2+=(d1)*v;\
  v=b0[2];\
  sum1+=(c2)*v;\
  sum2+=(d2)*v;\
  v=b0[3];\
  sum1+=(c3)*v;\
  sum2+=(d3)*v;\
  v=b0[4];\
  sum1+=(c4)*v;\
  sum2+=(d4)*v;\
  v=b0[5];\
  sum1+=(c5)*v;\
  sum2+=(d5)*v;\
  v=b0[6];\
  sum1+=(c6)*v;\
  sum2+=(d6)*v;\
  v=b0[7];\
  sum1+=(c7)*v;\
  sum2+=(d7)*v;\
  v=b0[8];\
  sum1+=(c8)*v;\
  sum2+=(d8)*v;\
  v=b0[9];\
  sum1+=(c9)*v;\
  sum2+=(d9)*v;\
  v=b0[10];\
  sum1+=(c10)*v;\
  sum2+=(d10)*v;\
  v=b0[11];\
  sum1+=(c11)*v;\
  sum2+=(d11)*v;\
  v=b0[12];\
  sum1+=(c12)*v;\
  sum2+=(d12)*v;\
  v=b0[13];\
  sum1+=(c13)*v;\
  sum2+=(d13)*v;\
  v=b0[14];\
  sum1+=(c14)*v;\
  sum2+=(d14)*v;\
  v=b0[15];\
  sum1+=(c15)*v;\
  sum2+=(d15)*v;\
 \
  sum1=(sum1+(1<<13))>>14;\
  sum2=(sum2+(1<<13))>>14;\
 \
  if (sum1<-32768) sum1=-32768;\
  else if (sum1>32767) sum1=32767;\
  samples[(pos1)*2]=sum1;\
 \
  if (sum2<-32768) sum2=-32768;\
  else if (sum2>32767) sum2=32767;\
  samples[(pos2)*2]=sum2;\
  b0+=FIR_BUFFER_SIZE;\
 }
 int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks)
 {
  short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
  int *b0,**buf;
  int clip = 0; 
  int block,b,bo_start;
  /* samples address */
  samples+=channel;
  bo_start=fr->bo[channel];
  buf = fr->int_buffs[channel];
  b=bo_start;
  for(block=0;block<nb_blocks;block++) {
    /* FIR offset */
    b++;
    if (b >= FIR_BUFFER_SIZE) {
      int *p,*q;
      int c,i,j;
      /* we shift the buffers */
      for(c=0;c<2;c++) {
        p=&buf[c][0]+1;
        q=p+(FIR_BUFFER_SIZE-FIR_SIZE);
        for(i=0;i<17;i++) {
          for(j=0;j<FIR_SIZE-1;j++) p[j]=q[j];
          p+=FIR_BUFFER_SIZE;
          q+=FIR_BUFFER_SIZE;
        }
      }
      /* we update 'bo' accordingly */
      b=fr->bo[channel]=FIR_SIZE;
    }
    if(b & 1) {
      dct64_i486(buf[1]+b,buf[0]+b,bandPtr);
    } else {
      dct64_i486(buf[0]+b,buf[1]+b,bandPtr);
    }
    bandPtr+=32;
  }
  fr->bo[channel]=b;
  /* filter bank: part 1 */
  b=bo_start;
  for(block=0;block<nb_blocks;block++) {
    b++;
    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
    if(b & 1) {
      b0 = buf[0] + b - (FIR_SIZE-1);
    } else {
      b0 = buf[1] + b - (FIR_SIZE-1);
    }
 FIR16_1(0,-7,53,-114,509,-1288,1643,-9372,18759,9372,1643,1288,509,114,53,7,0);
 FIR16_2(1,-6,52,-100,515,-1197,1783,-8910,18748,9834,1489,1379,500,129,54,7,0,
 31,0,-7,54,-129,500,-1379,1489,-9834,18748,8910,1783,1197,515,100,52,6);
 FIR16_2(2,-6,50,-86,520,-1106,1910,-8447,18714,10294,1322,1469,488,145,55,8,0,
 30,0,-8,55,-145,488,-1469,1322,-10294,18714,8447,1910,1106,520,86,50,6);
 FIR16_2(3,-5,49,-73,521,-1015,2023,-7986,18657,10751,1140,1559,473,161,56,9,0,
 29,0,-9,56,-161,473,-1559,1140,-10751,18657,7986,2023,1015,521,73,49,5);              
    samples+=64;
  }
  samples-=64*nb_blocks;
  /* filter bank: part 2 */
  b=bo_start;
  for(block=0;block<nb_blocks;block++) {
    b++;
    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
    if(b & 1) {
      b0 = buf[0] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
    } else {
      b0 = buf[1] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
    }
 FIR16_2(4,-4,47,-61,521,-926,2123,-7528,18578,11205,944,1647,455,177,56,10,0,
 28,0,-10,56,-177,455,-1647,944,-11205,18578,7528,2123,926,521,61,47,4);
 FIR16_2(5,-4,45,-49,518,-837,2210,-7072,18477,11654,733,1733,434,194,57,11,0,
 27,0,-11,57,-194,434,-1733,733,-11654,18477,7072,2210,837,518,49,45,4);
 FIR16_2(6,-4,44,-38,514,-751,2284,-6620,18353,12097,509,1817,411,212,57,12,0,
 26,0,-12,57,-212,411,-1817,509,-12097,18353,6620,2284,751,514,38,44,4);
 FIR16_2(7,-3,42,-27,508,-665,2347,-6173,18208,12534,270,1899,383,229,56,13,0,
 25,0,-13,56,-229,383,-1899,270,-12534,18208,6173,2347,665,508,27,42,3);             
    samples+=64;
  }
  samples-=64*nb_blocks;
  /* filter bank: part 3 */
  b=bo_start;
  for(block=0;block<nb_blocks;block++) {
    b++;
    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
    if(b & 1) {
      b0 = buf[0] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
    } else {
      b0 = buf[1] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
    }
 FIR16_2(8,-3,40,-18,500,-582,2398,-5732,18042,12963,17,1977,353,247,56,14,0,
 24,0,-14,56,-247,353,-1977,17,-12963,18042,5732,2398,582,500,18,40,3);
 FIR16_2(9,-2,38,-9,490,-501,2437,-5297,17855,13383,-249,2052,320,266,55,15,0,
 23,0,-15,55,-266,320,-2052,-249,-13383,17855,5297,2437,501,490,9,38,2);
 FIR16_2(10,-2,36,0,479,-423,2465,-4869,17647,13794,-530,2122,282,284,53,17,0,
 22,0,-17,53,-284,282,-2122,-530,-13794,17647,4869,2465,423,479,0,36,2);
 FIR16_2(11,-2,34,7,467,-347,2483,-4449,17419,14194,-825,2188,242,302,52,18,0,
 21,0,-18,52,-302,242,-2188,-825,-14194,17419,4449,2483,347,467,-7,34,2);          
    samples+=64;
  }
  samples-=64*nb_blocks;
  /* filter bank: part 4 */
  b=bo_start;
  for(block=0;block<nb_blocks;block++) {
    b++;
    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
    if(b & 1) {
      b0 = buf[0] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
    } else {
      b0 = buf[1] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
    }
 FIR16_2(12,-2,33,14,454,-273,2491,-4038,17173,14583,-1133,2249,198,320,50,19,0,
 20,0,-19,50,-320,198,-2249,-1133,-14583,17173,4038,2491,273,454,-14,33,2);
 FIR16_2(13,-1,31,20,439,-203,2489,-3637,16907,14959,-1454,2304,151,339,47,21,-1,
 19,-1,-21,47,-339,151,-2304,-1454,-14959,16907,3637,2489,203,439,-20,31,1);
 FIR16_2(14,-1,29,26,424,-136,2479,-3245,16623,15322,-1788,2354,100,357,44,22,-1,
 18,-1,-22,44,-357,100,-2354,-1788,-15322,16623,3245,2479,136,424,-26,29,1);
 FIR16_2(15,-1,27,31,408,-72,2459,-2863,16322,15671,-2135,2396,46,374,40,24,-1,
 17,-1,-24,40,-374,46,-2396,-2135,-15671,16322,2863,2459,72,408,-31,27,1);
 FIR16_1(16,-1,0,36,0,-11,0,-2493,0,16004,0,2431,0,391,0,26,0);      
    samples+=64;
  }
  return clip;
 }
--- a/src/libmpg123/decode_i586.S
+++ b/src/libmpg123/decode_i586.S
@@ -0,0 +1,335 @@
 /*
 decode_i586: asm synth
 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 see COPYING and AUTHORS files in distribution or http://mpg123.org
 initially written by Stefan Bieschewski
 synth_1to1 works the same way as the c version of this
 file.  only two types of changes have been made:
 - reordered floating point instructions to
   prevent pipline stalls
 - made WRITE_SAMPLE use integer instead of
   (slower) floating point
 all kinds of x86 processors should benefit from these
 modifications.
 useful sources of information on optimizing x86 code include:
     Intel Architecture Optimization Manual
     http://www.intel.com/design/pentium/manuals/242816.htm
     Cyrix 6x86 Instruction Set Summary
     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
     AMD-K5 Processor Software Development
     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
 Stefan Bieschewski <stb@acm.org>
 $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
 */
 #include "mangle.h"
 .data
 #ifndef __APPLE__
 .section .rodata
 #endif
 	ALIGN8
 .LC0:
 	.long 0x0,0x40dfffc0
 	ALIGN8
 .LC1:
 	.long 0x0,0xc0e00000
 	ALIGN8
 .text
 /* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
 .globl ASM_NAME(synth_1to1_i586_asm)
 ASM_NAME(synth_1to1_i586_asm):
 	subl $12,%esp
 	pushl %ebp
 	pushl %edi
 	pushl %esi
 	pushl %ebx
 /* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
 	movl 32(%esp),%eax /* *bandPtr */
 	movl 40(%esp),%esi /* *out */
 	movl 48(%esp),%edi /* *bo */
 	movl (%edi),%ebp   /* store bo value in ebp */
 	xorl %edi,%edi
 	cmpl %edi,36(%esp)
 	jne .L48           /* if(!channel) */
 	decl %ebp          /* bo-- */
 	andl $15,%ebp      /* bo &= 0xf */
 	movl 48(%esp),	%edi /* *bo */
 	movl %ebp,(%edi)   /* write back bo */
 	xorl %edi,%edi     /* restore %edi to 0; it's used later */
 	movl 44(%esp),%ecx /* use buffs */
 	jmp .L49
 .L48: /* if(channel) use buffs+2176 */
 	addl $2,%esi
 	movl 44(%esp),%ecx /* *buffs */
 	addl $2176,%ecx
 .L49:
 	testl $1,%ebp
 	je .L50
 	movl %ecx,%ebx
 	movl %ebp,16(%esp)
 	pushl %eax
 	movl 20(%esp),%edx
 	leal (%ebx,%edx,4),%eax
 	pushl %eax
 	movl 24(%esp),%eax
 	incl %eax
 	andl $15,%eax
 	leal 1088(,%eax,4),%eax
 	addl %ebx,%eax
 	jmp .L74
 .L50:
 	leal 1088(%ecx),%ebx
 	leal 1(%ebp),%edx
 	movl %edx,16(%esp)
 	pushl %eax
 	leal 1092(%ecx,%ebp,4),%eax
 	pushl %eax
 	leal (%ecx,%ebp,4),%eax
 .L74:
 	pushl %eax
 	call ASM_NAME(dct64_i386)
 	addl $12,%esp
 /* stack now back on track */
 	movl 16(%esp),%edx
 	leal 0(,%edx,4),%edx
 	movl 52(%esp),%eax /* decwin */
 	addl $64,%eax
 	movl %eax,%ecx
 	subl %edx,%ecx
 	movl $16,%ebp
 .L55:
 	flds (%ecx)
 	fmuls (%ebx)
 	flds 4(%ecx)
 	fmuls 4(%ebx)
 	fxch %st(1)
 	flds 8(%ecx)
 	fmuls 8(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 12(%ecx)
 	fmuls 12(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 16(%ecx)
 	fmuls 16(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 20(%ecx)
 	fmuls 20(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 24(%ecx)
 	fmuls 24(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 28(%ecx)
 	fmuls 28(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 32(%ecx)
 	fmuls 32(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 36(%ecx)
 	fmuls 36(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 40(%ecx)
 	fmuls 40(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 44(%ecx)
 	fmuls 44(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 48(%ecx)
 	fmuls 48(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 52(%ecx)
 	fmuls 52(%ebx)
 	fxch %st(2)         
 	faddp %st,%st(1)
 	flds 56(%ecx)
 	fmuls 56(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 60(%ecx)
 	fmuls 60(%ebx)
 	fxch %st(2)
 	subl $4,%esp
 	faddp %st,%st(1)
 	fxch %st(1)
 	fsubrp %st,%st(1)
 	fistpl (%esp)
 	popl %eax
 	cmpl $32767,%eax
 	jg 1f
 	cmpl $-32768,%eax
 	jl 2f
 	movw %ax,(%esi)
 	jmp 4f
 1:	movw $32767,(%esi)
 	jmp 3f
 2:	movw $-32768,(%esi)
 3:	incl %edi
 4:
 .L54:
 	addl $64,%ebx
 	subl $-128,%ecx
 	addl $4,%esi
 	decl %ebp
 	jnz .L55
 	flds (%ecx)
 	fmuls (%ebx)
 	flds 8(%ecx)
 	fmuls 8(%ebx)
 	flds 16(%ecx)
 	fmuls 16(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 24(%ecx)
 	fmuls 24(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 32(%ecx)
 	fmuls 32(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 40(%ecx)
 	fmuls 40(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 48(%ecx)
 	fmuls 48(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 56(%ecx)
 	fmuls 56(%ebx)
 	fxch %st(2)
 	subl $4,%esp
 	faddp %st,%st(1)
 	fxch %st(1)
 	faddp %st,%st(1)
 	fistpl (%esp)
 	popl %eax
 	cmpl $32767,%eax
 	jg 1f
 	cmpl $-32768,%eax
 	jl 2f
 	movw %ax,(%esi)
 	jmp 4f
 1:	movw $32767,(%esi)
 	jmp 3f
 2:	movw $-32768,(%esi)
 3:	incl %edi
 4:
 .L62:
 	addl $-64,%ebx
 	addl $4,%esi
 	movl 16(%esp),%edx
 	leal -128(%ecx,%edx,8),%ecx
 	movl $15,%ebp
 .L68:
 	flds -4(%ecx)
 	fchs
 	fmuls (%ebx)
 	flds -8(%ecx)
 	fmuls 4(%ebx)
 	fxch %st(1)
 	flds -12(%ecx)
 	fmuls 8(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -16(%ecx)
 	fmuls 12(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -20(%ecx)
 	fmuls 16(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -24(%ecx)
 	fmuls 20(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -28(%ecx)
 	fmuls 24(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -32(%ecx)
 	fmuls 28(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -36(%ecx)
 	fmuls 32(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -40(%ecx)
 	fmuls 36(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -44(%ecx)
 	fmuls 40(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -48(%ecx)
 	fmuls 44(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -52(%ecx)
 	fmuls 48(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -56(%ecx)
 	fmuls 52(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -60(%ecx)
 	fmuls 56(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds (%ecx)
 	fmuls 60(%ebx)
 	fxch %st(2)
 	subl $4,%esp
 	fsubrp %st,%st(1)
 	fxch %st(1)
 	fsubrp %st,%st(1)
 	fistpl (%esp)
 	popl %eax
 	cmpl $32767,%eax
 	jg 1f
 	cmpl $-32768,%eax
 	jl 2f
 	movw %ax,(%esi)
 	jmp 4f
 1:	movw $32767,(%esi)
 	jmp 3f
 2:	movw $-32768,(%esi)
 3:	incl %edi
 4:
 .L67:
 	addl $-64,%ebx
 	addl $-128,%ecx
 	addl $4,%esi
 	decl %ebp
 	jnz .L68
 	movl %edi,%eax
 	popl %ebx
 	popl %esi
 	popl %edi
 	popl %ebp
 	addl $12,%esp
 	ret
--- a/src/libmpg123/decode_i586_dither.S
+++ b/src/libmpg123/decode_i586_dither.S
@@ -0,0 +1,368 @@
 /*
 	decode_i586_dither: asm synth with dither noise
 	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Stefan Bieschewski as decode_i586.s without dither
 	This version uses "circular" 64k dither noise.
 	(Patch by Adrian <adrian.bacon@xs4all.nl>)
 	Thomas learned something about assembler and the stack while making this one thread safe (removing static data).
 */
 #include "mangle.h"
 .data
 #ifndef __APPLE__
 		.section	.rodata
 #endif
 	ALIGN8
 .LC0:
 	.long 0x0,0x40dfffc0
 	ALIGN8
 .LC1:
 	.long 0x0,0xc0e00000
 	ALIGN8
 .text
 /* int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int bo_and_ditherindex[2], real *decwin); */
 .globl ASM_NAME(synth_1to1_i586_asm_dither)
 ASM_NAME(synth_1to1_i586_asm_dither):
 	subl $16,%esp
 	pushl %ebp
 	pushl %edi
 	pushl %esi
 	pushl %ebx
 /* stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin */
 #define BANDPTR 36(%esp)
 #define CHANNEL 40(%esp)
 #define OUT     44(%esp)
 #define BUFFS   48(%esp)
 #define BO      52(%esp)
 #define DECWIN  56(%esp)
 #define LOC0    16(%esp)
 #define LOC1    20(%esp)
 #define LOC2    24(%esp)
 #define DITHERINDEX  28(%esp)
 	movl BANDPTR,%eax
 	movl OUT,%esi
 	movl BO, %ebx
 	movl (%ebx),%ebp    /* get bo value */
 	movl 4(%ebx),%edi;  /* get the ditherindex behind bo */
 	movl %edi,DITHERINDEX
 	xorl %edi,%edi
 	cmpl %edi,CHANNEL
 	jne .L48
 	decl %ebp
 	andl $15,%ebp
 	movl %ebp,(%ebx)   /* save bo back */
 	movl BUFFS,%ecx
 	jmp .L49
 .L48:
 /*       In stereo mode , "rewind" dither pointer 32 samples , so 2nd channel */
 /*       has same dither values. Tested OK for mono and stereo MP2 and MP3 */
 	subl $128,DITHERINDEX /* better move to %edi for the two calculations? */
 	andl $0x0003fffc,DITHERINDEX
 	addl $2,%esi
 	movl BUFFS,%ecx
 	addl $2176,%ecx
 .L49:
 /* now the call of dct64 is prepared, stuff pushed to the stack, but soon after it's removed again */
 	testl $1,%ebp
 	je .L50
 	movl %ecx,%ebx
 	movl %ebp,LOC0
 	pushl %eax
 	movl LOC1,%edx
 	leal (%ebx,%edx,4),%eax
 	pushl %eax
 	movl LOC2,%eax
 	incl %eax
 	andl $15,%eax
 	leal 1088(,%eax,4),%eax
 	addl %ebx,%eax
 	jmp .L74
 .L50:
 	leal 1088(%ecx),%ebx
 	leal 1(%ebp),%edx
 	movl %edx,LOC0
 	pushl %eax
 	leal 1092(%ecx,%ebp,4),%eax
 	pushl %eax
 	leal (%ecx,%ebp,4),%eax
 .L74:
 	pushl %eax
 	call ASM_NAME(dct64_i386)
 	addl $12,%esp
 /* Now removed the parameters.
   stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
 	movl LOC0,%edx
 	leal 0(,%edx,4),%edx
 	/* movl $ASM_NAME(decwin)+64,%eax */
 	movl DECWIN,%eax
 	addl $64,%eax
 	movl %eax,%ecx
 	subl %edx,%ecx
 	movl $16,%ebp
 .L55:
 	flds (%ecx)
 	fmuls (%ebx)
 	flds 4(%ecx)
 	fmuls 4(%ebx)
 	fxch %st(1)
 	flds 8(%ecx)
 	fmuls 8(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 12(%ecx)
 	fmuls 12(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 16(%ecx)
 	fmuls 16(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 20(%ecx)
 	fmuls 20(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 24(%ecx)
 	fmuls 24(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 28(%ecx)
 	fmuls 28(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 32(%ecx)
 	fmuls 32(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 36(%ecx)
 	fmuls 36(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 40(%ecx)
 	fmuls 40(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 44(%ecx)
 	fmuls 44(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 48(%ecx)
 	fmuls 48(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 52(%ecx)
 	fmuls 52(%ebx)
 	fxch %st(2)         
 	faddp %st,%st(1)
 	flds 56(%ecx)
 	fmuls 56(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds 60(%ecx)
 	fmuls 60(%ebx)
 	fxch %st(2)
 	subl $4,%esp
 	faddp %st,%st(1)
 	fxch %st(1)
 	fsubrp %st,%st(1)
 	addl $4,DITHERINDEX
 	andl $0x0003fffc,DITHERINDEX
 	movl $ASM_NAME(dithernoise),%edi
 	addl DITHERINDEX,%edi	
 	fadd (%edi)
 /* fistpl and popl as a unit keep the stack unchanged */
 	fistpl (%esp)
 	popl %eax
 	cmpl $32767,%eax
 	jg 1f
 	cmpl $-32768,%eax
 	jl 2f
 	movw %ax,(%esi)
 	jmp 4f
 1:	movw $32767,(%esi)
 	jmp 3f
 2:	movw $-32768,(%esi)
 3:
 /*	incl %edi */
 4:
 .L54:
 	addl $64,%ebx
 	subl $-128,%ecx
 	addl $4,%esi
 	decl %ebp
 	jnz .L55
 	flds (%ecx)
 	fmuls (%ebx)
 	flds 8(%ecx)
 	fmuls 8(%ebx)
 	flds 16(%ecx)
 	fmuls 16(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 24(%ecx)
 	fmuls 24(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 32(%ecx)
 	fmuls 32(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 40(%ecx)
 	fmuls 40(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 48(%ecx)
 	fmuls 48(%ebx)
 	fxch %st(2)
 	faddp %st,%st(1)
 	flds 56(%ecx)
 	fmuls 56(%ebx)
 	fxch %st(2)
 	subl $4,%esp
 	faddp %st,%st(1)
 	fxch %st(1)
 	faddp %st,%st(1)
 	addl $4,DITHERINDEX
 	andl $0x0003fffc,DITHERINDEX
 	movl $ASM_NAME(dithernoise),%edi
 	addl DITHERINDEX,%edi	
 	fadd (%edi)
 /* fistpl and popl as a unit keep the stack unchanged */
 	fistpl (%esp)
 	popl %eax
 	cmpl $32767,%eax
 	jg 1f
 	cmpl $-32768,%eax
 	jl 2f
 	movw %ax,(%esi)
 	jmp 4f
 1:	movw $32767,(%esi)
 	jmp 3f
 2:	movw $-32768,(%esi)
 3:
 /*	incl %edi */
 4:
 .L62:
 	addl $-64,%ebx
 	addl $4,%esi
 	movl LOC0,%edx
 	leal -128(%ecx,%edx,8),%ecx
 	movl $15,%ebp
 .L68:
 	flds -4(%ecx)
 	fchs
 	fmuls (%ebx)
 	flds -8(%ecx)
 	fmuls 4(%ebx)
 	fxch %st(1)
 	flds -12(%ecx)
 	fmuls 8(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -16(%ecx)
 	fmuls 12(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -20(%ecx)
 	fmuls 16(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -24(%ecx)
 	fmuls 20(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -28(%ecx)
 	fmuls 24(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -32(%ecx)
 	fmuls 28(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -36(%ecx)
 	fmuls 32(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -40(%ecx)
 	fmuls 36(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -44(%ecx)
 	fmuls 40(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -48(%ecx)
 	fmuls 44(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -52(%ecx)
 	fmuls 48(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -56(%ecx)
 	fmuls 52(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds -60(%ecx)
 	fmuls 56(%ebx)
 	fxch %st(2)
 	fsubrp %st,%st(1)
 	flds (%ecx)
 	fmuls 60(%ebx)
 	fxch %st(2)
 	subl $4,%esp
 	fsubrp %st,%st(1)
 	fxch %st(1)
 	fsubrp %st,%st(1)
 	addl $4,DITHERINDEX
 	andl $0x0003fffc,DITHERINDEX
 	movl $ASM_NAME(dithernoise),%edi
 	addl DITHERINDEX,%edi	
 	fadd (%edi)
 /* fistpl and popl as a unit keep the stack unchanged */
 	fistpl (%esp)
 	popl %eax
 	cmpl $32767,%eax
 	jg 1f
 	cmpl $-32768,%eax
 	jl 2f
 	movw %ax,(%esi)
 	jmp 4f
 1:	movw $32767,(%esi)
 	jmp 3f
 2:	movw $-32768,(%esi)
 3:
 /*	incl %edi */
 4:
 .L67:
 	addl $-64,%ebx
 	addl $-128,%ecx
 	addl $4,%esi
 	decl %ebp
 	jnz .L68
 /* return ipv edi 0 in eax */
 	movl $0,%eax
 /* save ditherindex */
 	movl BO,%ebx
 	movl DITHERINDEX,%esi
 	movl %esi,4(%ebx);
 /* stack: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
 	popl %ebx
 	popl %esi
 	popl %edi
 	popl %ebp
 	addl $16,%esp
 /* The stack must be now: 0=back 4=bandptr 8=channel 12=out 16=buffs 20=bo */
 	ret
--- a/src/libmpg123/decode_mmx.S
+++ b/src/libmpg123/decode_mmx.S
@@ -0,0 +1,125 @@
 /*
 	decode_MMX.s: MMX optimized synth
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by the mysterious higway (apparently)
 Thomas' words about a note:
 Initially, I found the note "this code comes under GPL" in this file.
 After asking Michael about legal status of the MMX files, he said that he got them without any comment and thus I believe that the GPL comment was made by Michael, since he made mpg123 GPL at some time - and marked some files that way, but not all.
 Based on that thought, I now consider this file along with the other parts of higway's MMX optimization to be licensed under LGPL 2.1 by Michael's decision.
 */
 #include "mangle.h"
 .text
 .globl ASM_NAME(synth_1to1_MMX)
 /* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
 ASM_NAME(synth_1to1_MMX):
        pushl %ebp
        pushl %edi
        pushl %esi
        pushl %ebx
 /* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
        movl 24(%esp),%ecx
        movl 28(%esp),%edi
        movl $15,%ebx
        movl 36(%esp),%edx
        leal (%edi,%ecx,2),%edi
 	decl %ecx
        movl 32(%esp),%esi
        movl (%edx),%eax
        jecxz .L1
        decl %eax
        andl %ebx,%eax
        leal 1088(%esi),%esi
        movl %eax,(%edx)
 .L1:
        leal (%esi,%eax,2),%edx
        movl %eax,%ebp
        incl %eax
        pushl 20(%esp)
        andl %ebx,%eax
        leal 544(%esi,%eax,2),%ecx
        incl %ebx
 	testl $1, %eax
 	jnz .L2
        xchgl %edx,%ecx
 	incl %ebp
        leal 544(%esi),%esi
 .L2:
        pushl %edx
        pushl %ecx
        call ASM_NAME(dct64_MMX)
        addl $12,%esp
 /* stack like before, pushed 3, incremented again */
 	leal 1(%ebx), %ecx
        subl %ebp,%ebx
 	pushl %eax
 	movl 44(%esp),%eax /* decwins */
 	leal (%eax,%ebx,2), %edx
 	popl %eax
 .L3:
        movq  (%edx),%mm0
        pmaddwd (%esi),%mm0
        movq  8(%edx),%mm1
        pmaddwd 8(%esi),%mm1
        movq  16(%edx),%mm2
        pmaddwd 16(%esi),%mm2
        movq  24(%edx),%mm3
        pmaddwd 24(%esi),%mm3
        paddd %mm1,%mm0
        paddd %mm2,%mm0
        paddd %mm3,%mm0
        movq  %mm0,%mm1
        psrlq $32,%mm1
        paddd %mm1,%mm0
        psrad $13,%mm0
        packssdw %mm0,%mm0
        movd %mm0,%eax
 	movw %ax, (%edi)
        leal 32(%esi),%esi
        leal 64(%edx),%edx
        leal 4(%edi),%edi
        loop .L3
        subl $64,%esi
        movl $15,%ecx
 .L4:
        movq  (%edx),%mm0
        pmaddwd (%esi),%mm0
        movq  8(%edx),%mm1
        pmaddwd 8(%esi),%mm1
        movq  16(%edx),%mm2
        pmaddwd 16(%esi),%mm2
        movq  24(%edx),%mm3
        pmaddwd 24(%esi),%mm3
        paddd %mm1,%mm0
        paddd %mm2,%mm0
        paddd %mm3,%mm0
        movq  %mm0,%mm1
        psrlq $32,%mm1
        paddd %mm0,%mm1
        psrad $13,%mm1
        packssdw %mm1,%mm1
        psubd %mm0,%mm0
        psubsw %mm1,%mm0
        movd %mm0,%eax
 	movw %ax,(%edi)
        subl $32,%esi
        addl $64,%edx
        leal 4(%edi),%edi
        loop .L4
 	emms
        popl %ebx
        popl %esi
        popl %edi
        popl %ebp
        ret
--- a/src/libmpg123/decode_mmxsse.S
+++ b/src/libmpg123/decode_mmxsse.S
@@ -0,0 +1,278 @@
 /*
 	decode_mmxsse: Synth for SSE and extended 3DNow (yeah, the name is a relic)
 	copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by the mysterious higway for MMX (apparently)
 	then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
 	Both have agreed to distribution under LGPL 2.1 .
 	Transformed back into standalone asm, with help of
 	gcc -S -DHAVE_CONFIG_H -I.  -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
 	Original comment from MPlayer source follows:
 */
 /*
 * this code comes under GPL
 * This code was taken from http://www.mpg123.org
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
 *
 * Local ChangeLog:
 * - Partial loops unrolling and removing MOVW insn from loops
 */
 #include "mangle.h"
 .globl ASM_NAME(costab_mmxsse)
 	.data
 	ALIGN16
 	/* .type	ASM_NAME(costab_mmxsse), @object
 	   .size	ASM_NAME(costab_mmxsse), 124 */
 ASM_NAME(costab_mmxsse):
 	.long	1056974725
 	.long	1057056395
 	.long	1057223771
 	.long	1057485416
 	.long	1057855544
 	.long	1058356026
 	.long	1059019886
 	.long	1059897405
 	.long	1061067246
 	.long	1062657950
 	.long	1064892987
 	.long	1066774581
 	.long	1069414683
 	.long	1073984175
 	.long	1079645762
 	.long	1092815430
 	.long	1057005197
 	.long	1057342072
 	.long	1058087743
 	.long	1059427869
 	.long	1061799040
 	.long	1065862217
 	.long	1071413542
 	.long	1084439708
 	.long	1057128951
 	.long	1058664893
 	.long	1063675095
 	.long	1076102863
 	.long	1057655764
 	.long	1067924853
 	.long	1060439283
 	ALIGN8
 	/* .type	one_null, @object
 	   .size	one_null, 8 */
 one_null:
 	.long	-65536
 	.long	-65536
 	ALIGN8
 	/* .type	null_one, @object
 	.size	null_one, 8 */
 null_one:
 	.long	65535
 	.long	65535
 	/* .local	temp */
 	COMM(temp,4,4)
 	.text
 	ALIGN16,,15
 	/* void synth_1to1_sse_s(real *bandPtr, int channel, short *samples, short *buffs, int *bo) */
 .globl ASM_NAME(synth_1to1_sse_s)
 	/* .type	ASM_NAME(synth_1to1_sse_s), @function */
 ASM_NAME(synth_1to1_sse_s):
 	pushl	%ebp
 	movl	%esp, %ebp
 	pushl	%edi
 	pushl	%esi
 	pushl	%ebx
 #APP
 	movl 12(%ebp),%ecx
 	movl 16(%ebp),%edi
 	movl $15,%ebx
 	movl 24(%ebp),%edx
 	leal (%edi,%ecx,2),%edi
 	decl %ecx
 	movl 20(%ebp),%esi
 	movl (%edx),%eax
 	jecxz .L01
 	decl %eax
 	andl %ebx,%eax
 	leal 1088(%esi),%esi
 	movl %eax,(%edx)
 	.L01:
 	leal (%esi,%eax,2),%edx
 	movl %eax,temp
 	incl %eax
 	andl %ebx,%eax
 	leal 544(%esi,%eax,2),%ecx
 	incl %ebx
 	testl $1, %eax
 	jnz .L02
 	xchgl %edx,%ecx
 	incl temp
 	leal 544(%esi),%esi
 	.L02:
 	emms
 	pushl 8(%ebp)
 	pushl %edx
 	pushl %ecx
 	call *ASM_NAME(mpl_dct64)
 	addl $12, %esp
 	leal 1(%ebx), %ecx
 	subl temp,%ebx
 	pushl %ecx
 	leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx
 	shrl $1, %ecx
 	ALIGN16
 	.L03:
 	movq  (%edx),%mm0
 	movq  64(%edx),%mm4
 	pmaddwd (%esi),%mm0
 	pmaddwd 32(%esi),%mm4
 	movq  8(%edx),%mm1
 	movq  72(%edx),%mm5
 	pmaddwd 8(%esi),%mm1
 	pmaddwd 40(%esi),%mm5
 	movq  16(%edx),%mm2
 	movq  80(%edx),%mm6
 	pmaddwd 16(%esi),%mm2
 	pmaddwd 48(%esi),%mm6
 	movq  24(%edx),%mm3
 	movq  88(%edx),%mm7
 	pmaddwd 24(%esi),%mm3
 	pmaddwd 56(%esi),%mm7
 	paddd %mm1,%mm0
 	paddd %mm5,%mm4
 	paddd %mm2,%mm0
 	paddd %mm6,%mm4
 	paddd %mm3,%mm0
 	paddd %mm7,%mm4
 	movq  %mm0,%mm1
 	movq  %mm4,%mm5
 	psrlq $32,%mm1
 	psrlq $32,%mm5
 	paddd %mm1,%mm0
 	paddd %mm5,%mm4
 	psrad $13,%mm0
 	psrad $13,%mm4
 	packssdw %mm0,%mm0
 	packssdw %mm4,%mm4
 	movq	(%edi), %mm1
 	punpckldq %mm4, %mm0
 	pand   one_null, %mm1
 	pand   null_one, %mm0
 	por    %mm0, %mm1
 	movq   %mm1,(%edi)
 	leal 64(%esi),%esi
 	leal 128(%edx),%edx
 	leal 8(%edi),%edi
 	decl %ecx
 	jnz  .L03
 	popl %ecx
 	andl $1, %ecx
 	jecxz .next_loop
 	movq  (%edx),%mm0
 	pmaddwd (%esi),%mm0
 	movq  8(%edx),%mm1
 	pmaddwd 8(%esi),%mm1
 	movq  16(%edx),%mm2
 	pmaddwd 16(%esi),%mm2
 	movq  24(%edx),%mm3
 	pmaddwd 24(%esi),%mm3
 	paddd %mm1,%mm0
 	paddd %mm2,%mm0
 	paddd %mm3,%mm0
 	movq  %mm0,%mm1
 	psrlq $32,%mm1
 	paddd %mm1,%mm0
 	psrad $13,%mm0
 	packssdw %mm0,%mm0
 	movd %mm0,%eax
 	movw %ax, (%edi)
 	leal 32(%esi),%esi
 	leal 64(%edx),%edx
 	leal 4(%edi),%edi
 	.next_loop:
 	subl $64,%esi
 	movl $7,%ecx
 	ALIGN16
 	.L04:
 	movq  (%edx),%mm0
 	movq  64(%edx),%mm4
 	pmaddwd (%esi),%mm0
 	pmaddwd -32(%esi),%mm4
 	movq  8(%edx),%mm1
 	movq  72(%edx),%mm5
 	pmaddwd 8(%esi),%mm1
 	pmaddwd -24(%esi),%mm5
 	movq  16(%edx),%mm2
 	movq  80(%edx),%mm6
 	pmaddwd 16(%esi),%mm2
 	pmaddwd -16(%esi),%mm6
 	movq  24(%edx),%mm3
 	movq  88(%edx),%mm7
 	pmaddwd 24(%esi),%mm3
 	pmaddwd -8(%esi),%mm7
 	paddd %mm1,%mm0
 	paddd %mm5,%mm4
 	paddd %mm2,%mm0
 	paddd %mm6,%mm4
 	paddd %mm3,%mm0
 	paddd %mm7,%mm4
 	movq  %mm0,%mm1
 	movq  %mm4,%mm5
 	psrlq $32,%mm1
 	psrlq $32,%mm5
 	paddd %mm0,%mm1
 	paddd %mm4,%mm5
 	psrad $13,%mm1
 	psrad $13,%mm5
 	packssdw %mm1,%mm1
 	packssdw %mm5,%mm5
 	psubd %mm0,%mm0
 	psubd %mm4,%mm4
 	psubsw %mm1,%mm0
 	psubsw %mm5,%mm4
 	movq	(%edi), %mm1
 	punpckldq %mm4, %mm0
 	pand   one_null, %mm1
 	pand   null_one, %mm0
 	por    %mm0, %mm1
 	movq   %mm1,(%edi)
 	subl $64,%esi
 	addl $128,%edx
 	leal 8(%edi),%edi
 	decl %ecx
 	jnz  .L04
 	movq  (%edx),%mm0
 	pmaddwd (%esi),%mm0
 	movq  8(%edx),%mm1
 	pmaddwd 8(%esi),%mm1
 	movq  16(%edx),%mm2
 	pmaddwd 16(%esi),%mm2
 	movq  24(%edx),%mm3
 	pmaddwd 24(%esi),%mm3
 	paddd %mm1,%mm0
 	paddd %mm2,%mm0
 	paddd %mm3,%mm0
 	movq  %mm0,%mm1
 	psrlq $32,%mm1
 	paddd %mm0,%mm1
 	psrad $13,%mm1
 	packssdw %mm1,%mm1
 	psubd %mm0,%mm0
 	psubsw %mm1,%mm0
 	movd %mm0,%eax
 	movw %ax,(%edi)
 	emms
 #NO_APP
 	popl	%ebx
 	popl	%esi
 	popl	%edi
 	popl	%ebp
 	ret
 	/* .size	ASM_NAME(synth_1to1_sse_s), .-ASM_NAME(synth_1to1_sse_s) */
--- a/src/libmpg123/decode_ntom.c
+++ b/src/libmpg123/decode_ntom.c
@@ -0,0 +1,401 @@
 /*
 	decode_ntom.c: N->M down/up sampling. Not optimized for speed.
 	copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include "mpg123lib_intern.h"
 int synth_ntom_set_step(mpg123_handle *fr)
 {
 	long m,n;
 	m = frame_freq(fr);
 	n = fr->af.rate;
 	if(VERBOSE2)
 		fprintf(stderr,"Init rate converter: %ld->%ld\n",m,n);
 	if(n > NTOM_MAX_FREQ || m > NTOM_MAX_FREQ || m <= 0 || n <= 0) {
 		if(NOQUIET) error("NtoM converter: illegal rates");
 		fr->err = MPG123_BAD_RATE;
 		return -1;
 	}
 	n *= NTOM_MUL;
 	fr->ntom_step = (unsigned long) n / m;
 	if(fr->ntom_step > (unsigned long)NTOM_MAX*NTOM_MUL) {
 		if(NOQUIET) error3("max. 1:%i conversion allowed (%lu vs %lu)!", NTOM_MAX, fr->ntom_step, (unsigned long)8*NTOM_MUL);
 		fr->err = MPG123_BAD_RATE;
 		return -1;
 	}
 	fr->ntom_val[0] = fr->ntom_val[1] = ntom_val(fr, fr->num);
 	return 0;
 }
 /*
 	The SAFE_NTOM does iterative loops instead of straight multiplication.
 	The safety is not just about the algorithm closely mimicking the decoder instead of applying some formula,
 	it is more about avoiding multiplication of possibly big sample offsets (a 32bit off_t could overflow too easily).
 */
 unsigned long ntom_val(mpg123_handle *fr, off_t frame)
 {
 	off_t ntm;
 #ifdef SAFE_NTOM /* Carry out the loop, without the threatening integer overflow. */
 	off_t f;
 	ntm = NTOM_MUL>>1; /* for frame 0 */
 	for(f=0; f<frame; ++f)   /* for frame > 0 */
 	{
 		ntm += spf(fr)*fr->ntom_step;
 		ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
 	}
 #else /* Just make one computation with overall sample offset. */
 	ntm  = (NTOM_MUL>>1) + spf(fr)*frame*fr->ntom_step;
 	ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
 #endif
 	return (unsigned long) ntm;
 }
 /* Set the ntom value for next expected frame to be decoded.
   This is for keeping output consistent across seeks. */
 void ntom_set_ntom(mpg123_handle *fr, off_t num)
 {
 	fr->ntom_val[1] = fr->ntom_val[0] = ntom_val(fr, num);
 }
 /* Convert frame offset to unadjusted output sample offset. */
 off_t ntom_frmouts(mpg123_handle *fr, off_t frame)
 {
 	off_t soff = 0;
 	off_t ntm = ntom_val(fr,0);
 #ifdef SAFE_NTOM
 	if(frame <= 0) return 0;
 	for(f=0; f<frame; ++f)
 	{
 		ntm  += spf(fr)*fr->ntom_step;
 		soff += ntm/NTOM_MUL;
 		ntm  -= (ntm/NTOM_MUL)*NTOM_MUL;
 	}
 #else
 	soff = (ntm + frame*spf(fr)*fr->ntom_step)/NTOM_MUL;
 #endif
 	return soff;
 }
 /* Convert input samples to unadjusted output samples. */
 off_t ntom_ins2outs(mpg123_handle *fr, off_t ins)
 {
 	off_t soff = 0;
 	off_t ntm = ntom_val(fr,0);
 #ifdef SAFE_NTOM
 	{
 		off_t block = spf(fr);
 		if(ins <= 0) return 0;
 		do
 		{
 			off_t nowblock = ins > block ? block : ins;
 			ntm  += nowblock*fr->ntom_step;
 			soff += ntm/NTOM_MUL;
 			ntm  -= (ntm/NTOM_MUL)*NTOM_MUL;
 			ins -= nowblock;
 		} while(ins > 0);
 	}
 #else
 	soff = (ntm + ins*fr->ntom_step)/NTOM_MUL;
 #endif
 	return soff;
 }
 /* Determine frame offset from unadjusted output sample offset. */
 off_t ntom_frameoff(mpg123_handle *fr, off_t soff)
 {
 	off_t ioff = 0; /* frames or samples */
 	off_t ntm = ntom_val(fr,0);
 #ifdef SAFE_NTOM
 	if(soff <= 0) return 0;
 	for(ioff=0; 1; ++ioff)
 	{
 		ntm  += spf(fr)*fr->ntom_step;
 		if(ntm/NTOM_MUL > soff) break;
 		soff -= ntm/NTOM_MUL;
 		ntm  -= (ntm/NTOM_MUL)*NTOM_MUL;
 	}
 	return ioff;
 #else
 	ioff = (soff*NTOM_MUL-ntm)/fr->ntom_step;
 	return ioff/spf(fr);
 #endif
 }
 /* Now to the actual decoding/synth functions... */
 int synth_ntom_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  sample_t samples_tmp[8*64];
  sample_t *tmp1 = samples_tmp + channel;
  int i,ret;
  int pnt = fr->buffer.fill;
  unsigned char *samples = fr->buffer.data;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_ntom(bandPtr, channel, fr, 1);
  fr->buffer.data = samples;
  samples += channel + pnt;
  for(i=0;i<(fr->buffer.fill>>2);i++) {
 #ifdef FLOATOUT
    *samples = 0;
 #else
    *samples = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    samples += 2;
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (final ? fr->buffer.fill>>1 : 0);
  return ret;
 }
 int synth_ntom_8bit_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[8*64];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  int pnt = fr->buffer.fill;
  unsigned char *samples = fr->buffer.data;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_ntom(bandPtr, 0, fr, 1);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<(fr->buffer.fill>>2);i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (fr->buffer.fill>>2);
  return ret;
 }
 int synth_ntom_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[8*64];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  int pnt = fr->buffer.fill;
  unsigned char *samples = fr->buffer.data;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_ntom(bandPtr, 0, fr, 1);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<(fr->buffer.fill>>2);i++) {
 #ifdef FLOATOUT
    *samples++ = 0;
    *samples++ = 0;
 #else
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
    *samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
 #endif
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (fr->buffer.fill>>1);
  return ret;
 }
 int synth_ntom_mono(real *bandPtr, mpg123_handle *fr)
 {
  sample_t samples_tmp[8*64];
  sample_t *tmp1 = samples_tmp;
  int i,ret;
  int pnt = fr->buffer.fill;
  unsigned char *samples = fr->buffer.data;
  fr->buffer.data = (unsigned char*) samples_tmp;
  fr->buffer.fill = 0;
  ret = synth_ntom(bandPtr, 0, fr, 1);
  fr->buffer.data = samples;
  samples += pnt;
  for(i=0;i<(fr->buffer.fill>>2);i++) {
    *( (sample_t *)samples) = *tmp1;
    samples += sizeof(sample_t);
    tmp1 += 2;
  }
  fr->buffer.fill = pnt + (fr->buffer.fill>>2)*sizeof(sample_t);
  return ret;
 }
 int synth_ntom_mono2stereo(real *bandPtr, mpg123_handle *fr)
 {
  int i,ret;
  int pnt1 = fr->buffer.fill;
  unsigned char *samples = fr->buffer.data + pnt1;
  ret = synth_ntom(bandPtr, 0, fr, 1);
  for(i=0;i<((fr->buffer.fill-pnt1)>>2);i++) {
    ((sample_t *)samples)[1] = ((sample_t *)samples)[0];
    samples+=2*sizeof(sample_t);
  }
  return ret;
 }
 int synth_ntom(real *bandPtr,int channel, mpg123_handle *fr, int final)
 {
  static const int step = 2;
  sample_t *samples = (sample_t *) (fr->buffer.data + fr->buffer.fill);
  real *b0, **buf; /* (*buf)[0x110]; */
  int clip = 0; 
  int bo1;
  int ntom;
  if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
  if(!channel) {
    fr->bo[0]--;
    fr->bo[0] &= 0xf;
    buf = fr->real_buffs[0];
    ntom = fr->ntom_val[1] = fr->ntom_val[0];
  }
  else {
    samples++;
    buf = fr->real_buffs[1];
    ntom = fr->ntom_val[1];
  }
  if(fr->bo[0] & 0x1) {
    b0 = buf[0];
    bo1 = fr->bo[0];
    opt_dct64(fr)(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = fr->bo[0]+1;
    opt_dct64(fr)(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
  }
  {
    register int j;
    real *window = opt_decwin(fr) + 16 - bo1;
    for (j=16;j;j--,window+=0x10)
    {
      real sum;
      ntom += fr->ntom_step;
      if(ntom < NTOM_MUL) {
        window += 16;
        b0 += 16;
        continue;
      }
      sum  = *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      sum += *window++ * *b0++;
      sum -= *window++ * *b0++;
      while(ntom >= NTOM_MUL) {
        WRITE_SAMPLE(samples,sum,clip);
        samples += step;
        ntom -= NTOM_MUL;
      }
    }
    ntom += fr->ntom_step;
    if(ntom >= NTOM_MUL)
    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum += window[0x2] * b0[0x2];
      sum += window[0x4] * b0[0x4];
      sum += window[0x6] * b0[0x6];
      sum += window[0x8] * b0[0x8];
      sum += window[0xA] * b0[0xA];
      sum += window[0xC] * b0[0xC];
      sum += window[0xE] * b0[0xE];
      while(ntom >= NTOM_MUL) {
        WRITE_SAMPLE(samples,sum,clip);
        samples += step;
        ntom -= NTOM_MUL;
      }
    }
    b0-=0x10,window-=0x20;
    window += bo1<<1;
    for (j=15;j;j--,b0-=0x20,window-=0x10)
    {
      real sum;
      ntom += fr->ntom_step;
      if(ntom < NTOM_MUL) {
        window -= 16;
        b0 += 16;
        continue;
      }
      sum = -*(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      sum -= *(--window) * *b0++;
      while(ntom >= NTOM_MUL) {
        WRITE_SAMPLE(samples,sum,clip);
        samples += step;
        ntom -= NTOM_MUL;
      }
    }
  }
  fr->ntom_val[channel] = ntom;
  if(final) fr->buffer.fill = ((unsigned char *) samples - fr->buffer.data - (channel ? 2 : 0));
  return clip;
 }
--- a/src/libmpg123/decode_sse.S
+++ b/src/libmpg123/decode_sse.S
@@ -0,0 +1,4 @@
 #include "mangle.h"
 #define MPL_DCT64 ASM_NAME(dct64_sse)
 #define SYNTH_NAME ASM_NAME(synth_1to1_sse_asm)
 #include "decode_sse3d.h"
--- a/src/libmpg123/decode_sse3d.h
+++ b/src/libmpg123/decode_sse3d.h
@@ -0,0 +1,247 @@
 /*
 	decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic)
 	copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by the mysterious higway for MMX (apparently)
 	then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
 	Both have agreed to distribution under LGPL 2.1 .
 	Transformed back into standalone asm, with help of
 	gcc -S -DHAVE_CONFIG_H -I.  -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
 	The difference between SSE and 3DNowExt is the dct64 function and the synth function name.
 	This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S...
 	That's not memory efficient since there's doubled code, but it's easier than giving another function pointer.
 	Maybe I'll change it in future, but now I need something that works.
 	Original comment from MPlayer source follows:
 */
 /*
 * this code comes under GPL
 * This code was taken from http://www.mpg123.org
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
 *
 * Local ChangeLog:
 * - Partial loops unrolling and removing MOVW insn from loops
 */
 #include "mangle.h"
 	.data
 	ALIGN8
 one_null:
 	.long	-65536
 	.long	-65536
 	ALIGN8
 null_one:
 	.long	65535
 	.long	65535
 	.text
 	ALIGN16,,15
 	/* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */
 .globl SYNTH_NAME
 SYNTH_NAME:
 	pushl	%ebp
 /* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */
 	movl	%esp, %ebp
 /* Now the old stack addresses are preserved via %epb. */
 	subl  $4,%esp /* What has been called temp before. */
 	pushl	%edi
 	pushl	%esi
 	pushl	%ebx
 #define TEMP 12(%esp)
 #APP
 	movl 12(%ebp),%ecx
 	movl 16(%ebp),%edi
 	movl $15,%ebx
 	movl 24(%ebp),%edx
 	leal (%edi,%ecx,2),%edi
 	decl %ecx
 	movl 20(%ebp),%esi
 	movl (%edx),%eax
 	jecxz .L01
 	decl %eax
 	andl %ebx,%eax
 	leal 1088(%esi),%esi
 	movl %eax,(%edx)
 	.L01:
 	leal (%esi,%eax,2),%edx
 	movl %eax,TEMP
 	incl %eax
 	andl %ebx,%eax
 	leal 544(%esi,%eax,2),%ecx
 	incl %ebx
 	testl $1, %eax
 	jnz .L02
 	xchgl %edx,%ecx
 	incl TEMP
 	leal 544(%esi),%esi
 	.L02:
 	emms
 	pushl 8(%ebp)
 	pushl %edx
 	pushl %ecx
 	call MPL_DCT64
 	addl $12, %esp
 	leal 1(%ebx), %ecx
 	subl TEMP,%ebx
 	pushl %ecx
 	/* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */
 	movl 28(%ebp),%ecx
 	leal (%ecx,%ebx,2), %edx
 	movl (%esp),%ecx /* restore, but leave value on stack */
 	shrl $1, %ecx
 	ALIGN16
 	.L03:
 	movq  (%edx),%mm0
 	movq  64(%edx),%mm4
 	pmaddwd (%esi),%mm0
 	pmaddwd 32(%esi),%mm4
 	movq  8(%edx),%mm1
 	movq  72(%edx),%mm5
 	pmaddwd 8(%esi),%mm1
 	pmaddwd 40(%esi),%mm5
 	movq  16(%edx),%mm2
 	movq  80(%edx),%mm6
 	pmaddwd 16(%esi),%mm2
 	pmaddwd 48(%esi),%mm6
 	movq  24(%edx),%mm3
 	movq  88(%edx),%mm7
 	pmaddwd 24(%esi),%mm3
 	pmaddwd 56(%esi),%mm7
 	paddd %mm1,%mm0
 	paddd %mm5,%mm4
 	paddd %mm2,%mm0
 	paddd %mm6,%mm4
 	paddd %mm3,%mm0
 	paddd %mm7,%mm4
 	movq  %mm0,%mm1
 	movq  %mm4,%mm5
 	psrlq $32,%mm1
 	psrlq $32,%mm5
 	paddd %mm1,%mm0
 	paddd %mm5,%mm4
 	psrad $13,%mm0
 	psrad $13,%mm4
 	packssdw %mm0,%mm0
 	packssdw %mm4,%mm4
 	movq	(%edi), %mm1
 	punpckldq %mm4, %mm0
 	pand   one_null, %mm1
 	pand   null_one, %mm0
 	por    %mm0, %mm1
 	movq   %mm1,(%edi)
 	leal 64(%esi),%esi
 	leal 128(%edx),%edx
 	leal 8(%edi),%edi
 	decl %ecx
 	jnz  .L03
 	popl %ecx
 	andl $1, %ecx
 	jecxz .next_loop
 	movq  (%edx),%mm0
 	pmaddwd (%esi),%mm0
 	movq  8(%edx),%mm1
 	pmaddwd 8(%esi),%mm1
 	movq  16(%edx),%mm2
 	pmaddwd 16(%esi),%mm2
 	movq  24(%edx),%mm3
 	pmaddwd 24(%esi),%mm3
 	paddd %mm1,%mm0
 	paddd %mm2,%mm0
 	paddd %mm3,%mm0
 	movq  %mm0,%mm1
 	psrlq $32,%mm1
 	paddd %mm1,%mm0
 	psrad $13,%mm0
 	packssdw %mm0,%mm0
 	movd %mm0,%eax
 	movw %ax, (%edi)
 	leal 32(%esi),%esi
 	leal 64(%edx),%edx
 	leal 4(%edi),%edi
 	.next_loop:
 	subl $64,%esi
 	movl $7,%ecx
 	ALIGN16
 	.L04:
 	movq  (%edx),%mm0
 	movq  64(%edx),%mm4
 	pmaddwd (%esi),%mm0
 	pmaddwd -32(%esi),%mm4
 	movq  8(%edx),%mm1
 	movq  72(%edx),%mm5
 	pmaddwd 8(%esi),%mm1
 	pmaddwd -24(%esi),%mm5
 	movq  16(%edx),%mm2
 	movq  80(%edx),%mm6
 	pmaddwd 16(%esi),%mm2
 	pmaddwd -16(%esi),%mm6
 	movq  24(%edx),%mm3
 	movq  88(%edx),%mm7
 	pmaddwd 24(%esi),%mm3
 	pmaddwd -8(%esi),%mm7
 	paddd %mm1,%mm0
 	paddd %mm5,%mm4
 	paddd %mm2,%mm0
 	paddd %mm6,%mm4
 	paddd %mm3,%mm0
 	paddd %mm7,%mm4
 	movq  %mm0,%mm1
 	movq  %mm4,%mm5
 	psrlq $32,%mm1
 	psrlq $32,%mm5
 	paddd %mm0,%mm1
 	paddd %mm4,%mm5
 	psrad $13,%mm1
 	psrad $13,%mm5
 	packssdw %mm1,%mm1
 	packssdw %mm5,%mm5
 	psubd %mm0,%mm0
 	psubd %mm4,%mm4
 	psubsw %mm1,%mm0
 	psubsw %mm5,%mm4
 	movq	(%edi), %mm1
 	punpckldq %mm4, %mm0
 	pand   one_null, %mm1
 	pand   null_one, %mm0
 	por    %mm0, %mm1
 	movq   %mm1,(%edi)
 	subl $64,%esi
 	addl $128,%edx
 	leal 8(%edi),%edi
 	decl %ecx
 	jnz  .L04
 	movq  (%edx),%mm0
 	pmaddwd (%esi),%mm0
 	movq  8(%edx),%mm1
 	pmaddwd 8(%esi),%mm1
 	movq  16(%edx),%mm2
 	pmaddwd 16(%esi),%mm2
 	movq  24(%edx),%mm3
 	pmaddwd 24(%esi),%mm3
 	paddd %mm1,%mm0
 	paddd %mm2,%mm0
 	paddd %mm3,%mm0
 	movq  %mm0,%mm1
 	psrlq $32,%mm1
 	paddd %mm0,%mm1
 	psrad $13,%mm1
 	packssdw %mm1,%mm1
 	psubd %mm0,%mm0
 	psubsw %mm1,%mm0
 	movd %mm0,%eax
 	movw %ax,(%edi)
 	emms
 #NO_APP
 	popl	%ebx
 	popl	%esi
 	popl	%edi
 	addl $4,%esp
 	popl	%ebp
 	ret
--- a/src/libmpg123/dnoise.c
+++ b/src/libmpg123/dnoise.c
--- a/src/libmpg123/equalizer.c
+++ b/src/libmpg123/equalizer.c
@@ -0,0 +1,17 @@
 /*
 	equalizer.c: equalizer settings
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include "mpg123lib_intern.h"
 void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]) 
 {
 	int i;
 	for(i=0;i<32;i++)
 	bandPtr[i] = REAL_MUL(bandPtr[i], equalizer[channel][i]);
 }
--- a/src/libmpg123/equalizer_3dnow.S
+++ b/src/libmpg123/equalizer_3dnow.S
@@ -0,0 +1,68 @@
 /*
 	equalizer_3dnow: 3DNow! optimized do_equalizer()
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by KIMURA Takuhiro
 */
 #include "mangle.h"
 .text
 	ALIGN4
 .globl ASM_NAME(do_equalizer_3dnow)
 /*	.type	 ASM_NAME(do_equalizer_3dnow),@function */
 /* void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]); */
 ASM_NAME(do_equalizer_3dnow):
 	pushl %esi
 	pushl %ebx
 	/* bandPtr */
 	movl 12(%esp),%ebx
 	/* channel */
 	movl 16(%esp),%ecx
 	xorl %edx,%edx
 	/* equalizer */
 	movl 20(%esp),%esi
 	sall $7,%ecx
 	ALIGN4
 .L9:
 	movq (%ebx,%edx),%mm0
 	pfmul (%esi,%ecx),%mm0
 	movq 8(%ebx,%edx),%mm1
 	pfmul 8(%esi,%ecx),%mm1
 	movq %mm0,(%ebx,%edx)
 	movq 16(%ebx,%edx),%mm0
 	pfmul 16(%esi,%ecx),%mm0
 	movq %mm1,8(%ebx,%edx)
 	movq 24(%ebx,%edx),%mm1
 	pfmul 24(%esi,%ecx),%mm1
 	movq %mm0,16(%ebx,%edx)
 	movq 32(%ebx,%edx),%mm0
 	pfmul 32(%esi,%ecx),%mm0
 	movq %mm1,24(%ebx,%edx)
 	movq 40(%ebx,%edx),%mm1
 	pfmul 40(%esi,%ecx),%mm1
 	movq %mm0,32(%ebx,%edx)
 	movq 48(%ebx,%edx),%mm0
 	pfmul 48(%esi,%ecx),%mm0
 	movq %mm1,40(%ebx,%edx)
 	movq 56(%ebx,%edx),%mm1
 	pfmul 56(%esi,%ecx),%mm1
 	movq %mm0,48(%ebx,%edx)
 	movq %mm1,56(%ebx,%edx)
 	addl $64,%edx
 	addl $32,%ecx
 	cmpl $124,%edx
 	jle .L9
 	ALIGN4
 	popl %ebx
 	popl %esi
 	ret
--- a/src/libmpg123/format.c
+++ b/src/libmpg123/format.c
@@ -0,0 +1,195 @@
 #include "mpg123lib_intern.h"
 /* static int chans[NUM_CHANNELS] = { 1 , 2 }; */
 const long mpg123_rates[MPG123_RATES] = /* only the standard rates */
 {
 	 8000, 11025, 12000, 
 	16000, 22050, 24000,
 	32000, 44100, 48000,
 };
 const int mpg123_encodings[MPG123_ENCODINGS] =
 {
 	MPG123_ENC_SIGNED_16, 
 	MPG123_ENC_UNSIGNED_16,
 	MPG123_ENC_UNSIGNED_8,
 	MPG123_ENC_SIGNED_8,
 	MPG123_ENC_ULAW_8,
 	MPG123_ENC_ALAW_8
 };
 /*	char audio_caps[NUM_CHANNELS][MPG123_RATES+1][MPG123_ENCODINGS]; */
 static int rate2num(mpg123_handle *fr, long r)
 {
 	int i;
 	for(i=0;i<MPG123_RATES;i++) if(mpg123_rates[i] == r) return i;
 	if(fr->p.force_rate != 0 && fr->p.force_rate == r) return MPG123_RATES;
 	return -1;
 }
 static int cap_fit(mpg123_handle *fr, struct audioformat *nf, int f0, int f2)
 {
 	int i;
 	int c  = nf->channels-1;
 	int rn = rate2num(fr, nf->rate);
 	if(rn >= 0)	for(i=f0;i<f2;i++)
 	{
 		if(fr->p.audio_caps[c][rn][i])
 		{
 			nf->encoding = mpg123_encodings[i];
 			return 1;
 		}
 	}
 	return 0;
 }
 static int freq_fit(mpg123_handle *fr, struct audioformat *nf, int f0, int f2)
 {
 	nf->rate = frame_freq(fr)>>fr->p.down_sample;
 	if(cap_fit(fr,nf,f0,f2)) return 1;
 	nf->rate>>=1;
 	if(cap_fit(fr,nf,f0,f2)) return 1;
 	nf->rate>>=1;
 	if(cap_fit(fr,nf,f0,f2)) return 1;
 	return 0;
 }
 /* match constraints against supported audio formats, store possible setup in frame
  return: -1: error; 0: no format change; 1: format change */
 int frame_output_format(mpg123_handle *fr)
 {
 	struct audioformat nf;
 	int f0=0;
 	mpg123_pars *p = &fr->p;
 	/* initialize new format, encoding comes later */
 	nf.channels = fr->stereo;
 	if(p->flags & MPG123_FORCE_8BIT) f0 = 2; /* skip the 16bit encodings */
 	/* force stereo is stronger */
 	if(p->flags & MPG123_FORCE_MONO)   nf.channels = 1;
 	if(p->flags & MPG123_FORCE_STEREO) nf.channels = 2;
 	if(p->force_rate)
 	{
 		nf.rate = p->force_rate;
 		if(cap_fit(fr,&nf,f0,2)) goto end;            /* 16bit encodings */
 		if(cap_fit(fr,&nf,2,MPG123_ENCODINGS)) goto end; /*  8bit encodings */
 		/* try again with different stereoness */
 		if(nf.channels == 2 && !(p->flags & MPG123_FORCE_STEREO)) nf.channels = 1;
 		else if(nf.channels == 1 && !(p->flags & MPG123_FORCE_MONO)) nf.channels = 2;
 		if(cap_fit(fr,&nf,f0,2)) goto end;            /* 16bit encodings */
 		if(cap_fit(fr,&nf,2,MPG123_ENCODINGS)) goto end; /*  8bit encodings */
 		if(NOQUIET)
 		error3( "Unable to set up output format! Constraints: %s%s%liHz.",
 		        ( p->flags & MPG123_FORCE_STEREO ? "stereo, " :
 		          (p->flags & MPG123_FORCE_MONO ? "mono, " : "") ),
 		        (p->flags & MPG123_FORCE_8BIT ? "8bit, " : ""),
 		        p->force_rate );
 /*		if(NOQUIET && p->verbose <= 1) print_capabilities(fr); */
 		fr->err = MPG123_BAD_OUTFORMAT;
 		return -1;
 	}
 	if(freq_fit(fr, &nf, f0, 2)) goto end; /* try rates with 16bit */
 	if(freq_fit(fr, &nf,  2, MPG123_ENCODINGS)) goto end; /* ... 8bit */
 	/* try again with different stereoness */
 	if(nf.channels == 2 && !(p->flags & MPG123_FORCE_STEREO)) nf.channels = 1;
 	else if(nf.channels == 1 && !(p->flags & MPG123_FORCE_MONO)) nf.channels = 2;
 	if(freq_fit(fr, &nf, f0, 2)) goto end; /* try rates with 16bit */
 	if(freq_fit(fr, &nf,  2, MPG123_ENCODINGS)) goto end; /* ... 8bit */
 	/* Here is the _bad_ end. */
 	if(NOQUIET)
 	error5( "Unable to set up output format! Constraints: %s%s%li, %li or %liHz.",
 	        ( p->flags & MPG123_FORCE_STEREO ? "stereo, " :
 	          (p->flags & MPG123_FORCE_MONO ? "mono, "  : "") ),
 	        (p->flags & MPG123_FORCE_8BIT  ? "8bit, " : ""),
 	        frame_freq(fr),  frame_freq(fr)>>1, frame_freq(fr)>>2 );
 /*	if(NOQUIET && p->verbose <= 1) print_capabilities(fr); */
 	fr->err = MPG123_BAD_OUTFORMAT;
 	return -1;
 end: /* Here is the _good_ end. */
 	/* we had a successful match, now see if there's a change */
 	if(nf.rate == fr->af.rate && nf.channels == fr->af.channels && nf.encoding == fr->af.encoding)
 	return 0; /* the same format as before */
 	else /* a new format */
 	{
 		fr->af.rate = nf.rate;
 		fr->af.channels = nf.channels;
 		fr->af.encoding = nf.encoding;
 		return 1;
 	}
 }
 int mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding)
 {
 	if(mh == NULL) return MPG123_ERR;
 	*rate = mh->af.rate;
 	*channels = mh->af.channels;
 	*encoding = mh->af.encoding;
 	return MPG123_OK;
 }
 int mpg123_format_none(mpg123_handle *mh)
 {
 	if(mh == NULL) return MPG123_ERR;
 	memset(mh->p.audio_caps,0,sizeof(mh->p.audio_caps));
 	return MPG123_OK;
 }
 int mpg123_format_all(mpg123_handle *mh)
 {
 	if(mh == NULL) return MPG123_ERR;
 	memset(mh->p.audio_caps,1,sizeof(mh->p.audio_caps));
 	return MPG123_OK;
 }
 int mpg123_format(mpg123_handle *mh, int ratei, int channels, int encodings)
 {
 	int ie, ic;
 	int ch[2] = {0, 1};
 	if(!(channels & (MPG123_MONO|MPG123_STEREO)))
 	{
 		mh->err = MPG123_BAD_CHANNEL;
 		return MPG123_ERR;
 	}
 	if(!(channels & MPG123_STEREO)) ch[1] = 0;     /* {0,0} */
 	else if(!(channels & MPG123_MONO)) ch[0] = 1; /* {1,1} */
 	if(ratei >= MPG123_RATES)
 	{
 		mh->err = MPG123_BAD_RATE;
 		return MPG123_ERR;
 	}
 	if(ratei < 0) ratei = MPG123_RATES; /* the special one */
 	/* now match the encodings */
 	for(ic = 0; ic < 2; ++ic)
 	{
 		for(ie = 0; ie < MPG123_ENCODINGS; ++ie)
 		if(mpg123_encodings[ie] & encodings) mh->p.audio_caps[ch[ic]][ratei][ie] = 1;
 		if(ch[0] == ch[1]) break; /* no need to do it again */
 	}
 	return MPG123_OK;
 }
 int mpg123_format_support(mpg123_handle *mh, int ratei, int enci)
 {
 	int ch = 0;
 	if(mh == NULL || ratei >= MPG123_RATES || enci < 0 || enci >= MPG123_ENCODINGS) return 0;
 	if(ratei < 0) ratei = MPG123_RATES; /* the special one */
 	if(mh->p.audio_caps[0][ratei][enci]) ch |= MPG123_MONO;
 	if(mh->p.audio_caps[1][ratei][enci]) ch |= MPG123_STEREO;
 	return ch;
 }
--- a/src/libmpg123/frame.c
+++ b/src/libmpg123/frame.c
@@ -0,0 +1,920 @@
 #include "mpg123lib_intern.h"
 #include "getcpuflags.h"
 #define IGNORESHIFT 2
 /* that's doubled in decode_ntom.c */
 #define NTOM_MUL (32768)
 #define aligned_pointer(p,type,alignment) \
 	(((char*)(p)-(char*)NULL) % (alignment)) \
 	? (type*)((char*)(p) + (alignment) - (((char*)(p)-(char*)NULL) % (alignment))) \
 	: (type*)(p)
 void frame_default_pars(mpg123_pars *mp)
 {
 	mp->outscale = MAXOUTBURST;
 	mp->flags = 0;
 	mp->force_rate = 0;
 	mp->down_sample = 0;
 	mp->rva = 0;
 	mp->halfspeed = 0;
 	mp->doublespeed = 0;
 	mp->verbose = 0;
 	mp->icy_interval = 0;
 }
 void frame_init(mpg123_handle *fr)
 {
 	frame_init_par(fr, NULL);
 }
 void frame_init_par(mpg123_handle *fr, mpg123_pars *mp)
 {
 	fr->fresh = 1;
 	fr->new_format = 0;
 	fr->own_buffer = FALSE;
 	fr->buffer.data = NULL;
 	fr->rawbuffs = NULL;
 	fr->rawdecwin = NULL;
 	fr->conv16to8_buf = NULL;
 	fr->cpu_opts.type = defopt;
 	fr->cpu_opts.class = (defopt == mmx || defopt == sse || defopt == dreidnowext) ? mmxsse : normal;
 	/* these two look unnecessary, check guarantee for synth_ntom_set_step (in control_generic, even)! */
 	fr->ntom_val[0] = NTOM_MUL>>1;
 	fr->ntom_val[1] = NTOM_MUL>>1;
 	fr->ntom_step = NTOM_MUL;
 	/* unnecessary: fr->buffer.size = fr->buffer.fill = 0; */
 	fr->lastscale = -1;
 	mpg123_reset_eq(fr);
 	fr->rd = NULL;
 	init_icy(&fr->icy);
 	init_id3(fr);
 	/* frame_outbuffer is missing... */
 	/* frame_buffers is missing... that one needs cpu opt setting! */
 	/* after these... frame_reset is needed before starting full decode */
 	fr->af.encoding = 0;
 	fr->af.rate = 0;
 	fr->af.channels = 0;
 	fr->icy.data = NULL;
 	fr->icy.interval = 0;
 	fr->icy.next = 0;
 	fr->to_decode = FALSE;
 	fr->to_ignore = FALSE;
 	fr->decoder_change = 1;
 	fr->err = MPG123_OK;
 	mpg123_format_all(fr);
 	if(mp == NULL) frame_default_pars(&fr->p);
 	else memcpy(&fr->p, mp, sizeof(struct mpg123_pars_struct));
 }
 mpg123_pars *mpg123_new_pars(int *error)
 {
 	mpg123_pars *mp = malloc(sizeof(struct mpg123_pars_struct));
 	if(mp != NULL){ frame_default_pars(mp); if(error != NULL) *error = MPG123_OK; }
 	else if(error != NULL) *error = MPG123_OUT_OF_MEM;
 	return mp;
 }
 void mpg123_delete_pars(mpg123_pars* mp)
 {
 	if(mp != NULL) free(mp);
 }
 int mpg123_reset_eq(mpg123_handle *mh)
 {
 	int i;
 	mh->have_eq_settings = 0;
 	for(i=0; i < 32; ++i) mh->equalizer[0][i] = mh->equalizer[1][i] = DOUBLE_TO_REAL(1.0);
 	return MPG123_OK;
 }
 int frame_outbuffer(mpg123_handle *fr)
 {
 	size_t size = mpg123_safe_buffer()*AUDIOBUFSIZE;
 	if(!fr->own_buffer) fr->buffer.data = NULL;
 	if(fr->buffer.data != NULL && fr->buffer.size != size)
 	{
 		free(fr->buffer.data);
 		fr->buffer.data = NULL;
 	}
 	fr->buffer.size = size;
 	if(fr->buffer.data == NULL) fr->buffer.data = (unsigned char*) malloc(fr->buffer.size);
 	if(fr->buffer.data == NULL)
 	{
 		fr->err = MPG123_OUT_OF_MEM;
 		return -1;
 	}
 	fr->own_buffer = TRUE;
 	fr->buffer.fill = 0;
 	return 0;
 }
 int mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size)
 {
 	if(data == NULL || size < mpg123_safe_buffer())
 	{
 		mh->err = MPG123_BAD_BUFFER;
 		return MPG123_ERR;
 	}
 	if(mh->own_buffer && mh->buffer.data != NULL) free(mh->buffer.data);
 	mh->own_buffer = FALSE;
 	mh->buffer.data = data;
 	mh->buffer.size = size;
 	mh->buffer.fill = 0;
 	return MPG123_OK;
 }
 int frame_buffers(mpg123_handle *fr)
 {
 	int buffssize = 0;
 	debug1("frame %p buffer", (void*)fr);
 /*
 	the used-to-be-static buffer of the synth functions, has some subtly different types/sizes
 	2to1, 4to1, ntom, generic, i386: real[2][2][0x110]
 	mmx, sse: short[2][2][0x110]
 	i586(_dither): 4352 bytes; int/long[2][2][0x110]
 	i486: int[2][2][17*FIR_BUFFER_SIZE]
 	altivec: static real __attribute__ ((aligned (16))) buffs[4][4][0x110]
 	Huh, altivec looks like fun. Well, let it be large... then, the 16 byte alignment seems to be implicit on MacOSX malloc anyway.
 	Let's make a reasonable attempt to allocate enough memory...
 	Keep in mind: biggest ones are i486 and altivec (mutually exclusive!), then follows i586 and normal real.
 	mmx/sse use short but also real for resampling.
 	Thus, minimum is 2*2*0x110*sizeof(real).
 */
 	if(fr->cpu_opts.type == altivec) buffssize = 4*4*0x110*sizeof(real);
 #ifdef OPT_I486
 	else if(fr->cpu_opts.type == ivier) buffssize = 2*2*17*FIR_BUFFER_SIZE*sizeof(int);
 #endif
 	else if(fr->cpu_opts.type == ifuenf || fr->cpu_opts.type == ifuenf_dither || fr->cpu_opts.type == dreidnow)
 	buffssize = 2*2*0x110*4; /* don't rely on type real, we need 4352 bytes */
 	if(2*2*0x110*sizeof(real) > buffssize)
 	buffssize = 2*2*0x110*sizeof(real);
 	if(fr->rawbuffs != NULL && fr->rawbuffss != buffssize)
 	{
 		free(fr->rawbuffs);
 		fr->rawbuffs = NULL;
 	}
 	if(fr->rawbuffs == NULL) fr->rawbuffs = (unsigned char*) malloc(buffssize);
 	if(fr->rawbuffs == NULL) return -1;
 	fr->rawbuffss = buffssize;
 	fr->short_buffs[0][0] = (short*) fr->rawbuffs;
 	fr->short_buffs[0][1] = fr->short_buffs[0][0] + 0x110;
 	fr->short_buffs[1][0] = fr->short_buffs[0][1] + 0x110;
 	fr->short_buffs[1][1] = fr->short_buffs[1][0] + 0x110;
 	fr->real_buffs[0][0] = (real*) fr->rawbuffs;
 	fr->real_buffs[0][1] = fr->real_buffs[0][0] + 0x110;
 	fr->real_buffs[1][0] = fr->real_buffs[0][1] + 0x110;
 	fr->real_buffs[1][1] = fr->real_buffs[1][0] + 0x110;
 #ifdef OPT_I486
 	if(fr->cpu_opts.type == ivier)
 	{
 		fr->int_buffs[0][0] = (int*) fr->rawbuffs;
 		fr->int_buffs[0][1] = fr->int_buffs[0][0] + 17*FIR_BUFFER_SIZE;
 		fr->int_buffs[1][0] = fr->int_buffs[0][1] + 17*FIR_BUFFER_SIZE;
 		fr->int_buffs[1][1] = fr->int_buffs[1][0] + 17*FIR_BUFFER_SIZE;
 	}
 #endif
 #ifdef OPT_ALTIVEC
 	if(fr->cpu_opts.type == altivec)
 	{
 		int i,j;
 		fr->areal_buffs[0][0] = (real*) fr->rawbuffs;
 		for(i=0; i<4; ++i) for(j=0; j<4; ++j)
 		fr->areal_buffs[i][j] = fr->areal_buffs[0][0] + (i*4+j)*0x110;
 	}
 #endif
 	/* now the different decwins... all of the same size, actually */
 	/* The MMX ones want 32byte alignment, which I'll try to ensure manually */
 	{
 		int decwin_size = (512+32)*sizeof(real);
 		if(fr->rawdecwin != NULL) free(fr->rawdecwin);
 #ifdef OPT_MMXORSSE
 #ifdef OPT_MULTI
 		if(fr->cpu_opts.class == mmxsse)
 		{
 #endif
 			/* decwin_mmx will share, decwins will be appended ... sizeof(float)==4 */
 			if(decwin_size < (512+32)*4) decwin_size = (512+32)*4;
 			decwin_size += (512+32)*4 + 32; /* the second window + alignment zone */
 			/* (512+32)*4/32 == 2176/32 == 68, so one decwin block retains alignment */
 #ifdef OPT_MULTI
 		}
 #endif
 #endif
 		fr->rawdecwin = (unsigned char*) malloc(decwin_size);
 		if(fr->rawdecwin == NULL) return -1;
 		fr->decwin = (real*) fr->rawdecwin;
 #ifdef OPT_MMXORSSE
 #ifdef OPT_MULTI
 		if(fr->cpu_opts.class == mmxsse)
 		{
 #endif
 			/* align decwin, assign that to decwin_mmx, append decwins */
 			/* I need to add to decwin what is missing to the next full 32 byte -- also I want to make gcc -pedantic happy... */
 			fr->decwin = aligned_pointer(fr->rawdecwin,real,32);
 			debug1("aligned decwin: %p", (void*)fr->decwin);
 			fr->decwin_mmx = (float*)fr->decwin;
 			fr->decwins = fr->decwin_mmx+512+32;
 #ifdef OPT_MULTI
 		}
 		else debug("no decwins/decwin_mmx for that class");
 #endif
 #endif
 	}
 	frame_buffers_reset(fr);
 	debug1("frame %p buffer done", (void*)fr);
 	return 0;
 }
 int frame_buffers_reset(mpg123_handle *fr)
 {
 	fr->buffer.fill = 0; /* hm, reset buffer fill... did we do a flush? */
 	fr->bsnum = 0;
 	/* Wondering: could it be actually _wanted_ to retain buffer contents over different files? (special gapless / cut stuff) */
 	fr->bsbuf = fr->bsspace[1];
 	fr->bsbufold = fr->bsbuf;
 	memset(fr->bsspace, 0, 2*(MAXFRAMESIZE+512));
 	memset(fr->ssave, 0, 34);
 	memset(fr->rawbuffs, 0, fr->rawbuffss);
 	fr->hybrid_blc[0] = fr->hybrid_blc[1] = 0;
 	memset(fr->hybrid_block, 0, sizeof(real)*2*2*SBLIMIT*SSLIMIT);
 	/* Not totally, but quite, sure that decwin(s) doesn't need cleaning. */
 	return 0;
 }
 void frame_icy_reset(mpg123_handle* fr)
 {
 	if(fr->icy.data != NULL) free(fr->icy.data);
 	fr->icy.data = NULL;
 	fr->icy.interval = 0;
 	fr->icy.next = 0;
 }
 /* Prepare the handle for a new track.
   That includes (re)allocation or reuse of the output buffer */
 int frame_reset(mpg123_handle* fr)
 {
 	frame_buffers_reset(fr);
 	frame_icy_reset(fr);
 	fr->metaflags = 0;
 	fr->outblock = mpg123_safe_buffer();
 	fr->num = -1;
 	fr->clip = 0;
 	fr->oldhead = 0;
 	fr->firsthead = 0;
 	fr->vbr = MPG123_CBR;
 	fr->abr_rate = 0;
 	fr->track_frames = 0;
 	fr->mean_frames = 0;
 	fr->mean_framesize = 0;
 	fr->lastscale = -1;
 	fr->rva.level[0] = -1;
 	fr->rva.level[1] = -1;
 	fr->rva.gain[0] = 0;
 	fr->rva.gain[1] = 0;
 	fr->rva.peak[0] = 0;
 	fr->rva.peak[1] = 0;
 	fr->index.fill = 0;
 	fr->index.step = 1;
 	fr->fsizeold = 0;
 	fr->do_recover = 0;
 	fr->firstframe = 0;
 	fr->ignoreframe = fr->firstframe-IGNORESHIFT;
 	fr->lastframe = -1;
 	fr->fresh = 1;
 	fr->new_format = 0;
 #ifdef GAPLESS
 	frame_gapless_init(fr,0,0);
 	fr->lastoff = 0;
 	fr->firstoff = 0;
 #endif
 	fr->bo[0] = 1; /* the usual bo */
 	fr->bo[1] = 0; /* ditherindex */
 #ifdef OPT_I486
 	fr->bo[0] = fr->bo[1] = FIR_SIZE-1;
 #endif
 	reset_id3(fr);
 	reset_icy(&fr->icy);
 	fr->halfphase = 0; /* here or indeed only on first-time init? */
 	fr->to_decode = FALSE;
 	return 0;
 }
 void frame_free_buffers(mpg123_handle *fr)
 {
 	if(fr->rawbuffs != NULL) free(fr->rawbuffs);
 	fr->rawbuffs = NULL;
 	if(fr->rawdecwin != NULL) free(fr->rawdecwin);
 	fr->rawdecwin = NULL;
 	if(fr->conv16to8_buf != NULL) free(fr->conv16to8_buf);
 	fr->conv16to8_buf = NULL;
 }
 void frame_exit(mpg123_handle *fr)
 {
 	if(fr->own_buffer && fr->buffer.data != NULL) free(fr->buffer.data);
 	fr->buffer.data = NULL;
 	frame_free_buffers(fr);
 	exit_id3(fr);
 	clear_icy(&fr->icy);
 }
 int mpg123_print_index(mpg123_handle *fr, FILE* out)
 {
 	size_t c;
 	if(fr == NULL) return MPG123_ERR;
 	for(c=0; c < fr->index.fill;++c) fprintf(out, "[%lu] %lu: %li (+%li)\n", (unsigned long) c, (unsigned long) c*fr->index.step, (long)fr->index.data[c], (long) (c ? fr->index.data[c]-fr->index.data[c-1] : 0));
 	return MPG123_OK;
 }
 int mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(mi == NULL)
 	{
 		mh->err = MPG123_ERR_NULL;
 		return MPG123_ERR;
 	}
 	mi->version = mh->mpeg25 ? MPG123_2_5 : (mh->lsf ? MPG123_2_0 : MPG123_1_0);
 	mi->layer = mh->lay;
 	mi->rate = frame_freq(mh);
 	switch(mh->mode)
 	{
 		case 0: mi->mode = MPG123_M_STEREO; break;
 		case 1: mi->mode = MPG123_M_JOINT;  break;
 		case 2: mi->mode = MPG123_M_DUAL;   break;
 		case 3: mi->mode = MPG123_M_MONO;   break;
 		default: error("That mode cannot be!");
 	}
 	mi->mode_ext = mh->mode_ext;
 	mi->framesize = mh->framesize+4; /* Include header. */
 	mi->flags = 0;
 	if(mh->error_protection) mi->flags |= MPG123_CRC;
 	if(mh->copyright)        mi->flags |= MPG123_COPYRIGHT;
 	if(mh->extension)        mi->flags |= MPG123_PRIVATE;
 	if(mh->original)         mi->flags |= MPG123_ORIGINAL;
 	mi->emphasis = mh->emphasis;
 	mi->bitrate  = frame_bitrate(mh);
 	mi->abr_rate = mh->abr_rate;
 	mi->vbr = mh->vbr;
 	return MPG123_OK;
 }
 /*
 	find the best frame in index just before the wanted one, seek to there
 	then step to just before wanted one with read_frame
 	do not care tabout the stuff that was in buffer but not played back
 	everything that left the decoder is counted as played
 	Decide if you want low latency reaction and accurate timing info or stable long-time playback with buffer!
 */
 off_t frame_index_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame)
 {
 	/* default is file start if no index position */
 	off_t gopos = 0;
 	*get_frame = 0;
 	if(fr->index.fill)
 	{
 		/* find in index */
 		size_t fi;
 		/* at index fi there is frame step*fi... */
 		fi = want_frame/fr->index.step;
 		if(fi >= fr->index.fill) fi = fr->index.fill - 1;
 		*get_frame = fi*fr->index.step;
 		gopos = fr->index.data[fi];
 	}
 	debug2("index: 0x%lx for frame %li", (unsigned long)gopos, (long) *get_frame);
 	return gopos;
 }
 off_t frame_ins2outs(mpg123_handle *fr, off_t ins)
 {	
 	off_t outs = 0;
 	switch(fr->down_sample)
 	{
 		case 0:
 		case 1:
 		case 2: outs = ins>>fr->down_sample; break;
 		case 3: outs = ntom_ins2outs(fr, ins); break;
 		default: error("Bad down_sample ... should not be possible!!");
 	}
 	return outs;
 }
 off_t frame_outs(mpg123_handle *fr, off_t num)
 {
 	off_t outs = 0;
 	switch(fr->down_sample)
 	{
 		case 0:
 		case 1:
 		case 2: outs = (spf(fr)>>fr->down_sample)*num; break;
 		case 3: outs = ntom_frmouts(fr, num); break;
 		default: error("Bad down_sample ... should not be possible!!");
 	}
 	return outs;
 }
 off_t frame_offset(mpg123_handle *fr, off_t outs)
 {
 	off_t num = 0;
 	switch(fr->down_sample)
 	{
 		case 0:
 		case 1:
 		case 2: num = outs/(spf(fr)>>fr->down_sample); break;
 		case 3: num = ntom_frameoff(fr, outs); break;
 		default: error("Bad down_sample ... should not be possible!!");
 	}
 	return num;
 }
 #ifdef GAPLESS
 /* input in _input_ samples */
 void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e)
 {
 	fr->begin_s = b;
 	fr->end_s = e;
 	/* These will get proper values later, from above plus resampling info. */
 	fr->begin_os = 0;
 	fr->end_os = 0;
 	debug2("frame_gapless_init: from %lu to %lu samples", fr->begin_s, fr->end_s);
 }
 void frame_gapless_realinit(mpg123_handle *fr)
 {
 	fr->begin_os = frame_ins2outs(fr, fr->begin_s);
 	fr->end_os   = frame_ins2outs(fr, fr->end_s);
 	debug2("frame_gapless_realinit: from %lu to %lu samples", fr->begin_os, fr->end_os);
 }
 #endif
 /* The frame seek... This is not simply the seek to fe*spf(fr) samples in output because we think of _input_ frames here.
   Seek to frame offset 1 may be just seek to 200 samples offset in output since the beginning of first frame is delay/padding.
   Hm, is that right? OK for the padding stuff, but actually, should the decoder delay be better totally hidden or not?
   With gapless, even the whole frame position could be advanced further than requested (since Homey don't play dat). */
 void frame_set_frameseek(mpg123_handle *fr, off_t fe)
 {
 	fr->firstframe = fe;
 #ifdef GAPLESS
 	if(fr->p.flags & MPG123_GAPLESS)
 	{
 		/* Take care of the beginning... */
 		off_t beg_f = frame_offset(fr, fr->begin_os);
 		if(fe <= beg_f)
 		{
 			fr->firstframe = beg_f;
 			fr->firstoff   = fr->begin_os - frame_outs(fr, beg_f);
 		}
 		else fr->firstoff = 0;
 		/* The end is set once for a track at least, on the frame_set_frameseek called in get_next_frame() */
 		if(fr->end_os > 0)
 		{
 			fr->lastframe  = frame_offset(fr,fr->end_os);
 			fr->lastoff    = fr->end_os - frame_outs(fr, fr->lastframe);
 		} else fr->lastoff = 0;
 	} else { fr->firstoff = fr->lastoff = 0; fr->lastframe = -1; }
 #endif
 	fr->ignoreframe = fr->lay == 3 ? fr->firstframe-IGNORESHIFT : fr->firstframe;
 #ifdef GAPLESS
 	debug5("frame_set_frameseek: begin at %li frames and %li samples, end at %li and %li; ignore from %li",
 	       (long) fr->firstframe, (long) fr->firstoff,
 	       (long) fr->lastframe,  (long) fr->lastoff, (long) fr->ignoreframe);
 #else
 	debug3("frame_set_frameseek: begin at %li frames, end at %li; ignore from %li",
 	       (long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe);
 #endif
 }
 /* Sample accurate seek prepare for decoder. */
 /* This gets unadjusted output samples and takes resampling into account */
 void frame_set_seek(mpg123_handle *fr, off_t sp)
 {
 	fr->firstframe = frame_offset(fr, sp);
 	fr->ignoreframe = fr->lay == 3 ? fr->firstframe-IGNORESHIFT : fr->firstframe;
 #ifdef GAPLESS /* The sample offset is used for non-gapless mode, too! */
 	fr->firstoff = sp - frame_outs(fr, fr->firstframe);
 	debug5("frame_set_seek: begin at %li frames and %li samples, end at %li and %li; ignore from %li",
 	       (long) fr->firstframe, (long) fr->firstoff,
 	       (long) fr->lastframe,  (long) fr->lastoff, (long) fr->ignoreframe);
 #else
 	debug3("frame_set_seek: begin at %li frames, end at %li; ignore from %li",
 	       (long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe);
 #endif
 }
 /* Unadjusted! */
 off_t frame_tell_seek(mpg123_handle *fr)
 {
 	off_t pos = frame_outs(fr, fr->firstframe);
 #ifdef GAPLESS
 	pos += fr->firstoff;
 #endif
 	return pos;
 }
 /* to vanish */
 void frame_outformat(mpg123_handle *fr, int format, int channels, long rate)
 {
 	fr->af.encoding = format;
 	fr->af.rate = rate;
 	fr->af.channels = channels;
 }
 /* set synth functions for current frame, optimizations handled by opt_* macros */
 int set_synth_functions(mpg123_handle *fr)
 {
 	int ds = fr->down_sample;
 	int p8=0;
 	static func_synth funcs[2][4] = { 
 		{ NULL,
 		  synth_2to1,
 		  synth_4to1,
 		  synth_ntom } ,
 		{ NULL,
 		  synth_2to1_8bit,
 		  synth_4to1_8bit,
 		  synth_ntom_8bit } 
 	};
 	static func_synth_mono funcs_mono[2][2][4] = {    
 		{ { NULL ,
 		    synth_2to1_mono2stereo ,
 		    synth_4to1_mono2stereo ,
 		    synth_ntom_mono2stereo } ,
 		  { NULL ,
 		    synth_2to1_8bit_mono2stereo ,
 		    synth_4to1_8bit_mono2stereo ,
 		    synth_ntom_8bit_mono2stereo } } ,
 		{ { NULL ,
 		    synth_2to1_mono ,
 		    synth_4to1_mono ,
 		    synth_ntom_mono } ,
 		  { NULL ,
 		    synth_2to1_8bit_mono ,
 		    synth_4to1_8bit_mono ,
 		    synth_ntom_8bit_mono } }
 	};
 	/* possibly non-constand entries filled here */
 	funcs[0][0] = (func_synth) opt_synth_1to1(fr);
 	funcs[1][0] = (func_synth) opt_synth_1to1_8bit(fr);
 	funcs_mono[0][0][0] = (func_synth_mono) opt_synth_1to1_mono2stereo(fr);
 	funcs_mono[0][1][0] = (func_synth_mono) opt_synth_1to1_8bit_mono2stereo(fr);
 	funcs_mono[1][0][0] = (func_synth_mono) opt_synth_1to1_mono(fr);
 	funcs_mono[1][1][0] = (func_synth_mono) opt_synth_1to1_8bit_mono(fr);
 	if(MPG123_ENC_8(fr->af.encoding)) p8 = 1;
 	fr->synth = funcs[p8][ds];
 	fr->synth_mono = funcs_mono[fr->af.channels==2 ? 0 : 1][p8][ds];
 	if(p8)
 	{
 		if(make_conv16to8_table(fr) != 0)
 		{
 			/* it's a bit more work to get proper error propagation up */
 			return -1;
 		}
 	}
 	return 0;
 }
 int mpg123_volume_change(mpg123_handle *mh, double change)
 {
 	if(mh == NULL) return MPG123_ERR;
 	return mpg123_volume(mh, change + (double) mh->p.outscale / MAXOUTBURST);
 }
 int mpg123_volume(mpg123_handle *mh, double vol)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(vol >= 0) mh->p.outscale = (double) MAXOUTBURST * vol;
 	do_rva(mh);
 	return MPG123_OK;
 }
 static int get_rva(mpg123_handle *fr, double *peak, double *gain)
 {
 	double p = -1;
 	double g = 0;
 	int ret = 0;
 	if(fr->p.rva)
 	{
 		int rt = 0;
 		/* Should one assume a zero RVA as no RVA? */
 		if(fr->p.rva == 2 && fr->rva.level[1] != -1) rt = 1;
 		if(fr->rva.level[rt] != -1)
 		{
 			p = fr->rva.peak[rt];
 			g = fr->rva.gain[rt];
 			ret = 1; /* Success. */
 		}
 	}
 	if(peak != NULL) *peak = p;
 	if(gain != NULL) *gain = g;
 	return ret;
 }
 /* adjust the volume, taking both fr->outscale and rva values into account */
 void do_rva(mpg123_handle *fr)
 {
 	double peak = 0;
 	double gain = 0;
 	scale_t newscale;
 	double rvafact = 1;
 	if(get_rva(fr, &peak, &gain))
 	{
 		if(NOQUIET && fr->p.verbose > 1) fprintf(stderr, "Note: doing RVA with gain %f\n", gain);
 		rvafact = pow(10,gain/20);
 	}
 	newscale = fr->p.outscale*rvafact;
 	/* if peak is unknown (== 0) this check won't hurt */
 	if((peak*newscale) > MAXOUTBURST)
 	{
 		newscale = (scale_t) ((double) MAXOUTBURST/peak);
 		warning2("limiting scale value to %li to prevent clipping with indicated peak factor of %f", newscale, peak);
 	}
 	/* first rva setting is forced with fr->lastscale < 0 */
 	if(newscale != fr->lastscale)
 	{
 		debug3("changing scale value from %li to %li (peak estimated to %li)", fr->lastscale != -1 ? fr->lastscale : fr->p.outscale, newscale, (long) (newscale*peak));
 		fr->lastscale = newscale;
 		opt_make_decode_tables(fr); /* the actual work */
 	}
 }
 int mpg123_getvolume(mpg123_handle *mh, double *base, double *really, double *rva_db)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(base)   *base   = (double)mh->p.outscale/MAXOUTBURST;
 	if(really) *really = (double)mh->lastscale/MAXOUTBURST;
 	get_rva(mh, NULL, rva_db);
 	return MPG123_OK;
 }
 int  frame_cpu_opt(mpg123_handle *fr, const char* cpu)
 {
 	char* chosen = ""; /* the chosed decoder opt as string */
 	int auto_choose = 0;
 	int done = 0;
 	if(   (cpu == NULL)
 	   || (cpu[0] == 0)
 	   || !strcasecmp(cpu, "auto") )
 	auto_choose = 1;
 #ifndef OPT_MULTI
 	{
 		char **sd = mpg123_decoders(); /* this contains _one_ decoder */
 		if(!auto_choose && strcasecmp(cpu, sd[0])) done = 0;
 		else
 		{
 			chosen = sd[0];
 			done = 1;
 		}
 	}
 #else
 	/* covers any i386+ cpu; they actually differ only in the synth_1to1 function... */
 	#ifdef OPT_X86
 	#ifdef OPT_MMXORSSE
 	fr->cpu_opts.make_decode_tables   = make_decode_tables;
 	fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2;
 	fr->cpu_opts.init_layer2_table    = init_layer2_table;
 	#endif
 	#ifdef OPT_3DNOW
 	fr->cpu_opts.dct36 = dct36;
 	#endif
 	#ifdef OPT_3DNOWEXT
 	fr->cpu_opts.dct36 = dct36;
 	#endif
 	if(cpu_i586(cpu_flags))
 	{
 		debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
 		#ifdef OPT_3DNOWEXT
 		if(   !done && (auto_choose || !strcasecmp(cpu, "3dnowext"))
 		   && cpu_3dnow(cpu_flags)
 		   && cpu_3dnowext(cpu_flags)
 		   && cpu_mmx(cpu_flags) )
 		{
 			int go = 1;
 			if(fr->p.force_rate)
 			{
 				#if defined(K6_FALLBACK) || defined(PENTIUM_FALLBACK)
 				if(!auto_choose){ if(NOQUIET) error("I refuse to choose 3DNowExt as this will screw up with forced rate!"); }
 				else if(VERBOSE) fprintf(stderr, "Note: Not choosing 3DNowExt because flexible rate not supported.\n");
 				go = 0;
 				#else
 				if(NOQUIET) error("You will hear some awful sound because of flexible rate being chosen with 3DNowExt decoder!");
 				#endif
 			}
 			if(go){ /* temporary hack for flexible rate bug, not going indent this - fix it instead! */
 			chosen = "3DNowExt";
 			fr->cpu_opts.type = dreidnowext;
 			fr->cpu_opts.class = mmxsse;
 			fr->cpu_opts.dct36 = dct36_3dnowext;
 			fr->cpu_opts.synth_1to1 = synth_1to1_3dnowext;
 			fr->cpu_opts.dct64 = dct64_mmx; /* only use the 3dnow version in the synth_1to1_sse */
 			fr->cpu_opts.make_decode_tables   = make_decode_tables_mmx;
 			fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2_mmx;
 			fr->cpu_opts.init_layer2_table    = init_layer2_table_mmx;
 			fr->cpu_opts.mpl_dct64 = dct64_3dnowext;
 			done = 1;
 			}
 		}
 		#endif
 		#ifdef OPT_SSE
 		if(   !done && (auto_choose || !strcasecmp(cpu, "sse"))
 		   && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
 		{
 			int go = 1;
 			if(fr->p.force_rate)
 			{
 				#ifdef PENTIUM_FALLBACK
 				if(!auto_choose){ if(NOQUIET) error("I refuse to choose SSE as this will screw up with forced rate!"); }
 				else if(VERBOSE) fprintf(stderr, "Note: Not choosing SSE because flexible rate not supported.\n");
 				go = 0;
 				#else
 				if(NOQUIET) error("You will hear some awful sound because of flexible rate being chosen with SSE decoder!");
 				#endif
 			}
 			if(go){ /* temporary hack for flexible rate bug, not going indent this - fix it instead! */
 			chosen = "SSE";
 			fr->cpu_opts.type = sse;
 			fr->cpu_opts.class = mmxsse;
 			fr->cpu_opts.synth_1to1 = synth_1to1_sse;
 			fr->cpu_opts.dct64 = dct64_mmx; /* only use the sse version in the synth_1to1_sse */
 			fr->cpu_opts.make_decode_tables   = make_decode_tables_mmx;
 			fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2_mmx;
 			fr->cpu_opts.init_layer2_table    = init_layer2_table_mmx;
 			fr->cpu_opts.mpl_dct64 = dct64_sse;
 			done = 1;
 			}
 		}
 		#endif
 		#ifdef OPT_3DNOW
 		fr->cpu_opts.dct36 = dct36;
 		/* TODO: make autodetection for _all_ x86 optimizations (maybe just for i586+ and keep separate 486 build?) */
 		/* check cpuflags bit 31 (3DNow!) and 23 (MMX) */
 		if(    !done && (auto_choose || !strcasecmp(cpu, "3dnow"))
 		    && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
 		{
 			chosen = "3DNow";
 			fr->cpu_opts.type = dreidnow;
 			fr->cpu_opts.dct36 = dct36_3dnow; /* 3DNow! optimized dct36() */
 			fr->cpu_opts.synth_1to1 = synth_1to1_3dnow;
 			fr->cpu_opts.dct64 = dct64_i386; /* use the 3dnow one? */
 			done = 1;
 		}
 		#endif
 		#ifdef OPT_MMX
 		if(   !done && (auto_choose || !strcasecmp(cpu, "mmx"))
 		   && cpu_mmx(cpu_flags) )
 		{
 			int go = 1;
 			if(fr->p.force_rate)
 			{
 				#ifdef PENTIUM_FALLBACK
 				if(!auto_choose){ if(NOQUIET) error("I refuse to choose MMX as this will screw up with forced rate!"); }
 				else if(VERBOSE) fprintf(stderr, "Note: Not choosing MMX because flexible rate not supported.\n");
 				go = 0;
 				#else
 				error("You will hear some awful sound because of flexible rate being chosen with MMX decoder!");
 				#endif
 			}
 			if(go){ /* temporary hack for flexible rate bug, not going indent this - fix it instead! */
 			chosen = "MMX";
 			fr->cpu_opts.type = mmx;
 			fr->cpu_opts.class = mmxsse;
 			fr->cpu_opts.synth_1to1 = synth_1to1_mmx;
 			fr->cpu_opts.dct64 = dct64_mmx;
 			fr->cpu_opts.make_decode_tables   = make_decode_tables_mmx;
 			fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2_mmx;
 			fr->cpu_opts.init_layer2_table    = init_layer2_table_mmx;
 			done = 1;
 			}
 		}
 		#endif
 		#ifdef OPT_I586
 		if(!done && (auto_choose || !strcasecmp(cpu, "i586")))
 		{
 			chosen = "i586/pentium";
 			fr->cpu_opts.type = ifuenf;
 			fr->cpu_opts.synth_1to1 = synth_1to1_i586;
 			fr->cpu_opts.synth_1to1_i586_asm = synth_1to1_i586_asm;
 			fr->cpu_opts.dct64 = dct64_i386;
 			done = 1;
 		}
 		#endif
 		#ifdef OPT_I586_DITHER
 		if(!done && (auto_choose || !strcasecmp(cpu, "i586_dither")))
 		{
 			chosen = "dithered i586/pentium";
 			fr->cpu_opts.type = ifuenf_dither;
 			fr->cpu_opts.synth_1to1 = synth_1to1_i586;
 			fr->cpu_opts.dct64 = dct64_i386;
 			fr->cpu_opts.synth_1to1_i586_asm = synth_1to1_i586_asm_dither;
 			done = 1;
 		}
 		#endif
 	}
 	#ifdef OPT_I486 /* that won't cooperate nicely in multi opt mode - forcing i486 in layer3.c */
 	if(!done && (auto_choose || !strcasecmp(cpu, "i486")))
 	{
 		chosen = "i486";
 		fr->cpu_opts.type = ivier;
 		fr->cpu_opts.synth_1to1 = synth_1to1_i386; /* i486 function is special */
 		fr->cpu_opts.dct64 = dct64_i386;
 		done = 1;
 	}
 	#endif
 	#ifdef OPT_I386
 	if(!done && (auto_choose || !strcasecmp(cpu, "i386")))
 	{
 		chosen = "i386";
 		fr->cpu_opts.type = idrei;
 		fr->cpu_opts.synth_1to1 = synth_1to1_i386;
 		fr->cpu_opts.dct64 = dct64_i386;
 		done = 1;
 	}
 	#endif
 	if(done) /* set common x86 functions */
 	{
 		fr->cpu_opts.synth_1to1_mono = synth_1to1_mono_i386;
 		fr->cpu_opts.synth_1to1_mono2stereo = synth_1to1_mono2stereo_i386;
 		fr->cpu_opts.synth_1to1_8bit = synth_1to1_8bit_i386;
 		fr->cpu_opts.synth_1to1_8bit_mono = synth_1to1_8bit_mono_i386;
 		fr->cpu_opts.synth_1to1_8bit_mono2stereo = synth_1to1_8bit_mono2stereo_i386;
 	}
 	#endif /* OPT_X86 */
 	#ifdef OPT_ALTIVEC
 	if(!done && (auto_choose || !strcasecmp(cpu, "altivec")))
 	{
 		chosen = "AltiVec";
 		fr->cpu_opts.type = altivec;
 		fr->cpu_opts.dct64 = dct64_altivec;
 		fr->cpu_opts.synth_1to1 = synth_1to1_altivec;
 		fr->cpu_opts.synth_1to1_mono = synth_1to1_mono_altivec;
 		fr->cpu_opts.synth_1to1_mono2stereo = synth_1to1_mono2stereo_altivec;
 		fr->cpu_opts.synth_1to1_8bit = synth_1to1_8bit_altivec;
 		fr->cpu_opts.synth_1to1_8bit_mono = synth_1to1_8bit_mono_altivec;
 		fr->cpu_opts.synth_1to1_8bit_mono2stereo = synth_1to1_8bit_mono2stereo_altivec;
 		done = 1;
 	}
 	#endif
 	#ifdef OPT_GENERIC
 	if(!done && (auto_choose || !strcasecmp(cpu, "generic")))
 	{
 		chosen = "generic";
 		fr->cpu_opts.type = generic;
 		fr->cpu_opts.dct64 = dct64;
 		fr->cpu_opts.synth_1to1 = synth_1to1;
 		fr->cpu_opts.synth_1to1_mono = synth_1to1_mono;
 		fr->cpu_opts.synth_1to1_mono2stereo = synth_1to1_mono2stereo;
 		fr->cpu_opts.synth_1to1_8bit = synth_1to1_8bit;
 		fr->cpu_opts.synth_1to1_8bit_mono = synth_1to1_8bit_mono;
 		fr->cpu_opts.synth_1to1_8bit_mono2stereo = synth_1to1_8bit_mono2stereo;
 		done = 1;
 	}
 	#endif
 #endif
 	if(done)
 	{
 		if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
 		return 1;
 	}
 	else
 	{
 		if(NOQUIET) error("Could not set optimization!");
 		return 0;
 	}
 }
 enum optdec dectype(const char* decoder)
 {
 	if(decoder == NULL) return nodec;
 	if(!strcasecmp(decoder, "3dnowext"))    return dreidnowext;
 	if(!strcasecmp(decoder, "3dnow"))       return dreidnow;
 	if(!strcasecmp(decoder, "sse"))         return sse;
 	if(!strcasecmp(decoder, "mmx"))         return mmx;
 	if(!strcasecmp(decoder, "generic"))     return generic;
 	if(!strcasecmp(decoder, "altivec"))     return altivec;
 	if(!strcasecmp(decoder, "i386"))        return idrei;
 	if(!strcasecmp(decoder, "i486"))        return ivier;
 	if(!strcasecmp(decoder, "i586"))        return ifuenf;
 	if(!strcasecmp(decoder, "i586_dither")) return ifuenf_dither;
 	return nodec;
 }
--- a/src/libmpg123/frame.h
+++ b/src/libmpg123/frame.h
@@ -0,0 +1,375 @@
 #ifndef MPG123_FRAME_H
 #define MPG123_FRAME_H
 #include "mpg123.h"
 #include "id3.h"
 #include "icy.h"
 #include "reader.h"
 #include <stdio.h>
 /* max = 1728 */
 #define MAXFRAMESIZE 3456
 /* need the definite optimization flags here */
 #ifdef OPT_I486
 #define OPT_I386
 #define FIR_BUFFER_SIZE  128
 #define FIR_SIZE 16
 #endif
 #ifdef OPT_I386
 #define PENTIUM_FALLBACK
 #define OPT_X86
 #endif
 #ifdef OPT_I586
 #define PENTIUM_FALLBACK
 #define OPT_PENTIUM
 #define OPT_X86
 #endif
 #ifdef OPT_I586_DITHER
 #define PENTIUM_FALLBACK
 #define OPT_PENTIUM
 #define OPT_X86
 #endif
 #ifdef OPT_MMX
 #define OPT_MMXORSSE
 #define OPT_X86
 #ifndef OPT_MULTI
 #define OPT_MMX_ONLY
 #endif
 #endif
 #ifdef OPT_SSE
 #define OPT_MMXORSSE
 #define OPT_MPLAYER
 #define OPT_X86
 #ifndef OPT_MULTI
 #define OPT_MMX_ONLY
 #endif
 #endif
 #ifdef OPT_3DNOWEXT
 #define OPT_MMXORSSE
 #define OPT_MPLAYER
 #define OPT_X86
 #ifndef OPT_MULTI
 #define OPT_MMX_ONLY
 #endif
 #endif
 #ifdef OPT_3DNOW
 #define K6_FALLBACK
 #define OPT_X86
 #endif
 struct al_table
 {
  short bits;
  short d;
 };
 struct frame_index
 {
 	off_t data[INDEX_SIZE];
 	size_t fill;
 	off_t step;
 };
 /* the output buffer, used to be pcm_sample, pcm_point and audiobufsize */
 struct outbuffer
 {
 	unsigned char *data;
 	unsigned char *p; /* read pointer  */
 	size_t fill; /* fill from read pointer */
 	size_t size; /* that's actually more like a safe size, after we have more than that, flush it */
 };
 struct audioformat
 {
 	int encoding;
 	int channels;
 	long rate;
 };
 enum optdec { nodec=0, generic, idrei, ivier, ifuenf, ifuenf_dither, mmx, dreidnow, dreidnowext, altivec, sse };
 enum optcla { nocla=0, normal, mmxsse };
 struct mpg123_pars_struct
 {
 	int verbose;    /* verbose level */
 	long flags; /* combination of above */
 	long force_rate;
 	int down_sample;
 	int rva; /* (which) rva to do: 0: nothing, 1: radio/mix/track 2: album/audiophile */
 	long halfspeed;
 	long doublespeed;
 #define NUM_CHANNELS 2
 	char audio_caps[NUM_CHANNELS][MPG123_RATES+1][MPG123_ENCODINGS];
 /*	long start_frame; */ /* frame offset to begin with */
 /*	long frame_number;*/ /* number of frames to decode */
 	long icy_interval;
 	scale_t outscale;
 };
 /* There is a lot to condense here... many ints can be merged as flags; though the main space is still consumed by buffers. */
 struct mpg123_handle_struct
 {
 	int fresh; /* to be moved into flags */
 	int new_format;
 	real hybrid_block[2][2][SBLIMIT*SSLIMIT];
 	int hybrid_blc[2];
 	/* the scratch vars for the decoders, sometimes real, sometimes short... sometimes int/long */ 
 	short *short_buffs[2][2];
 	real *real_buffs[2][2];
 	unsigned char *rawbuffs;
 	int rawbuffss;
 	int bo[2]; /* i486 and dither need a second value */
 	unsigned char* rawdecwin; /* the block with all decwins */
 	real *decwin; /* _the_ decode table */
 #ifdef OPT_MMXORSSE
 	/* I am not really sure that I need both of them... used in assembler */
 	float *decwin_mmx;
 	float *decwins;
 #endif
 	int have_eq_settings;
 	real equalizer[2][32];
 	/* for halfspeed mode */
 	unsigned char ssave[34];
 	int halfphase;
 	/* a raw buffer and a pointer into the middle for signed short conversion, only allocated on demand */
 	unsigned char *conv16to8_buf;
 	unsigned char *conv16to8;
 	/* There's some possible memory saving for stuff that is not _really_ dynamic. */
 	/* layer3 */
 	int longLimit[9][23];
 	int shortLimit[9][14];
 	real gainpow2[256+118+4]; /* not really dynamic, just different for mmx */
 	/* layer2 */
 	real muls[27][64];	/* also used by layer 1 */
 	/* decode_ntom */
 	unsigned long ntom_val[2];
 	unsigned long ntom_step;
 	/* special i486 fun */
 #ifdef OPT_I486
 	int *int_buffs[2][2];
 #endif
 	/* special altivec... */
 #ifdef OPT_ALTIVEC
 	real *areal_buffs[4][4];
 #endif
 	struct
 	{
 #ifdef OPT_MULTI
 		int (*synth_1to1)(real *,int, mpg123_handle *,int );
 		int (*synth_1to1_mono)(real *, mpg123_handle *);
 		int (*synth_1to1_mono2stereo)(real *, mpg123_handle *);
 		int (*synth_1to1_8bit)(real *,int, mpg123_handle *,int );
 		int (*synth_1to1_8bit_mono)(real *, mpg123_handle *);
 		int (*synth_1to1_8bit_mono2stereo)(real *, mpg123_handle *);
 #ifdef OPT_PENTIUM
 		int (*synth_1to1_i586_asm)(real *,int,unsigned char *, unsigned char *, int *, real *decwin);
 #endif
 #ifdef OPT_MMXORSSE
 		void (*make_decode_tables)(mpg123_handle *fr);
 		real (*init_layer3_gainpow2)(mpg123_handle*, int);
 		real* (*init_layer2_table)(mpg123_handle*, real*, double);
 #endif
 #ifdef OPT_3DNOW
 		void (*dct36)(real *,real *,real *,real *,real *);
 #endif
 		void (*dct64)(real *,real *,real *);
 #ifdef OPT_MPLAYER
 		void (*mpl_dct64)(real *,real *,real *);
 #endif
 #endif
 		enum optdec type;
 		enum optcla class;
 	} cpu_opts;
 	int verbose;    /* 0: nothing, 1: just print chosen decoder, 2: be verbose */
 	/* mpg123_handle */
 	const struct al_table *alloc;
 	/* could use types from optimize.h */
 	int (*synth)(real *,int, mpg123_handle*, int);
 	int (*synth_mono)(real *, mpg123_handle*);
 	int stereo; /* I _think_ 1 for mono and 2 for stereo */
 	int jsbound;
 #define SINGLE_STEREO -1
 #define SINGLE_LEFT    0
 #define SINGLE_RIGHT   1
 #define SINGLE_MIX     3
 	int single;
 	int II_sblimit;
 	int down_sample_sblimit;
 	int lsf; /* 0: MPEG 1.0; 1: MPEG 2.0/2.5 -- both used as bool and array index! */
 	int mpeg25;
 	int down_sample;
 	int header_change;
 	int lay;
 	int (*do_layer)(mpg123_handle *);
 	int error_protection;
 	int bitrate_index;
 	int sampling_frequency;
 	int padding;
 	int extension;
 	int mode;
 	int mode_ext;
 	int copyright;
 	int original;
 	int emphasis;
 	int framesize; /* computed framesize */
 	enum mpg123_vbr vbr; /* 1 if variable bitrate was detected */
 	off_t num; /* frame offset ... */
 	/* bitstream info; bsi */
 	int bitindex;
 	unsigned char *wordpointer;
 	/* temporary storage for getbits stuff */
 	unsigned long ultmp;
 	unsigned char uctmp;
 	/* rva data, used in common.c, set in id3.c */
 	scale_t lastscale;
 	struct
 	{
 		int level[2];
 		float gain[2];
 		float peak[2];
 	} rva;
 	int do_recover;
 	/* input data */
 	off_t track_frames;
 	double mean_framesize;
 	off_t mean_frames;
 	int fsizeold;
 	int ssize;
 	unsigned char bsspace[2][MAXFRAMESIZE+512]; /* MAXFRAMESIZE */
 	unsigned char *bsbuf;
 	unsigned char *bsbufold;
 	int bsnum;
 	unsigned long oldhead;
 	unsigned long firsthead;
 	int abr_rate;
 	struct frame_index index;
 	/* output data */
 	struct outbuffer buffer;
 	struct audioformat af;
 	int own_buffer;
 	size_t outblock; /* number of bytes that this frame produces (upper bound) */
 	int to_decode;   /* this frame holds data to be decoded */
 	int to_ignore;   /* the same, somehow */
 	off_t firstframe;  /* start decoding from here */
 	off_t lastframe;   /* last frame to decode (for gapless or num_frames limit) */
 	off_t ignoreframe; /* frames to decode but discard before firstframe */
 #ifdef GAPLESS
 	off_t firstoff; /* number of samples to ignore from firstframe */
 	off_t lastoff;  /* number of samples to use from lastframe */
 	off_t begin_s;  /* overall begin offset in samples */
 	off_t begin_os;
 	off_t end_s;    /* overall end offset in samples */
 	off_t end_os;
 #endif
 	unsigned int crc;
 	struct reader *rd; /* pointer to the reading functions */
 	struct reader_data rdat; /* reader data and state info */
 	struct mpg123_pars_struct p;
 	int err;
 	int decoder_change;
 	int delayed_change;
 	long clip;
 	/* the meta crap */
 	int metaflags;
 	unsigned char id3buf[128];
 	mpg123_id3v2 id3v2;
 	struct icy_meta icy;
 };
 /* generic init, does not include dynamic buffers */
 void frame_init(mpg123_handle *fr);
 void frame_init_par(mpg123_handle *fr, mpg123_pars *mp);
 /* output buffer and format */
 int  frame_outbuffer(mpg123_handle *fr);
 int  frame_output_format(mpg123_handle *fr);
 int frame_buffers(mpg123_handle *fr); /* various decoder buffers, needed once */
 int frame_reset(mpg123_handle* fr);   /* reset for next track */
 int frame_buffers_reset(mpg123_handle *fr);
 void frame_exit(mpg123_handle *fr);   /* end, free all buffers */
 int mpg123_print_index(mpg123_handle *fr, FILE* out);
 off_t frame_index_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame);
 int frame_cpu_opt(mpg123_handle *fr, const char* cpu);
 enum optdec dectype(const char* decoder);
 int set_synth_functions(mpg123_handle *fr);
 void do_volume(mpg123_handle *fr, double factor);
 void do_rva(mpg123_handle *fr);
 /* samples per frame ...
 Layer I
 Layer II
 Layer III
 MPEG-1
 384
 1152
 1152
 MPEG-2 LSF
 384
 1152
 576
 MPEG 2.5
 384
 1152
 576
 */
 #define spf(fr) ((fr)->lay == 1 ? 384 : ((fr)->lay==2 ? 1152 : ((fr)->lsf || (fr)->mpeg25 ? 576 : 1152)))
 #ifdef GAPLESS
 /* well, I take that one for granted... at least layer3 */
 #define DECODER_DELAY 529
 /* still fine-tuning the "real music" window... see read_frame */
 #define GAP_SHIFT 0
 void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e);
 void frame_gapless_realinit(mpg123_handle *fr);
 /*void frame_gapless_position(mpg123_handle* fr);
 void frame_gapless_bytify(mpg123_handle *fr);
 void frame_gapless_ignore(mpg123_handle *fr, off_t frames);*/
 /* void frame_gapless_buffercheck(mpg123_handle *fr); */
 #endif
 /*
 	Seeking core functions:
 	- convert input sample offset to output sample offset
 	- convert frame offset to output sample offset
 	- get leading frame offset for output sample offset
 	The offsets are "unadjusted"/internal; resampling is being taken care of.
 */
 off_t frame_ins2outs(mpg123_handle *fr, off_t ins);
 off_t frame_outs(mpg123_handle *fr, off_t num);
 off_t frame_offset(mpg123_handle *fr, off_t outs);
 void frame_set_frameseek(mpg123_handle *fr, off_t fe);
 void frame_set_seek(mpg123_handle *fr, off_t sp);
 off_t frame_tell_seek(mpg123_handle *fr);
 /* adjust volume to current outscale and rva values if wanted */
 void do_rva(mpg123_handle *fr);
 #endif
--- a/src/libmpg123/getbits.c
+++ b/src/libmpg123/getbits.c
@@ -0,0 +1,135 @@
 /*
 	getbits
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include "mpg123app.h"
 #include "common.h"
 #if 0
 static void check_buffer_range(int size)
 {
 	int pos = (bsi.wordpointer-bsbuf) + (size>>3);
 	if( pos >= fsizeold) {
 		fprintf(stderr,"Pointer out of range (%d,%d)!\n",pos,fsizeold);
 	}
 }
 #endif
 void backbits(int number_of_bits)
 {
  bsi.bitindex    -= number_of_bits;
  bsi.wordpointer += (bsi.bitindex>>3);
  bsi.bitindex    &= 0x7;
 }
 int getbitoffset(void) 
 {
  return (-bsi.bitindex)&0x7;
 }
 int getbyte(void)
 {
 #ifdef DEBUG_GETBITS
  if(bsi.bitindex) 
    fprintf(stderr,"getbyte called unsynched!\n");
 #endif
  return *bsi.wordpointer++;
 }
 unsigned int getbits(int number_of_bits)
 {
  unsigned long rval;
 #ifdef DEBUG_GETBITS
 fprintf(stderr,"g%d",number_of_bits);
 #endif
  if(!number_of_bits)
    return 0;
 #if 0
   check_buffer_range(number_of_bits+bsi.bitindex);
 #endif
  {
    rval = bsi.wordpointer[0];
    rval <<= 8;
    rval |= bsi.wordpointer[1];
    rval <<= 8;
    rval |= bsi.wordpointer[2];
    rval <<= bsi.bitindex;
    rval &= 0xffffff;
    bsi.bitindex += number_of_bits;
    rval >>= (24-number_of_bits);
    bsi.wordpointer += (bsi.bitindex>>3);
    bsi.bitindex &= 7;
  }
 #ifdef DEBUG_GETBITS
 fprintf(stderr,":%lx ",rval);
 #endif
  return rval;
 }
 unsigned int getbits_fast(int number_of_bits)
 {
  unsigned int rval;
 #ifdef DEBUG_GETBITS
 fprintf(stderr,"g%d",number_of_bits);
 #endif
 #if 0
   check_buffer_range(number_of_bits+bsi.bitindex);
 #endif
  rval =  (unsigned char) (bsi.wordpointer[0] << bsi.bitindex);
  rval |= ((unsigned int) bsi.wordpointer[1]<<bsi.bitindex)>>8;
  rval <<= number_of_bits;
  rval >>= 8;
  bsi.bitindex += number_of_bits;
  bsi.wordpointer += (bsi.bitindex>>3);
  bsi.bitindex &= 7;
 #ifdef DEBUG_GETBITS
 fprintf(stderr,":%x ",rval);
 #endif
  return rval;
 }
 unsigned int get1bit(void)
 {
  unsigned char rval;
 #ifdef DEBUG_GETBITS
 fprintf(stderr,"g%d",1);
 #endif
 #if 0
   check_buffer_range(1+bsi.bitindex);
 #endif
  rval = *bsi.wordpointer << bsi.bitindex;
  bsi.bitindex++;
  bsi.wordpointer += (bsi.bitindex>>3);
  bsi.bitindex &= 7;
 #ifdef DEBUG_GETBITS
 fprintf(stderr,":%d ",rval>>7);
 #endif
  return rval>>7;
 }
--- a/src/libmpg123/getbits.h
+++ b/src/libmpg123/getbits.h
@@ -0,0 +1,49 @@
 /*
 	getbits
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #ifndef _MPG123_GETBITS_H_
 #define _MPG123_GETBITS_H_
 /* that's the same file as getits.c but with defines to
  force inlining */
 #define backbits(fr,nob) ((void)( \
  fr->bitindex    -= nob, \
  fr->wordpointer += (fr->bitindex>>3), \
  fr->bitindex    &= 0x7 ))
 #define getbitoffset(fr) ((-fr->bitindex)&0x7)
 #define getbyte(fr)      (*fr->wordpointer++)
 #define getbits(fr, nob) ( \
  fr->ultmp = fr->wordpointer[0], fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[1], \
  fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[2], fr->ultmp <<= fr->bitindex, \
  fr->ultmp &= 0xffffff, fr->bitindex += nob, \
  fr->ultmp >>= (24-nob), fr->wordpointer += (fr->bitindex>>3), \
  fr->bitindex &= 7,fr->ultmp)
 #define skipbits(fr, nob) fr->ultmp = ( \
  fr->ultmp = fr->wordpointer[0], fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[1], \
  fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[2], fr->ultmp <<= fr->bitindex, \
  fr->ultmp &= 0xffffff, fr->bitindex += nob, \
  fr->ultmp >>= (24-nob), fr->wordpointer += (fr->bitindex>>3), \
  fr->bitindex &= 7 )
 #define getbits_fast(fr, nob) ( \
  fr->ultmp = (unsigned char) (fr->wordpointer[0] << fr->bitindex), \
  fr->ultmp |= ((unsigned long) fr->wordpointer[1]<<fr->bitindex)>>8, \
  fr->ultmp <<= nob, fr->ultmp >>= 8, \
  fr->bitindex += nob, fr->wordpointer += (fr->bitindex>>3), \
  fr->bitindex &= 7, fr->ultmp )
 #define get1bit(fr) ( \
  fr->uctmp = *fr->wordpointer << fr->bitindex, fr->bitindex++, \
  fr->wordpointer += (fr->bitindex>>3), fr->bitindex &= 7, fr->uctmp>>7 )
 #endif
--- a/src/libmpg123/getcpuflags.S
+++ b/src/libmpg123/getcpuflags.S
@@ -0,0 +1,79 @@
 /*
 	getcpucpuflags: get cpuflags for ia32
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http:#mpg123.org
 	initially written by KIMURA Takuhiro (for 3DNow!)
 	extended for general use by Thomas Orgis
 	 extern int getcpuid(struct cpuflags*)
 	or just 
 	 extern int getcpuid(unsigned int*)
 	where there is memory for 4 ints
 	 -> the first set of idflags (basic cpu family info)
 	    and the idflags, stdflags, std2flags, extflags written to the parameter
 	 -> 0x00000000 (CPUID instruction not supported)
 */
 #include "mangle.h"
 .text
 	ALIGN4
 .globl ASM_NAME(getcpuflags)
 /*	.type ASM_NAME(getcpuflags),@function */
 ASM_NAME(getcpuflags):
 	pushl %ebp
 	movl %esp,%ebp
 	pushl %edx
 	pushl %ecx
 	pushl %ebx
 	pushl %esi
 /* get the int pointer for storing the flags */
 	movl 8(%ebp), %esi
 /* does that one make sense? */
 	movl $0x80000000,%eax
 /* now save the flags and do a check for cpuid availability */
 	pushfl
 	pushfl
 	popl %eax
 	movl %eax,%ebx
 /* set that bit... */
 	xorl $0x00200000,%eax
 	pushl %eax
 	popfl
 /* ...and read back the flags to see if it is understood */
 	pushfl
 	popl %eax
 	popfl
 	cmpl %ebx,%eax
 	je .Lnocpuid
 /* now get the info, first extended */
 	movl $0x80000001,%eax
 	cpuid
 	movl %edx,12(%esi)
 /* then the other ones, called last to get the id flags in %eax for ret */
 	movl $0x00000001,%eax
 	cpuid
 	movl %eax, (%esi)
 	movl %ecx, 4(%esi)
 	movl %edx, 8(%esi)
 	jmp .Lend
 	ALIGN4
 .Lnocpuid:
 /* error: set everything to zero */
 	movl $0, %eax
 	movl $0, (%esi)
 	movl $0, 4(%esi)
 	movl $0, 8(%esi)
 	movl $0, 12(%esi)
 	ALIGN4
 .Lend:
 /* return value are the id flags, still stored in %eax */
 	popl %esi
 	popl %ebx
 	popl %ecx
 	popl %edx
 	movl %ebp,%esp
 	popl %ebp
 	ret
--- a/src/libmpg123/getcpuflags.h
+++ b/src/libmpg123/getcpuflags.h
@@ -0,0 +1,39 @@
 #ifndef MPG123_H_GETCPUFLAGS
 #define MPG123_H_GETCPUFLAGS
 /* standard level flags part 1 */
 #define FLAG_SSE3      0x00000001
 /* standard level flags part 2 */
 #define FLAG2_MMX       0x00800000
 #define FLAG2_SSE       0x02000000
 #define FLAG2_SSE2      0x04000000
 #define FLAG2_FPU       0x00000001
 /* cpuid extended level 1 (AMD) */
 #define XFLAG_MMX      0x00800000
 #define XFLAG_3DNOW    0x80000000
 #define XFLAG_3DNOWEXT 0x40000000
 struct cpuflags
 {
 	unsigned int id;
 	unsigned int std;
 	unsigned int std2;
 	unsigned int ext;
 };
 extern struct cpuflags cpu_flags;
 unsigned int getcpuflags(struct cpuflags* cf);
 /* checks the family */
 #define cpu_i586(s) ( ((s.id & 0xf00)>>8) == 0 || ((s.id & 0xf00)>>8) > 4 )
 /* checking some flags... */
 #define cpu_fpu(s) (FLAG2_FPU & s.std2)
 #define cpu_mmx(s) (FLAG2_MMX & s.std2 || XFLAG_MMX & s.ext)
 #define cpu_3dnow(s) (XFLAG_3DNOW & s.ext)
 #define cpu_3dnowext(s) (XFLAG_3DNOWEXT & s.ext)
 #define cpu_sse(s) (FLAG2_SSE & s.std2)
 #define cpu_sse2(s) (FLAG2_SSE2 & s.std2)
 #define cpu_sse3(s) (FLAG_SSE3 & s.std)
 #endif
--- a/src/libmpg123/huffman.h
+++ b/src/libmpg123/huffman.h
@@ -0,0 +1,340 @@
 /*
 	huffman.h: huffman tables ... recalcualted to work with optimzed decoder scheme (MH)
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	probably we could save a few bytes of memory, because the 
 	smaller tables are often the part of a bigger table
 */
 #ifndef _MPG123_HUFFMAN_H_
 #define _MPG123_HUFFMAN_H_
 struct newhuff 
 {
  unsigned int linbits;
  short *table;
 };
 static short tab0[] = 
 { 
   0
 };
 static short tab1[] =
 {
  -5,  -3,  -1,  17,   1,  16,   0
 };
 static short tab2[] =
 {
 -15, -11,  -9,  -5,  -3,  -1,  34,   2,  18,  -1,  33,  32,  17,  -1,   1,
  16,   0
 };
 static short tab3[] =
 {
 -13, -11,  -9,  -5,  -3,  -1,  34,   2,  18,  -1,  33,  32,  16,  17,  -1,
   1,   0
 };
 static short tab5[] =
 {
 -29, -25, -23, -15,  -7,  -5,  -3,  -1,  51,  35,  50,  49,  -3,  -1,  19,
   3,  -1,  48,  34,  -3,  -1,  18,  33,  -1,   2,  32,  17,  -1,   1,  16,
   0
 };
 static short tab6[] =
 {
 -25, -19, -13,  -9,  -5,  -3,  -1,  51,   3,  35,  -1,  50,  48,  -1,  19,
  49,  -3,  -1,  34,   2,  18,  -3,  -1,  33,  32,   1,  -1,  17,  -1,  16,
   0
 };
 static short tab7[] =
 {
 -69, -65, -57, -39, -29, -17, -11,  -7,  -3,  -1,  85,  69,  -1,  84,  83,
  -1,  53,  68,  -3,  -1,  37,  82,  21,  -5,  -1,  81,  -1,   5,  52,  -1,
  80,  -1,  67,  51,  -5,  -3,  -1,  36,  66,  20,  -1,  65,  64, -11,  -7,
  -3,  -1,   4,  35,  -1,  50,   3,  -1,  19,  49,  -3,  -1,  48,  34,  18,
  -5,  -1,  33,  -1,   2,  32,  17,  -1,   1,  16,   0
 };
 static short tab8[] =
 {
 -65, -63, -59, -45, -31, -19, -13,  -7,  -5,  -3,  -1,  85,  84,  69,  83,
  -3,  -1,  53,  68,  37,  -3,  -1,  82,   5,  21,  -5,  -1,  81,  -1,  52,
  67,  -3,  -1,  80,  51,  36,  -5,  -3,  -1,  66,  20,  65,  -3,  -1,   4,
  64,  -1,  35,  50,  -9,  -7,  -3,  -1,  19,  49,  -1,   3,  48,  34,  -1,
   2,  32,  -1,  18,  33,  17,  -3,  -1,   1,  16,   0
 };
 static short tab9[] =
 {
 -63, -53, -41, -29, -19, -11,  -5,  -3,  -1,  85,  69,  53,  -1,  83,  -1,
  84,   5,  -3,  -1,  68,  37,  -1,  82,  21,  -3,  -1,  81,  52,  -1,  67,
  -1,  80,   4,  -7,  -3,  -1,  36,  66,  -1,  51,  64,  -1,  20,  65,  -5,
  -3,  -1,  35,  50,  19,  -1,  49,  -1,   3,  48,  -5,  -3,  -1,  34,   2,
  18,  -1,  33,  32,  -3,  -1,  17,   1,  -1,  16,   0
 };
 static short tab10[] =
 {
 -125,-121,-111, -83, -55, -35, -21, -13,  -7,  -3,  -1, 119, 103,  -1, 118,
  87,  -3,  -1, 117, 102,  71,  -3,  -1, 116,  86,  -1, 101,  55,  -9,  -3,
  -1, 115,  70,  -3,  -1,  85,  84,  99,  -1,  39, 114, -11,  -5,  -3,  -1,
 100,   7, 112,  -1,  98,  -1,  69,  53,  -5,  -1,   6,  -1,  83,  68,  23,
 -17,  -5,  -1, 113,  -1,  54,  38,  -5,  -3,  -1,  37,  82,  21,  -1,  81,
  -1,  52,  67,  -3,  -1,  22,  97,  -1,  96,  -1,   5,  80, -19, -11,  -7,
  -3,  -1,  36,  66,  -1,  51,   4,  -1,  20,  65,  -3,  -1,  64,  35,  -1,
  50,   3,  -3,  -1,  19,  49,  -1,  48,  34,  -7,  -3,  -1,  18,  33,  -1,
   2,  32,  17,  -1,   1,  16,   0
 };
 static short tab11[] =
 {
 -121,-113, -89, -59, -43, -27, -17,  -7,  -3,  -1, 119, 103,  -1, 118, 117,
  -3,  -1, 102,  71,  -1, 116,  -1,  87,  85,  -5,  -3,  -1,  86, 101,  55,
  -1, 115,  70,  -9,  -7,  -3,  -1,  69,  84,  -1,  53,  83,  39,  -1, 114,
  -1, 100,   7,  -5,  -1, 113,  -1,  23, 112,  -3,  -1,  54,  99,  -1,  96,
  -1,  68,  37, -13,  -7,  -5,  -3,  -1,  82,   5,  21,  98,  -3,  -1,  38,
   6,  22,  -5,  -1,  97,  -1,  81,  52,  -5,  -1,  80,  -1,  67,  51,  -1,
  36,  66, -15, -11,  -7,  -3,  -1,  20,  65,  -1,   4,  64,  -1,  35,  50,
  -1,  19,  49,  -5,  -3,  -1,   3,  48,  34,  33,  -5,  -1,  18,  -1,   2,
  32,  17,  -3,  -1,   1,  16,   0
 };
 static short tab12[] =
 {
 -115, -99, -73, -45, -27, -17,  -9,  -5,  -3,  -1, 119, 103, 118,  -1,  87,
 117,  -3,  -1, 102,  71,  -1, 116, 101,  -3,  -1,  86,  55,  -3,  -1, 115,
  85,  39,  -7,  -3,  -1, 114,  70,  -1, 100,  23,  -5,  -1, 113,  -1,   7,
 112,  -1,  54,  99, -13,  -9,  -3,  -1,  69,  84,  -1,  68,  -1,   6,   5,
  -1,  38,  98,  -5,  -1,  97,  -1,  22,  96,  -3,  -1,  53,  83,  -1,  37,
  82, -17,  -7,  -3,  -1,  21,  81,  -1,  52,  67,  -5,  -3,  -1,  80,   4,
  36,  -1,  66,  20,  -3,  -1,  51,  65,  -1,  35,  50, -11,  -7,  -5,  -3,
  -1,  64,   3,  48,  19,  -1,  49,  34,  -1,  18,  33,  -7,  -5,  -3,  -1,
   2,  32,   0,  17,  -1,   1,  16
 };
 static short tab13[] =
 {
 -509,-503,-475,-405,-333,-265,-205,-153,-115, -83, -53, -35, -21, -13,  -9,
  -7,  -5,  -3,  -1, 254, 252, 253, 237, 255,  -1, 239, 223,  -3,  -1, 238,
 207,  -1, 222, 191,  -9,  -3,  -1, 251, 206,  -1, 220,  -1, 175, 233,  -1,
 236, 221,  -9,  -5,  -3,  -1, 250, 205, 190,  -1, 235, 159,  -3,  -1, 249,
 234,  -1, 189, 219, -17,  -9,  -3,  -1, 143, 248,  -1, 204,  -1, 174, 158,
  -5,  -1, 142,  -1, 127, 126, 247,  -5,  -1, 218,  -1, 173, 188,  -3,  -1,
 203, 246, 111, -15,  -7,  -3,  -1, 232,  95,  -1, 157, 217,  -3,  -1, 245,
 231,  -1, 172, 187,  -9,  -3,  -1,  79, 244,  -3,  -1, 202, 230, 243,  -1,
  63,  -1, 141, 216, -21,  -9,  -3,  -1,  47, 242,  -3,  -1, 110, 156,  15,
  -5,  -3,  -1, 201,  94, 171,  -3,  -1, 125, 215,  78, -11,  -5,  -3,  -1,
 200, 214,  62,  -1, 185,  -1, 155, 170,  -1,  31, 241, -23, -13,  -5,  -1,
 240,  -1, 186, 229,  -3,  -1, 228, 140,  -1, 109, 227,  -5,  -1, 226,  -1,
  46,  14,  -1,  30, 225, -15,  -7,  -3,  -1, 224,  93,  -1, 213, 124,  -3,
  -1, 199,  77,  -1, 139, 184,  -7,  -3,  -1, 212, 154,  -1, 169, 108,  -1,
 198,  61, -37, -21,  -9,  -5,  -3,  -1, 211, 123,  45,  -1, 210,  29,  -5,
  -1, 183,  -1,  92, 197,  -3,  -1, 153, 122, 195,  -7,  -5,  -3,  -1, 167,
 151,  75, 209,  -3,  -1,  13, 208,  -1, 138, 168, -11,  -7,  -3,  -1,  76,
 196,  -1, 107, 182,  -1,  60,  44,  -3,  -1, 194,  91,  -3,  -1, 181, 137,
  28, -43, -23, -11,  -5,  -1, 193,  -1, 152,  12,  -1, 192,  -1, 180, 106,
  -5,  -3,  -1, 166, 121,  59,  -1, 179,  -1, 136,  90, -11,  -5,  -1,  43,
  -1, 165, 105,  -1, 164,  -1, 120, 135,  -5,  -1, 148,  -1, 119, 118, 178,
 -11,  -3,  -1,  27, 177,  -3,  -1,  11, 176,  -1, 150,  74,  -7,  -3,  -1,
  58, 163,  -1,  89, 149,  -1,  42, 162, -47, -23,  -9,  -3,  -1,  26, 161,
  -3,  -1,  10, 104, 160,  -5,  -3,  -1, 134,  73, 147,  -3,  -1,  57,  88,
  -1, 133, 103,  -9,  -3,  -1,  41, 146,  -3,  -1,  87, 117,  56,  -5,  -1,
 131,  -1, 102,  71,  -3,  -1, 116,  86,  -1, 101, 115, -11,  -3,  -1,  25,
 145,  -3,  -1,   9, 144,  -1,  72, 132,  -7,  -5,  -1, 114,  -1,  70, 100,
  40,  -1, 130,  24, -41, -27, -11,  -5,  -3,  -1,  55,  39,  23,  -1, 113,
  -1,  85,   7,  -7,  -3,  -1, 112,  54,  -1,  99,  69,  -3,  -1,  84,  38,
  -1,  98,  53,  -5,  -1, 129,  -1,   8, 128,  -3,  -1,  22,  97,  -1,   6,
  96, -13,  -9,  -5,  -3,  -1,  83,  68,  37,  -1,  82,   5,  -1,  21,  81,
  -7,  -3,  -1,  52,  67,  -1,  80,  36,  -3,  -1,  66,  51,  20, -19, -11,
  -5,  -1,  65,  -1,   4,  64,  -3,  -1,  35,  50,  19,  -3,  -1,  49,   3,
  -1,  48,  34,  -3,  -1,  18,  33,  -1,   2,  32,  -3,  -1,  17,   1,  16,
   0
 };
 static short tab15[] =
 {
 -495,-445,-355,-263,-183,-115, -77, -43, -27, -13,  -7,  -3,  -1, 255, 239,
  -1, 254, 223,  -1, 238,  -1, 253, 207,  -7,  -3,  -1, 252, 222,  -1, 237,
 191,  -1, 251,  -1, 206, 236,  -7,  -3,  -1, 221, 175,  -1, 250, 190,  -3,
  -1, 235, 205,  -1, 220, 159, -15,  -7,  -3,  -1, 249, 234,  -1, 189, 219,
  -3,  -1, 143, 248,  -1, 204, 158,  -7,  -3,  -1, 233, 127,  -1, 247, 173,
  -3,  -1, 218, 188,  -1, 111,  -1, 174,  15, -19, -11,  -3,  -1, 203, 246,
  -3,  -1, 142, 232,  -1,  95, 157,  -3,  -1, 245, 126,  -1, 231, 172,  -9,
  -3,  -1, 202, 187,  -3,  -1, 217, 141,  79,  -3,  -1, 244,  63,  -1, 243,
 216, -33, -17,  -9,  -3,  -1, 230,  47,  -1, 242,  -1, 110, 240,  -3,  -1,
  31, 241,  -1, 156, 201,  -7,  -3,  -1,  94, 171,  -1, 186, 229,  -3,  -1,
 125, 215,  -1,  78, 228, -15,  -7,  -3,  -1, 140, 200,  -1,  62, 109,  -3,
  -1, 214, 227,  -1, 155, 185,  -7,  -3,  -1,  46, 170,  -1, 226,  30,  -5,
  -1, 225,  -1,  14, 224,  -1,  93, 213, -45, -25, -13,  -7,  -3,  -1, 124,
 199,  -1,  77, 139,  -1, 212,  -1, 184, 154,  -7,  -3,  -1, 169, 108,  -1,
 198,  61,  -1, 211, 210,  -9,  -5,  -3,  -1,  45,  13,  29,  -1, 123, 183,
  -5,  -1, 209,  -1,  92, 208,  -1, 197, 138, -17,  -7,  -3,  -1, 168,  76,
  -1, 196, 107,  -5,  -1, 182,  -1, 153,  12,  -1,  60, 195,  -9,  -3,  -1,
 122, 167,  -1, 166,  -1, 192,  11,  -1, 194,  -1,  44,  91, -55, -29, -15,
  -7,  -3,  -1, 181,  28,  -1, 137, 152,  -3,  -1, 193,  75,  -1, 180, 106,
  -5,  -3,  -1,  59, 121, 179,  -3,  -1, 151, 136,  -1,  43,  90, -11,  -5,
  -1, 178,  -1, 165,  27,  -1, 177,  -1, 176, 105,  -7,  -3,  -1, 150,  74,
  -1, 164, 120,  -3,  -1, 135,  58, 163, -17,  -7,  -3,  -1,  89, 149,  -1,
  42, 162,  -3,  -1,  26, 161,  -3,  -1,  10, 160, 104,  -7,  -3,  -1, 134,
  73,  -1, 148,  57,  -5,  -1, 147,  -1, 119,   9,  -1,  88, 133, -53, -29,
 -13,  -7,  -3,  -1,  41, 103,  -1, 118, 146,  -1, 145,  -1,  25, 144,  -7,
  -3,  -1,  72, 132,  -1,  87, 117,  -3,  -1,  56, 131,  -1, 102,  71,  -7,
  -3,  -1,  40, 130,  -1,  24, 129,  -7,  -3,  -1, 116,   8,  -1, 128,  86,
  -3,  -1, 101,  55,  -1, 115,  70, -17,  -7,  -3,  -1,  39, 114,  -1, 100,
  23,  -3,  -1,  85, 113,  -3,  -1,   7, 112,  54,  -7,  -3,  -1,  99,  69,
  -1,  84,  38,  -3,  -1,  98,  22,  -3,  -1,   6,  96,  53, -33, -19,  -9,
  -5,  -1,  97,  -1,  83,  68,  -1,  37,  82,  -3,  -1,  21,  81,  -3,  -1,
   5,  80,  52,  -7,  -3,  -1,  67,  36,  -1,  66,  51,  -1,  65,  -1,  20,
   4,  -9,  -3,  -1,  35,  50,  -3,  -1,  64,   3,  19,  -3,  -1,  49,  48,
  34,  -9,  -7,  -3,  -1,  18,  33,  -1,   2,  32,  17,  -3,  -1,   1,  16,
   0
 };
 static short tab16[] =
 {
 -509,-503,-461,-323,-103, -37, -27, -15,  -7,  -3,  -1, 239, 254,  -1, 223,
 253,  -3,  -1, 207, 252,  -1, 191, 251,  -5,  -1, 175,  -1, 250, 159,  -3,
  -1, 249, 248, 143,  -7,  -3,  -1, 127, 247,  -1, 111, 246, 255,  -9,  -5,
  -3,  -1,  95, 245,  79,  -1, 244, 243, -53,  -1, 240,  -1,  63, -29, -19,
 -13,  -7,  -5,  -1, 206,  -1, 236, 221, 222,  -1, 233,  -1, 234, 217,  -1,
 238,  -1, 237, 235,  -3,  -1, 190, 205,  -3,  -1, 220, 219, 174, -11,  -5,
  -1, 204,  -1, 173, 218,  -3,  -1, 126, 172, 202,  -5,  -3,  -1, 201, 125,
  94, 189, 242, -93,  -5,  -3,  -1,  47,  15,  31,  -1, 241, -49, -25, -13,
  -5,  -1, 158,  -1, 188, 203,  -3,  -1, 142, 232,  -1, 157, 231,  -7,  -3,
  -1, 187, 141,  -1, 216, 110,  -1, 230, 156, -13,  -7,  -3,  -1, 171, 186,
  -1, 229, 215,  -1,  78,  -1, 228, 140,  -3,  -1, 200,  62,  -1, 109,  -1,
 214, 155, -19, -11,  -5,  -3,  -1, 185, 170, 225,  -1, 212,  -1, 184, 169,
  -5,  -1, 123,  -1, 183, 208, 227,  -7,  -3,  -1,  14, 224,  -1,  93, 213,
  -3,  -1, 124, 199,  -1,  77, 139, -75, -45, -27, -13,  -7,  -3,  -1, 154,
 108,  -1, 198,  61,  -3,  -1,  92, 197,  13,  -7,  -3,  -1, 138, 168,  -1,
 153,  76,  -3,  -1, 182, 122,  60, -11,  -5,  -3,  -1,  91, 137,  28,  -1,
 192,  -1, 152, 121,  -1, 226,  -1,  46,  30, -15,  -7,  -3,  -1, 211,  45,
  -1, 210, 209,  -5,  -1,  59,  -1, 151, 136,  29,  -7,  -3,  -1, 196, 107,
  -1, 195, 167,  -1,  44,  -1, 194, 181, -23, -13,  -7,  -3,  -1, 193,  12,
  -1,  75, 180,  -3,  -1, 106, 166, 179,  -5,  -3,  -1,  90, 165,  43,  -1,
 178,  27, -13,  -5,  -1, 177,  -1,  11, 176,  -3,  -1, 105, 150,  -1,  74,
 164,  -5,  -3,  -1, 120, 135, 163,  -3,  -1,  58,  89,  42, -97, -57, -33,
 -19, -11,  -5,  -3,  -1, 149, 104, 161,  -3,  -1, 134, 119, 148,  -5,  -3,
  -1,  73,  87, 103, 162,  -5,  -1,  26,  -1,  10, 160,  -3,  -1,  57, 147,
  -1,  88, 133,  -9,  -3,  -1,  41, 146,  -3,  -1, 118,   9,  25,  -5,  -1,
 145,  -1, 144,  72,  -3,  -1, 132, 117,  -1,  56, 131, -21, -11,  -5,  -3,
  -1, 102,  40, 130,  -3,  -1,  71, 116,  24,  -3,  -1, 129, 128,  -3,  -1,
   8,  86,  55,  -9,  -5,  -1, 115,  -1, 101,  70,  -1,  39, 114,  -5,  -3,
  -1, 100,  85,   7,  23, -23, -13,  -5,  -1, 113,  -1, 112,  54,  -3,  -1,
  99,  69,  -1,  84,  38,  -3,  -1,  98,  22,  -1,  97,  -1,   6,  96,  -9,
  -5,  -1,  83,  -1,  53,  68,  -1,  37,  82,  -1,  81,  -1,  21,   5, -33,
 -23, -13,  -7,  -3,  -1,  52,  67,  -1,  80,  36,  -3,  -1,  66,  51,  20,
  -5,  -1,  65,  -1,   4,  64,  -1,  35,  50,  -3,  -1,  19,  49,  -3,  -1,
   3,  48,  34,  -3,  -1,  18,  33,  -1,   2,  32,  -3,  -1,  17,   1,  16,
   0
 };
 static short tab24[] =
 {
 -451,-117, -43, -25, -15,  -7,  -3,  -1, 239, 254,  -1, 223, 253,  -3,  -1,
 207, 252,  -1, 191, 251,  -5,  -1, 250,  -1, 175, 159,  -1, 249, 248,  -9,
  -5,  -3,  -1, 143, 127, 247,  -1, 111, 246,  -3,  -1,  95, 245,  -1,  79,
 244, -71,  -7,  -3,  -1,  63, 243,  -1,  47, 242,  -5,  -1, 241,  -1,  31,
 240, -25,  -9,  -1,  15,  -3,  -1, 238, 222,  -1, 237, 206,  -7,  -3,  -1,
 236, 221,  -1, 190, 235,  -3,  -1, 205, 220,  -1, 174, 234, -15,  -7,  -3,
  -1, 189, 219,  -1, 204, 158,  -3,  -1, 233, 173,  -1, 218, 188,  -7,  -3,
  -1, 203, 142,  -1, 232, 157,  -3,  -1, 217, 126,  -1, 231, 172, 255,-235,
 -143, -77, -45, -25, -15,  -7,  -3,  -1, 202, 187,  -1, 141, 216,  -5,  -3,
  -1,  14, 224,  13, 230,  -5,  -3,  -1, 110, 156, 201,  -1,  94, 186,  -9,
  -5,  -1, 229,  -1, 171, 125,  -1, 215, 228,  -3,  -1, 140, 200,  -3,  -1,
  78,  46,  62, -15,  -7,  -3,  -1, 109, 214,  -1, 227, 155,  -3,  -1, 185,
 170,  -1, 226,  30,  -7,  -3,  -1, 225,  93,  -1, 213, 124,  -3,  -1, 199,
  77,  -1, 139, 184, -31, -15,  -7,  -3,  -1, 212, 154,  -1, 169, 108,  -3,
  -1, 198,  61,  -1, 211,  45,  -7,  -3,  -1, 210,  29,  -1, 123, 183,  -3,
  -1, 209,  92,  -1, 197, 138, -17,  -7,  -3,  -1, 168, 153,  -1,  76, 196,
  -3,  -1, 107, 182,  -3,  -1, 208,  12,  60,  -7,  -3,  -1, 195, 122,  -1,
 167,  44,  -3,  -1, 194,  91,  -1, 181,  28, -57, -35, -19,  -7,  -3,  -1,
 137, 152,  -1, 193,  75,  -5,  -3,  -1, 192,  11,  59,  -3,  -1, 176,  10,
  26,  -5,  -1, 180,  -1, 106, 166,  -3,  -1, 121, 151,  -3,  -1, 160,   9,
 144,  -9,  -3,  -1, 179, 136,  -3,  -1,  43,  90, 178,  -7,  -3,  -1, 165,
  27,  -1, 177, 105,  -1, 150, 164, -17,  -9,  -5,  -3,  -1,  74, 120, 135,
  -1,  58, 163,  -3,  -1,  89, 149,  -1,  42, 162,  -7,  -3,  -1, 161, 104,
  -1, 134, 119,  -3,  -1,  73, 148,  -1,  57, 147, -63, -31, -15,  -7,  -3,
  -1,  88, 133,  -1,  41, 103,  -3,  -1, 118, 146,  -1,  25, 145,  -7,  -3,
  -1,  72, 132,  -1,  87, 117,  -3,  -1,  56, 131,  -1, 102,  40, -17,  -7,
  -3,  -1, 130,  24,  -1,  71, 116,  -5,  -1, 129,  -1,   8, 128,  -1,  86,
 101,  -7,  -5,  -1,  23,  -1,   7, 112, 115,  -3,  -1,  55,  39, 114, -15,
  -7,  -3,  -1,  70, 100,  -1,  85, 113,  -3,  -1,  54,  99,  -1,  69,  84,
  -7,  -3,  -1,  38,  98,  -1,  22,  97,  -5,  -3,  -1,   6,  96,  53,  -1,
  83,  68, -51, -37, -23, -15,  -9,  -3,  -1,  37,  82,  -1,  21,  -1,   5,
  80,  -1,  81,  -1,  52,  67,  -3,  -1,  36,  66,  -1,  51,  20,  -9,  -5,
  -1,  65,  -1,   4,  64,  -1,  35,  50,  -1,  19,  49,  -7,  -5,  -3,  -1,
   3,  48,  34,  18,  -1,  33,  -1,   2,  32,  -3,  -1,  17,   1,  -1,  16,
   0
 };
 static short tab_c0[] =
 {
 -29, -21, -13,  -7,  -3,  -1,  11,  15,  -1,  13,  14,  -3,  -1,   7,   5,
   9,  -3,  -1,   6,   3,  -1,  10,  12,  -3,  -1,   2,   1,  -1,   4,   8,
   0
 };
 static short tab_c1[] =
 {
 -15,  -7,  -3,  -1,  15,  14,  -1,  13,  12,  -3,  -1,  11,  10,  -1,   9,
   8,  -7,  -3,  -1,   7,   6,  -1,   5,   4,  -3,  -1,   3,   2,  -1,   1,
   0
 };
 static struct newhuff ht[] = 
 {
 { /* 0 */ 0 , tab0  } ,
 { /* 2 */ 0 , tab1  } ,
 { /* 3 */ 0 , tab2  } ,
 { /* 3 */ 0 , tab3  } ,
 { /* 0 */ 0 , tab0  } ,
 { /* 4 */ 0 , tab5  } ,
 { /* 4 */ 0 , tab6  } ,
 { /* 6 */ 0 , tab7  } ,
 { /* 6 */ 0 , tab8  } ,
 { /* 6 */ 0 , tab9  } ,
 { /* 8 */ 0 , tab10 } ,
 { /* 8 */ 0 , tab11 } ,
 { /* 8 */ 0 , tab12 } ,
 { /* 16 */ 0 , tab13 } ,
 { /* 0  */ 0 , tab0  } ,
 { /* 16 */ 0 , tab15 } ,
 { /* 16 */ 1 , tab16 } ,
 { /* 16 */ 2 , tab16 } ,
 { /* 16 */ 3 , tab16 } ,
 { /* 16 */ 4 , tab16 } ,
 { /* 16 */ 6 , tab16 } ,
 { /* 16 */ 8 , tab16 } ,
 { /* 16 */ 10, tab16 } ,
 { /* 16 */ 13, tab16 } ,
 { /* 16 */ 4 , tab24 } ,
 { /* 16 */ 5 , tab24 } ,
 { /* 16 */ 6 , tab24 } ,
 { /* 16 */ 7 , tab24 } ,
 { /* 16 */ 8 , tab24 } ,
 { /* 16 */ 9 , tab24 } ,
 { /* 16 */ 11, tab24 } ,
 { /* 16 */ 13, tab24 }
 };
 static struct newhuff htc[] = 
 {
 { /* 1 , 1 , */ 0 , tab_c0 } ,
 { /* 1 , 1 , */ 0 , tab_c1 }
 };
 #endif
--- a/src/libmpg123/icy.c
+++ b/src/libmpg123/icy.c
@@ -0,0 +1,25 @@
 #include "icy.h"
 #include <stdlib.h>
 void init_icy(struct icy_meta *icy)
 {
 	icy->data = NULL;
 }
 void clear_icy(struct icy_meta *icy)
 {
 	if(icy->data != NULL) free(icy->data);
 	init_icy(icy);
 }
 void reset_icy(struct icy_meta *icy)
 {
 	clear_icy(icy);
 	init_icy(icy);
 }
 /*void set_icy(struct icy_meta *icy, char* new_data)
 {
 	if(icy->data) free(icy->data);
 	icy->data = new_data;
 	icy->changed = 1;
 }*/
--- a/src/libmpg123/icy.h
+++ b/src/libmpg123/icy.h
@@ -0,0 +1,25 @@
 /*
 	icy: support for SHOUTcast ICY meta info, an attempt to keep it organized
 	copyright 2006-7 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Thomas Orgis and modelled after patch by Honza
 */
 #ifndef MPG123_ICY_H
 #define MPG123_ICY_H
 #include <sys/types.h>
 #include "mpg123.h"
 struct icy_meta
 {
 	char* data;
 	off_t interval;
 	off_t next;
 };
 void init_icy(struct icy_meta *);
 void clear_icy(struct icy_meta *);
 void reset_icy(struct icy_meta *);
 #endif
--- a/src/libmpg123/id3.c
+++ b/src/libmpg123/id3.c
@@ -0,0 +1,601 @@
 #include "mpg123lib_intern.h"
 #include "id3.h"
 /* UTF support definitions */
 typedef void (*text_converter)(mpg123_string *sb, unsigned char* source, size_t len);
 static void convert_latin1  (mpg123_string *sb, unsigned char* source, size_t len);
 static void convert_utf16   (mpg123_string *sb, unsigned char* source, size_t len, int str_be);
 static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len);
 static void convert_utf16be (mpg123_string *sb, unsigned char* source, size_t len);
 static void convert_utf8    (mpg123_string *sb, unsigned char* source, size_t len);
 static const text_converter text_converters[4] = 
 {
 	convert_latin1,
 	convert_utf16bom,
 	convert_utf16be,
 	convert_utf8
 };
 const int encoding_widths[4] = { 1, 2, 2, 1 };
 /* the code starts here... */
 void init_id3(mpg123_handle *fr)
 {
 	fr->id3v2.version = 0; /* nothing there */
 	mpg123_init_string(&fr->id3v2.title);
 	mpg123_init_string(&fr->id3v2.artist);
 	mpg123_init_string(&fr->id3v2.album);
 	mpg123_init_string(&fr->id3v2.year);
 	mpg123_init_string(&fr->id3v2.comment);
 	mpg123_init_string(&fr->id3v2.genre);
 }
 void exit_id3(mpg123_handle *fr)
 {
 	mpg123_free_string(&fr->id3v2.title);
 	mpg123_free_string(&fr->id3v2.artist);
 	mpg123_free_string(&fr->id3v2.album);
 	mpg123_free_string(&fr->id3v2.year);
 	mpg123_free_string(&fr->id3v2.comment);
 	mpg123_free_string(&fr->id3v2.genre);
 }
 void reset_id3(mpg123_handle *fr)
 {
 	fr->id3v2.version = 0;
 	fr->id3v2.title.fill = 0;
 	fr->id3v2.artist.fill = 0;
 	fr->id3v2.album.fill = 0;
 	fr->id3v2.year.fill = 0;
 	fr->id3v2.comment.fill = 0;
 	fr->id3v2.genre.fill = 0;
 }
 /*
 	Store any text in UTF8 encoding; preserve the zero string separator (I don't need strlen for the total size).
 	ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
 	So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
 */
 void store_id3_text(mpg123_string *sb, char *source, size_t source_size)
 {
 	int encoding;
 	int bwidth;
 	if(!source_size)
 	{
 		debug("Empty id3 data!");
 		return;
 	}
 	encoding = source[0];
 	++source;
 	--source_size;
 	debug1("encoding: %i", encoding);
 	/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
 	   UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
 	if(encoding > 3)
 	{
 		warning1("Unknown text encoding %d, assuming ISO8859-1 - I will probably screw a bit up!", encoding);
 		encoding = 0;
 	}
 	bwidth = encoding_widths[encoding];
 	if(source_size % bwidth)
 	{
 		/* Uh. (BTW, the -1 is for the encoding byte.) */
 		warning2("Weird tag size %d for encoding %d - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
 		source_size -= source_size % bwidth;
 	}
 	text_converters[encoding](sb, (unsigned char*)source, source_size);
 	if(sb->size) debug1("UTF-8 string (the first one): %s", sb->p);
 	else error("unable to convert string to UTF-8 (out of memory, junk input?)!");
 }
 /*
 	trying to parse ID3v2.3 and ID3v2.4 tags...
 	returns:  0 = read-error... or so... soft issue... ok... somehow...
 	         ... = illegal ID3 header; maybe extended to mean unparseable (to new) header in future
 	          1 = somehow ok...
 	         ...or READER_MORE...
 */
 int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
 {
 	#define UNSYNC_FLAG 128
 	#define EXTHEAD_FLAG 64
 	#define EXP_FLAG 32
 	#define FOOTER_FLAG 16
 	#define UNKNOWN_FLAGS 15 /* 00001111*/
 	unsigned char buf[6];
 	unsigned long length=0;
 	unsigned char flags = 0;
 	int ret = 1;
 	int ret2;
 	unsigned char* tagdata = NULL;
 	unsigned char major = first4bytes & 0xff;
 	debug1("ID3v2: major tag version: %i", major);
 	if(major == 0xff) return 0; /* used to be -1 */
 	if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
 	return ret2;
 	if(buf[0] == 0xff) /* major version, will never be 0xff */
 	return 0; /* used to be -1 */
 	/* second new byte are some nice flags, if these are invalid skip the whole thing */
 	flags = buf[1];
 	debug1("ID3v2: flags 0x%08x", flags);
 	/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
 	#define synchsafe_to_long(buf,res) \
 	( \
 		(((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
 		(res =  (((unsigned long) (buf)[0]) << 21) \
 		     | (((unsigned long) (buf)[1]) << 14) \
 		     | (((unsigned long) (buf)[2]) << 7) \
 		     |  ((unsigned long) (buf)[3]) \
 		,1) \
 	)
 	/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
 	#define bytes_to_long(buf,res) \
 	( \
 		major == 3 ? \
 		(res =  (((unsigned long) (buf)[0]) << 24) \
 		     | (((unsigned long) (buf)[1]) << 16) \
 		     | (((unsigned long) (buf)[2]) << 8) \
 		     |  ((unsigned long) (buf)[3]) \
 		,1) : synchsafe_to_long(buf,res) \
 	)
 	/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number  */
 	/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
 	if(!synchsafe_to_long(buf+2,length)) return -1;
 	debug1("ID3v2: tag data length %lu", length);
 	if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
 	/* skip if unknown version/scary flags, parse otherwise */
 	if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 3))
 	{
 		/* going to skip because there are unknown flags set */
 		warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
 		if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
 		ret = ret2;
 	}
 	else
 	{
 		fr->id3v2.version = major;
 		/* try to interpret that beast */
 		if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
 		{
 			debug("ID3v2: analysing frames...");
 			if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
 			{
 				unsigned long tagpos = 0;
 				debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
 				/* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
 				tagdata[length] = 0;
 				if(flags & EXTHEAD_FLAG)
 				{
 					debug("ID3v2: skipping extended header");
 					if(!bytes_to_long(tagdata, tagpos)) ret = -1;
 				}
 				if(ret > 0)
 				{
 					char id[5];
 					unsigned long framesize;
 					unsigned long fflags; /* need 16 bits, actually */
 					id[4] = 0;
 					/* pos now advanced after ext head, now a frame has to follow */
 					while(tagpos < length-10) /* I want to read at least a full header */
 					{
 						int i = 0;
 						unsigned long pos = tagpos;
 						/* level 1,2,3 - 0 is info from lame/info tag! */
 						/* rva tags with ascending significance, then general frames */
 						#define KNOWN_FRAMES 8
 						const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "TPE1", "TALB", "TIT2", "TYER", "TCON" };
 						enum { egal = -1, comment, extra, rva2, artist, album, title, year, genre } tt = egal;
 						/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
 						for(; i< 4; ++i) if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
 						                     || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
 						{
 							debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
 							ret = 0; /* used to be -1 */
 							break;
 						}
 						if(ret > 0)
 						{
 							/* 4 bytes id */
 							strncpy(id, (char*) tagdata+pos, 4);
 							pos += 4;
 							/* size as 32 bits */
 							if(!bytes_to_long(tagdata+pos, framesize))
 							{
 								ret = -1;
 								error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
 								break;
 							}
 							if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
 							tagpos += 10 + framesize; /* the important advancement in whole tag */
 							pos += 4;
 							fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
 							pos += 2;
 							/* for sanity, after full parsing tagpos should be == pos */
 							/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
 							/* %0abc0000 %0h00kmnp */
 							#define BAD_FFLAGS (unsigned long) 36784
 							#define PRES_TAG_FFLAG 16384
 							#define PRES_FILE_FFLAG 8192
 							#define READ_ONLY_FFLAG 4096
 							#define GROUP_FFLAG 64
 							#define COMPR_FFLAG 8
 							#define ENCR_FFLAG 4
 							#define UNSYNC_FFLAG 2
 							#define DATLEN_FFLAG 1
 							/* shall not or want not handle these */
 							if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
 							{
 								warning("ID3v2: skipping invalid/unsupported frame");
 								continue;
 							}
 							for(i = 0; i < KNOWN_FRAMES; ++i)
 							if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
 							if(tt != egal)
 							{
 								int rva_mode = -1; /* mix / album */
 								unsigned long realsize = framesize;
 								unsigned char* realdata = tagdata+pos;
 								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
 								{
 									unsigned long ipos = 0;
 									unsigned long opos = 0;
 									debug("Id3v2: going to de-unsync the frame data");
 									/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
 									/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
 									/* standard mandates that de-unsync should always be safe if flag is set */
 									realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
 									if(realdata == NULL)
 									{
 										error("ID3v2: unable to allocate working buffer for de-unsync");
 										continue;
 									}
 									/* now going byte per byte through the data... */
 									realdata[0] = tagdata[pos];
 									opos = 1;
 									for(ipos = pos+1; ipos < pos+framesize; ++ipos)
 									{
 										if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
 										{
 											realdata[opos++] = tagdata[ipos];
 										}
 									}
 									realsize = opos;
 									debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
 								}
 								pos = 0; /* now at the beginning again... */
 								switch(tt)
 								{
 									case comment: /* a comment that perhaps is a RVA / fr->rva.ALBUM/AUDIOPHILE / fr->rva.MIX/RADIO one */
 									{
 										/* Text encoding          $xx */
 										/* Language               $xx xx xx */
 										/* policy about encodings: do not care for now here */
 										/* if(realdata[0] == 0)  */
 										{
 											/* don't care about language */
 											pos = 4;
 											if(   !strcasecmp((char*)realdata+pos, "rva")
 											   || !strcasecmp((char*)realdata+pos, "fr->rva.mix")
 											   || !strcasecmp((char*)realdata+pos, "fr->rva.radio"))
 											rva_mode = 0;
 											else if(   !strcasecmp((char*)realdata+pos, "fr->rva.album")
 											        || !strcasecmp((char*)realdata+pos, "fr->rva.audiophile")
 											        || !strcasecmp((char*)realdata+pos, "fr->rva.user"))
 											rva_mode = 1;
 											if((rva_mode > -1) && (fr->rva.level[rva_mode] <= tt+1))
 											{
 												char* comstr;
 												size_t comsize = realsize-4-(strlen((char*)realdata+pos)+1);
 												if(VERBOSE3) fprintf(stderr, "Note: evaluating %s data for RVA\n", realdata+pos);
 												if((comstr = (char*) malloc(comsize+1)) != NULL)
 												{
 													memcpy(comstr,realdata+realsize-comsize, comsize);
 													comstr[comsize] = 0;
 													/* hm, what about utf16 here? */
 													fr->rva.gain[rva_mode] = atof(comstr);
 													if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
 													fr->rva.peak[rva_mode] = 0;
 													fr->rva.level[rva_mode] = tt+1;
 													free(comstr);
 												}
 												else error("could not allocate memory for rva comment interpretation");
 											}
 											else
 											{
 												if(!strcasecmp((char*)realdata+pos, ""))
 												{
 													/* only add general comments */
 													realdata[pos] = realdata[pos-4]; /* the encoding field copied */
 													store_id3_text(&fr->id3v2.comment, (char*)realdata+pos, realsize-4);
 												}
 											}
 										}
 									}
 									break;
 									case extra: /* perhaps foobar2000's work */
 									{
 										/* Text encoding          $xx */
 										/* unicode would hurt in string comparison... */
 										if(realdata[0] == 0)
 										{
 											int is_peak = 0;
 											pos = 1;
 											if(!strncasecmp((char*)realdata+pos, "replaygain_track_",17))
 											{
 												debug("ID3v2: track gain/peak");
 												rva_mode = 0;
 												if(!strcasecmp((char*)realdata+pos, "replaygain_track_peak")) is_peak = 1;
 												else if(strcasecmp((char*)realdata+pos, "replaygain_track_gain")) rva_mode = -1;
 											}
 											else
 											if(!strncasecmp((char*)realdata+pos, "replaygain_album_",17))
 											{
 												debug("ID3v2: album gain/peak");
 												rva_mode = 1;
 												if(!strcasecmp((char*)realdata+pos, "replaygain_album_peak")) is_peak = 1;
 												else if(strcasecmp((char*)realdata+pos, "replaygain_album_gain")) rva_mode = -1;
 											}
 											if((rva_mode > -1) && (fr->rva.level[rva_mode] <= tt+1))
 											{
 												char* comstr;
 												size_t comsize = realsize-1-(strlen((char*)realdata+pos)+1);
 												if(VERBOSE3) fprintf(stderr, "Note: evaluating %s data for RVA\n", realdata+pos);
 												if((comstr = (char*) malloc(comsize+1)) != NULL)
 												{
 													memcpy(comstr,realdata+realsize-comsize, comsize);
 													comstr[comsize] = 0;
 													if(is_peak)
 													{
 														fr->rva.peak[rva_mode] = atof(comstr);
 														if(VERBOSE3) fprintf(stderr, "Note: RVA peak %fdB\n", fr->rva.peak[rva_mode]);
 													}
 													else
 													{
 														fr->rva.gain[rva_mode] = atof(comstr);
 														if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
 													}
 													fr->rva.level[rva_mode] = tt+1;
 													free(comstr);
 												}
 												else error("could not allocate memory for rva comment interpretation");
 											}
 										}
 									}
 									break;
 									case rva2: /* "the" RVA tag */
 									{
 										#ifdef HAVE_INTTYPES_H
 										/* starts with null-terminated identification */
 										if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
 										/* default: some individual value, mix mode */
 										rva_mode = 0;
 										if( !strncasecmp((char*)realdata, "album", 5)
 										    || !strncasecmp((char*)realdata, "audiophile", 10)
 										    || !strncasecmp((char*)realdata, "user", 4))
 										rva_mode = 1;
 										if(fr->rva.level[rva_mode] <= tt+1)
 										{
 											pos += strlen((char*) realdata) + 1;
 											if(realdata[pos] == 1)
 											{
 												++pos;
 												/* only handle master channel */
 												debug("ID3v2: it is for the master channel");
 												/* two bytes adjustment, one byte for bits representing peak - n bytes for peak */
 												/* 16 bit signed integer = dB * 512 */
 												/* we already assume short being 16 bit */
 												fr->rva.gain[rva_mode] = (float) ((((short) realdata[pos]) << 8) | ((short) realdata[pos+1])) / 512;
 												pos += 2;
 												if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
 												/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
 												fr->rva.peak[rva_mode] = 0;
 												fr->rva.level[rva_mode] = tt+1;
 											}
 										}
 										#else
 										warning("ID3v2: Cannot parse RVA2 value because I don't have a guaranteed 16 bit signed integer type");
 										#endif
 									}
 									break;
 									/* non-rva metainfo, simply store... */
 									case artist:
 										debug("ID3v2: parsing artist info");
 										store_id3_text(&fr->id3v2.artist, (char*) realdata, realsize);
 									break;
 									case album:
 										debug("ID3v2: parsing album info");
 										store_id3_text(&fr->id3v2.album, (char*) realdata, realsize);
 									break;
 									case title:
 										debug("ID3v2: parsing title info");
 										store_id3_text(&fr->id3v2.title, (char*) realdata, realsize);
 									break;
 									case year:
 										debug("ID3v2: parsing year info");
 										store_id3_text(&fr->id3v2.year, (char*) realdata, realsize);
 									break;
 									case genre:
 										debug("ID3v2: parsing genre info");
 										store_id3_text(&fr->id3v2.genre, (char*) realdata, realsize);
 									break;
 									default: error1("ID3v2: unknown frame type %i", tt);
 								}
 								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
 							}
 							#undef BAD_FFLAGS
 							#undef PRES_TAG_FFLAG
 							#undef PRES_FILE_FFLAG
 							#undef READ_ONLY_FFLAG
 							#undef GROUP_FFLAG
 							#undef COMPR_FFLAG
 							#undef ENCR_FFLAG
 							#undef UNSYNC_FFLAG
 							#undef DATLEN_FFLAG
 						}
 						else break;
 						#undef KNOWN_FRAMES
 					}
 				}
 			}
 			else
 			{
 				error("ID3v2: Duh, not able to read ID3v2 tag data.");
 				ret = ret2;
 			}
 			free(tagdata);
 		}
 		else
 		{
 			error1("ID3v2Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
 			if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
 			else ret = 0;
 		}
 	}
 	/* skip footer if present */
 	if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
 	return ret;
 	#undef UNSYNC_FLAG
 	#undef EXTHEAD_FLAG
 	#undef EXP_FLAG
 	#undef FOOTER_FLAG
 	#undef UNKOWN_FLAGS
 }
 static void convert_latin1(mpg123_string *sb, unsigned char* s, size_t l)
 {
 	size_t length = l;
 	size_t i;
 	unsigned char *p;
 	/* determine real length, a latin1 character can at most take 2 bytes in UTF8 */
 	for(i=0; i<l; ++i)
 	if(s[i] >= 0x80) ++length;
 	debug1("UTF-8 length: %lu", (unsigned long)length);
 	/* one extra zero byte for paranoia */
 	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
 	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
 	for(i=0; i<l; ++i)
 	if(s[i] < 0x80){ *p = s[i]; ++p; }
 	else /* two-byte encoding */
 	{
 		*p     = 0xc0 | (s[i]>>6);
 		*(p+1) = 0x80 | (s[i] & 0x3f);
 		p+=2;
 	}
 	sb->p[length] = 0;
 	sb->fill = length+1;
 }
 #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
 /* Remember: There's a limit at 0x1ffff. */
 #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
 static void convert_utf16(mpg123_string *sb, unsigned char* s, size_t l, int str_be)
 {
 	size_t i;
 	unsigned char *p;
 	size_t length = 0; /* the resulting UTF-8 length */
 	/* Determine real length... extreme case can be more than utf-16 length. */
 	size_t high = 0;
 	size_t low  = 1;
 	if(!str_be) /* little-endian */
 	{
 		high = 1; /* The second byte is the high byte. */
 		low  = 0; /* The first byte is the low byte. */
 	}
 	/* first: get length, check for errors -- stop at first one */
 	for(i=0; i < l-1; i+=2)
 	{
 		unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
 		if((point & 0xd800) == 0xd800) /* lead surrogate */
 		{
 			unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
 			if((second & 0xdc00) == 0xdc00) /* good... */
 			{
 				point = FULLPOINT(point,second);
 				length += UTF8LEN(point); /* possibly 4 bytes */
 				i+=2; /* We overstepped one word. */
 			}
 			else /* if no valid pair, break here */
 			{
 				l = i; /* Forget the half pair, END! */
 				break;
 			}
 		}
 		else length += UTF8LEN(point); /* 1,2 or 3 bytes */
 	}
 	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
 	/* Now really convert, skip checks as these have been done just before. */
 	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
 	for(i=0; i < l-1; i+=2)
 	{
 		unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
 		if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
 		{
 			unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
 			codepoint = FULLPOINT(codepoint,second);
 			i+=2; /* We overstepped one word. */
 		}
 		if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
 		else if(codepoint < 0x800)
 		{
 			*p++ = 0xc0 | (codepoint>>6);
 			*p++ = 0x80 | (codepoint & 0x3f);
 		}
 		else if(codepoint < 0x10000)
 		{
 			*p++ = 0xe0 | (codepoint>>12);
 			*p++ = 0x80 | ((codepoint>>6) & 0x3f);
 			*p++ = 0x80 | (codepoint & 0x3f);
 		}
 		else if (codepoint < 0x200000) 
 		{
 			*p++ = 0xf0 | codepoint>>18;
 			*p++ = 0x80 | ((codepoint>>12) & 0x3f);
 			*p++ = 0x80 | ((codepoint>>6) & 0x3f);
 			*p++ = 0x80 | (codepoint & 0x3f);
 		} /* ignore bigger ones (that are not possible here anyway) */
 	}
 	sb->p[sb->size-1] = 0; /* paranoia... */
 	sb->fill = sb->size;
 }
 #undef UTF8LEN
 #undef FULLPOINT
 static void convert_utf16be(mpg123_string *sb, unsigned char* source, size_t len)
 {
 	convert_utf16(sb, source, len, 1);
 }
 static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len)
 {
 	if(len < 2){ mpg123_free_string(sb); return; }
 	if(source[0] == 0xff && source[1] == 0xfe) /* Little-endian */
 	convert_utf16(sb, source + 2, len - 2, 0);
 	else /* Big-endian */
 	convert_utf16(sb, source + 2, len - 2, 1);
 }
 static void convert_utf8(mpg123_string *sb, unsigned char* source, size_t len)
 {
 	if(mpg123_resize_string(sb, len+1))
 	{
 		memcpy(sb->p, source, len);
 		sb->p[len] = 0;
 		sb->fill = len+1;
 	}
 	else mpg123_free_string(sb);
 }
--- a/src/libmpg123/id3.h
+++ b/src/libmpg123/id3.h
@@ -0,0 +1,12 @@
 #ifndef MPG123_ID3_H
 #define MPG123_ID3_H
 /* really need it _here_! */
 #include "frame.h"
 void init_id3(mpg123_handle *fr);
 void exit_id3(mpg123_handle *fr);
 void reset_id3(mpg123_handle *fr);
 int  parse_new_id3(mpg123_handle *fr, unsigned long first4bytes);
 #endif
--- a/src/libmpg123/l2tables.h
+++ b/src/libmpg123/l2tables.h
@@ -0,0 +1,164 @@
 /*
 	l2tables.h: Layer 2 Alloc tables
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	most other tables are calculated on program start (which is (of course)	not ISO-conform)
 	Layer-3 huffman table is in huffman.h
 */
 #ifndef _MPG123_L2TABLES_H_
 #define _MPG123_L2TABLES_H_
 const struct al_table alloc_0[] = {
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767} };
 const struct al_table alloc_1[] = {
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 	{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767} };
 const struct al_table alloc_2[] = {
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
 	{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
 	{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} };
 const struct al_table alloc_3[] = {
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
 	{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
 	{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} };
 const struct al_table alloc_4[] = {
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 		{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 		{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 		{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 		{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
 	{2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9},
    {2,0},{5,3},{7,5},{10,9}  };
 #endif
--- a/src/libmpg123/layer1.c
+++ b/src/libmpg123/layer1.c
@@ -0,0 +1,153 @@
 /*
 	layer1.c: the layer 1 decoder
 	copyright 1995-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	may have a few bugs after last optimization ... 
 */
 #include "mpg123lib_intern.h"
 #include "getbits.h"
 void I_step_one(unsigned int balloc[], unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr)
 {
  unsigned int *ba=balloc;
  unsigned int *sca = (unsigned int *) scale_index;
  if(fr->stereo == 2) {
    int i;
    int jsbound = fr->jsbound;
    for (i=0;i<jsbound;i++) { 
      *ba++ = getbits(fr, 4);
      *ba++ = getbits(fr, 4);
    }
    for (i=jsbound;i<SBLIMIT;i++)
      *ba++ = getbits(fr, 4);
    ba = balloc;
    for (i=0;i<jsbound;i++) {
      if ((*ba++))
        *sca++ = getbits(fr, 6);
      if ((*ba++))
        *sca++ = getbits(fr, 6);
    }
    for (i=jsbound;i<SBLIMIT;i++)
      if ((*ba++)) {
        *sca++ =  getbits(fr, 6);
        *sca++ =  getbits(fr, 6);
      }
  }
  else {
    int i;
    for (i=0;i<SBLIMIT;i++)
      *ba++ = getbits(fr, 4);
    ba = balloc;
    for (i=0;i<SBLIMIT;i++)
      if ((*ba++))
        *sca++ = getbits(fr, 6);
  }
 }
 void I_step_two(real fraction[2][SBLIMIT],unsigned int balloc[2*SBLIMIT],
 	unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr)
 {
  int i,n;
  int smpb[2*SBLIMIT]; /* values: 0-65535 */
  int *sample;
  register unsigned int *ba;
  register unsigned int *sca = (unsigned int *) scale_index;
  if(fr->stereo == 2) {
    int jsbound = fr->jsbound;
    register real *f0 = fraction[0];
    register real *f1 = fraction[1];
    ba = balloc;
    for (sample=smpb,i=0;i<jsbound;i++)  {
      if ((n = *ba++))
        *sample++ = getbits(fr, n+1);
      if ((n = *ba++))
        *sample++ = getbits(fr, n+1);
    }
    for (i=jsbound;i<SBLIMIT;i++) 
      if ((n = *ba++))
        *sample++ = getbits(fr, n+1);
    ba = balloc;
    for (sample=smpb,i=0;i<jsbound;i++) {
      if((n=*ba++))
        *f0++ = (real) ( ((-1)<<n) + (*sample++) + 1) * fr->muls[n+1][*sca++];
      else
        *f0++ = 0.0;
      if((n=*ba++))
        *f1++ = (real) ( ((-1)<<n) + (*sample++) + 1) * fr->muls[n+1][*sca++];
      else
        *f1++ = 0.0;
    }
    for (i=jsbound;i<SBLIMIT;i++) {
      if ((n=*ba++)) {
        real samp = ( ((-1)<<n) + (*sample++) + 1);
        *f0++ = samp * fr->muls[n+1][*sca++];
        *f1++ = samp * fr->muls[n+1][*sca++];
      }
      else
        *f0++ = *f1++ = 0.0;
    }
    for(i=fr->down_sample_sblimit;i<32;i++)
      fraction[0][i] = fraction[1][i] = 0.0;
  }
  else {
    register real *f0 = fraction[0];
    ba = balloc;
    for (sample=smpb,i=0;i<SBLIMIT;i++)
      if ((n = *ba++))
        *sample++ = getbits(fr, n+1);
    ba = balloc;
    for (sample=smpb,i=0;i<SBLIMIT;i++) {
      if((n=*ba++))
        *f0++ = (real) ( ((-1)<<n) + (*sample++) + 1) * fr->muls[n+1][*sca++];
      else
        *f0++ = 0.0;
    }
    for(i=fr->down_sample_sblimit;i<32;i++)
      fraction[0][i] = 0.0;
  }
 }
 int do_layer1(mpg123_handle *fr)
 {
  int clip=0;
  int i,stereo = fr->stereo;
  unsigned int balloc[2*SBLIMIT];
  unsigned int scale_index[2][SBLIMIT];
  real aligned(16) fraction[2][SBLIMIT];
  int single = fr->single;
  fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ? (fr->mode_ext<<2)+4 : 32;
  if(stereo == 1 || single == SINGLE_MIX) /* I don't see mixing handled here */
    single = SINGLE_LEFT;
  I_step_one(balloc,scale_index,fr);
  for (i=0;i<SCALE_BLOCK;i++)
  {
    I_step_two(fraction,balloc,scale_index,fr);
    if(single != SINGLE_STEREO)
    {
      clip += (fr->synth_mono)( (real *) fraction[single], fr);
    }
    else
    {
      clip += (fr->synth)( (real *) fraction[0], 0, fr, 0);
      clip += (fr->synth)( (real *) fraction[1], 1, fr, 1);
    }
  }
  return clip;
 }
--- a/src/libmpg123/layer2.c
+++ b/src/libmpg123/layer2.c
@@ -0,0 +1,335 @@
 /*
 	layer2.c: the layer 2 decoder, root of mpg123
 	copyright 1994-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	mpg123 started as mp2 decoder a long time ago...
 */
 #include "mpg123lib_intern.h"
 #include "l2tables.h"
 #include "getbits.h"
 static int grp_3tab[32 * 3] = { 0, };   /* used: 27 */
 static int grp_5tab[128 * 3] = { 0, };  /* used: 125 */
 static int grp_9tab[1024 * 3] = { 0, }; /* used: 729 */
 static const double mulmul[27] =
 {
 	0.0 , -2.0/3.0 , 2.0/3.0 ,
 	2.0/7.0 , 2.0/15.0 , 2.0/31.0, 2.0/63.0 , 2.0/127.0 , 2.0/255.0 ,
 	2.0/511.0 , 2.0/1023.0 , 2.0/2047.0 , 2.0/4095.0 , 2.0/8191.0 ,
 	2.0/16383.0 , 2.0/32767.0 , 2.0/65535.0 ,
 	-4.0/5.0 , -2.0/5.0 , 2.0/5.0, 4.0/5.0 ,
 	-8.0/9.0 , -4.0/9.0 , -2.0/9.0 , 2.0/9.0 , 4.0/9.0 , 8.0/9.0
 };
 void init_layer2(void)
 {
  const int base[3][9] = {
     { 1 , 0, 2 , } ,
     { 17, 18, 0 , 19, 20 , } ,
     { 21, 1, 22, 23, 0, 24, 25, 2, 26 } };
  int i,j,k,l,len;
  const int tablen[3] = { 3 , 5 , 9 };
  int *itable;
  int *tables[3] = { grp_3tab , grp_5tab , grp_9tab };
  for(i=0;i<3;i++)
  {
    itable = tables[i];
    len = tablen[i];
    for(j=0;j<len;j++)
      for(k=0;k<len;k++)
        for(l=0;l<len;l++)
        {
          *itable++ = base[i][l];
          *itable++ = base[i][k];
          *itable++ = base[i][j];
        }
  }
 }
 void init_layer2_stuff(mpg123_handle *fr)
 {
  int k;
  real *table;
  for(k=0;k<27;k++)
  {
    table = opt_init_layer2_table(fr)(fr, fr->muls[k], mulmul[k]);
    *table++ = 0.0;
  }
 }
 real* init_layer2_table(mpg123_handle *fr, real *table, double m)
 {
 	int i,j;
 	for(j=3,i=0;i<63;i++,j--)
 	*table++ = m * pow(2.0,(double) j / 3.0);
 	return table;
 }
 #ifdef OPT_MMXORSSE
 real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m)
 {
 	int i,j;
 	if(!fr->p.down_sample) 
 	for(j=3,i=0;i<63;i++,j--)
 	*table++ = 16384 * m * pow(2.0,(double) j / 3.0);
 	else
 	for(j=3,i=0;i<63;i++,j--)
 	*table++ = m * pow(2.0,(double) j / 3.0);
 	return table;
 }
 #endif
 void II_step_one(unsigned int *bit_alloc,int *scale,mpg123_handle *fr)
 {
    int stereo = fr->stereo-1;
    int sblimit = fr->II_sblimit;
    int jsbound = fr->jsbound;
    int sblimit2 = fr->II_sblimit<<stereo;
    const struct al_table *alloc1 = fr->alloc;
    int i;
    /* static unsigned int scfsi_buf[64]; */
 		unsigned int scfsi_buf[64];
    unsigned int *scfsi,*bita;
    int sc,step;
    bita = bit_alloc;
    if(stereo)
    {
      for (i=jsbound;i;i--,alloc1+=(1<<step))
      {
        step=alloc1->bits;
        *bita++ = (char) getbits(fr, step);
        *bita++ = (char) getbits(fr, step);
      }
      for (i=sblimit-jsbound;i;i--,alloc1+=(1<<step))
      {
        step=alloc1->bits;
        bita[0] = (char) getbits(fr, step);
        bita[1] = bita[0];
        bita+=2;
      }
      bita = bit_alloc;
      scfsi=scfsi_buf;
      for (i=sblimit2;i;i--)
        if (*bita++)
          *scfsi++ = (char) getbits_fast(fr, 2);
    }
    else /* mono */
    {
      for (i=sblimit;i;i--,alloc1+=(1<<step))
      {
        step=alloc1->bits;
        *bita++ = (char) getbits(fr, step);
      }
      bita = bit_alloc;
      scfsi=scfsi_buf;
      for (i=sblimit;i;i--)
        if (*bita++)
          *scfsi++ = (char) getbits_fast(fr, 2);
    }
    bita = bit_alloc;
    scfsi=scfsi_buf;
    for (i=sblimit2;i;i--) 
      if (*bita++)
        switch (*scfsi++) 
        {
          case 0: 
                *scale++ = getbits_fast(fr, 6);
                *scale++ = getbits_fast(fr, 6);
                *scale++ = getbits_fast(fr, 6);
                break;
          case 1 : 
                *scale++ = sc = getbits_fast(fr, 6);
                *scale++ = sc;
                *scale++ = getbits_fast(fr, 6);
                break;
          case 2: 
                *scale++ = sc = getbits_fast(fr, 6);
                *scale++ = sc;
                *scale++ = sc;
                break;
          default:              /* case 3 */
                *scale++ = getbits_fast(fr, 6);
                *scale++ = sc = getbits_fast(fr, 6);
                *scale++ = sc;
                break;
        }
 }
 void II_step_two(unsigned int *bit_alloc,real fraction[2][4][SBLIMIT],int *scale,mpg123_handle *fr,int x1)
 {
    int i,j,k,ba;
    int stereo = fr->stereo;
    int sblimit = fr->II_sblimit;
    int jsbound = fr->jsbound;
    const struct al_table *alloc2,*alloc1 = fr->alloc;
    unsigned int *bita=bit_alloc;
    int d1,step;
    for (i=0;i<jsbound;i++,alloc1+=(1<<step))
    {
      step = alloc1->bits;
      for (j=0;j<stereo;j++)
      {
        if ( (ba=*bita++) ) 
        {
          k=(alloc2 = alloc1+ba)->bits;
          if( (d1=alloc2->d) < 0) 
          {
            real cm=fr->muls[k][scale[x1]];
            fraction[j][0][i] = ((real) ((int)getbits(fr, k) + d1)) * cm;
            fraction[j][1][i] = ((real) ((int)getbits(fr, k) + d1)) * cm;
            fraction[j][2][i] = ((real) ((int)getbits(fr, k) + d1)) * cm;
          }        
          else 
          {
            const int *table[] = { 0,0,0,grp_3tab,0,grp_5tab,0,0,0,grp_9tab };
            unsigned int idx,*tab,m=scale[x1];
            idx = (unsigned int) getbits(fr, k);
            tab = (unsigned int *) (table[d1] + idx + idx + idx);
            fraction[j][0][i] = fr->muls[*tab++][m];
            fraction[j][1][i] = fr->muls[*tab++][m];
            fraction[j][2][i] = fr->muls[*tab][m];  
          }
          scale+=3;
        }
        else
          fraction[j][0][i] = fraction[j][1][i] = fraction[j][2][i] = 0.0;
      }
    }
    for (i=jsbound;i<sblimit;i++,alloc1+=(1<<step))
    {
      step = alloc1->bits;
      bita++;	/* channel 1 and channel 2 bitalloc are the same */
      if ( (ba=*bita++) )
      {
        k=(alloc2 = alloc1+ba)->bits;
        if( (d1=alloc2->d) < 0)
        {
          real cm;
          cm=fr->muls[k][scale[x1+3]];
          fraction[1][0][i] = (fraction[0][0][i] = (real) ((int)getbits(fr, k) + d1) ) * cm;
          fraction[1][1][i] = (fraction[0][1][i] = (real) ((int)getbits(fr, k) + d1) ) * cm;
          fraction[1][2][i] = (fraction[0][2][i] = (real) ((int)getbits(fr, k) + d1) ) * cm;
          cm=fr->muls[k][scale[x1]];
          fraction[0][0][i] *= cm; fraction[0][1][i] *= cm; fraction[0][2][i] *= cm;
        }
        else
        {
          const int *table[] = { 0,0,0,grp_3tab,0,grp_5tab,0,0,0,grp_9tab };
          unsigned int idx,*tab,m1,m2;
          m1 = scale[x1]; m2 = scale[x1+3];
          idx = (unsigned int) getbits(fr, k);
          tab = (unsigned int *) (table[d1] + idx + idx + idx);
          fraction[0][0][i] = fr->muls[*tab][m1]; fraction[1][0][i] = fr->muls[*tab++][m2];
          fraction[0][1][i] = fr->muls[*tab][m1]; fraction[1][1][i] = fr->muls[*tab++][m2];
          fraction[0][2][i] = fr->muls[*tab][m1]; fraction[1][2][i] = fr->muls[*tab][m2];
        }
        scale+=6;
      }
      else {
        fraction[0][0][i] = fraction[0][1][i] = fraction[0][2][i] =
        fraction[1][0][i] = fraction[1][1][i] = fraction[1][2][i] = 0.0;
      }
 /* 
   should we use individual scalefac for channel 2 or
   is the current way the right one , where we just copy channel 1 to
   channel 2 ?? 
   The current 'strange' thing is, that we throw away the scalefac
   values for the second channel ...!!
 -> changed .. now we use the scalefac values of channel one !! 
 */
    }
    if(sblimit > (fr->down_sample_sblimit) )
      sblimit = fr->down_sample_sblimit;
    for(i=sblimit;i<SBLIMIT;i++)
      for (j=0;j<stereo;j++)
        fraction[j][0][i] = fraction[j][1][i] = fraction[j][2][i] = 0.0;
 }
 static void II_select_table(mpg123_handle *fr)
 {
  const int translate[3][2][16] =
   { { { 0,2,2,2,2,2,2,0,0,0,1,1,1,1,1,0 } ,
       { 0,2,2,0,0,0,1,1,1,1,1,1,1,1,1,0 } } ,
     { { 0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0 } ,
       { 0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0 } } ,
     { { 0,3,3,3,3,3,3,0,0,0,1,1,1,1,1,0 } ,
       { 0,3,3,0,0,0,1,1,1,1,1,1,1,1,1,0 } } };
  int table,sblim;
  const struct al_table *tables[5] =
       { alloc_0, alloc_1, alloc_2, alloc_3 , alloc_4 };
  const int sblims[5] = { 27 , 30 , 8, 12 , 30 };
  if(fr->sampling_frequency >= 3)	/* Or equivalent: (fr->lsf == 1) */
    table = 4;
  else
    table = translate[fr->sampling_frequency][2-fr->stereo][fr->bitrate_index];
  sblim = sblims[table];
  fr->alloc      = tables[table];
  fr->II_sblimit = sblim;
 }
 int do_layer2(mpg123_handle *fr)
 {
  int clip=0;
  int i,j;
  int stereo = fr->stereo;
  real aligned(16) fraction[2][4][SBLIMIT]; /* pick_table clears unused subbands */
  unsigned int bit_alloc[64];
  int scale[192];
  int single = fr->single;
  II_select_table(fr);
  fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ?
     (fr->mode_ext<<2)+4 : fr->II_sblimit;
  if (fr->jsbound > fr->II_sblimit) {
 	  fprintf(stderr, "Truncating stereo boundary to sideband limit.\n");
 	  fr->jsbound=fr->II_sblimit;
  }
  if(stereo == 1 || single == SINGLE_MIX) /* also, mix not really handled */
    single = SINGLE_LEFT;
  II_step_one(bit_alloc, scale, fr);
  for (i=0;i<SCALE_BLOCK;i++) 
  {
    II_step_two(bit_alloc,fraction,scale,fr,i>>2);
    for (j=0;j<3;j++) 
    {
      if(single != SINGLE_STEREO)
      {
        clip += (fr->synth_mono) (fraction[single][j], fr);
      }
      else
      {
        clip += (fr->synth) (fraction[0][j], 0, fr, 0);
        clip += (fr->synth) (fraction[1][j], 1, fr, 1);
      }
    }
  }
  return clip;
 }
--- a/src/libmpg123/layer3.c
+++ b/src/libmpg123/layer3.c
--- a/src/libmpg123/libmpg123.c
+++ b/src/libmpg123/libmpg123.c
@@ -0,0 +1,840 @@
 #include "mpg123lib_intern.h"
 #include "getbits.h"
 #ifdef GAPLESS
 #define SAMPLE_ADJUST(x)   ((x) - ((mh->p.flags & MPG123_GAPLESS) ? mh->begin_os : 0))
 #define SAMPLE_UNADJUST(x) ((x) + ((mh->p.flags & MPG123_GAPLESS) ? mh->begin_os : 0))
 #else
 #define SAMPLE_ADJUST(x)   (x)
 #define SAMPLE_UNADJUST(x) (x)
 #endif
 #define SEEKFRAME(mh) ((mh)->ignoreframe < 0 ? 0 : (mh)->ignoreframe)
 static int initialized = 0;
 #ifdef GAPLESS
 /*
 	Take the buffer after a frame decode (strictly: it is the data from frame fr->num!) and cut samples out.
 	fr->buffer.fill may then be smaller than before...
 */
 static void frame_buffercheck(mpg123_handle *fr)
 {
 	/* The first interesting frame: Skip some leading samples. */
 	if(fr->firstoff && fr->num == fr->firstframe)
 	{
 		off_t byteoff = samples_to_bytes(fr, fr->firstoff);
 		if(fr->buffer.fill > byteoff)
 		{
 			fr->buffer.fill -= byteoff;
 			/* buffer.p != buffer.data only for own buffer */
 			debug6("cutting %li samples/%li bytes on begin, own_buffer=%i at %p=%p, buf[1]=%i",
 			        (long)fr->firstoff, (long)byteoff, fr->own_buffer, (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]);
 			if(fr->own_buffer) fr->buffer.p = fr->buffer.data + byteoff;
 			else memmove(fr->buffer.data, fr->buffer.data + byteoff, fr->buffer.fill);
 			debug3("done cutting, buffer at %p =? %p, buf[1]=%i",
 			        (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]);
 		}
 		else fr->buffer.fill = 0;
 		fr->firstoff = 0; /* Only enter here once... when you seek, firstoff should be reset. */
 	}
 	/* The last interesting (planned) frame: Only use some leading samples. */
 	if(fr->lastoff && fr->num == fr->lastframe)
 	{
 		off_t byteoff = samples_to_bytes(fr, fr->lastoff);
 		if(fr->buffer.fill > byteoff)
 		{
 			fr->buffer.fill = byteoff;
 		}
 		fr->lastoff = 0; /* Only enter here once... when you seek, lastoff should be reset. */
 	}
 }
 #endif
 int mpg123_init(void)
 {
 	if((sizeof(short) != 2) || (sizeof(long) < 4)) return MPG123_BAD_TYPES;
 	init_layer2(); /* inits also shared tables with layer1 */
 	init_layer3();
 #ifndef OPT_MMX_ONLY
 	prepare_decode_tables();
 #endif
 	check_decoders();
 	initialized = 1;
 	return MPG123_OK;
 }
 void mpg123_exit(void)
 {
 	/* nothing yet, but something later perhaps */
 	if(initialized) return;
 }
 /* create a new handle with specified decoder, decoder can be "", "auto" or NULL for auto-detection */
 mpg123_handle *mpg123_new(const char* decoder, int *error)
 {
 	return mpg123_parnew(NULL, decoder, error);
 }
 /* ...the full routine with optional initial parameters to override defaults. */
 mpg123_handle *mpg123_parnew(mpg123_pars *mp, const char* decoder, int *error)
 {
 	mpg123_handle *fr = NULL;
 	int err = MPG123_OK;
 	if(initialized) fr = (mpg123_handle*) malloc(sizeof(mpg123_handle));
 	else err = MPG123_NOT_INITIALIZED;
 	if(fr != NULL)
 	{
 		frame_init_par(fr, mp);
 		debug("cpu opt setting");
 		if(frame_cpu_opt(fr, decoder) != 1)
 		{
 			err = MPG123_BAD_DECODER;
 			frame_exit(fr);
 			free(fr);
 			fr = NULL;
 		}
 	}
 	if(fr != NULL)
 	{
 		if((frame_outbuffer(fr) != 0) || (frame_buffers(fr) != 0))
 		{
 			err = MPG123_NO_BUFFERS;
 			frame_exit(fr);
 			free(fr);
 			fr = NULL;
 		}
 		else
 		{
 			opt_make_decode_tables(fr);
 			fr->decoder_change = 1;
 			/* happening on frame change instead:
 			init_layer3_stuff(fr);
 			init_layer2_stuff(fr); */
 		}
 	}
 	else if(err == MPG123_OK) err = MPG123_OUT_OF_MEM;
 	if(error != NULL) *error = err;
 	return fr;
 }
 int mpg123_decoder(mpg123_handle *mh, const char* decoder)
 {
 	enum optdec dt = dectype(decoder);
 	if(mh == NULL) return MPG123_ERR;
 	if(dt == nodec)
 	{
 		mh->err = MPG123_BAD_DECODER;
 		return MPG123_ERR;
 	}
 	if(dt == mh->cpu_opts.type) return MPG123_OK;
 	/* Now really change. */
 	/* frame_exit(mh);
 	frame_init(mh); */
 	debug("cpu opt setting");
 	if(frame_cpu_opt(mh, decoder) != 1)
 	{
 		mh->err = MPG123_BAD_DECODER;
 		frame_exit(mh);
 		return MPG123_ERR;
 	}
 	/* New buffers for decoder are created in frame_buffers() */
 	if((frame_outbuffer(mh) != 0) || (frame_buffers(mh) != 0))
 	{
 		mh->err = MPG123_NO_BUFFERS;
 		frame_exit(mh);
 		return MPG123_ERR;
 	}
 	opt_make_decode_tables(mh);
 	mh->decoder_change = 1;
 	return MPG123_OK;
 }
 int mpg123_param(mpg123_handle *mh, int key, long val, double fval)
 {
 	int r;
 	if(mh == NULL) return MPG123_ERR;
 	r = mpg123_par(&mh->p, key, val, fval);
 	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
 	return r;
 }
 int mpg123_par(mpg123_pars *mp, int key, long val, double fval)
 {
 	int ret = MPG123_OK;
 	switch(key)
 	{
 		case MPG123_VERBOSE:
 			mp->verbose = val;
 		break;
 		case MPG123_FLAGS:
 #ifndef GAPLESS
 			if(val & MPG123_GAPLESS) ret = MPG123_NO_GAPLESS;
 			else
 #endif
 			mp->flags = val;
 			debug1("set flags to 0x%lx", (unsigned long) mp->flags);
 		break;
 		case MPG123_ADD_FLAGS:
 			mp->flags |= val;
 		break;
 		case MPG123_FORCE_RATE: /* should this trigger something? */
 			if(val > 96000) ret = MPG123_BAD_RATE;
 			else mp->force_rate = val < 0 ? 0 : val; /* >0 means enable, 0 disable */
 		break;
 		case MPG123_DOWN_SAMPLE:
 			if(val < 0 || val > 2) ret = MPG123_BAD_RATE;
 			else mp->down_sample = (int)val;
 		break;
 		case MPG123_RVA:
 			if(val < 0 || val > MPG123_RVA_MAX) ret = MPG123_BAD_RVA;
 			else mp->rva = (int)val;
 		break;
 		case MPG123_DOWNSPEED:
 			mp->halfspeed = val < 0 ? 0 : val;
 		break;
 		case MPG123_UPSPEED:
 			mp->doublespeed = val < 0 ? 0 : val;
 		break;
 		case MPG123_ICY_INTERVAL:
 			mp->icy_interval = val > 0 ? val : 0;
 		break;
 		case MPG123_OUTSCALE:
 #ifdef FLOATOUT
 			mp->outscale = fval;
 #else
 			mp->outscale = val;
 #endif
 		break;
 		default:
 			ret = MPG123_BAD_PARAM;
 	}
 	return ret;
 }
 int mpg123_getparam(mpg123_handle *mh, int key, long *val, double *fval)
 {
 	int r;
 	if(mh == NULL) return MPG123_ERR;
 	r = mpg123_getpar(&mh->p, key, val, fval);
 	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
 	return r;
 }
 int mpg123_getpar(mpg123_pars *mp, int key, long *val, double *fval)
 {
 	int ret = 0;
 	switch(key)
 	{
 		case MPG123_VERBOSE:
 			if(val) *val = mp->verbose;
 		break;
 		case MPG123_FLAGS:
 		case MPG123_ADD_FLAGS:
 			if(val) *val = mp->flags;
 		break;
 		case MPG123_FORCE_RATE:
 			if(val) *val = mp->force_rate;
 		break;
 		case MPG123_DOWN_SAMPLE:
 			if(val) *val = mp->down_sample;
 		break;
 		case MPG123_RVA:
 			if(val) *val = mp->rva;
 		break;
 		case MPG123_DOWNSPEED:
 			if(val) *val = mp->halfspeed;
 		break;
 		case MPG123_UPSPEED:
 			if(val) *val = mp->doublespeed;
 		break;
 		case MPG123_ICY_INTERVAL:
 			if(val) *val = (long)mp->icy_interval;
 		break;
 		case MPG123_OUTSCALE:
 #ifdef FLOATOUT
 			if(fval) *fval = mp->outscale;
 #else
 			if(val) *val = mp->outscale;
 #endif
 		break;
 		default:
 			ret = MPG123_BAD_PARAM;
 	}
 	return ret;
 }
 int mpg123_eq(mpg123_handle *mh, int channel, int band, double val)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(band < 0 || band > 31){ mh->err = MPG123_BAD_BAND; return MPG123_ERR; }
 	switch(channel)
 	{
 		case MPG123_LEFT|MPG123_RIGHT:
 			mh->equalizer[0][band] = mh->equalizer[1][band] = DOUBLE_TO_REAL(val);
 		break;
 		case MPG123_LEFT:  mh->equalizer[0][band] = DOUBLE_TO_REAL(val); break;
 		case MPG123_RIGHT: mh->equalizer[1][band] = DOUBLE_TO_REAL(val); break;
 		default:
 			mh->err=MPG123_BAD_CHANNEL;
 			return MPG123_ERR;
 	}
 	mh->have_eq_settings = TRUE;
 	return MPG123_OK;
 }
 /* plain file access, no http! */
 int mpg123_open(mpg123_handle *mh, char *path)
 {
 	mpg123_close(mh);
 	frame_reset(mh);
 	return open_stream(mh, path, -1);
 }
 int mpg123_open_fd(mpg123_handle *mh, int fd)
 {
 	mpg123_close(mh);
 	frame_reset(mh);
 	return open_stream(mh, NULL, fd);
 }
 int mpg123_open_feed(mpg123_handle *mh)
 {
 	mpg123_close(mh);
 	frame_reset(mh);
 	return open_feed(mh);
 }
 int decode_update(mpg123_handle *mh)
 {
 	long native_rate = frame_freq(mh);
 	debug("updating decoder structure");
 	if(mh->af.rate == native_rate) mh->down_sample = 0;
 	else if(mh->af.rate == native_rate>>1) mh->down_sample = 1;
 	else if(mh->af.rate == native_rate>>2) mh->down_sample = 2;
 	else mh->down_sample = 3; /* flexible (fixed) rate */
 	switch(mh->down_sample)
 	{
 		case 0:
 		case 1:
 		case 2:
 			mh->down_sample_sblimit = SBLIMIT>>(mh->down_sample);
 			/* With downsampling I get less samples per frame */
 			mh->outblock = sizeof(sample_t)*mh->af.channels*(spf(mh)>>mh->down_sample);
 		break;
 		case 3:
 		{
 			if(synth_ntom_set_step(mh) != 0) return -1;
 			if(frame_freq(mh) > mh->af.rate)
 			{
 				mh->down_sample_sblimit = SBLIMIT * mh->af.rate;
 				mh->down_sample_sblimit /= frame_freq(mh);
 			}
 			else mh->down_sample_sblimit = SBLIMIT;
 			mh->outblock = sizeof(sample_t) * mh->af.channels *
 			               ( ( NTOM_MUL-1+spf(mh)
 			                   * (((size_t)NTOM_MUL*mh->af.rate)/frame_freq(mh))
 			                 )/NTOM_MUL );
 		}
 		break;
 	}
 	if(!(mh->p.flags & MPG123_FORCE_MONO))
 	{
 		if(mh->af.channels == 1) mh->single = SINGLE_MIX;
 		else mh->single = SINGLE_STEREO;
 	}
 	else mh->single = (mh->p.flags & MPG123_FORCE_MONO)-1;
 	if(set_synth_functions(mh) != 0) return -1;;
 	init_layer3_stuff(mh);
 	init_layer2_stuff(mh);
 	do_rva(mh);
 	return 0;
 }
 size_t mpg123_safe_buffer()
 {
 	return sizeof(sample_t)*2*1152*NTOM_MAX;
 }
 size_t mpg123_outblock(mpg123_handle *mh)
 {
 	if(mh != NULL) return mh->outblock;
 	else return mpg123_safe_buffer();
 }
 static int get_next_frame(mpg123_handle *mh)
 {
 	int change = mh->decoder_change;
 	do
 	{
 		int b;
 		/* Decode & discard some frame(s) before beginning. */
 		if(mh->to_ignore && mh->num < mh->firstframe && mh->num >= mh->ignoreframe)
 		{
 			debug1("ignoring frame %li", (long)mh->num);
 			/* Decoder structure must be current! decode_update has been called before... */
 			(mh->do_layer)(mh); mh->buffer.fill = 0;
 			mh->to_ignore = mh->to_decode = FALSE;
 		}
 		/* Read new frame data; possibly breaking out here for MPG123_NEED_MORE. */
 		debug("read frame");
 		mh->to_decode = FALSE;
 		b = read_frame(mh); /* That sets to_decode only if a full frame was read. */
 		debug3("read of frame %li returned %i (to_decode=%i)", mh->num, b, mh->to_decode);
 		if(b == MPG123_NEED_MORE) return MPG123_NEED_MORE; /* need another call with data */
 		else if(b <= 0)
 		{
 			/* More sophisticated error control? */
 			if(b==0 || mh->rdat.filepos == mh->rdat.filelen)
 			{ /* We simply reached the end. */
 				mh->track_frames = mh->num + 1;
 				return MPG123_DONE;
 			}
 			else return MPG123_ERR; /* Some real error. */
 		}
 		/* Now, there should be new data to decode ... and also possibly new stream properties */
 		if(mh->header_change > 1)
 		{
 			debug("big header change");
 			change = 1;
 		}
 	} while(mh->num < mh->firstframe);
 	/* When we start actually using the CRC, this could move into the loop... */
 	/* A question of semantics ... should I fold start_frame and frame_number into firstframe/lastframe? */
 	if(mh->lastframe >= 0 && mh->num > mh->lastframe)
 	{
 		mh->to_decode = mh->to_ignore = FALSE;
 		return MPG123_DONE;
 	}
 	if(change)
 	{
 		int b = frame_output_format(mh); /* Select the new output format based on given constraints. */
 		if(b < 0) return MPG123_ERR; /* not nice to fail here... perhaps once should add possibility to repeat this step */
 		if(decode_update(mh) < 0) return MPG123_ERR; /* dito... */
 		mh->decoder_change = 0;
 		if(b == 1) mh->new_format = 1; /* Store for later... */
 #ifdef GAPLESS
 		if(mh->fresh)
 		{
 			b=0;
 			/* Prepare offsets for gapless decoding. */
 			frame_gapless_realinit(mh);
 			frame_set_frameseek(mh, mh->num);
 			mh->fresh = 0;
 			/* Could this possibly happen? With a real big gapless offset... */
 			if(mh->num < mh->firstframe) b = get_next_frame(mh);
 			if(b < 0) return b; /* Could be error, need for more, new format... */
 		}
 #endif
 	}
 	return MPG123_OK;
 }
 /*
 	Put _one_ decoded frame into the frame structure's buffer, accessible at the location stored in <audio>, with <bytes> bytes available.
 	The buffer contents will be lost on next call to mpg123_decode_frame.
 	MPG123_OK -- successfully decoded the frame, you get your output data
 	MPg123_DONE -- This is it. End.
 	MPG123_ERR -- some error occured...
 	MPG123_NEW_FORMAT -- new frame was read, it results in changed output format -> will be decoded on next call
 	MPG123_NEED_MORE  -- that should not happen as this function is intended for in-library stream reader but if you force it...
 	MPG123_NO_SPACE   -- not enough space in buffer for safe decoding, also should not happen
 	num will be updated to the last decoded frame number (may possibly _not_ increase, p.ex. when format changed).
 */
 int mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(mh->buffer.size < mh->outblock) return MPG123_NO_SPACE;
 	mh->buffer.fill = 0; /* always start fresh */
 	*bytes = 0;
 	while(TRUE)
 	{
 		/* decode if possible */
 		if(mh->to_decode)
 		{
 			if(mh->new_format)
 			{
 				mh->new_format = 0;
 				return MPG123_NEW_FORMAT;
 			}
 			*num = mh->num;
 			debug("decoding");
 			mh->clip += (mh->do_layer)(mh);
 			mh->to_decode = mh->to_ignore = FALSE;
 			mh->buffer.p = mh->buffer.data;
 #ifdef GAPLESS
 			/* This checks for individual samples to skip, for gapless mode or sample-accurate seek. */
 			frame_buffercheck(mh);
 #endif
 			*audio = mh->buffer.p;
 			*bytes = mh->buffer.fill;
 			return MPG123_OK;
 		}
 		else
 		{
 			int b = get_next_frame(mh);
 			if(b < 0) return b;
 			debug1("got next frame, %i", mh->to_decode);
 		}
 	}
 	return MPG123_ERR;
 }
 ssize_t mpg123_read(mpg123_handle *mh, unsigned char *out, size_t size, size_t *done)
 {
 	return mpg123_decode(mh, NULL, 0, out, size, done);
 }
 /*
 	The old picture:
 	while(1) {
 		len = read(0,buf,16384);
 		if(len <= 0)
 			break;
 		ret = decodeMP3(&mp,buf,len,out,8192,&size);
 		while(ret == MP3_OK) {
 			write(1,out,size);
 			ret = decodeMP3(&mp,NULL,0,out,8192,&size);
 		}
 	}
 */
 int mpg123_decode(mpg123_handle *mh,unsigned char *inmemory, size_t inmemsize, unsigned char *outmemory, size_t outmemsize, size_t *done)
 {
 	int ret = MPG123_OK;
 	*done = 0;
 	if(mh == NULL) return MPG123_ERR;
 	if(inmemsize > 0)
 	if(feed_more(mh, inmemory, inmemsize) == -1) return MPG123_ERR;
 	while(ret == MPG123_OK)
 	{
 		debug3("decode loop, fill %i (%li vs. %li)", mh->buffer.fill, (long)mh->num, (long)mh->firstframe);
 		/* Decode a frame that has been read before.
 		   This only happens when buffer is empty! */
 		if(mh->to_decode)
 		{
 			if(mh->new_format)
 			{
 				mh->new_format = 0;
 				return MPG123_NEW_FORMAT;
 			}
 			if(mh->buffer.size - mh->buffer.fill < mh->outblock) return MPG123_NO_SPACE;
 			mh->clip += (mh->do_layer)(mh);
 			mh->to_decode = mh->to_ignore = FALSE;
 			mh->buffer.p = mh->buffer.data;
 			debug2("decoded frame %li, got %li samples in buffer", mh->num, mh->buffer.fill / (samples_to_bytes(mh, 1)));
 #ifdef GAPLESS
 			frame_buffercheck(mh); /* Seek & gapless. */
 #endif
 		}
 		if(mh->buffer.fill) /* Copy (part of) the decoded data to the caller's buffer. */
 		{
 			/* get what is needed - or just what is there */
 			int a = mh->buffer.fill > (outmemsize - *done) ? outmemsize - *done : mh->buffer.fill;
 			debug4("buffer fill: %i; copying %i (%i - %i)", mh->buffer.fill, a, outmemsize, *done);
 			memcpy(outmemory, mh->buffer.p, a);
 			/* less data in frame buffer, less needed, output pointer increase, more data given... */
 			mh->buffer.fill -= a;
 			outmemory  += a;
 			*done += a;
 			mh->buffer.p += a;
 			if(!(outmemsize > *done)) return ret;
 		}
 		else /* If we didn't have data, get a new frame. */
 		{
 			int b = get_next_frame(mh);
 			if(b < 0) return b;
 		}
 	}
 	return ret;
 }
 long mpg123_clip(mpg123_handle *mh)
 {
 	long ret = 0;
 	if(mh != NULL)
 	{
 		ret = mh->clip;
 		mh->clip = 0;
 	}
 	return ret;
 }
 /*
 	Now, where are we? We need to know the last decoded frame... and what's left of it in buffer.
 	The current frame number can mean the last decoded frame or the to-be-decoded frame.
 	If mh->to_decode, then mh->num frames have been decoded, the frame mh->num now coming next.
 	If not, we have the possibility of mh->num+1 frames being decoded or nothing at all.
 	Then, there is firstframe...when we didn't reach it yet, then the next data will come from there.
 	mh->num starts with -1
 */
 off_t mpg123_tell(mpg123_handle *mh)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(!mh->to_decode && mh->fresh)
 	{
 		/* Fresh track, need first frame for basic info. */
 		int b = get_next_frame(mh);
 		if(b < 0) return b;
 	}
 	/* Now we have all the info at hand. */
 	debug5("tell: %li/%i first %li firstoff %li buffer %lu", (long)mh->num, mh->to_decode, (long)mh->firstframe, (long)mh->firstoff, (unsigned long)mh->buffer.fill);
 	if((mh->num < mh->firstframe) || (mh->num == mh->firstframe && mh->to_decode)) return SAMPLE_ADJUST(frame_tell_seek(mh));
 	else if(mh->to_decode) return SAMPLE_ADJUST(frame_outs(mh, mh->num) - mh->buffer.fill);
 	else return SAMPLE_ADJUST(frame_outs(mh, mh->num+1) - mh->buffer.fill);
 }
 off_t mpg123_tellframe(mpg123_handle *mh)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(mh->num < mh->firstframe) return mh->firstframe;
 	if(mh->to_decode) return mh->num;
 	/* Consider firstoff? */
 	return mh->buffer.fill ? mh->num : mh->num + 1;
 }
 static int do_the_seek(mpg123_handle *mh)
 {
 	int b;
 	off_t fnum = SEEKFRAME(mh);
 	mh->buffer.fill = 0;
 	if(mh->num < mh->firstframe) mh->to_decode = FALSE;
 	if(mh->num == fnum && mh->to_decode) return MPG123_OK;
 	if(mh->num == fnum-1)
 	{
 		mh->to_decode = FALSE;
 		return MPG123_OK;
 	}
 	/*frame_buffers_reset(mh);*/
 	b = mh->rd->seek_frame(mh, fnum);
 	if(b<0) return b;
 	/* Only mh->to_ignore is TRUE. */
 	if(mh->num < mh->firstframe) mh->to_decode = FALSE;
 	return 0;
 }
 off_t mpg123_seek(mpg123_handle *mh, off_t sampleoff, int whence)
 {
 	off_t pos = mpg123_tell(mh); /* adjusted samples */
 debug1("pos=%li", (long)pos);
 	if(pos < 0) return pos; /* mh == NULL is covered in mpg123_tell() */
 	switch(whence)
 	{
 		case SEEK_CUR: pos += sampleoff; break;
 		case SEEK_SET: pos  = sampleoff; break;
 		case SEEK_END:
 #ifdef GAPLESS
 			if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh->end_os) - sampleoff;
 #else
 			if(mh->track_frames > 0) pos = SAMPLE_ADJUST(frame_outs(mh, mh->track_frames)) - sampleoff;
 #endif
 			else
 			{
 				mh->err = MPG123_NO_SEEK_FROM_END;
 				return MPG123_ERR;
 			}
 		break;
 		default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR;
 	}
 	if(pos < 0) pos = 0;
 	/* pos now holds the wanted sample offset in adjusted samples */
 	frame_set_seek(mh, SAMPLE_UNADJUST(pos));
 	pos = do_the_seek(mh);
 	if(pos < 0) return pos;
 	return mpg123_tell(mh);
 }
 /*
 	A bit more tricky... libmpg123 does not do the seeking itself.
 	All it can do is to ignore frames until the wanted one is there.
 	The caller doesn't know where a specific frame starts and mpg123 also only knows the general region after it scanned the file.
 	Well, it is tricky...
 */
 off_t mpg123_feedseek(mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset)
 {
 	off_t pos = mpg123_tell(mh); /* adjusted samples */
 	debug3("seek from %li to %li (whence=%i)", (long)pos, (long)sampleoff, whence);
 	if(pos < 0) return pos; /* mh == NULL is covered in mpg123_tell() */
 	switch(whence)
 	{
 		case SEEK_CUR: pos += sampleoff; break;
 		case SEEK_SET: pos  = sampleoff; break;
 		case SEEK_END:
 #ifdef GAPLESS
 			if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh->end_os) - sampleoff;
 #else
 			if(mh->track_frames > 0) pos = SAMPLE_ADJUST(frame_outs(mh, mh->track_frames)) - sampleoff;
 #endif
 			else
 			{
 				mh->err = MPG123_NO_SEEK_FROM_END;
 				return MPG123_ERR;
 			}
 		break;
 		default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR;
 	}
 	if(pos < 0) pos = 0;
 	frame_set_seek(mh, SAMPLE_UNADJUST(pos));
 	pos = SEEKFRAME(mh);
 	mh->buffer.fill = 0;
 	/* Shortcuts without modifying input stream. */
 	*input_offset = mh->rdat.firstpos + mh->rdat.filelen;
 	if(mh->num < mh->firstframe) mh->to_decode = FALSE;
 	if(mh->num == pos && mh->to_decode) goto feedseekend;
 	if(mh->num == pos-1) goto feedseekend;
 	/* Whole way. */
 	*input_offset = feed_set_pos(mh, frame_index_find(mh, SEEKFRAME(mh), &pos));
 	mh->num = pos-1; /* The next read frame will have num = pos. */
 	if(*input_offset < 0) return MPG123_ERR;
 feedseekend:
 	return mpg123_tell(mh);
 }
 off_t mpg123_seek_frame(mpg123_handle *mh, off_t offset, int whence)
 {
 	off_t pos = 0;
 	if(mh == NULL) return MPG123_ERR;
 	if(!mh->to_decode && mh->fresh)
 	{
 		/* Fresh track, need first frame for basic info. */
 		int b = get_next_frame(mh);
 		if(b < 0) return b;
 	}
 	/* Could play games here with to_decode... */
 	pos = mh->num;
 	switch(whence)
 	{
 		case SEEK_CUR: pos += offset; break;
 		case SEEK_SET: pos  = offset; break;
 		case SEEK_END:
 			if(mh->track_frames > 0) pos = mh->track_frames - offset;
 			else
 			{
 				mh->err = MPG123_NO_SEEK_FROM_END;
 				return MPG123_ERR;
 			}
 		break;
 		default:
 			mh->err = MPG123_BAD_WHENCE;
 			return MPG123_ERR;
 	}
 	if(pos < 0) pos = 0;
 	/* Hm, do we need to seek right past the end? */
 	else if(mh->track_frames > 0 && pos >= mh->track_frames) pos = mh->track_frames;
 	frame_set_frameseek(mh, pos);
 	pos = do_the_seek(mh);
 	if(pos < 0) return pos;
 	return mpg123_tellframe(mh);
 }
 int mpg123_meta_check(mpg123_handle *mh)
 {
 	if(mh != NULL) return mh->metaflags;
 	else return 0;
 }
 int mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2)
 {
 	if(v1 != NULL) *v1 = NULL;
 	if(v2 != NULL) *v2 = NULL;
 	if(mh == NULL) return MPG123_ERR;
 	if(mh->metaflags & MPG123_ID3)
 	{
 		if(v1 != NULL && mh->rdat.flags & READER_ID3TAG) *v1 = (mpg123_id3v1*) mh->id3buf;
 		if(v2 != NULL) *v2 = &mh->id3v2;
 		mh->metaflags |= MPG123_ID3;
 		mh->metaflags &= ~MPG123_NEW_ID3;
 	}
 	return MPG123_OK;
 }
 int mpg123_icy(mpg123_handle *mh, char **icy_meta)
 {
 	*icy_meta = NULL;
 	if(mh == NULL) return MPG123_ERR;
 	if(mh->metaflags & MPG123_ICY)
 	{
 		*icy_meta = mh->icy.data;
 		mh->metaflags |= MPG123_ICY;
 		mh->metaflags &= ~MPG123_NEW_ICY;
 	}
 	return MPG123_OK;
 }
 int mpg123_close(mpg123_handle *mh)
 {
 	if(mh == NULL) return MPG123_ERR;
 	if(mh->rd != NULL && mh->rd->close != NULL) mh->rd->close(mh);
 	mh->rd = NULL;
 	return MPG123_OK;
 }
 void mpg123_delete(mpg123_handle *mh)
 {
 	if(mh != NULL)
 	{
 		mpg123_close(mh);
 		frame_exit(mh); /* free buffers in frame */
 		free(mh); /* free struct; cast? */
 	}
 }
 static const char *mpg123_error[] =
 {
 	"No error... (code 0)",
 	"Unable to set up output format! (code 1)",
 	"Invalid channel number specified. (code 2)",
 	"Invalid sample rate specified. (code 3)",
 	"Unable to allocate memory for 16 to 8 converter table! (code 4)",
 	"Bad parameter id! (code 5)",
 	"Bad buffer given -- invalid pointer or too small size. (code 6)",
 	"Out of memory -- some malloc() failed, (code 7)",
 	"You didn't initialize the library! (code 8)",
 	"Invalid decoder choice. (code 9)",
 	"Invalid mpg123 handle. (code 10)",
 	"Unable to initialize frame buffers (out of memory?)! (code 11)",
 	"Invalid RVA mode. (code 12)",
 	"This build doesn't support gapless decoding. (code 13)"
 	"Not enough buffer space. (code 14)",
 	"Incompatible numeric data types. (code 15)",
 	"Bad equalizer band. (code 16)",
 	"Null pointer given where valid storage address needed. (code 17)",
 	"Some problem reading the stream. (code 18)",
 	"Cannot seek from end (end is not known). (code 19)",
 	"Invalid \"whence\" for seek function. (code 20)"
 };
 const char* mpg123_plain_strerror(int errcode)
 {
 	if(errcode >= 0 && errcode < sizeof(mpg123_error)/sizeof(char*))
 	return mpg123_error[errcode];
 	else return "I have no idea - an unknown error code!";
 }
 int mpg123_errcode(mpg123_handle *mh)
 {
 	if(mh != NULL) return mh->err;
 	return MPG123_BAD_HANDLE;
 }
 const char* mpg123_strerror(mpg123_handle *mh)
 {
 	return mpg123_plain_strerror(mpg123_errcode(mh));
 }
--- a/src/libmpg123/libmpg123.sym
+++ b/src/libmpg123/libmpg123.sym
@@ -0,0 +1,58 @@
 mpg123_init
 mpg123_exit
 mpg123_new
 mpg123_parnew
 mpg123_delete
 mpg123_decoders
 mpg123_supported_decoders
 mpg123_decoder
 mpg123_plain_strerror
 mpg123_strerror
 mpg123_errcode
 mpg123_rates
 mpg123_encodings
 mpg123_format_none
 mpg123_format_all
 mpg123_format
 mpg123_format_support
 mpg123_getformat
 mpg123_param
 mpg123_getparam
 mpg123_new_pars
 mpg123_delete_pars
 mpg123_par
 mpg123_getpar
 mpg123_eq
 mpg123_reset_eq
 mpg123_volume
 mpg123_volume_change
 mpg123_getvolume
 mpg123_position
 mpg123_tpf
 mpg123_open
 mpg123_open_feed
 mpg123_open_fd
 mpg123_read
 mpg123_decode
 mpg123_decode_frame
 mpg123_clip
 mpg123_close
 mpg123_seek_frame
 mpg123_timeframe
 mpg123_print_index
 mpg123_seek
 mpg123_info
 mpg123_safe_buffer
 mpg123_outblock
 mpg123_replace_buffer
 mpg123_init_string
 mpg123_free_string
 mpg123_resize_string
 mpg123_copy_string
 mpg123_add_string
 mpg123_set_string
 mpg123_meta_check
 mpg123_id3
 mpg123_icy
 mpg123_tell
 mpg123_feedseek
--- a/src/libmpg123/mangle.h
+++ b/src/libmpg123/mangle.h
@@ -0,0 +1,57 @@
 /* mangle.h - This file has some CPP macros to deal with different symbol
 * mangling across binary formats.
 * (c)2002 by Felix Buenemann <atmosfear at users.sourceforge.net>
 * File licensed under the GPL, see http://www.fsf.org/ for more info.
 */
 /* ThOr: added the plain ASM_NAME
   Also this is getting more generic with the align stuff. */
 #ifndef __MANGLE_H
 #define __MANGLE_H
 #include "config.h"
 #ifdef CCALIGN
 #define MOVUAPS movaps
 #else
 #define MOVUAPS movups
 #endif
 #ifdef ASMALIGN_EXP
 #define ALIGN4  .align 2
 #define ALIGN8  .align 3
 #define ALIGN16 .align 4
 #define ALIGN32 .align 5
 #else
 #define ALIGN4  .align 4
 #define ALIGN8  .align 8
 #define ALIGN16 .align 16
 #define ALIGN32 .align 32
 #endif
 /* Feel free to add more to the list, eg. a.out IMO */
 #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \
   (defined(__OpenBSD__) && !defined(__ELF__)) || defined(__APPLE__)
 #define MANGLE(a) "_" #a
 #define ASM_NAME(a) _##a
 #define ASM_VALUE(a) $_##a
 #else
 #define MANGLE(a) #a
 #define ASM_NAME(a) a
 #define ASM_VALUE(a) "$" #a
 #endif
 #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__APPLE__)
 #define COMM(a,b,c) .comm a,b
 #else
 #define COMM(a,b,c) .comm a,b,c
 #endif
 /* more hacks for macosx; no .bss ... */
 #ifdef __APPLE__
 #define BSS .data
 #else
 #define BSS .bss
 #endif
 #endif /* !__MANGLE_H */
--- a/src/libmpg123/mpg123.h
+++ b/src/libmpg123/mpg123.h
@@ -0,0 +1,326 @@
 #ifndef MPG123_LIB_H
 #define MPG123_LIB_H
 /* These aren't actually in use... seems to work without using libtool. */
 #ifdef BUILD_MPG123_DLL
 /* The dll exports. */
 #define EXPORT __declspec(dllexport)
 #else
 #ifdef LINK_MPG123_DLL
 /* The exe imports. */
 #define EXPORT __declspec(dllimport)
 #else
 /* Nothing on normal/UNIX builds */
 #define EXPORT
 #endif
 #endif
 #include <stdlib.h>
 #include <stdio.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* not decided... how anonymous should the handle be? */
 struct mpg123_handle_struct;
 struct mpg123_pars_struct;
 typedef struct mpg123_handle_struct mpg123_handle;
 typedef struct mpg123_pars_struct   mpg123_pars;
 /* non-threadsafe init/exit, call _once_ */
 EXPORT int  mpg123_init(void);
 EXPORT void mpg123_exit(void);
 /* Create a handle with optional choice of decoder (named by a string).
   and optional retrieval of an error code to feed to mpg123_plain_strerror().
   Optional means: Any of or both the parameters may be NULL.
   The handle creation is successful when a non-NULL pointer is returned. */
 EXPORT mpg123_handle *mpg123_new(const char* decoder, int *error);
 /* Create a handle with preset parameters. */
 EXPORT mpg123_handle *mpg123_parnew(mpg123_pars *mp, const char* decoder, int *error);
 /* Delete handle, mh is either a valid mpg123 handle or NULL. */
 EXPORT void mpg123_delete(mpg123_handle *mh);
 /* Return NULL-terminated array of generally available decoder names... */
 EXPORT char **mpg123_decoders();
 /* ...or just the actually supported (by CPU) decoders. */
 EXPORT char **mpg123_supported_decoders();
 EXPORT int mpg123_decoder(mpg123_handle *mh, const char* decoder);
 enum mpg123_errors
 {
 	MPG123_OK=0, MPG123_BAD_OUTFORMAT, MPG123_BAD_CHANNEL, MPG123_BAD_RATE,
 	MPG123_ERR_16TO8TABLE, MPG123_BAD_PARAM, MPG123_BAD_BUFFER,
 	MPG123_OUT_OF_MEM, MPG123_NOT_INITIALIZED, MPG123_BAD_DECODER, MPG123_BAD_HANDLE,
 	MPG123_NO_BUFFERS, MPG123_BAD_RVA, MPG123_NO_GAPLESS, MPG123_NO_SPACE,
 	MPG123_BAD_TYPES, MPG123_BAD_BAND, MPG123_ERR_NULL, MPG123_ERR_READER,
 	MPG123_NO_SEEK_FROM_END, MPG123_BAD_WHENCE
 };
 /* Give string describing that error errcode means. */
 EXPORT const char* mpg123_plain_strerror(int errcode);
 /* Give string describing what error has occured in the context of handle mh.
   When a function operating on an mpg123 handle returns MPG123_ERR, you should check for the actual reason via
   char *errmsg = mpg123_strerror(mh)
   This function will catch mh == NULL and return the message for MPG123_BAD_HANDLE. */
 EXPORT const char* mpg123_strerror(mpg123_handle *mh);
 /* Return the plain errcode intead of a string. */
 EXPORT int         mpg123_errcode(mpg123_handle *mh);
 /* 16 or 8 bits, signed or unsigned... all flags fit into 8 bits, float/double are not yet standard and special anyway */
 #define MPG123_ENC_16     0x40 /* 0100 0000 */
 #define MPG123_ENC_SIGNED 0x80 /* 1000 0000 */
 #define MPG123_ENC_8(f)   (!((f) & MPG123_ENC_16)) /* it's 8bit encoding of not 16bit, this changes in case float output will be integrated in the normal library */
 #define MPG123_ENC_SIGNED_16    (MPG123_ENC_16|MPG123_ENC_SIGNED|0x10) /* 1101 0000 */
 #define MPG123_ENC_UNSIGNED_16  (MPG123_ENC_16|0x20)                   /* 0110 0000 */
 #define MPG123_ENC_UNSIGNED_8   0x01                                   /* 0000 0001 */
 #define MPG123_ENC_SIGNED_8     (MPG123_ENC_SIGNED|0x02)               /* 1000 0010 */
 #define MPG123_ENC_ULAW_8       0x04                                   /* 0000 0100 */
 #define MPG123_ENC_ALAW_8       0x08                                   /* 0000 1000 */
 #define MPG123_ENC_ANY ( MPG123_ENC_SIGNED_16  | MPG123_ENC_UNSIGNED_16 | \
                         MPG123_ENC_UNSIGNED_8 | MPG123_ENC_SIGNED_8    | \
                         MPG123_ENC_ULAW_8 | MPG123_ENC_ALAW_8 | MPG123_ENC_ANY )
 /* They can be combined into one number to indicate mono and stereo... */
 #define MPG123_MONO   1
 #define MPG123_STEREO 2
 /* 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 or _one_ custom rate <=96000 */
 #define MPG123_RATES     9 /* A future library version may not have less! */
 EXPORT extern const long mpg123_rates[MPG123_RATES];
 #define MPG123_ENCODINGS 6 /* A future library version may not have less! */
 EXPORT extern const int  mpg123_encodings[MPG123_ENCODINGS];
 /* Accept no output format at all, use before specifying supported formats with mpg123_format */
 EXPORT int mpg123_format_none(mpg123_handle *mh);
 /* Accept all formats (also any custom rate you may set) -- this is default. */
 EXPORT int mpg123_format_all(mpg123_handle *mh);
 /*
 	Setting audio format support in detail:
 	rateindex: Index in rates list...
 	Negative rate index chooses the custom one.
 	channels: combination of MPG123_STEREO and MPG123_MONO
 	encodings: combination of accepted encodings for rate and channels, p.ex MPG123_ENC_SIGNED16|MPG123_ENC_ULAW_8
 */
 EXPORT int mpg123_format(mpg123_handle *mh, int rateindex, int channels, int encodings); /* 0 is good, -1 is error */
 /* Check if a specific format at a specific rate is supported.
   Returns 0 for no support (includes invalid parameters), MPG123_STEREO, MPG123_MONO or MPG123_STEREO|MPG123_MONO. */
 EXPORT int mpg123_format_support(mpg123_handle *mh, int ratei, int enci); /* Indices of rate and encoding! */
 /* Get the current output format. */
 EXPORT int mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding);
 /* various flags */
 #define MPG123_FORCE_MONO   0x7  /*     0111 */
 #define MPG123_MONO_LEFT    0x1  /*     0001 */
 #define MPG123_MONO_RIGHT   0x2  /*     0010 */
 #define MPG123_MONO_MIX     0x4  /*     0100 */
 #define MPG123_FORCE_STEREO 0x8  /*     1000 */
 #define MPG123_FORCE_8BIT   0x10 /* 00010000 */
 #define MPG123_QUIET        0x20 /* 00100000 suppress any printouts (overrules verbose) */
 #define MPG123_GAPLESS      0x40 /* 01000000 flag always defined... */
 #define MPG123_NO_RESYNC    0x80 /* 10000000 disable resync stream after error */
 /* RVA choices */
 #define MPG123_RVA_OFF   0
 #define MPG123_RVA_MIX   1
 #define MPG123_RVA_ALBUM 2
 #define MPG123_RVA_MAX   MPG123_RVA_ALBUM
 enum mpg123_parms
 {
 	MPG123_VERBOSE,        /* set verbosity value for enabling messages to stderr, >= 0 makes sense */
 	MPG123_FLAGS,          /* set all flags, p.ex val = MPG123_GAPLESS|MPG123_MONO_MIX */
 	MPG123_ADD_FLAGS,      /* add some flags */
 	MPG123_FORCE_RATE,     /* when value > 0, force output rate to that value */
 	MPG123_DOWN_SAMPLE,    /* 0=native rate, 1=half rate, 2=quarter rate */
 	MPG123_RVA,            /* one of the RVA choices above */
 	MPG123_DOWNSPEED,      /* play a frame <n> times */
 	MPG123_UPSPEED,        /* play every <n>th frame */
 	MPG123_START_FRAME,    /* start with this frame (skip frames before that) */ 
 	MPG123_DECODE_FRAMES,  /* decode only this number of frames */
 	MPG123_ICY_INTERVAL,   /* stream contains ICY metadata with this interval */
 	MPG123_OUTSCALE        /* the scale for output samples (amplitude) */
 };
 /* This sets, for a specific handle, a specific parameter (key chosen from the above list), to the specified value.
   TODO: Assess the possibilities and troubles of changing parameters during playback. */
 EXPORT int mpg123_param   (mpg123_handle *mh, int key, long value, double fvalue);
 EXPORT int mpg123_getparam(mpg123_handle *mh, int key, long *val,  double *fval);
 /* Direct access to a parameter set without full handle around it. */
 EXPORT mpg123_pars *mpg123_new_pars(int *error);
 EXPORT void         mpg123_delete_pars(mpg123_pars* mp);
 EXPORT int mpg123_par   (mpg123_pars *mp, int key, long value, double fvalue);
 EXPORT int mpg123_getpar(mpg123_pars *mp, int key, long *val, double *fval);
 #define MPG123_LEFT  1
 #define MPG123_RIGHT 2
 /* Channel can be MPG123_LEFT, MPG123_RIGHT or MPG123_LEFT|MPG123_RIGHT for both.
   Band is an eq band from 0 to 31, val the (linear) factor. */
 EXPORT int mpg123_eq(mpg123_handle *mh, int channel, int band, double val);
 EXPORT int mpg123_reset_eq(mpg123_handle *mh); /* all back to 1 */
 /* Change output volume including the RVA setting, vol<0 just applies (a possibly changed) RVA setting. */
 EXPORT int mpg123_volume(mpg123_handle *mh, double vol);
 EXPORT int mpg123_volume_change(mpg123_handle *mh, double change);
 /* Return current volume setting, the actual value due to RVA, the RVA adjustment itself.
   It's all as double float value to abstract the sample format.
   Oh, and the volume values are linear factors / amplitudes  (not percent) and the RVA value is in decibel. */
 EXPORT int mpg123_getvolume(mpg123_handle *mh, double *base, double *really, double *rva_db);
 /* The current position in samples. One the next read, you'd get that sample. */
 EXPORT off_t mpg123_tell(mpg123_handle *mh);
 /* The next read will give you data from this frame. */
 EXPORT off_t mpg123_tellframe(mpg123_handle *mh);
 /* If possible, tell the full (expected) length of current track in samples. */
 EXPORT off_t mpg123_length(mpg123_handle *mh);
 /* Info about current and remaining frames/seconds.
   You provide an offset (in frames) from now and a number of output bytes served by mpg123 but not yet played.
   You get the projected current frame and seconds, as well as the remaining frames/seconds.
   This does _not_ care about skipped samples due to gapless playback. */
 EXPORT int mpg123_position( mpg123_handle *mh, off_t frame_offset, off_t buffered_bytes,
                            off_t *current_frame,     off_t *frames_left,
 /*                            off_t *current_samples,   off_t *samples_left ); */
 double *current_seconds, double *seconds_left);
 /* Time (seconds) per frame; <0 is error. */
 EXPORT double mpg123_tpf(mpg123_handle *mh);
 /* The open functions reset stuff and make a new, different stream possible - even if there isn't actually a resource involved like with open_feed. */
 EXPORT int mpg123_open     (mpg123_handle *mh, char *url); /* a file or http url */
 EXPORT int mpg123_open_feed(mpg123_handle *mh);            /* prepare for direct feeding */
 EXPORT int mpg123_open_fd  (mpg123_handle *mh, int fd);    /* use an already opened file descriptor */
 /* reading samples / triggering decoding, possible return values: */
 /* MPG123_OK on success */
 #define MPG123_ERR -1 /* in general, functions return that on error */
 /* special status valuea */
 #define MPG123_NEED_MORE  -10 /* For feed: "Feed me more!" */
 #define MPG123_NEW_FORMAT -11 /* Output format will be different on next call. */
 #define MPG123_DONE       -12 /* Track ended. */
 /* Read from stream and decode up to outmemsize bytes. Returns a code from above and the number of decoded bytes in *done. */
 EXPORT ssize_t mpg123_read(mpg123_handle *mh, unsigned char *outmemory, size_t outmemsize, size_t *done);
 /* Same as above but with feeding input data (when inmemory != NULL).
   This is very close to a drop-in replacement for old mpglib.
   When you give zero-sized output buffer the input will be parsed until decoded data is available.
   That enables you to get NEW_FORMAT (and query it) without taking decoded data. */
 EXPORT int mpg123_decode(mpg123_handle *mh, unsigned char *inmemory, size_t inmemsize, unsigned char *outmemory, size_t outmemsize, size_t *done);
 /* Decode only one frame (or read a frame and return after setting a new format), update num to latest decoded frame index. */
 EXPORT int mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes);
 /* Get and reset the clip count. */
 EXPORT long mpg123_clip(mpg123_handle *mh);
 /* Well, what do you think? Closes the resource, if libmpg123 opened it. */
 EXPORT int mpg123_close(mpg123_handle *mh);
 /* The seek stuff needs more thought; it's going to be sample-accurate and I need a way for feeding.
   So: SEEK STUFF WILL CHANGE! */
 EXPORT off_t mpg123_timeframe(mpg123_handle *mh, double sec);
 EXPORT int mpg123_print_index(mpg123_handle *fr, FILE* out);
 /*
 	Seeking in MPEG files/streams: modelled after the standard fseek (or fseeko).
 	- set whence to SEEK_SET, SEEK_CUR or SEEK_END (not guaranteed to work for all streams, of course)
 	- returning resulting offset >= 0 or MPG123_ERR (-1)
 	mpg123_feedseek() gives also an input data offset that it expects to be present the next time data is fed to mpg123_decode().
 	Still wondering: long or off_t ??
 	Trying to code it so that no decoding happens during seek (but some pre-decoding may be needed after seek).
 	Sample-accurate seek depends on the gapless code being in effect.
 	Without that, we only get frame-accurate.
 */
 EXPORT off_t mpg123_seek      (mpg123_handle *mh, off_t sampleoff, int whence);
 EXPORT off_t mpg123_feedseek  (mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset);
 /* in/output offset in MPEG frames instead of samples */
 EXPORT off_t mpg123_seek_frame(mpg123_handle *mh, off_t frameoff,  int whence);
 enum mpg123_vbr  { MPG123_CBR=0, MPG123_VBR, MPG123_ABR };
 struct mpg123_frameinfo
 {
 	enum {MPG123_1_0 = 0, MPG123_2_0, MPG123_2_5 } version;
 	int layer; /* Well... 1, 2 or 3  */
 	long rate; /* The sampling rate. */
 	/* "Stereo", "Joint-Stereo", "Dual-Channel", "Single-Channel" ... so mode != MPG213_M_MONO means two channels. */
 	enum { MPG123_M_STEREO=0, MPG123_M_JOINT, MPG123_M_DUAL, MPG123_M_MONO } mode;
 	int mode_ext;
 	int framesize;
 #define MPG123_CRC       1
 #define MPG123_COPYRIGHT 2
 #define MPG123_PRIVATE   4
 #define MPG123_ORIGINAL  8
 	int flags;
 	int emphasis;
 	int bitrate;
 	int abr_rate;
 	enum mpg123_vbr vbr;
 };
 EXPORT int mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi);
 /* Scan through file (if seekable) or just the first frame (without decoding, for non-seekable) and return various information.
   That could include format, length, padding, ID3, ... */
 /* int mpg123_scan(mpg123_handle *mh, struct mpg123_info *mi); */
 EXPORT size_t mpg123_safe_buffer(); /* Get the safe output buffer size for all cases (when you want to replace the internal buffer) */
 EXPORT size_t mpg123_outblock(mpg123_handle *mh); /* The max size of one frame's decoded output with current settings. */
 EXPORT int mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size);
 /* 128 bytes of ID3v1 - Don't take anything for granted (like string termination)! */
 typedef struct
 {
 	char tag[3];         /* "TAG", the classic intro */
 	char title[30];      /* title string  */
 	char artist[30];     /* artist string */
 	char album[30];      /* album string */
 	char year[4];        /* year string */
 	char comment[30];    /* comment string */
 	unsigned char genre; /* genre code */
 } mpg123_id3v1;
 /* A safer string, also can hold a number of null-terminated strings. */
 typedef struct 
 {
 	char* p;     /* pointer to the string data */
 	size_t size; /* raw number of bytes allocated */
 	size_t fill; /* number of used bytes (including closing zero byte) */
 } mpg123_string;
 /* A little string library, it's not strictly mpeg decoding, but the funcitons are there. */
 EXPORT void mpg123_init_string  (mpg123_string* sb);
 EXPORT void mpg123_free_string  (mpg123_string* sb);
 /* returning 0 on error, 1 on success */
 EXPORT int  mpg123_resize_string(mpg123_string* sb, size_t news);
 EXPORT int  mpg123_copy_string  (mpg123_string* from, mpg123_string* to);
 EXPORT int  mpg123_add_string   (mpg123_string* sb, char* stuff);
 EXPORT int  mpg123_set_string   (mpg123_string* sb, char* stuff);
 typedef struct
 {
 	unsigned char version; /* 3 or 4 for ID3v2.3 or ID3v2.4 */
 	/* The ID3v2 text frames are allowed to contain multiple strings.
 	   So check for null bytes until you reach the mpg123_string fill.
 	   All text is encoded in UTF-8 */
 	mpg123_string title;
 	mpg123_string artist;
 	mpg123_string album;
 	mpg123_string year;    /* be ready for 20570! */
 	mpg123_string comment;
 	mpg123_string genre;   /* The genre string(s) may very well need postprocessing, esp. for ID3v2.3 . */
 } mpg123_id3v2;
 /* Query if there is (new) meta info, be it ID3 or ICY (or something new in future).
   The check function returns a combination of these flags: */
 #define MPG123_ID3     0x3 /* 0011 There is some ID3 info. Also matches 0010 or NEW_ID3. */
 #define MPG123_NEW_ID3 0x1 /* 0001 There is ID3 info that changed since last call to mpg123_id3. */
 #define MPG123_ICY     0xc /* 1100 There is some ICY info. Also matches 0100 or NEW_ICY.*/
 #define MPG123_NEW_ICY 0x4 /* 0100 There is ICY info that changed since last call to mpg123_icy. */
 EXPORT int mpg123_meta_check(mpg123_handle *mh); /* On error (no valid handle) just 0 is returned. */
 /* Point v1 and v2 to existing data structures wich may change on any next read/decode function call.
   Return value is MPG123_OK or MPG123_ERR, v1 and/or v2 can be set to NULL when there is no corresponding data. */
 EXPORT int mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2);
 EXPORT int mpg123_icy(mpg123_handle *mh, char **icy_meta); /* same for ICY meta string */
 /* missing various functions to change properties: RVA, equalizer */
 /* also: functions to access properties: RVA, equalizer... */
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/libmpg123/mpg123lib_intern.h
+++ b/src/libmpg123/mpg123lib_intern.h
@@ -0,0 +1,149 @@
 #ifndef MPG123_H_INTERN
 #define MPG123_H_INTERN
 #include "mpg123.h"
 #include "config.h"
 #include "debug.h"
 /* Seek code relies on GAPLESS, at least for now. */
 #ifndef GAPLESS
 #define GAPLESS
 #endif
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 #define SKIP_JUNK 1
 /* should these really be here? */
 #ifdef _WIN32	/* Win32 Additions By Tony Million */
 # undef MPG123_WIN32
 # define MPG122_WIN32
 # define M_PI       3.14159265358979323846
 # define M_SQRT2	1.41421356237309504880
 # ifndef REAL_IS_FLOAT
 #  define REAL_IS_FLOAT
 # endif
 # define NEW_DCT9
 #endif
 #ifdef SUNOS
 #define memmove(dst,src,size) bcopy(src,dst,size)
 #endif
 /* some stuff has to go back to mpg123.h */
 #ifdef REAL_IS_FLOAT
 #  define real float
 #  define REAL_SCANF "%f"
 #  define REAL_PRINTF "%f"
 #elif defined(REAL_IS_LONG_DOUBLE)
 #  define real long double
 #  define REAL_SCANF "%Lf"
 #  define REAL_PRINTF "%Lf"
 #elif defined(REAL_IS_FIXED)
 # define real long
 # define REAL_RADIX            15
 # define REAL_FACTOR           (32.0 * 1024.0)
 # define REAL_PLUS_32767       ( 32767 << REAL_RADIX )
 # define REAL_MINUS_32768      ( -32768 << REAL_RADIX )
 # define DOUBLE_TO_REAL(x)     ((int)((x) * REAL_FACTOR))
 # define REAL_TO_SHORT(x)      ((x) >> REAL_RADIX)
 # define REAL_MUL(x, y)                (((long long)(x) * (long long)(y)) >> REAL_RADIX)
 #  define REAL_SCANF "%ld"
 #  define REAL_PRINTF "%ld"
 #else
 #  define real double
 #  define REAL_SCANF "%lf"
 #  define REAL_PRINTF "%f"
 #endif
 #ifndef DOUBLE_TO_REAL
 # define DOUBLE_TO_REAL(x)     (x)
 #endif
 #ifndef REAL_TO_SHORT
 # define REAL_TO_SHORT(x)      (x)
 #endif
 #ifndef REAL_PLUS_32767
 # define REAL_PLUS_32767       32767.0
 #endif
 #ifndef REAL_MINUS_32768
 # define REAL_MINUS_32768      -32768.0
 #endif
 #ifndef REAL_MUL
 # define REAL_MUL(x, y)                ((x) * (y))
 #endif
 /* used to be: AUDIOBUFSIZE = n*64 with n=1,2,3 ...
   now: factor on minimum frame buffer size (which takes upsampling into account) */
 #define		AUDIOBUFSIZE		2
 #include "true.h"
 #define         MAX_NAME_SIZE           81
 #define         SBLIMIT                 32
 #define         SCALE_BLOCK             12
 #define         SSLIMIT                 18
 /* Same as MPG_M_* */
 #define         MPG_MD_STEREO           0
 #define         MPG_MD_JOINT_STEREO     1
 #define         MPG_MD_DUAL_CHANNEL     2
 #define         MPG_MD_MONO             3
 /* float output only for generic decoder! */
 #ifdef FLOATOUT
 #define MAXOUTBURST 1.0
 #define scale_t double
 #else
 /* I suspect that 32767 would be a better idea here, but Michael put this in... */
 #define MAXOUTBURST 32768
 #define scale_t long
 #endif
 /* Pre Shift fo 16 to 8 bit converter table */
 #define AUSHIFT (3)
 /* stuff that should be moved... */
 #include        <stdio.h>
 #include        <string.h>
 #include        <signal.h>
 #ifndef WIN32
 #include        <sys/signal.h>
 #include        <unistd.h>
 #endif
 /* want to suport large files in future */
 #ifdef HAVE_SYS_TYPES_H
 	#include <sys/types.h>
 #endif
 #ifndef off_t
 	#define off_t long
 #endif
 #include        <math.h>
 typedef unsigned char byte;
 #ifdef OS2
 #include <float.h>
 #endif
 #include "decode.h"
 #include "parse.h"
 #include "optimize.h"
 #include "frame.h"
 /* fr is a mpg123_handle* by convention here... */
 #define NOQUIET  (!(fr->p.flags & MPG123_QUIET))
 #define VERBOSE  (NOQUIET && fr->p.verbose)
 #define VERBOSE2 (NOQUIET && fr->p.verbose > 1)
 #define VERBOSE3 (NOQUIET && fr->p.verbose > 2)
 int decode_update(mpg123_handle *mh);
 #endif
--- a/src/libmpg123/optimize.c
+++ b/src/libmpg123/optimize.c
@@ -0,0 +1,144 @@
 /*
 	optimize: get a grip on the different optimizations
 	copyright 2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
 	Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
 */
 #include "mpg123lib_intern.h" /* includes optimize.h */
 #ifdef OPT_MULTI
 #include "getcpuflags.h"
 struct cpuflags cpu_flags;
 /* same number of entries as full list, but empty at beginning */
 static char *mpg123_supported_decoder_list[] =
 {
 	#ifdef OPT_3DNOWEXT
 	NULL,
 	#endif
 	#ifdef OPT_SSE
 	NULL,
 	#endif
 	#ifdef OPT_3DNOW
 	NULL,
 	#endif
 	#ifdef OPT_MMX
 	NULL,
 	#endif
 	#ifdef OPT_I586
 	NULL,
 	#endif
 	#ifdef OPT_I586_DITHER
 	NULL,
 	#endif
 	#ifdef OPT_I486
 	NULL,
 	#endif
 	#ifdef OPT_I386
 	NULL,
 	#endif
 	#ifdef OPT_ALTIVEC
 	NULL,
 	#endif
 	NULL, /* generic */
 	NULL
 };
 #endif
 static char *mpg123_decoder_list[] =
 {
 	#ifdef OPT_3DNOWEXT
 	"3DNowExt",
 	#endif
 	#ifdef OPT_SSE
 	"SSE",
 	#endif
 	#ifdef OPT_3DNOW
 	"3DNow",
 	#endif
 	#ifdef OPT_MMX
 	"MMX",
 	#endif
 	#ifdef OPT_I586
 	"i586",
 	#endif
 	#ifdef OPT_I586_DITHER
 	"i586_dither",
 	#endif
 	#ifdef OPT_I486
 	"i486",
 	#endif
 	#ifdef OPT_I386
 	"i386",
 	#endif
 	#ifdef OPT_ALTIVEC
 	"AltiVec",
 	#endif
 	#ifdef OPT_GENERIC
 	"generic",
 	#endif
 	NULL
 };
 void check_decoders(void )
 {
 #ifndef OPT_MULTI
 	return;
 #else
 	char **d = mpg123_supported_decoder_list;
 #ifdef OPT_X86
 	getcpuflags(&cpu_flags);
 	if(cpu_i586(cpu_flags))
 	{
 		/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
 		if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
 #ifdef OPT_3DNOWEXT
 		if(cpu_3dnowext(cpu_flags)) *(d++) = "3DNowExt";
 #endif
 #ifdef OPT_SSE
 		if(cpu_sse(cpu_flags)) *(d++) = "SSE";
 #endif
 #ifdef OPT_3DNOW
 		if(cpu_3dnow(cpu_flags)) *(d++) = "3DNow";
 #endif
 #ifdef OPT_MMX
 		if(cpu_mmx(cpu_flags)) *(d++) = "MMX";
 #endif
 #ifdef OPT_I586
 		*(d++) = "i586";
 #endif
 #ifdef OPT_I586_DITHER
 		*(d++) = "i586_dither";
 #endif
 	}
 #endif
 /* just assume that the i486 built is run on a i486 cpu... */
 #ifdef OPT_I486
 	*(d++) = "i486";
 #endif
 #ifdef OPT_ALTIVEC
 	*(d++) = "AltiVec";
 #endif
 /* every supported x86 can do i386, any cpu can do generic */
 #ifdef OPT_I386
 	*(d++) = "i386";
 #endif
 #ifdef OPT_GENERIC
 	*(d++) = "generic";
 #endif
 #endif /* ndef OPT_MULTI */
 }
 char **mpg123_decoders(){ return mpg123_decoder_list; }
 char **mpg123_supported_decoders()
 {
 #ifdef OPT_MULTI
 	return mpg123_supported_decoder_list;
 #else
 	return mpg123_decoder_list;
 #endif
 }
--- a/src/libmpg123/optimize.h
+++ b/src/libmpg123/optimize.h
@@ -0,0 +1,334 @@
 /*
 	optimize: get a grip on the different optimizations
 	copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Thomas Orgis, taking from mpg123.[hc]
 	for building mpg123 with one optimization only, you have to choose exclusively between
 	OPT_GENERIC (generic C code for everyone)
 	OPT_I386 (Intel i386)
 	OPT_I486 (...)
 	OPT_I586 (Intel Pentium)
 	OPT_I586_DITHER (Intel Pentium with dithering/noise shaping for enhanced quality)
 	OPT_MMX (Intel Pentium and compatibles with MMX, fast, but not the best accuracy)
 	OPT_3DNOW (AMD 3DNow!, K6-2/3, Athlon, compatibles...)
 	OPT_ALTIVEC (Motorola/IBM PPC with AltiVec under MacOSX)
 	or you define OPT_MULTI and give a combination which makes sense (do not include i486, do not mix altivec and x86).
 	I still have to examine the dynamics of this here together with REAL_IS_FIXED.
 */
 /* this is included in mpg123.h, which includes config.h */
 #ifdef CCALIGN
 #define aligned(a) __attribute__((aligned(a)))
 #else
 #define aligned(a)
 #endif
 /* the optimizations only cover the synth1to1 mode and the dct36 function */
 /* the first two types are needed in set_synth_functions regardless of optimizations */
 typedef int (*func_synth)(real *,int, mpg123_handle *,int );
 typedef int (*func_synth_mono)(real *, mpg123_handle *);
 typedef void (*func_dct36)(real *,real *,real *,real *,real *);
 typedef	void (*func_dct64)(real *,real *,real *);
 typedef void (*func_make_decode_tables)(mpg123_handle*);
 typedef real (*func_init_layer3_gainpow2)(mpg123_handle*, int);
 typedef real* (*func_init_layer2_table)(mpg123_handle*, real*, double);
 typedef int (*func_synth_pent)(real *,int,unsigned char *);
 /* last headaches about getting mmx hardcode out */
 real init_layer3_gainpow2(mpg123_handle *fr, int i);
 real* init_layer2_table(mpg123_handle *fr, real *table, double m);
 void make_decode_tables(mpg123_handle *fr);
 void prepare_decode_tables(void); /* perhaps not best place here */
 /* only 3dnow replaces that one, it's internal to layer3.c otherwise */
 void dct36(real *,real *,real *,real *,real *);
 #define opt_dct36(fr) dct36
 /* only mmx replaces those */
 #define opt_make_decode_tables(fr) make_decode_tables(fr)
 #define opt_decwin(fr) (fr)->decwin
 #define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2
 #define opt_init_layer2_table(fr) init_layer2_table
 #ifdef OPT_GENERIC
 	#define PENTIUM_FALLBACK
 	void dct64(real *,real *,real *);
 	int synth_1to1(real *bandPtr,int channel, mpg123_handle *fr, int final);
 	int synth_1to1_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final);
 	int synth_1to1_mono(real *, mpg123_handle *fr);
 	int synth_1to1_mono2stereo (real *, mpg123_handle *fr);
 	int synth_1to1_8bit_mono (real *, mpg123_handle *fr);
 	int synth_1to1_8bit_mono2stereo (real *, mpg123_handle *fr);
 	#ifndef OPT_MULTI
 	#define defopt generic
 	#define opt_dct64(fr) dct64
 	#define opt_synth_1to1(fr) synth_1to1
 	#define opt_synth_1to1_mono(fr) synth_1to1_mono
 	#define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo
 	#define opt_synth_1to1_8bit(fr) synth_1to1_8bit
 	#define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono
 	#define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo
 	#endif
 #endif
 /* i486 is special */
 #ifdef OPT_I486
 #define OPT_I386
 #define defopt ivier
 	int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks);
 	void dct64_i486(int *a,int *b,real *c); /* not used generally */
 #endif
 #ifdef OPT_I386
 	#define PENTIUM_FALLBACK
 	#define OPT_X86
 	int synth_1to1_i386(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	#ifndef OPT_MULTI
 #ifndef defopt
 	#define defopt idrei
 #endif
 	#define opt_synth_1to1(fr) synth_1to1_i386
 	#endif
 #endif
 #ifdef OPT_I586
 	#define PENTIUM_FALLBACK
 	#define OPT_PENTIUM
 	#define OPT_X86
 	int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
 	#ifndef OPT_MULTI
 	#define defopt ifuenf
 	#define opt_synth_1to1(fr) synth_1to1_i586
 	#define opt_synth_1to1_i586_asm(fr) synth_1to1_i586_asm
 	#endif
 #endif
 #ifdef OPT_I586_DITHER
 	#define PENTIUM_FALLBACK
 	#define OPT_PENTIUM
 	#define OPT_X86
 	int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
 	#ifndef OPT_MULTI
 	#define defopt ifuenf_dither
 	#define opt_synth_1to1(fr) synth_1to1_i586
 	#define opt_synth_1to1_i586_asm(fr) synth_1to1_i586_asm_dither
 	#endif
 #endif
 /* That one has by far the most ugly hacks to make it cooperative. */
 #ifdef OPT_MMX
 	#define OPT_MMXORSSE
 	#define OPT_X86
 	real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
 	real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
 	/* I think one can optimize storage here with the normal decwin */
 	extern real decwin_mmx[512+32];
 	void dct64_mmx(real *,real *,real *);
 	int synth_1to1_mmx(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
 	void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
 	/* these are in asm, dct64 called directly there */
 	void dct64_MMX(short *a,short *b,real *c);
 	int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins);
 	#ifndef OPT_MULTI
 	#define defopt mmx
 /*	#undef opt_decwin
 	#define opt_decwin(fr) decwin_mmx */
 	#define opt_dct64(fr) dct64_mmx
 	#define opt_synth_1to1(fr) synth_1to1_mmx
 	#define opt_
 	#undef opt_make_decode_tables
 	#define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
 	#undef opt_init_layer3_gainpow2
 	#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
 	#undef opt_init_layer2_table
 	#define opt_init_layer2_table(fr) init_layer2_table_mmx
 	#define OPT_MMX_ONLY
 	#endif
 #endif
 /* first crude hack into our source */
 #ifdef OPT_SSE
 	#define OPT_MMXORSSE
 	#define OPT_MPLAYER
 	#define OPT_X86
 	real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
 	real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
 	/* I think one can optimize storage here with the normal decwin */
 	extern real decwin_mmx[512+32];
 	void dct64_mmx(real *,real *,real *);
 	void dct64_sse(real *,real *,real *);
 	int synth_1to1_sse(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	void synth_1to1_sse_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
 	void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
 	void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
 	/* ugly! */
 	extern func_dct64 mpl_dct64;
 	#ifndef OPT_MULTI
 	#define defopt sse
 	#define opt_mpl_dct64(fr) dct64_sse
 /*	#undef opt_decwin
 	#define opt_decwin(fr) decwin_mmx */
 	#define opt_dct64(fr) dct64_mmx /* dct64_sse is silent in downsampling modes */
 	#define opt_synth_1to1(fr) synth_1to1_sse /* that will use dct64_sse */
 	#undef opt_make_decode_tables
 	#define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
 	#undef opt_init_layer3_gainpow2
 	#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
 	#undef opt_init_layer2_table
 	#define opt_init_layer2_table(fr) init_layer2_table_mmx
 	#define OPT_MMX_ONLY /* watch out! */
 	#endif
 #endif
 /* first crude hack into our source */
 #ifdef OPT_3DNOWEXT
 	#define OPT_MMXORSSE
 	#define OPT_MPLAYER
 	#define OPT_X86
 	real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
 	real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
 	/* I think one can optimize storage here with the normal decwin */
 	extern real decwin_mmx[512+32];
 	void dct64_mmx(real *,real *,real *);
 	void dct64_3dnowext(real *,real *,real *);
 	void dct36_3dnowext(real *,real *,real *,real *,real *);
 	int synth_1to1_3dnowext(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	void synth_1to1_3dnowext_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
 	void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
 	void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
 	/* ugly! */
 	extern func_dct64 mpl_dct64;
 	#ifndef OPT_MULTI
 	#define defopt dreidnowext
 	#define opt_mpl_dct64(fr) dct64_3dnowext
 	#undef opt_dct36
 	#define opt_dct36(fr) dct36_3dnowext
 /*	#undef opt_decwin
 	#define opt_decwin(fr) decwin_mmx */
 	#define opt_dct64(fr) dct64_mmx /* dct64_sse is silent in downsampling modes */
 	#define opt_synth_1to1(fr) synth_1to1_3dnowext /* that will use dct64_3dnowext */
 	#undef opt_make_decode_tables
 	#define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
 	#undef opt_init_layer3_gainpow2
 	#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
 	#undef opt_init_layer2_table
 	#define opt_init_layer2_table(fr) init_layer2_table_mmx
 	#define OPT_MMX_ONLY /* watch out! */
 	#endif
 #endif
 #ifndef OPT_MMX_ONLY
 extern real *pnts[5];
 extern real decwin[512+32];
 #endif
 #ifdef OPT_MPLAYER
 extern const int costab_mmxsse[];
 #endif
 /* 3dnow used to use synth_1to1_i586 for mono / 8bit conversion - was that intentional? */
 /* I'm trying to skip the pentium code here ... until I see that that is indeed a bad idea */
 #ifdef OPT_3DNOW
 	#define K6_FALLBACK /* a fallback for 3DNowExt */
 	#define OPT_X86
 	void dct36_3dnow(real *,real *,real *,real *,real *);
 	void do_equalizer_3dnow(real *bandPtr,int channel, real equalizer[2][32]);
 	int synth_1to1_3dnow(real *bandPtr, int channel, mpg123_handle *fr, int final);
 	int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
 	#ifndef OPT_MULTI
 	#define defopt dreidnow
 	#undef opt_dct36
 	#define opt_dct36(fr) dct36_3dnow
 	#define opt_synth_1to1(fr) synth_1to1_3dnow
 	#endif
 #endif
 #ifdef OPT_X86
 	/* these have to be merged back into one! */
 	unsigned int getcpuid();
 	unsigned int getextcpuflags();
 	unsigned int getstdcpuflags();
 	unsigned int getstd2cpuflags();
 	void dct64_i386(real *,real *,real *);
 	int synth_1to1_mono_i386(real *, mpg123_handle *fr);
 	int synth_1to1_mono2stereo_i386(real *, mpg123_handle *fr);
 	int synth_1to1_8bit_i386(real *,int, mpg123_handle *fr, int final);
 	int synth_1to1_8bit_mono_i386(real *, mpg123_handle *fr);
 	int synth_1to1_8bit_mono2stereo_i386(real *, mpg123_handle *fr);
 	#ifndef OPT_MULTI
 	#ifndef opt_dct64
 	#define opt_dct64(fr) dct64_i386 /* default one even for 3dnow and i486 in decode_2to1, decode_ntom */
 	#endif
 	#define opt_synth_1to1_mono(fr) synth_1to1_mono_i386
 	#define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo_i386
 	#define opt_synth_1to1_8bit(fr) synth_1to1_8bit_i386
 	#define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono_i386
 	#define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo_i386
 	#endif
 #endif
 #ifdef OPT_ALTIVEC
 	void dct64_altivec(real *out0,real *out1,real *samples);
 	int synth_1to1_altivec(real *,int,unsigned char *,int *);
 	int synth_1to1_mono_altivec(real *,unsigned char *,int *);
 	int synth_1to1_mono2stereo_altivec(real *,unsigned char *,int *);
 	int synth_1to1_8bit_altivec(real *,int,unsigned char *,int *);
 	int synth_1to1_8bit_mono_altivec(real *,unsigned char *,int *);
 	int synth_1to1_8bit_mono2stereo_altivec(real *,unsigned char *,int *);
 	#ifndef OPT_MULTI
 	#define defopt altivec
 	#define opt_dct64(fr) dct64_altivec
 	#define opt_synth_1to1(fr) synth_1to1_altivec
 	#define opt_synth_1to1_mono(fr) synth_1to1_mono_altivec
 	#define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo_altivec
 	#define opt_synth_1to1_8bit(fr) synth_1to1_8bit_altivec
 	#define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono_altivec
 	#define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo_altivec
 	#endif
 #endif
 /* used for multi opt mode and the single 3dnow mode to have the old 3dnow test flag still working */
 void check_decoders(void);
 #ifdef OPT_MULTI
 	#ifdef OPT_X86
 	extern struct cpuflags cf;
 	#endif
 	#define defopt nodec
 	/* a simple global struct to hold the decoding function pointers, could be localized later if really wanted */
 	#define opt_synth_1to1(fr) ((fr)->cpu_opts.synth_1to1)
 	#define opt_synth_1to1_mono(fr) ((fr)->cpu_opts.synth_1to1_mono)
 	#define opt_synth_1to1_mono2stereo(fr) ((fr)->cpu_opts.synth_1to1_mono2stereo)
 	#define opt_synth_1to1_8bit(fr) ((fr)->cpu_opts.synth_1to1_8bit)
 	#define opt_synth_1to1_8bit_mono(fr) ((fr)->cpu_opts.synth_1to1_8bit_mono)
 	#define opt_synth_1to1_8bit_mono2stereo(fr) ((fr)->cpu_opts.synth_1to1_8bit_mono2stereo)
 	#ifdef OPT_PENTIUM
 	#define opt_synth_1to1_i586_asm(fr) ((fr)->cpu_opts.synth_1to1_i586_asm)
 	#endif
 	#ifdef OPT_MMXORSSE
 	#undef opt_make_decode_tables
 	#define opt_make_decode_tables(fr) ((fr)->cpu_opts.make_decode_tables)(fr)
 /*	#undef opt_decwin
 	#define opt_decwin(fr) (fr)->cpu_opts.decwin */
 	#undef opt_init_layer3_gainpow2
 	#define opt_init_layer3_gainpow2(fr) ((fr)->cpu_opts.init_layer3_gainpow2)
 	#undef opt_init_layer2_table
 	#define opt_init_layer2_table(fr) ((fr)->cpu_opts.init_layer2_table)
 	#endif
 	#ifdef OPT_3DNOW
 	#undef opt_dct36
 	#define opt_dct36(fr) ((fr)->cpu_opts.dct36)
 	#endif
 	#define opt_dct64(fr) ((fr)->cpu_opts.dct64)
 	#ifdef OPT_MPLAYER
 	#define opt_mpl_dct64(fr) ((fr)->cpu_opts.mpl_dct64)
 	#endif
 #endif
--- a/src/libmpg123/parse.c
+++ b/src/libmpg123/parse.c
--- a/src/libmpg123/parse.h
+++ b/src/libmpg123/parse.h
@@ -0,0 +1,19 @@
 #ifndef MPG123_PARSE_H
 #define MPG123_PARSE_H
 #include "frame.h"
 int read_frame_init(mpg123_handle* fr);
 int frame_bitrate(mpg123_handle *fr);
 long frame_freq(mpg123_handle *fr);
 int read_frame_recover(mpg123_handle* fr); /* dead? */
 int read_frame(mpg123_handle *fr);
 void set_pointer(mpg123_handle *fr, long backstep);
 int position_info(mpg123_handle* fr, unsigned long no, long buffsize, unsigned long* frames_left, double* current_seconds, double* seconds_left);
 double compute_bpf(mpg123_handle *fr);
 long time_to_frame(mpg123_handle *fr, double seconds);
 int get_songlen(mpg123_handle *fr,int no);
 off_t samples_to_bytes(mpg123_handle *fr , off_t s);
 off_t bytes_to_samples(mpg123_handle *fr , off_t b);
 #endif
--- a/src/libmpg123/reader.h
+++ b/src/libmpg123/reader.h
@@ -0,0 +1,72 @@
 #ifndef MPG123_READER_H
 #define MPG123_READER_H
 #include "config.h"
 #include "mpg123.h"
 struct buffy
 {
 	unsigned char *data;
 	off_t size;
 	struct buffy *next;
 };
 struct reader_data
 {
 	off_t filelen; /* total file length or total buffer size */
 	off_t filepos; /* position in file or position in buffer chain */
 	int   filept;
 	int   flags;
 	/* variables specific to feed reader */
 	off_t firstpos; /* the point of return on non-forget() */
 	struct buffy *buf;  /* first in buffer chain */
 };
 /* start to use off_t to properly do LFS in future ... used to be long */
 struct reader
 {
 	int     (*init)           (mpg123_handle *);
 	void    (*close)          (mpg123_handle *);
 	ssize_t (*fullread)       (mpg123_handle *, unsigned char *, ssize_t);
 	int     (*head_read)      (mpg123_handle *, unsigned long *newhead);    /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */
 	int     (*head_shift)     (mpg123_handle *, unsigned long *head);       /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */
 	off_t   (*skip_bytes)     (mpg123_handle *, off_t len);                 /* succ: >=0, else error or READER_MORE         */
 	int     (*read_frame_body)(mpg123_handle *, unsigned char *, int size);
 	int     (*back_bytes)     (mpg123_handle *, off_t bytes);
 	int     (*seek_frame)     (mpg123_handle *, off_t num);
 	off_t   (*tell)           (mpg123_handle *);
 	void    (*rewind)         (mpg123_handle *);
 	void    (*forget)         (mpg123_handle *);
 };
 /* Open a file by path or use an opened file descriptor. */
 int open_stream(mpg123_handle *, char *path, int fd);
 /* feed based operation has some specials */
 int open_feed(mpg123_handle *);
 /* externally called function, returns 0 on success, -1 on error */
 int  feed_more(mpg123_handle *fr, unsigned char *in, long count);
 void feed_forget(mpg123_handle *fr);  /* forget the data that has been read (free some buffers) */
 off_t feed_set_pos(mpg123_handle *fr, off_t pos); /* Set position (inside available data if possible), return wanted byte offset of next feed. */
 #define READER_FD_OPENED 0x1
 #define READER_ID3TAG    0x2
 #define READER_SEEKABLE  0x4
 #define READER_BUFFERED  0x8
 #define READER_MICROSEEK 0x10
 #define READER_STREAM 0
 #define READER_ICY_STREAM 1
 #define READER_FEED       2
 #ifdef READ_SYSTEM
 #define READER_SYSTEM 3
 #define READERS 4
 #else
 #define READERS 3
 #endif
 #define READER_ERROR -1
 #define READER_MORE  MPG123_NEED_MORE
 #endif
--- a/src/libmpg123/readers.c
+++ b/src/libmpg123/readers.c
@@ -0,0 +1,566 @@
 /*
 	readers.c: reading input data
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include "mpg123lib_intern.h"
 static off_t get_fileinfo(mpg123_handle *);
 /* stream based operation  with icy meta data*/
 static ssize_t icy_fullread(mpg123_handle *fr, unsigned char *buf, ssize_t count)
 {
 	ssize_t ret,cnt;
 	cnt = 0;
 	/*
 		We check against READER_ID3TAG instead of rds->filelen >= 0 because if we got the ID3 TAG we know we have the end of the file.
 		If we don't have an ID3 TAG, then it is possible the file has grown since we started playing, so we want to keep reading from it if possible.
 	*/
 	if((fr->rdat.flags & READER_ID3TAG) && fr->rdat.filepos + count > fr->rdat.filelen) count = fr->rdat.filelen - fr->rdat.filepos;
 	while(cnt < count)
 	{
 		/* all icy code is inside this if block, everything else is the plain fullread we know */
 		/* debug1("read: %li left", (long) count-cnt); */
 		if(fr->icy.interval && (fr->rdat.filepos+count > fr->icy.next))
 		{
 			unsigned char temp_buff;
 			size_t meta_size;
 			ssize_t cut_pos;
 			/* we are near icy-metaint boundary, read up to the boundary */
 			cut_pos = fr->icy.next - fr->rdat.filepos;
 			ret = read(fr->rdat.filept,buf,cut_pos);
 			if(ret < 0) return READER_ERROR;
 			fr->rdat.filepos += ret;
 			cnt += ret;
 			/* now off to read icy data */
 			/* one byte icy-meta size (must be multiplied by 16 to get icy-meta length) */
 			ret = read(fr->rdat.filept,&temp_buff,1);
 			if(ret < 0) return READER_ERROR;
 			if(ret == 0) break;
 			debug2("got meta-size byte: %u, at filepos %li", temp_buff, (long)fr->rdat.filepos );
 			fr->rdat.filepos += ret; /* 1... */
 			if((meta_size = ((size_t) temp_buff) * 16))
 			{
 				/* we have got some metadata */
 				char *meta_buff;
 				meta_buff = (char*) malloc(meta_size+1);
 				if(meta_buff != NULL)
 				{
 					ret = read(fr->rdat.filept,meta_buff,meta_size);
 					meta_buff[meta_size] = 0; /* string paranoia */
 					if(ret < 0) return READER_ERROR;
 					fr->rdat.filepos += ret;
 					if(fr->icy.data) free(fr->icy.data);
 					fr->icy.data = meta_buff;
 					fr->metaflags |= MPG123_NEW_ICY;
 					debug2("icy-meta: %s size: %d bytes", fr->icy.data, (int)meta_size);
 				}
 				else
 				{
 					error1("cannot allocate memory for meta_buff (%lu bytes) ... trying to skip the metadata!", (unsigned long)meta_size);
 					fr->rd->skip_bytes(fr, meta_size);
 				}
 			}
 			fr->icy.next = fr->rdat.filepos+fr->icy.interval;
 		}
 		ret = read(fr->rdat.filept,buf+cnt,count-cnt);
 		if(ret < 0) return READER_ERROR;
 		if(ret == 0) break;
 		fr->rdat.filepos += ret;
 		cnt += ret;
 	}
 	/* debug1("done reading, got %li", (long)cnt); */
 	return cnt;
 }
 /* stream based operation */
 static ssize_t plain_fullread(mpg123_handle *fr,unsigned char *buf, ssize_t count)
 {
 	ssize_t ret,cnt=0;
 	/*
 		We check against READER_ID3TAG instead of rds->filelen >= 0 because if we got the ID3 TAG we know we have the end of the file.
 		If we don't have an ID3 TAG, then it is possible the file has grown since we started playing, so we want to keep reading from it if possible.
 	*/
 	if((fr->rdat.flags & READER_ID3TAG) && fr->rdat.filepos + count > fr->rdat.filelen) count = fr->rdat.filelen - fr->rdat.filepos;
 	while(cnt < count)
 	{
 		ret = read(fr->rdat.filept,buf+cnt,count-cnt);
 		if(ret < 0) return READER_ERROR;
 		if(ret == 0) break;
 		fr->rdat.filepos += ret;
 		cnt += ret;
 	}
 	return cnt;
 }
 static off_t stream_lseek(struct reader_data *rds, off_t pos, int whence)
 {
 	off_t ret;
 	ret = lseek(rds->filept, pos, whence);
 	if (ret >= 0)	rds->filepos = ret;
 	else ret = READER_ERROR; /* not the original value */
 	return ret;
 }
 static int default_init(mpg123_handle *fr)
 {
 	fr->rdat.filelen = get_fileinfo(fr);
 	fr->rdat.filepos = 0;
 	if(fr->rdat.filelen >= 0)
 	{
 		fr->rdat.flags |= READER_SEEKABLE;
 		if(!strncmp((char*)fr->id3buf,"TAG",3))
 		{
 			fr->rdat.flags |= READER_ID3TAG;
 			fr->metaflags  |= MPG123_NEW_ID3;
 		}
 	}
 	return 0;
 }
 void stream_close(mpg123_handle *fr)
 {
 	if (fr->rdat.flags & READER_FD_OPENED) close(fr->rdat.filept);
 }
 /**************************************** 
 * HACK,HACK,HACK: step back <num> frames 
 * can only work if the 'stream' isn't a real stream but a file
 * returns 0 on success; 
 */
 static int stream_back_bytes(mpg123_handle *fr, off_t bytes)
 {
 	if(stream_lseek(&fr->rdat,-bytes,SEEK_CUR) < 0) return READER_ERROR;
 	return 0;
 }
 static int stream_seek_frame(mpg123_handle *fr, off_t newframe)
 {
 	if(fr->rdat.flags & READER_SEEKABLE)
 	{
 		off_t preframe;
 		/* two leading frames? hm, doesn't seem to be really needed... */
 		/*if(newframe > 1) newframe -= 2;
 		else newframe = 0;*/
 		/* now seek to nearest leading index position and read from there until newframe is reached */
 		if(stream_lseek(&fr->rdat,frame_index_find(fr, newframe, &preframe),SEEK_SET) < 0)
 		return READER_ERROR;
 		debug2("going to %lu; just got %lu", newframe, preframe);
 		fr->num = preframe-1; /* Watch out! I am going to read preframe... fr->num should indicate the frame before! */
 		while(fr->num < newframe)
 		{
 			/* try to be non-fatal now... frameNum only gets advanced on success anyway */
 			if(!read_frame(fr)) break;
 		}
 		/* Now the wanted frame should be ready for decoding. */
 		/* I think, I don't want this...
 		if(fr->lay == 3) set_pointer(fr, 512); */
 		debug1("arrived at %lu", fr->num);
 		return MPG123_OK;
 	}
 	else return READER_ERROR; /* invalid, no seek happened */
 }
 /* return FALSE on error, TRUE on success, READER_MORE on occasion */
 static int generic_head_read(mpg123_handle *fr,unsigned long *newhead)
 {
 	unsigned char hbuf[4];
 	int ret = fr->rd->fullread(fr,hbuf,4);
 	if(ret == READER_MORE) return ret;
 	if(ret != 4) return FALSE;
 	*newhead = ((unsigned long) hbuf[0] << 24) |
 	           ((unsigned long) hbuf[1] << 16) |
 	           ((unsigned long) hbuf[2] << 8)  |
 	            (unsigned long) hbuf[3];
 	return TRUE;
 }
 /* return FALSE on error, TRUE on success, READER_MORE on occasion */
 static int generic_head_shift(mpg123_handle *fr,unsigned long *head)
 {
 	unsigned char hbuf;
 	int ret = fr->rd->fullread(fr,&hbuf,1);
 	if(ret == READER_MORE) return ret;
 	if(ret != 1) return FALSE;
 	*head <<= 8;
 	*head |= hbuf;
 	*head &= 0xffffffff;
 	return TRUE;
 }
 /* returns reached position... negative ones are bad... */
 static off_t stream_skip_bytes(mpg123_handle *fr,off_t len)
 {
 	if((fr->rdat.flags & READER_SEEKABLE) && (fr->rdat.filelen >= 0))
 	{
 		off_t ret = stream_lseek(&fr->rdat, len, SEEK_CUR);
 		return ret<0 ? READER_ERROR : ret;
 	}
 	else if(len >= 0)
 	{
 		unsigned char buf[1024]; /* ThOr: Compaq cxx complained and it makes sense to me... or should one do a cast? What for? */
 		ssize_t ret;
 		while (len > 0)
 		{
 			ssize_t num = len < (off_t)sizeof(buf) ? (ssize_t)len : (ssize_t)sizeof(buf);
 			ret = fr->rd->fullread(fr, buf, num);
 			if (ret < 0) return ret;
 			len -= ret;
 		}
 		return fr->rdat.filepos;
 	}
 	else return READER_ERROR;
 }
 /* returns size on success... */
 static int generic_read_frame_body(mpg123_handle *fr,unsigned char *buf, int size)
 {
 	long l;
 	if((l=fr->rd->fullread(fr,buf,size)) != size)
 	{
 		long ll = l;
 		if(ll <= 0) ll = 0;
 		/* This allows partial frames at the end... do we really want to pad and decode these?! */
 		memset(buf+ll,0,size-ll);
 	}
 	return l;
 }
 static off_t generic_tell(mpg123_handle *fr){ return fr->rdat.filepos; }
 static void stream_rewind(mpg123_handle *fr)
 {
 	stream_lseek(&fr->rdat,0,SEEK_SET);
 }
 /*
 * returns length of a file (if filept points to a file)
 * reads the last 128 bytes information into buffer
 * ... that is not totally safe...
 */
 static off_t get_fileinfo(mpg123_handle *fr)
 {
 	off_t len;
 	if((len=lseek(fr->rdat.filept,0,SEEK_END)) < 0)	return -1;
 	if(lseek(fr->rdat.filept,-128,SEEK_END) < 0) return -1;
 	if(fr->rd->fullread(fr,(unsigned char *)fr->id3buf,128) != 128)	return -1;
 	if(!strncmp((char*)fr->id3buf,"TAG",3))	len -= 128;
 	if(lseek(fr->rdat.filept,0,SEEK_SET) < 0)	return -1;
 	if(len <= 0)	return -1;
 	return len;
 }
 /* reader for input via manually provided buffers */
 static int feed_init(mpg123_handle *fr)
 {
 	fr->rdat.buf = NULL;
 	fr->rdat.filelen = 0;
 	fr->rdat.filepos = 0;
 	fr->rdat.firstpos = 0;
 	fr->rdat.flags |= READER_BUFFERED | READER_MICROSEEK;
 	return 0;
 }
 static void feed_close(mpg123_handle *fr)
 {
 	/* free the buffer chain */
 	struct buffy *b = fr->rdat.buf;
 	while(b != NULL)
 	{
 		struct buffy *n = b->next;
 		free(b->data);
 		free(b);
 		b = n;
 	}
 	feed_init(fr);
 }
 /* externally called function, returns 0 on success, -1 on error */
 int feed_more(mpg123_handle *fr, unsigned char *in, long count)
 {
 	/* the pointer to the pointer for the buffy after the end... */
 	struct buffy **b = &fr->rdat.buf;
 	debug("feed_more");
 	while(*b != NULL){ b = &(*b)->next; }
 	*b = (struct buffy*)malloc(sizeof(struct buffy));
 	if(*b == NULL) return -1;
 	(*b)->data = (unsigned char*)malloc(count);
 	if((*b)->data == NULL){ free(*b); *b = NULL; return -1; }
 	memcpy((*b)->data, in, count);
 	(*b)->size = count;
 	(*b)->next = NULL; /* Hurray, the new last buffer! */
 	fr->rdat.filelen += count;
 	debug3("feed_more: %p %luB filelen=%lu", (*b)->data, (unsigned long)(*b)->size, (unsigned long)fr->rdat.filelen);
 	return 0;
 }
 static ssize_t feed_read(mpg123_handle *fr, unsigned char *out, ssize_t count)
 {
 	struct buffy *b = fr->rdat.buf;
 	ssize_t gotcount = 0;
 	ssize_t offset = 0;
 	if(fr->rdat.filelen - fr->rdat.filepos < count)
 	{
 		debug3("hit end, back to beginning (%li - %li < %li)", (long)fr->rdat.filelen, (long)fr->rdat.filepos, (long)count);
 		/* go back to firstpos, undo the previous reads */
 		fr->rdat.filepos = fr->rdat.firstpos;
 		return MPG123_NEED_MORE;
 	}
 	/* find the current buffer */
 	while(b != NULL && (offset + b->size) <= fr->rdat.filepos)
 	{
 		offset += b->size;
 		b = b->next;
 	}
 	/* now start copying from there */
 	while(gotcount < count && (b != NULL))
 	{
 		ssize_t loff = fr->rdat.filepos - offset;
 		ssize_t chunk = count - gotcount; /* amount of bytes to get from here... */
 		if(chunk > b->size - loff) chunk = b->size - loff;
 		debug3("copying %liB from %p+%li",(long)chunk, b->data, (long)loff);
 		memcpy(out+gotcount, b->data+loff, chunk);
 		gotcount += chunk;
 		fr->rdat.filepos += chunk;
 		offset += b->size;
 		b = b->next;
 	}
 	debug2("got %li bytes, pos advanced to %li", (long)gotcount, (long)fr->rdat.filepos);
 	if(gotcount != count) return -1; /* That must be an error. */
 	return gotcount;
 }
 /* returns reached position... negative ones are bad... */
 static off_t feed_skip_bytes(mpg123_handle *fr,off_t len)
 {
 	if(len >= 0)
 	{
 		if(fr->rdat.filelen - fr->rdat.filepos < len) return READER_MORE;
 		else return fr->rdat.filepos += len;
 	}
 	else return READER_ERROR;
 }
 static int feed_back_bytes(mpg123_handle *fr, off_t bytes)
 {
 	if(bytes >=0)
 	{
 		if(bytes <= fr->rdat.filepos) fr->rdat.filepos -= bytes;
 		else return READER_ERROR;
 	}
 	else
 	{
 		off_t ret = feed_skip_bytes(fr, -bytes);
 		if(ret > 0) ret = 0;
 		return ret; /* could be 0, could be error code */
 	}
 	return 0;
 }
 static int feed_seek_frame(mpg123_handle *fr, off_t num){ return READER_ERROR; }
 void feed_rewind(mpg123_handle *fr)
 {
 	fr->rdat.filepos  = 0;
 	fr->rdat.firstpos = 0;
 }
 void feed_forget(mpg123_handle *fr)
 {
 	struct buffy *b = fr->rdat.buf;
 	/* free all buffers that are def'n'tly outdated */
 	/* we have buffers until filepos... delete all buffers fully below it */
 	if(b) debug2("feed_forget: block %lu pos %lu", (unsigned long)b->size, (unsigned long)fr->rdat.filepos);
 	else debug("forget with nothing there!");
 	while(b != NULL && fr->rdat.filepos >= b->size)
 	{
 		struct buffy *n = b->next; /* != NULL or this is indeed the end and the last cycle anyway */
 		fr->rdat.filepos -= b->size;
 		fr->rdat.filelen -= b->size;
 		debug4("feed_forget: forgot %p with %lu, filepos=%lu, filelen=%lu", b->data, (unsigned long)b->size, (unsigned long)fr->rdat.filepos,  (unsigned long)fr->rdat.filelen);
 		free(b->data);
 		free(b);
 		b = n;
 	}
 	fr->rdat.buf = b;
 	fr->rdat.firstpos = fr->rdat.filepos;
 }
 off_t feed_set_pos(mpg123_handle *fr, off_t pos)
 {
 	if(pos >= fr->rdat.firstpos && pos < fr->rdat.firstpos + fr->rdat.filelen)
 	{ /* We have the position! */
 		fr->rdat.filepos = pos - fr->rdat.firstpos;
 		return fr->rdat.firstpos + fr->rdat.filelen;
 	}
 	else
 	{ /* I expect to get the specific position on next feed. Forget what I have now. */
 		feed_close(fr);
 		fr->rdat.firstpos = fr->rdat.filepos = pos;
 		return pos;
 	}
 	return READER_ERROR;
 }
 /*****************************************************************
 * read frame helper
 */
 struct reader readers[] =
 {
 	{
 		default_init,
 		stream_close,
 		plain_fullread,
 		generic_head_read,
 		generic_head_shift,
 		stream_skip_bytes,
 		generic_read_frame_body,
 		stream_back_bytes,
 		stream_seek_frame,
 		generic_tell,
 		stream_rewind,
 		NULL
 	} ,
 	{
 		default_init,
 		stream_close,
 		icy_fullread,
 		generic_head_read,
 		generic_head_shift,
 		stream_skip_bytes,
 		generic_read_frame_body,
 		stream_back_bytes,
 		stream_seek_frame,
 		generic_tell,
 		stream_rewind,
 		NULL
 	},
 	{
 		feed_init,
 		feed_close,
 		feed_read,
 		generic_head_read,
 		generic_head_shift,
 		feed_skip_bytes,
 		generic_read_frame_body,
 		feed_back_bytes,
 		feed_seek_frame,
 		generic_tell,
 		feed_rewind,
 		feed_forget
 	}
 /* buffer readers... can also be icy? nah, drop it... plain mpeg audio buffer reader */
 #ifdef READ_SYSTEM
 	,{
 		system_init,
 		NULL,	/* filled in by system_init() */
 		fullread,
 		NULL,
 		NULL,
 		NULL,
 		NULL,
 		NULL,
 		NULL,
 		NULL,
 		NULL,
 		NULL,
 	} 
 #endif
 };
 int open_feed(mpg123_handle *fr)
 {
 	debug("feed reader");
 	clear_icy(&fr->icy);
 	fr->rd = &readers[READER_FEED];
 	fr->rdat.flags = 0;
 	if(fr->rd->init(fr) < 0) return -1;
 	return 0;
 }
 int open_stream(mpg123_handle *fr, char *bs_filenam, int fd)
 {
 	int filept_opened = 1;
 	int filept; /* descriptor of opened file/stream */
 	clear_icy(&fr->icy); /* can be done inside frame_clear ...? */
 	if(!bs_filenam) /* no file to open, got a descriptor (stdin) */
 	{
 		filept = fd;
 		filept_opened = 0; /* and don't try to close it... */
 	}
 	#ifndef O_BINARY
 	#define O_BINARY (0)
 	#endif
 	else if((filept = open(bs_filenam, O_RDONLY|O_BINARY)) < 0) /* a plain old file to open... */
 	{
 		perror(bs_filenam);
 		return filept; /* error... */
 	}
 	/* now we have something behind filept and can init the reader */
 	fr->rdat.filelen = -1;
 	fr->rdat.filept  = filept;
 	fr->rdat.flags = 0;
 	if(filept_opened)	fr->rdat.flags |= READER_FD_OPENED;
 	if(fr->p.icy_interval > 0)
 	{
 		debug("ICY reader");
 		fr->icy.interval = fr->p.icy_interval;
 		fr->icy.next = fr->icy.interval;
 		fr->rd = &readers[READER_ICY_STREAM];
 	}
 	else
 	{
 		fr->rd = &readers[READER_STREAM];
 		debug("stream reader");
 	}
 	if(fr->rd->init(fr) < 0) return -1;
 	return MPG123_OK;
 }
--- a/src/libmpg123/stringbuf.c
+++ b/src/libmpg123/stringbuf.c
@@ -0,0 +1,94 @@
 /*
 	stringbuf: mimicking a bit of C++ to more safely handle strings
 	copyright 2006-7 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Thomas Orgis
 */
 #include "config.h"
 #include "debug.h"
 #include "mpg123.h"
 #include <stdlib.h>
 #include <string.h>
 void mpg123_init_string(mpg123_string* sb)
 {
 	sb->p = NULL;
 	sb->size = 0;
 	sb->fill = 0;
 }
 void mpg123_free_string(mpg123_string* sb)
 {
 	if(sb->p != NULL) free(sb->p);
 	mpg123_init_string(sb);
 }
 int mpg123_resize_string(mpg123_string* sb, size_t new)
 {
 	debug3("resizing string pointer %p from %lu to %lu", (void*) sb->p, (unsigned long)sb->size, (unsigned long)new);
 	if(new == 0)
 	{
 		if(sb->size && sb->p != NULL) free(sb->p);
 		mpg123_init_string(sb);
 		return 1;
 	}
 	if(sb->size != new)
 	{
 		char* t;
 		debug("really!");
 		t = (char*) realloc(sb->p, new*sizeof(char));
 		debug1("realloc returned %p", (void*) t); 
 		if(t != NULL)
 		{
 			sb->p = t;
 			sb->size = new;
 			return 1;
 		}
 		else return 0;
 	}
 	else return 1; /* success */
 }
 int mpg123_copy_string(mpg123_string* from, mpg123_string* to)
 {
 	if(mpg123_resize_string(to, from->fill))
 	{
 		memcpy(to->p, from->p, to->size);
 		to->fill = to->size;
 		return 1;
 	}
 	else return 0;
 }
 int mpg123_add_string(mpg123_string* sb, char* stuff)
 {
 	size_t addl = strlen(stuff)+1;
 	debug1("adding %s", stuff);
 	if(sb->fill)
 	{
 		if(sb->size >= sb->fill-1+addl || mpg123_resize_string(sb, sb->fill-1+addl))
 		{
 			memcpy(sb->p+sb->fill-1, stuff, addl);
 			sb->fill += addl-1;
 		}
 		else return 0;
 	}
 	else
 	{
 		if(mpg123_resize_string(sb, addl))
 		{
 			memcpy(sb->p, stuff, addl);
 			sb->fill = addl;
 		}
 		else return 0;
 	}
 	return 1;
 }
 int mpg123_set_string(mpg123_string* sb, char* stuff)
 {
 	sb->fill = 0;
 	return mpg123_add_string(sb, stuff);
 }
--- a/src/libmpg123/system.c
+++ b/src/libmpg123/system.c
@@ -0,0 +1,494 @@
 /*
 	system.c: system stream decoder (standalone)
 	copyright 1997-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 	grabs an audio stream inside a video/audio system stream
 	This Program outputs only the first audio stream to STDOUT
 	currently this is an external program. You must pipe
 	your streams file to this program and the output to 
 	the mpg123 player.  e.g: 
 	./system < my_system_stream.mpg | mpg123 -
 */
 #include <stdlib.h>
 #include <stdio.h>
 #include <errno.h>
 #include "mpg123app.h"
 static int filept;
 static int verbose = 1;
 #define PACKET_START		0x000001ba
 #define STREAM_END		0x000001b9
 #define SYSTEM_STREAM		0x000001bb
 /* the following two types are not supported */
 #define AUDIO_STREAM		0x000001b8
 #define VIDEO_STREAM		0x000001b9
 #define PADDING_STREAM		0x000001be
 #define RESERVED_STREAM		0x000001bc
 #define PRIVATE_STREAM_1	0x000001bd
 #define PRIVATE_STREAM_2	0x000001bf
 static int system_back_frame(mpg123_handle *fr,int num);
 static int system_head_read(unsigned char *hbuf,unsigned long *newhead);
 static int system_head_shift(unsigned char *hbuf,unsigned long *head);
 static int system_skip_bytes(int len);
 static int system_read_frame_body(int size);
 static long system_tell(void);
 struct system_info {
 	unsigned long rate;
 	int num_audio;
 	int num_video;
 	int fixed;
 	int csps;
 	int audio_lock;
 	int video_lock;
 };
 struct stream_info {
 	int id;
 	int id1;
 	int type;
 	int size;
 	int scale;
 };
 struct packet_info {
  int scale;
  int size;
  unsigned long dts;
  unsigned long pts;
 };
 struct system_info sys_info;
 struct stream_info str_info[64];
 static int my_read(int f,char *buf,int len)
 {
 	int len1 = 0;
 	int ret;
 	while(len1 < len) {
 		ret = read(f,buf+len1,len-len1);
 		if(ret < 0)
 			return -1;
 		len1 += ret;
 	}
 	return len;
 }
 static int system_raw_read_head(int f,unsigned long *head)
 {
 	unsigned char buf[4];
 	if(my_read(f,buf,4) != 4) {
 		perror("read_head");
                return -1;
        }
 	*head = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
 	if(verbose > 1)
 		fprintf(stderr,"head: %08lx\n",*head);
 	return 0;
 }
 static int system_raw_read_word(int f,int *word)
 {
        unsigned char buf[2];
        if(my_read(f,buf,2) != 2) {
 		perror("read_word");
                return -1;
        }
        *word = (buf[0]<<8) + buf[1];
        return 0;
 }
 static int system_raw_read(int f,int len,unsigned char *buf)
 {
 	if(my_read(f,buf,len) != len)
              return -1;
        return 0;
 }
 static int system_raw_skip(int f,int len)
 {
 	int ret;
 	int cnt = 0;
 	ret = lseek(f,len,SEEK_CUR);
 	if(ret < 0 && errno == ESPIPE) {
 		cnt = len;
 		while(cnt) {
                        char buf[1024];
 			if(cnt > 1024)
                           ret = read(f,buf,1024);
                        else
                           ret = read(f,buf,cnt);
                        if(ret < 0)
                           return -1;
                        cnt -= ret;
 		}
                ret = len;
 	}
        return ret;
 }
 static unsigned long system_raw_timer_value(unsigned char *buf)
 {
 	unsigned long val;
 	if(!(buf[0] & 0x1) || !(buf[2] & 0x1) || !(buf[4] & 0x1)) {
 		if(verbose)
 			fprintf(stderr,"Warning: missing marker in time stamp!\n");
 	}
 	val  = (buf[0] & 0xe) << (29-1);
        val |= buf[1] << 21;
        val |= (buf[2] & 0xfe) << (14-1);
        val |= buf[3] << 7;
        val |= buf[4] >> 1;
 	return val;
 }
 static int system_raw_read_packet_data(int fd,struct packet_info *pi)
 {
    static unsigned char buf[16384];
    int len;
    int pos = 0;
    int i;
    if(system_raw_read_word(filept,&len) < 0)
        return -1;
    if(verbose > 1)
    	fprintf(stderr,"Stream video/audio len: %d\n",len);
    if(system_raw_read(fd,len,buf) < 0)
      return -1;
    for(i=0;i<16;i++,pos++) {
       if(buf[pos] != 0xff)
         break;
    }
    if(i == 16) {
       fprintf(stderr,"Ouch ... too much stuffing bytes!\n");
       return -1;
    }
    if( (buf[pos] & 0xc0) == 0x40 ) {
       pi->scale = (buf[pos] >> 5) & 0x1;
       pi->size  = (buf[pos] & 0x1f) << 8;
       pi->size |= buf[pos+1];
       pos += 2;
    }
    switch( buf[pos] & 0xf0) {
      case 0x00:
        if(buf[pos] != 0x0f) {
          fprintf(stderr,"Ouch ... illegal timer code!\n");
          return -1;
        }
        pos++;
        break;
      case 0x20:
        pi->pts = system_raw_timer_value(buf+pos);
        pos += 5;
        break;
      case 0x30:
        pi->pts = system_raw_timer_value(buf+pos);
        pos += 5;
        if( (buf[pos] & 0xf) != 0x10) {
          if(verbose)
          	fprintf(stderr,"DTS should start with 0x1x!\n");
 	}
        pi->dts = system_raw_timer_value(buf+pos);
        pos += 5;
        break;
      default:
 	if(verbose)
        	fprintf(stderr,"Ouch ... illegal timer code!\n");
        return -1;
    }
 #if 1 
 	write(1,buf+pos,len-pos);
 #endif
 	return 0;
 }
 static int system_raw_read_packet_info(int f,double *clock,unsigned long *rate)
 {
 	unsigned char buf[8];
 	int i;
 	if(my_read(f,buf,8) != 8) {
 		perror("read_packet_info");
 		return -1;
 	}
 	*clock = 0.0;
 	for(i=0;i<5;i++) {
 		*clock *= 256.0;
 		*clock += (double) buf[4-i];
 	}
 	*rate = (buf[5]<<16) + (buf[6]<<8) + buf[7];
 	return 0;
 }
 static int system_raw_read_system_header(int f,struct system_info *ssi) 
 {
 	int rlen,len;
 	unsigned char buf[6+48*3];
 	int i,cnt;
 	if(system_raw_read_word(filept,&len) < 0)
 		return -1;
 	if(verbose > 1)
 		fprintf(stderr,"system len: %d\n",len);
 	rlen = len;
 	if(len > 6 + 48 * 3) {
 		if(verbose)
 			fprintf(stderr,"Oops .. large System header!\n");
 		rlen = 6+48*3;
 	}
 	if(my_read(f,buf,rlen) != rlen) {
 		perror("raw_read_system_header");
 		return -1;
 	}
 	if(len - rlen) {
 		if(system_raw_skip(filept,len-rlen) < 0)
 			return -1;
 	}
 	if(buf[5] != 0xff) {
 		if(verbose)
 			fprintf(stderr,"Warning: buf[5] !=0xff \n");
 	}
 	ssi->rate = (buf[0]<<16)+(buf[1]<<8)+buf[2];
 	if( (ssi->rate & 0x800001) != 0x800001) {
 		if(verbose)
 			fprintf(stderr,"System Header Byte 0: Missing bits\n");
 		return -1;
 	}
 	ssi->rate >>= 1;
 	ssi->rate &= 0x7fffff;
 	ssi->num_audio = buf[3] >> 2;
 	ssi->num_video = buf[4] & 0x1f;
 	ssi->fixed = buf[3] & 0x2;
 	ssi->csps  = buf[3] & 0x1;
 	ssi->audio_lock = buf[4] & 0x80;
 	ssi->video_lock = buf[4] & 0x40;
 	if(verbose)
 		fprintf(stderr,"Audio: %d Video: %d, Lock: %d/%d, fixed: %d, csps: %d\n",
 	ssi->num_audio,ssi->num_video,ssi->audio_lock?1:0,ssi->video_lock?1:0,
 	ssi->fixed?1:0,ssi->csps?1:0);
 	i = 6;
 	cnt = 0;
 	while( i < rlen ) {
 		if( !(buf[i] & 0x80) || ((buf[i+1] & 0xc0) != 0xc0) ) {
 			fprintf(stderr,"system_raw_read_system_header byte %d,%d: bits not set!\n",i,i+1);
 			return -1;
 		}
 		str_info[cnt].id = buf[i];
 		if( (str_info[cnt].id & 0xe0) == 0xc0 ) {
 			str_info[cnt].type = 'A';
 			str_info[cnt].id1 = str_info[cnt].id & 0x1f;
 		}
 		else if((str_info[cnt].id & 0xf0) == 0xe0 ) {
 			str_info[cnt].type = 'V';
 			str_info[cnt].id1 = str_info[cnt].id & 0x0f;
 		}
 		else {
 			str_info[cnt].type = 'R';
 			str_info[cnt].id1 = str_info[cnt].id & 0x3f;
 		}
 		str_info[cnt].scale = buf[i+1] & 0x20;
 		str_info[cnt].size = ((buf[i+1] & 0x1f)<<8)+buf[i+2];
 		i += 3;
 		if(verbose)
 			fprintf(stderr,"ID: %#02x=%c%d, scale: %d, size %d\n",
 				str_info[cnt].id,str_info[cnt].type,str_info[cnt].id1,str_info[cnt].scale?1:0,str_info[cnt].size);
 	}
 	return 0;
 }
 /***************************************************
 * init system layer read functions 
 */
 int system_init(struct reader *r)
 {
 	unsigned long head;
 	double clk;
 	unsigned long rate;
 	int len;
 	int err;
 	r->back_frame = NULL;
 	r->head_read = system_head_read;
 	r->head_shift = system_head_shift;
 	r->skip_bytes = system_skip_bytes;
 	r->read_frame_body = system_read_frame_body;
 	r->tell = system_tell;
 	if(system_raw_read_head(filept,&head) < 0)
 		return -1;
 	if(head != PACKET_START) {
 		fprintf(stderr,"No PACKET_START found!\n");
 		return -1;
 	}
 	if(system_raw_read_packet_info(filept,&clk,&rate) < 0)
 		return -1;
 	err = 0;
 	while(err == 0) {
 		if(system_raw_read_head(filept,&head) < 0)
 			return -1;
 		if((head & 0xffffff00) != 0x00000100)
 			return -1;
 		switch(head) {
 			case PACKET_START:
 				if(system_raw_read_packet_info(filept,&clk,&rate))
 					return -1;
 				if(verbose > 1)
 					fprintf(stderr,"Packet Start\n");
 				break;
 			case STREAM_END:
 				if(verbose)
 					fprintf(stderr,"Stream End\n");
 				break;
 			case SYSTEM_STREAM:
 				if(system_raw_read_system_header(filept,&sys_info) < 0)
 					return -1; 
 				break;
 #if 0
 			case AUDIO_STREAM:
 				if(system_raw_read_word(filept,&len) < 0)
 					return -1;
 				if(verbose > 1)
 					fprintf(stderr,"STD audio len: %d\n",len);
 				if(system_raw_skip(filept,len) < 0)
 					return -1;
 				break;
 			case VIDEO_STREAM:
 				if(system_raw_read_word(filept,&len) < 0)
 					return -1;
 				if(verbose > 1)
 					fprintf(stderr,"STD video len: %d\n",len);
 				if(system_raw_skip(filept,len) < 0)
 					return -1;
 				break;
 #endif
 			default:
 				if(head >= 0x000001c0 && head < 0x000001f0) {
 					if(verbose > 1)
 						fprintf(stderr,"Stream ID %ld\n",head - 0x000001c0);
 					if( (head - 0x000001c0) == 0x0) {
 						struct packet_info pi;
 						if(system_raw_read_packet_data(filept,&pi) < 0 )
 							return -1;
 					}
 					else {
 						if(system_raw_read_word(filept,&len) < 0)
 							return -1;
 						if(system_raw_skip(filept,len) < 0)
 							return -1;
 					}
 					break;
 				}
 				else if(head >= 0x000001bd && head < 0x000001c0) {
 					if(system_raw_read_word(filept,&len) < 0)
 						return -1;
 					if(system_raw_skip(filept,len) < 0)
 						return -1;
 					break;
 				}
 				else {
 					if(verbose)
 						fprintf(stderr,"unsupported head %8lx\n",head);
 					if(system_raw_read_word(filept,&len) < 0)
 						return -1;
 					if(verbose)
 						fprintf(stderr,"Skipping: %d bytes\n",len);
 					if(system_raw_skip(filept,len) < 0)
 						return -1;
 					break;
 				}
 				err = 1;
 				break;
 		}
 	}
 	return 0;
 }
 static int system_back_frame(mpg123_handle *fr,int num)
 {
 	return 0;
 }
 static int system_head_read(unsigned char *hbuf,unsigned long *newhead)
 {
 	return 0;
 }
 static int system_head_shift(unsigned char *hbuf,unsigned long *head)
 {
 	return 0;
 }
 static int system_skip_bytes(int len)
 {
 	return 0;
 }
 static int system_read_frame_body(int size)
 {
 	return 0;
 }
 static long system_tell(void)
 {
 	return 0;
 }
 struct reader rd1;
 void main(void)
 {
 	int ret;
 	filept = 0;
 	ret = system_init(&rd1);
 	fprintf(stderr,"ret: %d\n",ret);
 	return ret;
 }
--- a/src/libmpg123/tabinit.c
+++ b/src/libmpg123/tabinit.c
@@ -0,0 +1,181 @@
 /*
 	tabinit.c: initialize tables...
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Michael Hipp
 */
 #include <stdlib.h>
 #include "mpg123lib_intern.h"
 #ifdef OPT_MMXORSSE
 /* 32 bit integer; i.e. "long" on x86, but int on x86_64... */
 const int aligned(32) costab_mmxsse[] =
 {
 	1056974725, 1057056395, 1057223771, 1057485416, 1057855544,
 	1058356026, 1059019886, 1059897405, 1061067246, 1062657950,
 	1064892987, 1066774581, 1069414683, 1073984175, 1079645762,
 	1092815430, 1057005197, 1057342072, 1058087743, 1059427869,
 	1061799040, 1065862217, 1071413542, 1084439708, 1057128951,
 	1058664893, 1063675095, 1076102863, 1057655764, 1067924853,
 	1060439283
 };
 #endif
 /* All optimizations share this code - with the exception of MMX */
 #ifndef OPT_MMX_ONLY
 /* that altivec alignment part here should not hurt generic code, I hope */
 #ifdef OPT_ALTIVEC
 static real __attribute__ ((aligned (16))) cos64[16];
 static real __attribute__ ((aligned (16))) cos32[8];
 static real __attribute__ ((aligned (16))) cos16[4];
 static real __attribute__ ((aligned (16))) cos8[2];
 static real __attribute__ ((aligned (16))) cos4[1];
 #else
 static real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
 #endif
 real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
 static long intwinbase[] = {
     0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,    -2,    -2,
    -2,    -3,    -3,    -4,    -4,    -5,    -5,    -6,    -7,    -7,
    -8,    -9,   -10,   -11,   -13,   -14,   -16,   -17,   -19,   -21,
   -24,   -26,   -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53,
   -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97,  -104,  -111,
  -117,  -125,  -132,  -139,  -147,  -154,  -161,  -169,  -176,  -183,
  -190,  -196,  -202,  -208,  -213,  -218,  -222,  -225,  -227,  -228,
  -228,  -227,  -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163,
  -146,  -127,  -106,   -83,   -57,   -29,     2,    36,    72,   111,
   153,   197,   244,   294,   347,   401,   459,   519,   581,   645,
   711,   779,   848,   919,   991,  1064,  1137,  1210,  1283,  1356,
  1428,  1498,  1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962,
  2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063,  2037,  2000,
  1952,  1893,  1822,  1739,  1644,  1535,  1414,  1280,  1131,   970,
   794,   605,   402,   185,   -45,  -288,  -545,  -814, -1095, -1388,
 -1692, -2006, -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
 -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597, -7910, -8209,
 -8491, -8755, -8998, -9219, -9416, -9585, -9727, -9838, -9916, -9959,
 -9966, -9935, -9863, -9750, -9592, -9389, -9139, -8840, -8492, -8092,
 -7640, -7134, -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082,
   -70,   998,  2122,  3300,  4533,  5818,  7154,  8540,  9975, 11455,
 12980, 14548, 16155, 17799, 19478, 21189, 22929, 24694, 26482, 28289,
 30112, 31947, 33791, 35640, 37489, 39336, 41176, 43006, 44821, 46617,
 48390, 50137, 51853, 53534, 55178, 56778, 58333, 59838, 61289, 62684,
 64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
 73415, 73908, 74313, 74630, 74856, 74992, 75038 };
 void prepare_decode_tables()
 {
  int i,k,kr,divv;
  real *costab;
  for(i=0;i<5;i++)
  {
    kr=0x10>>i; divv=0x40>>i;
    costab = pnts[i];
    for(k=0;k<kr;k++)
      costab[k] = DOUBLE_TO_REAL(1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv)));
  }
 }
 #endif
 #ifdef OPT_MMXORSSE
 void make_decode_tables_mmx(mpg123_handle *fr)
 {
 	debug("MMX decode tables");
 	make_decode_tables_mmx_asm((fr->lastscale < 0 ? fr->p.outscale : fr->lastscale), fr->decwin_mmx, fr->decwins);
 	debug("MMX decode tables done");
 }
 #endif
 #ifndef OPT_MMX_ONLY
 void make_decode_tables(mpg123_handle *fr)
 {
  int i,j;
  int idx = 0;
  scale_t scaleval = -(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
  debug("MMX decode tables");
  for(i=0,j=0;i<256;i++,j++,idx+=32)
  {
    if(idx < 512+16)
      fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] / 65536.0 * (double) scaleval);
    if(i % 32 == 31)
      idx -= 1023;
    if(i % 64 == 63)
      scaleval = - scaleval;
  }
  for( /* i=256 */ ;i<512;i++,j--,idx+=32)
  {
    if(idx < 512+16)
      fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] / 65536.0 * (double) scaleval);
    if(i % 32 == 31)
      idx -= 1023;
    if(i % 64 == 63)
      scaleval = - scaleval;
  }
  debug("MMX decode tables done");
 }
 #endif
 int make_conv16to8_table(mpg123_handle *fr)
 {
  int i;
 	int mode = fr->af.encoding;
  /*
   * ????: 8.0 is right but on SB cards '2.0' is a better value ???
   */
  const double mul = 8.0;
  if(!fr->conv16to8_buf){
    fr->conv16to8_buf = (unsigned char *) malloc(8192);
    if(!fr->conv16to8_buf) {
      fr->err = MPG123_ERR_16TO8TABLE;
      if(NOQUIET) error("Can't allocate 16 to 8 converter table!");
      return -1;
    }
    fr->conv16to8 = fr->conv16to8_buf + 4096;
  }
  if(fr->af.encoding == MPG123_ENC_ULAW_8){
    double m=127.0 / log(256.0);
    int c1;
    for(i=-4096;i<4096;i++) {
 /* dunno whether this is a valid transformation rule ?!?!? */
      if(i < 0)
        c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m);
      else
        c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m);
      if(c1 < 0 || c1 > 255) 
 	fprintf(stderr,"Converror %d %d\n",i,c1);
      if(c1 == 0)
        c1 = 2;
      fr->conv16to8[i] = (unsigned char) c1;
    }
  }
  else if(mode == MPG123_ENC_SIGNED_8) {
    for(i=-4096;i<4096;i++) {
      fr->conv16to8[i] = i>>5;
    }
  }
  else if(mode == MPG123_ENC_UNSIGNED_8) {
    for(i=-4096;i<4096;i++) {
      fr->conv16to8[i] = (i>>5)+128;
    }
  }
  else {
    for(i=-4096;i<4096;i++) {
      fr->conv16to8[i] = 0;
    }
  }
 	return 0;
 }
--- a/src/libmpg123/tabinit_mmx.S
+++ b/src/libmpg123/tabinit_mmx.S
@@ -0,0 +1,175 @@
 /*
 	tabinit_mmx: make_decode_tables_mmx
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by the mysterious higway (apparently)
 */
 #include "mangle.h"
 .data
 	ALIGN32
 intwinbase:
 	.value      0,    -1,    -1,    -1,    -1,    -1,    -1,    -2
 	.value     -2,    -2,    -2,    -3,    -3,    -4,    -4,    -5
 	.value     -5,    -6,    -7,    -7,    -8,    -9,   -10,   -11
 	.value    -13,   -14,   -16,   -17,   -19,   -21,   -24,   -26
 	.value    -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53
 	.value    -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97
 	.value   -104,  -111,  -117,  -125,  -132,  -139,  -147,  -154
 	.value   -161,  -169,  -176,  -183,  -190,  -196,  -202,  -208
 	.value   -213,  -218,  -222,  -225,  -227,  -228,  -228,  -227
 	.value   -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163
 	.value   -146,  -127,  -106,   -83,   -57,   -29,     2,    36
 	.value     72,   111,   153,   197,   244,   294,   347,   401
 	.value    459,   519,   581,   645,   711,   779,   848,   919
 	.value    991,  1064,  1137,  1210,  1283,  1356,  1428,  1498
 	.value   1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962
 	.value   2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063
 	.value   2037,  2000,  1952,  1893,  1822,  1739,  1644,  1535
 	.value   1414,  1280,  1131,   970,   794,   605,   402,   185
 	.value    -45,  -288,  -545,  -814, -1095, -1388, -1692, -2006
 	.value  -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
 	.value  -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
 	.value  -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
 	.value  -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
 	.value  -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
 	.value  -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
 	.value    -70,   998,  2122,  3300,  4533,  5818,  7154,  8540
 	.value   9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
 	.value  22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
 	.value -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
 	.value  -8147, -6466, -4822, -3222, -1667,  -162,  1289,  2684
 	.value   4019,  5290,  6494,  7629,  8692,  9679, 10590, 11420
 	.value  12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
 	.value  15038
 intwindiv:
 	.long 0x47800000			# 65536.0
 .text
 	ALIGN32
 /* void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); */
 .globl ASM_NAME(make_decode_tables_mmx_asm)
 ASM_NAME(make_decode_tables_mmx_asm):
 	pushl %edi
 	pushl %esi
 	pushl %ebx
 /* stack: 0=ebx, 4=esi, 8=edi, 12=back, 16=scaleval, 20=decwin_mmx, 24=decwins */
 	xorl %ecx,%ecx
 	xorl %ebx,%ebx
 	movl $32,%esi
 	movl $intwinbase,%edi
 	negl 16(%esp)	/* scaleval */
 	pushl $2	/* intwinbase step */
 /* stack: 20=scaleval 24=decwin_mmx, 28=decwins */
 .L00:
 	cmpl $528,%ecx
 	jnc .L02
 	movswl (%edi),%eax
 	cmpl $intwinbase+444,%edi
 	jc .L01
 	addl $60000,%eax
 .L01:
 	pushl %eax
 /* stack: 24=scaleval 28=decwin_mmx, 32=decwins */
 	fildl (%esp)
 	fdivs intwindiv
 	fimull 24(%esp) /* scaleval */
 /* eax used to be popped the line before... I'll just use it here a bit */
 	movl 28(%esp),%eax /* decwin_mmx */
 	fsts    (%eax,%ecx,4)
 	fstps 64(%eax,%ecx,4)
 	popl %eax
 /* stack: 20=scaleval 24=decwin_mmx, 28=decwins */
 .L02:
 	leal -1(%esi),%edx
 	and %ebx,%edx
 	cmp $31,%edx
 	jnz .L03
 	addl $-1023,%ecx
 	test %esi,%ebx
 	jz  .L03
 	negl 20(%esp)
 .L03:
 	addl %esi,%ecx
 	addl (%esp),%edi
 	incl %ebx
 	cmpl $intwinbase,%edi
 	jz .L04
 	cmp $256,%ebx
 	jnz .L00
 	negl (%esp)
 	jmp .L00
 .L04:
 	popl %eax
 	xorl %ecx,%ecx
 	xorl %ebx,%ebx
 	pushl $2 /* paired with popl above */
 .L05:
 	cmpl $528,%ecx
 	jnc .L11
 	movswl (%edi),%eax
 	cmpl $intwinbase+444,%edi
 	jc .L06
 	addl $60000,%eax
 .L06:
 	cltd
 	imull 20(%esp)
 	shrdl $17,%edx,%eax
 	cmpl $32767,%eax
 	movl $1055,%edx
 	jle .L07
 	movl $32767,%eax
 	jmp .L08
 .L07:
 	cmpl $-32767,%eax
 	jge .L08
 	movl $-32767,%eax
 .L08:
 /* going to use ebx for decwins, watch the jumps */
 	pushl %ebx 
 /* stack: 24=scaleval 28=decwin_mmx, 32=decwins */
 	movl 32(%esp),%ebx
 	cmpl $512,%ecx
 	jnc .L09
 	subl %ecx,%edx
 	movw %ax,(%ebx,%edx,2) /* decwins */
 	movw %ax,-32(%ebx,%edx,2)
 .L09:
 	testl $1,%ecx
 	jnz .L10
 	negl %eax
 .L10:
 	movw %ax,(%ebx,%ecx,2)
 	movw %ax,32(%ebx,%ecx,2)
 	popl %ebx /* that has to match the pushl before */
 .L11:
 	leal -1(%esi),%edx
 	and %ebx,%edx
 	cmp $31,%edx
 	jnz .L12
 	addl $-1023,%ecx
 	test %esi,%ebx
 	jz  .L12
 	negl 20(%esp)
 .L12:
 	addl %esi,%ecx
 	addl (%esp),%edi
 	incl %ebx
 	cmpl $intwinbase,%edi
 	jz .L13
 	cmp $256,%ebx
 	jnz .L05
 	negl (%esp)
 	jmp .L05
 .L13:
 	popl %eax
 	popl %ebx
 	popl %esi
 	popl %edi
 	ret
--- a/src/libmpg123/testcpu.c
+++ b/src/libmpg123/testcpu.c
@@ -0,0 +1,27 @@
 #include <stdio.h>
 #include "getcpuflags.h"
 int main()
 {
 	int family;
 	struct cpuflags flags;
 	if(!getcpuflags(&flags)){ printf("CPU won't do cpuid (some old i386 or i486)\n"); return 0; }
 	family = (flags.id & 0xf00)>>8;
 	printf("family: %i\n", family);
 	printf("stdcpuflags:  0x%08x\n", flags.std);
 	printf("std2cpuflags: 0x%08x\n", flags.std2);
 	printf("extcpuflags:  0x%08x\n", flags.ext);
 	if(cpu_i586(flags))
 	{
 		printf("A i586 or better cpu with:");
 		if(cpu_mmx(flags)) printf(" mmx");
 		if(cpu_3dnow(flags)) printf(" 3dnow");
 		if(cpu_3dnowext(flags)) printf(" 3dnowext");
 		if(cpu_sse(flags)) printf(" sse");
 		if(cpu_sse2(flags)) printf(" sse2");
 		if(cpu_sse3(flags)) printf(" sse3");
 		printf("\n");
 	}
 	else printf("I guess you have some i486\n");
 	return 0;
 }
--- a/src/libmpg123/true.h
+++ b/src/libmpg123/true.h
@@ -0,0 +1,7 @@
 #ifndef MPG123_H_TRUE
 #define MPG123_H_TRUE
 #define FALSE 0
 #define TRUE  1
 #endif